hestia-earth-utils 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,14 +25,6 @@ def _text_to_csv(csv_content: str):
25
25
  return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
26
26
 
27
27
 
28
- def _csv_reader_converter(field_str_bytes):
29
- try:
30
- field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
31
- return _replace_missing_values(field_str)
32
- except Exception:
33
- return str(_MISSING)
34
-
35
-
36
28
  def _get_columns(csv_content: str):
37
29
  try:
38
30
  reader = _text_to_csv(csv_content)
@@ -42,26 +34,48 @@ def _get_columns(csv_content: str):
42
34
  return []
43
35
 
44
36
 
45
- def csv_str_to_recarray(csv_content: str) -> np.recarray:
37
+ def _get_rows(csv_content: str):
38
+ string_io = io.StringIO(csv_content.strip())
39
+ try:
40
+ next(string_io)
41
+ except StopIteration:
42
+ return
43
+
44
+ return csv.reader(string_io, delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
45
+
46
+
47
+ def _csv_str_to_recarray_chunks_numpy(csv_content: str, chunk_size: int = 5):
46
48
  names = _get_columns(csv_content)
47
49
  num_cols = len(names)
48
50
 
49
- converters_dict = {
50
- i: _csv_reader_converter
51
- for i in range(num_cols)
52
- }
53
-
54
- # TODO: find the maximum column size instead of using 1000
55
51
  max_size = 1000
56
- return np.loadtxt(
57
- io.StringIO(csv_content.strip()),
58
- delimiter=_DELIMITER,
59
- quotechar=_QUOTE_CHAR,
60
- skiprows=1,
61
- converters=converters_dict,
62
- dtype=[(name, f"U{max_size}") for name in names],
63
- encoding=ENCODING
64
- ).view(np.recarray)
52
+ dtype = [(name, f"U{max_size}") for name in names]
53
+
54
+ reader = _get_rows(csv_content)
55
+
56
+ # 4. Process the file in batches
57
+ chunk_rows = []
58
+ for row in reader:
59
+ if not row:
60
+ continue
61
+ if len(row) != num_cols:
62
+ continue
63
+
64
+ # replace missing values
65
+ processed_row = tuple(_replace_missing_values(field) for field in row)
66
+ chunk_rows.append(processed_row)
67
+
68
+ if len(chunk_rows) >= chunk_size:
69
+ yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
70
+ chunk_rows = []
71
+
72
+ if chunk_rows:
73
+ yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
74
+
75
+
76
+ def csv_str_to_recarray(csv_content: str) -> np.recarray:
77
+ array_rows = list(_csv_str_to_recarray_chunks_numpy(csv_content))
78
+ return np.hstack(array_rows).view(np.recarray)
65
79
 
66
80
 
67
81
  def csv_file_to_recarray(filepath: str):
@@ -43,7 +43,8 @@ def _download_lookup_data(filename: str):
43
43
 
44
44
  def fallback():
45
45
  url = request_url(f"{web_url()}/{filepath}")
46
- return requests.get(url).content.decode('utf-8')
46
+ data = requests.get(url).content.decode('utf-8')
47
+ return data if data and '<html' not in data else None
47
48
 
48
49
  try:
49
50
  data = _load_from_storage(filepath, glossary=True)
@@ -1 +1 @@
1
- VERSION = '0.16.4'
1
+ VERSION = '0.16.6'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.16.4
3
+ Version: 0.16.6
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
@@ -2,12 +2,12 @@ hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
2
2
  hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
3
3
  hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
4
4
  hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
5
- hestia_earth/utils/csv_utils.py,sha256=MXE8ZqLo02aGaGVebEVWR_hPBP8ADINcJixn_RgDlxs,1938
5
+ hestia_earth/utils/csv_utils.py,sha256=BK-tci1sALmsxamSR1Y7f9O6ajTTdhggLC2pBEWhYME,2310
6
6
  hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
7
7
  hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
8
8
  hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
9
9
  hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
10
- hestia_earth/utils/lookup.py,sha256=XKmxFEH9o1Rhi4oTLteQnMAwNXiObjSX7pMfrUw8q1I,9522
10
+ hestia_earth/utils/lookup.py,sha256=Fu-Xn0145cNhWNNGj_FK1b-KdZ9T58Kd3TJ1ukf8pl4,9584
11
11
  hestia_earth/utils/lookup_utils.py,sha256=_k3RZ1pK-gw7jq8wn9HrPWfDl4FlEWRb8bXmgaARu0w,6716
12
12
  hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
13
13
  hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
@@ -16,7 +16,7 @@ hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,3
16
16
  hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
17
17
  hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
18
18
  hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
19
- hestia_earth/utils/version.py,sha256=7beGZMTXE5jj-oFnFWj3G6qwJ6yzFEAre86VzGZDUvU,19
19
+ hestia_earth/utils/version.py,sha256=8vT6KCaFSfD7RiRbwuK0XrS9XtxPXVHE_oqjEj4SFqE,19
20
20
  hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
22
22
  hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
@@ -26,9 +26,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0K
26
26
  hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
27
27
  hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
28
28
  hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
29
- hestia_earth_utils-0.16.4.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
- hestia_earth_utils-0.16.4.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
- hestia_earth_utils-0.16.4.dist-info/METADATA,sha256=_bUWrWyGlmLGQ0xS5jBrcbLWYxIlaXf7Eo2UMBk4RBs,2030
32
- hestia_earth_utils-0.16.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
- hestia_earth_utils-0.16.4.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
- hestia_earth_utils-0.16.4.dist-info/RECORD,,
29
+ hestia_earth_utils-0.16.6.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
+ hestia_earth_utils-0.16.6.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
+ hestia_earth_utils-0.16.6.dist-info/METADATA,sha256=9ERQJRlEpH8aOQCdrD-FRDW8htf7tWLPGdp1Rt9bySE,2030
32
+ hestia_earth_utils-0.16.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ hestia_earth_utils-0.16.6.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
+ hestia_earth_utils-0.16.6.dist-info/RECORD,,