hestia-earth-utils 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,16 +25,6 @@ def _text_to_csv(csv_content: str):
25
25
  return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
26
26
 
27
27
 
28
- def _csv_reader_converter(field_str_bytes):
29
- field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
30
- reader = _text_to_csv(field_str)
31
-
32
- try:
33
- return _replace_missing_values(next(reader)[0].strip())
34
- except StopIteration:
35
- return str(_MISSING)
36
-
37
-
38
28
  def _get_columns(csv_content: str):
39
29
  try:
40
30
  reader = _text_to_csv(csv_content)
@@ -44,26 +34,48 @@ def _get_columns(csv_content: str):
44
34
  return []
45
35
 
46
36
 
47
- def csv_str_to_recarray(csv_content: str) -> np.recarray:
37
+ def _get_rows(csv_content: str):
38
+ string_io = io.StringIO(csv_content.strip())
39
+ try:
40
+ next(string_io)
41
+ except StopIteration:
42
+ return
43
+
44
+ return csv.reader(string_io, delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
45
+
46
+
47
+ def _csv_str_to_recarray_chunks_numpy(csv_content: str, chunk_size: int = 5):
48
48
  names = _get_columns(csv_content)
49
49
  num_cols = len(names)
50
50
 
51
- converters_dict = {
52
- i: _csv_reader_converter
53
- for i in range(num_cols)
54
- }
55
-
56
- # TODO: find the maximum column size instead of using 1000
57
51
  max_size = 1000
58
- return np.loadtxt(
59
- io.StringIO(csv_content.strip()),
60
- delimiter=_DELIMITER,
61
- quotechar=_QUOTE_CHAR,
62
- skiprows=1,
63
- converters=converters_dict,
64
- dtype=[(name, f"U{max_size}") for name in names],
65
- encoding=ENCODING
66
- ).view(np.recarray)
52
+ dtype = [(name, f"U{max_size}") for name in names]
53
+
54
+ reader = _get_rows(csv_content)
55
+
56
+ # 4. Process the file in batches
57
+ chunk_rows = []
58
+ for row in reader:
59
+ if not row:
60
+ continue
61
+ if len(row) != num_cols:
62
+ continue
63
+
64
+ # replace missing values
65
+ processed_row = tuple(_replace_missing_values(field) for field in row)
66
+ chunk_rows.append(processed_row)
67
+
68
+ if len(chunk_rows) >= chunk_size:
69
+ yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
70
+ chunk_rows = []
71
+
72
+ if chunk_rows:
73
+ yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
74
+
75
+
76
+ def csv_str_to_recarray(csv_content: str) -> np.recarray:
77
+ array_rows = list(_csv_str_to_recarray_chunks_numpy(csv_content))
78
+ return np.hstack(array_rows).view(np.recarray)
67
79
 
68
80
 
69
81
  def csv_file_to_recarray(filepath: str):
@@ -2,7 +2,6 @@ from functools import reduce
2
2
  from typing import Union
3
3
  import requests
4
4
  import numpy
5
- import traceback
6
5
 
7
6
  from .storage import _load_from_storage
8
7
  from .request import request_url, web_url
@@ -92,8 +91,6 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
92
91
  try:
93
92
  return _memory_wrapper(filename, load) if keep_in_memory else load()
94
93
  except Exception:
95
- stack = traceback.format_exc()
96
- print(stack)
97
94
  return None
98
95
 
99
96
 
@@ -1 +1 @@
1
- VERSION = '0.16.3'
1
+ VERSION = '0.16.5'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.16.3
3
+ Version: 0.16.5
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
@@ -2,12 +2,12 @@ hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
2
2
  hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
3
3
  hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
4
4
  hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
5
- hestia_earth/utils/csv_utils.py,sha256=nb_ihJaTj3K5hO7cxXO1xjTLVGVX1P13m9SgquO5-XY,1990
5
+ hestia_earth/utils/csv_utils.py,sha256=BK-tci1sALmsxamSR1Y7f9O6ajTTdhggLC2pBEWhYME,2310
6
6
  hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
7
7
  hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
8
8
  hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
9
9
  hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
10
- hestia_earth/utils/lookup.py,sha256=iZnNQn13_WWFWc3LyL4qPTiL2IA48Db3y4eat5kmcuw,9599
10
+ hestia_earth/utils/lookup.py,sha256=XKmxFEH9o1Rhi4oTLteQnMAwNXiObjSX7pMfrUw8q1I,9522
11
11
  hestia_earth/utils/lookup_utils.py,sha256=_k3RZ1pK-gw7jq8wn9HrPWfDl4FlEWRb8bXmgaARu0w,6716
12
12
  hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
13
13
  hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
@@ -16,7 +16,7 @@ hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,3
16
16
  hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
17
17
  hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
18
18
  hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
19
- hestia_earth/utils/version.py,sha256=2f5woLdAmKvk1TRWtU3KMO3z6ac2wJDnGXIOq8M0TMk,19
19
+ hestia_earth/utils/version.py,sha256=RKa3Cna34LUgL3Ye-ubIdZ9B2mS9iFURrl2snrPa3uY,19
20
20
  hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
22
22
  hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
@@ -26,9 +26,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0K
26
26
  hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
27
27
  hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
28
28
  hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
29
- hestia_earth_utils-0.16.3.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
- hestia_earth_utils-0.16.3.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
- hestia_earth_utils-0.16.3.dist-info/METADATA,sha256=mP1MsmL8u1Ybatz47kHMAEzVi3hdSXMsrJXfCPfVglg,2030
32
- hestia_earth_utils-0.16.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
- hestia_earth_utils-0.16.3.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
- hestia_earth_utils-0.16.3.dist-info/RECORD,,
29
+ hestia_earth_utils-0.16.5.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
+ hestia_earth_utils-0.16.5.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
+ hestia_earth_utils-0.16.5.dist-info/METADATA,sha256=PAHew6LMon2UUdlJlGUSE7plh0VzfcBXXsV3SPzLL5A,2030
32
+ hestia_earth_utils-0.16.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ hestia_earth_utils-0.16.5.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
+ hestia_earth_utils-0.16.5.dist-info/RECORD,,