hestia-earth-utils 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ import io
2
+ import csv
3
+ import re
4
+ import numpy as np
5
+
6
+ _MISSING_VALUE = '-'
7
+ _MISSING = -99999
8
+ _DELIMITER = ','
9
+ _QUOTE_CHAR = '"'
10
+ ENCODING = 'ISO-8859-1'
11
+ # default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
12
+ _DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
13
+
14
+
15
+ def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
16
+
17
+
18
+ def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
19
+
20
+
21
+ def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
22
+
23
+
24
+ def _text_to_csv(csv_content: str):
25
+ return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
26
+
27
+
28
+ def _csv_reader_converter(field_str_bytes):
29
+ field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
30
+ reader = _text_to_csv(field_str)
31
+
32
+ try:
33
+ return _replace_missing_values(next(reader)[0].strip())
34
+ except StopIteration:
35
+ return str(_MISSING)
36
+
37
+
38
+ def _get_columns(csv_content: str):
39
+ try:
40
+ reader = _text_to_csv(csv_content)
41
+ names = next(reader)
42
+ return list(map(_replace_chars, names))
43
+ except StopIteration:
44
+ return []
45
+
46
+
47
+ def csv_str_to_recarray(csv_content: str) -> np.recarray:
48
+ names = _get_columns(csv_content)
49
+ num_cols = len(names)
50
+
51
+ converters_dict = {
52
+ i: _csv_reader_converter
53
+ for i in range(num_cols)
54
+ }
55
+
56
+ # TODO: find the maximum column size instead of using 1000
57
+ max_size = 1000
58
+ return np.loadtxt(
59
+ io.StringIO(csv_content.strip()),
60
+ delimiter=_DELIMITER,
61
+ quotechar=_QUOTE_CHAR,
62
+ skiprows=1,
63
+ converters=converters_dict,
64
+ dtype=[(name, f"U{max_size}") for name in names],
65
+ encoding=ENCODING
66
+ ).view(np.recarray)
67
+
68
+
69
+ def csv_file_to_recarray(filepath: str):
70
+ with open(filepath, 'r', encoding=ENCODING) as f:
71
+ content = f.read()
72
+ return csv_str_to_recarray(content)
@@ -1,51 +1,16 @@
1
1
  from functools import reduce
2
- from io import StringIO
3
2
  from typing import Union
4
- import re
5
3
  import requests
6
- import csv
7
4
  import numpy
5
+ import traceback
8
6
 
9
7
  from .storage import _load_from_storage
10
8
  from .request import request_url, web_url
9
+ from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
11
10
 
12
- DELIMITER = '\t'
13
- ENCODING = 'ISO-8859-1'
14
- GLOSSARY_FOLDER = 'glossary/lookups'
11
+ _GLOSSARY_FOLDER = 'glossary/lookups'
15
12
  _memory = {}
16
- MISSING_VALUE = '-'
17
- MISSING = -99999
18
13
  _INDEX_COL = 'termid'
19
- # default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
20
- _DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
21
-
22
-
23
- def _is_missing_value(value): return value == MISSING_VALUE or value == MISSING or value == str(MISSING)
24
-
25
-
26
- def _replace_missing_values(value: str): return str(MISSING) if str(value) == '-' else value
27
-
28
-
29
- def _rewrite_csv_file_as_tab(filepath: str):
30
- with open(filepath, 'r', encoding=ENCODING) as fp:
31
- reader = csv.reader(fp)
32
- for row in reader:
33
- yield DELIMITER.join(list(map(_replace_missing_values, row)))
34
-
35
-
36
- def _rewrite_csv_text_as_tab(text: str):
37
- reader = csv.reader(StringIO(text))
38
- for row in reader:
39
- yield DELIMITER.join(list(map(_replace_missing_values, row)))
40
-
41
-
42
- def _recfromcsv(data): return numpy.recfromcsv(data,
43
- missing_values=MISSING_VALUE,
44
- filling_values=MISSING,
45
- delimiter=DELIMITER,
46
- encoding=ENCODING,
47
- case_sensitive=True,
48
- deletechars=_DELETE_CHARS)
49
14
 
50
15
 
51
16
  def _memory_wrapper(key: str, func):
@@ -70,12 +35,12 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
70
35
  numpy.recarray
71
36
  The `numpy.recarray` converted from the csv content.
72
37
  """
73
- def load(): return _recfromcsv(_rewrite_csv_file_as_tab(filepath))
38
+ def load(): return csv_file_to_recarray(filepath)
74
39
  return _memory_wrapper(filepath, load) if keep_in_memory else load()
75
40
 
76
41
 
77
42
  def _download_lookup_data(filename: str):
78
- filepath = f"{GLOSSARY_FOLDER}/{filename}"
43
+ filepath = f"{_GLOSSARY_FOLDER}/{filename}"
79
44
 
80
45
  def fallback():
81
46
  url = request_url(f"{web_url()}/{filepath}")
@@ -121,12 +86,14 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
121
86
  """
122
87
  def load():
123
88
  data = _download_lookup_data(filename)
124
- rec = _recfromcsv(_rewrite_csv_text_as_tab(data)) if data else None
89
+ rec = csv_str_to_recarray(data) if data else None
125
90
  return (_build_index(rec) if build_index else rec) if data else None
126
91
 
127
92
  try:
128
93
  return _memory_wrapper(filename, load) if keep_in_memory else load()
129
94
  except Exception:
95
+ stack = traceback.format_exc()
96
+ print(stack)
130
97
  return None
131
98
 
132
99
 
@@ -144,7 +111,19 @@ def column_name(key: str):
144
111
  str
145
112
  The column name that can be used in `get_table_value`.
146
113
  """
147
- return re.sub('[' + re.escape(_DELETE_CHARS) + ']', '', key.replace(' ', '_')) if key else ''
114
+ return _replace_chars(key) if key else ''
115
+
116
+
117
+ def _parse_value(value: str):
118
+ """ Automatically converts the value to float or bool if possible """
119
+ try:
120
+ return (
121
+ True if str(value).lower() == 'true' else
122
+ False if str(value).lower() == 'false' else
123
+ float(value)
124
+ )
125
+ except Exception:
126
+ return value
148
127
 
149
128
 
150
129
  def _get_single_table_value(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
@@ -163,7 +142,7 @@ def _get_multiple_table_values(data: Union[dict, numpy.recarray], col_match: str
163
142
  return reduce(reducer, enumerate(col_match), data)[col_val][0]
164
143
 
165
144
 
166
- def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
145
+ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val, default_value=''):
167
146
  """
168
147
  Get a value matched by one or more columns from a `numpy.recarray`.
169
148
 
@@ -179,6 +158,8 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
179
158
  Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match`.
180
159
  col_val: str
181
160
  The column which contains the value to look for.
161
+ default_value : Any
162
+ A value to return when none if found in the data.
182
163
 
183
164
  Returns
184
165
  -------
@@ -191,7 +172,7 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
191
172
  _get_single_table_value(lookup, col_match, col_match_with, col_val) if single else
192
173
  _get_multiple_table_values(lookup, col_match, col_match_with, col_val)
193
174
  )
194
- return None if _is_missing_value(value) else value
175
+ return default_value if is_missing_value(value) else _parse_value(value)
195
176
  except Exception:
196
177
  return None
197
178
 
@@ -251,7 +232,7 @@ def extract_grouped_data(data: str, key: str) -> str:
251
232
  **{curr.split(':')[0]: curr.split(':')[1]}
252
233
  }, data.split(';'), {}) if data is not None and isinstance(data, str) and len(data) > 1 else {}
253
234
  value = grouped_data.get(key)
254
- return None if _is_missing_value(value) else value
235
+ return None if is_missing_value(value) else _parse_value(value)
255
236
 
256
237
 
257
238
  def extract_grouped_data_closest_date(data: str, year: int) -> str:
@@ -278,13 +259,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
278
259
  lambda prev, curr: {
279
260
  **prev,
280
261
  **{curr.split(':')[0]: curr.split(':')[1]}
281
- } if len(curr) > 0 and not _is_missing_value(curr.split(':')[1]) else prev,
262
+ } if len(curr) > 0 and not is_missing_value(curr.split(':')[1]) else prev,
282
263
  data.split(';'),
283
264
  {}
284
265
  ) if data is not None and isinstance(data, str) and len(data) > 1 else {}
285
266
  dist_years = list(data_by_date.keys())
286
267
  closest_year = min(dist_years, key=lambda x: abs(int(x) - year)) if len(dist_years) > 0 else None
287
- return None if closest_year is None else data_by_date.get(closest_year)
268
+ return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
288
269
 
289
270
 
290
271
  def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
@@ -2,6 +2,7 @@ import copy
2
2
  import json
3
3
  import re
4
4
  import numpy as np
5
+ import pandas as pd
5
6
  from hestia_earth.schema import UNIQUENESS_FIELDS, Term, NODE_TYPES
6
7
  from hestia_earth.schema.utils.sort import get_sort_key, SORT_CONFIG
7
8
  from flatten_json import flatten as flatten_json
@@ -11,17 +12,6 @@ from ..api import find_term_ids_by_names
11
12
  from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
12
13
 
13
14
 
14
- PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
15
- try:
16
- import pandas as pd
17
-
18
- version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
19
- if version[0] < 1 or (version[0] == 1 and version[1] < 2):
20
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
21
- except ImportError:
22
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
23
-
24
-
25
15
  # We only want to pivot array items containing blank nodes
26
16
  # Assume these are all fields with uniqueness fields not of type Node
27
17
  def _get_blank_node_uniqueness_fields():
@@ -1,22 +1,12 @@
1
1
  from functools import reduce
2
2
  import numpy as np
3
+ import pandas as pd
3
4
  from hestia_earth.schema import NodeType
4
5
 
5
6
  # __package__ = "hestia_earth.utils" # required to run interactively in vscode
6
7
  from .tools import flatten
7
8
 
8
9
 
9
- PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
10
- try:
11
- import pandas as pd
12
-
13
- version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
14
- if version[0] < 1 or (version[0] == 1 and version[1] < 2):
15
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
16
- except ImportError:
17
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
18
-
19
-
20
10
  def _replace_ids(df):
21
11
  # in columns, first letter is always lower case
22
12
  node_types = [e.value[0].lower() + e.value[1:] for e in NodeType]
@@ -74,11 +64,6 @@ def format_for_upload(filepath: str):
74
64
  pandas.DataFrame
75
65
  Formatted pandas dataframe
76
66
  """
77
- try:
78
- import pandas as pd
79
- except ImportError:
80
- raise ImportError("Run `pip install pandas~=1.2.0` to use this functionality")
81
-
82
67
  df = pd.read_csv(filepath, index_col=None, na_values="")
83
68
 
84
69
  # replace @id with id for top-level Node
@@ -1 +1 @@
1
- VERSION = '0.16.1'
1
+ VERSION = '0.16.3'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.16.1
3
+ Version: 0.16.3
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
@@ -13,8 +13,12 @@ Requires-Dist: hestia-earth-schema>=35.0.1
13
13
  Requires-Dist: requests>=2.24.0
14
14
  Requires-Dist: urllib3~=1.26.0
15
15
  Requires-Dist: python-dateutil>=2.8.1
16
- Requires-Dist: numpy<2,>=1.25.0
16
+ Requires-Dist: numpy>=2
17
17
  Requires-Dist: flatten_json
18
+ Provides-Extra: pivot-csv
19
+ Requires-Dist: pandas>=2; extra == "pivot-csv"
20
+ Provides-Extra: table
21
+ Requires-Dist: pandas>=2; extra == "table"
18
22
  Dynamic: author
19
23
  Dynamic: author-email
20
24
  Dynamic: classifier
@@ -22,6 +26,7 @@ Dynamic: description
22
26
  Dynamic: description-content-type
23
27
  Dynamic: home-page
24
28
  Dynamic: license
29
+ Dynamic: provides-extra
25
30
  Dynamic: requires-dist
26
31
  Dynamic: requires-python
27
32
  Dynamic: summary
@@ -2,32 +2,33 @@ hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
2
2
  hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
3
3
  hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
4
4
  hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
5
+ hestia_earth/utils/csv_utils.py,sha256=nb_ihJaTj3K5hO7cxXO1xjTLVGVX1P13m9SgquO5-XY,1990
5
6
  hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
6
7
  hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
7
8
  hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
8
9
  hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
9
- hestia_earth/utils/lookup.py,sha256=0RLqy3HPzkbhkRaO7fYoHU0jKhAYzI6QHMptMEbqTlg,10344
10
+ hestia_earth/utils/lookup.py,sha256=iZnNQn13_WWFWc3LyL4qPTiL2IA48Db3y4eat5kmcuw,9599
10
11
  hestia_earth/utils/lookup_utils.py,sha256=_k3RZ1pK-gw7jq8wn9HrPWfDl4FlEWRb8bXmgaARu0w,6716
11
12
  hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
12
13
  hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
13
14
  hestia_earth/utils/request.py,sha256=bu7hkWKmFdXl2_Feawiam_x32whlclA9oP0asJyC69k,626
14
15
  hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,34667
15
- hestia_earth/utils/table.py,sha256=RrTt-KF_QzjKiCpaAueoG6La1FG-Iusxw5NMDpoRBpQ,2861
16
+ hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
16
17
  hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
17
18
  hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
18
- hestia_earth/utils/version.py,sha256=xSEZ3N7McvEb7jAV8u8vNc-uFNjN6zu3mnEj2i-XYyk,19
19
+ hestia_earth/utils/version.py,sha256=2f5woLdAmKvk1TRWtU3KMO3z6ac2wJDnGXIOq8M0TMk,19
19
20
  hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
21
  hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
21
- hestia_earth/utils/pivot/pivot_csv.py,sha256=zaiDcig4I5lVSHPZ-2bXKKBcIRrayA0GUaw0c8H3D-w,12371
22
+ hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
22
23
  hestia_earth/utils/pivot/pivot_json.py,sha256=GBu5CFgCNdFjAuKGNsk2Phgds-xp4iREa5YIrplpFwA,9801
23
24
  hestia_earth/utils/storage/__init__.py,sha256=uNX6_EHWWnNUIm4Ng7L43-cQmuc6NGFAxXye85saIXQ,922
24
25
  hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0KvsXP7me1ZUBy00,1274
25
26
  hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
26
27
  hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
27
28
  hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
28
- hestia_earth_utils-0.16.1.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
29
- hestia_earth_utils-0.16.1.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
30
- hestia_earth_utils-0.16.1.dist-info/METADATA,sha256=ofiEuFahZ5b4fyOxvnJ0N40gck62LEBKWsZiSWaSzFU,1876
31
- hestia_earth_utils-0.16.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- hestia_earth_utils-0.16.1.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
33
- hestia_earth_utils-0.16.1.dist-info/RECORD,,
29
+ hestia_earth_utils-0.16.3.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
+ hestia_earth_utils-0.16.3.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
+ hestia_earth_utils-0.16.3.dist-info/METADATA,sha256=mP1MsmL8u1Ybatz47kHMAEzVi3hdSXMsrJXfCPfVglg,2030
32
+ hestia_earth_utils-0.16.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ hestia_earth_utils-0.16.3.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
+ hestia_earth_utils-0.16.3.dist-info/RECORD,,