hestia-earth-utils 0.15.16__tar.gz → 0.16.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/PKG-INFO +25 -8
  2. hestia_earth_utils-0.16.2/hestia_earth/utils/csv_utils.py +72 -0
  3. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/lookup.py +25 -46
  4. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/pivot_csv.py +1 -11
  5. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/table.py +1 -16
  6. hestia_earth_utils-0.16.2/hestia_earth/utils/version.py +1 -0
  7. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/PKG-INFO +26 -9
  8. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/SOURCES.txt +17 -4
  9. hestia_earth_utils-0.16.2/hestia_earth_utils.egg-info/requires.txt +12 -0
  10. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/top_level.txt +0 -1
  11. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/setup.py +9 -6
  12. hestia_earth_utils-0.16.2/tests/test_api.py +169 -0
  13. hestia_earth_utils-0.16.2/tests/test_blank_node.py +59 -0
  14. hestia_earth_utils-0.16.2/tests/test_calculation_status.py +40 -0
  15. hestia_earth_utils-0.16.2/tests/test_cycle.py +18 -0
  16. hestia_earth_utils-0.16.2/tests/test_date.py +17 -0
  17. hestia_earth_utils-0.16.2/tests/test_descriptive_stats.py +49 -0
  18. hestia_earth_utils-0.16.2/tests/test_emission.py +62 -0
  19. hestia_earth_utils-0.16.2/tests/test_lookup.py +150 -0
  20. hestia_earth_utils-0.16.2/tests/test_lookup_utils.py +104 -0
  21. hestia_earth_utils-0.16.2/tests/test_model.py +69 -0
  22. hestia_earth_utils-0.16.2/tests/test_pipeline.py +212 -0
  23. hestia_earth_utils-0.16.2/tests/test_request.py +9 -0
  24. hestia_earth_utils-0.16.2/tests/test_stats.py +194 -0
  25. hestia_earth_utils-0.16.2/tests/test_table.py +11 -0
  26. hestia_earth_utils-0.16.2/tests/test_term.py +19 -0
  27. hestia_earth_utils-0.16.2/tests/test_tools.py +153 -0
  28. hestia_earth_utils-0.15.16/hestia_earth/__init__.py +0 -1
  29. hestia_earth_utils-0.15.16/hestia_earth/utils/__init__.py +0 -3
  30. hestia_earth_utils-0.15.16/hestia_earth/utils/version.py +0 -1
  31. hestia_earth_utils-0.15.16/hestia_earth_utils.egg-info/requires.txt +0 -6
  32. hestia_earth_utils-0.15.16/tests/pivot/test_pivot_csv.py +0 -267
  33. hestia_earth_utils-0.15.16/tests/pivot/test_pivot_json.py +0 -231
  34. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/MANIFEST.in +0 -0
  35. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/README.md +0 -0
  36. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/bin/hestia-format-upload +0 -0
  37. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/bin/hestia-pivot-csv +0 -0
  38. {hestia_earth_utils-0.15.16/hestia_earth/utils/pivot → hestia_earth_utils-0.16.2/hestia_earth/utils}/__init__.py +0 -0
  39. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/api.py +0 -0
  40. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/blank_node.py +0 -0
  41. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/calculation_status.py +0 -0
  42. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/cycle.py +0 -0
  43. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/date.py +0 -0
  44. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/descriptive_stats.py +0 -0
  45. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/emission.py +0 -0
  46. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/lookup_utils.py +0 -0
  47. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/model.py +0 -0
  48. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pipeline.py +0 -0
  49. {hestia_earth_utils-0.15.16/tests → hestia_earth_utils-0.16.2/hestia_earth/utils}/pivot/__init__.py +0 -0
  50. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/_shared.py +0 -0
  51. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/pivot_json.py +0 -0
  52. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/request.py +0 -0
  53. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/stats.py +0 -0
  54. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/__init__.py +0 -0
  55. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_azure_client.py +0 -0
  56. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_local_client.py +0 -0
  57. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_s3_client.py +0 -0
  58. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_sns_client.py +0 -0
  59. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/term.py +0 -0
  60. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/tools.py +0 -0
  61. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/dependency_links.txt +0 -0
  62. {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/setup.cfg +0 -0
@@ -1,16 +1,35 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.15.16
3
+ Version: 0.16.2
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
7
7
  Author-email: guillaumeroyer.mail@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
- Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
- Classifier: Programming Language :: Python :: 3.9
12
- Requires-Python: >=3.9
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Requires-Python: >=3.12
13
11
  Description-Content-Type: text/markdown
12
+ Requires-Dist: hestia-earth-schema>=35.0.1
13
+ Requires-Dist: requests>=2.24.0
14
+ Requires-Dist: urllib3~=1.26.0
15
+ Requires-Dist: python-dateutil>=2.8.1
16
+ Requires-Dist: numpy>=2
17
+ Requires-Dist: flatten_json
18
+ Provides-Extra: pivot-csv
19
+ Requires-Dist: pandas>=2; extra == "pivot-csv"
20
+ Provides-Extra: table
21
+ Requires-Dist: pandas>=2; extra == "table"
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
14
33
 
15
34
  # HESTIA Utils
16
35
 
@@ -60,5 +79,3 @@ from hestia_earth.utils.lookup import download_lookup
60
79
 
61
80
  df = download_lookup('crop.csv')
62
81
  ```
63
-
64
-
@@ -0,0 +1,72 @@
1
+ import io
2
+ import csv
3
+ import re
4
+ import numpy as np
5
+
6
+ _MISSING_VALUE = '-'
7
+ _MISSING = -99999
8
+ _DELIMITER = ','
9
+ _QUOTE_CHAR = '"'
10
+ ENCODING = 'ISO-8859-1'
11
+ # default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
12
+ _DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
13
+
14
+
15
+ def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
16
+
17
+
18
+ def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
19
+
20
+
21
+ def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
22
+
23
+
24
+ def _text_to_csv(csv_content: str):
25
+ return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
26
+
27
+
28
+ def _csv_reader_converter(field_str_bytes):
29
+ field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
30
+ reader = _text_to_csv(field_str)
31
+
32
+ try:
33
+ return _replace_missing_values(next(reader)[0].strip())
34
+ except StopIteration:
35
+ return str(_MISSING)
36
+
37
+
38
+ def _get_columns(csv_content: str):
39
+ try:
40
+ reader = _text_to_csv(csv_content)
41
+ names = next(reader)
42
+ return list(map(_replace_chars, names))
43
+ except StopIteration:
44
+ return []
45
+
46
+
47
+ def csv_str_to_recarray(csv_content: str) -> np.recarray:
48
+ names = _get_columns(csv_content)
49
+ num_cols = len(names)
50
+
51
+ converters_dict = {
52
+ i: _csv_reader_converter
53
+ for i in range(num_cols)
54
+ }
55
+
56
+ # TODO: find the maximum column size instead of using 1000
57
+ max_size = 1000
58
+ return np.loadtxt(
59
+ io.StringIO(csv_content.strip()),
60
+ delimiter=_DELIMITER,
61
+ quotechar=_QUOTE_CHAR,
62
+ skiprows=1,
63
+ converters=converters_dict,
64
+ dtype=[(name, f"U{max_size}") for name in names],
65
+ encoding=ENCODING
66
+ ).view(np.recarray)
67
+
68
+
69
+ def csv_file_to_recarray(filepath: str):
70
+ with open(filepath, 'r', encoding=ENCODING) as f:
71
+ content = f.read()
72
+ return csv_str_to_recarray(content)
@@ -1,51 +1,16 @@
1
1
  from functools import reduce
2
- from io import StringIO
3
2
  from typing import Union
4
- import re
5
3
  import requests
6
- import csv
7
4
  import numpy
5
+ import traceback
8
6
 
9
7
  from .storage import _load_from_storage
10
8
  from .request import request_url, web_url
9
+ from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
11
10
 
12
- DELIMITER = '\t'
13
- ENCODING = 'ISO-8859-1'
14
- GLOSSARY_FOLDER = 'glossary/lookups'
11
+ _GLOSSARY_FOLDER = 'glossary/lookups'
15
12
  _memory = {}
16
- MISSING_VALUE = '-'
17
- MISSING = -99999
18
13
  _INDEX_COL = 'termid'
19
- # default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
20
- _DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
21
-
22
-
23
- def _is_missing_value(value): return value == MISSING_VALUE or value == MISSING or value == str(MISSING)
24
-
25
-
26
- def _replace_missing_values(value: str): return str(MISSING) if str(value) == '-' else value
27
-
28
-
29
- def _rewrite_csv_file_as_tab(filepath: str):
30
- with open(filepath, 'r', encoding=ENCODING) as fp:
31
- reader = csv.reader(fp)
32
- for row in reader:
33
- yield DELIMITER.join(list(map(_replace_missing_values, row)))
34
-
35
-
36
- def _rewrite_csv_text_as_tab(text: str):
37
- reader = csv.reader(StringIO(text))
38
- for row in reader:
39
- yield DELIMITER.join(list(map(_replace_missing_values, row)))
40
-
41
-
42
- def _recfromcsv(data): return numpy.recfromcsv(data,
43
- missing_values=MISSING_VALUE,
44
- filling_values=MISSING,
45
- delimiter=DELIMITER,
46
- encoding=ENCODING,
47
- case_sensitive=True,
48
- deletechars=_DELETE_CHARS)
49
14
 
50
15
 
51
16
  def _memory_wrapper(key: str, func):
@@ -70,12 +35,12 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
70
35
  numpy.recarray
71
36
  The `numpy.recarray` converted from the csv content.
72
37
  """
73
- def load(): return _recfromcsv(_rewrite_csv_file_as_tab(filepath))
38
+ def load(): return csv_file_to_recarray(filepath)
74
39
  return _memory_wrapper(filepath, load) if keep_in_memory else load()
75
40
 
76
41
 
77
42
  def _download_lookup_data(filename: str):
78
- filepath = f"{GLOSSARY_FOLDER}/{filename}"
43
+ filepath = f"{_GLOSSARY_FOLDER}/{filename}"
79
44
 
80
45
  def fallback():
81
46
  url = request_url(f"{web_url()}/{filepath}")
@@ -121,12 +86,14 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
121
86
  """
122
87
  def load():
123
88
  data = _download_lookup_data(filename)
124
- rec = _recfromcsv(_rewrite_csv_text_as_tab(data)) if data else None
89
+ rec = csv_str_to_recarray(data) if data else None
125
90
  return (_build_index(rec) if build_index else rec) if data else None
126
91
 
127
92
  try:
128
93
  return _memory_wrapper(filename, load) if keep_in_memory else load()
129
94
  except Exception:
95
+ stack = traceback.format_exc()
96
+ print(stack)
130
97
  return None
131
98
 
132
99
 
@@ -144,7 +111,19 @@ def column_name(key: str):
144
111
  str
145
112
  The column name that can be used in `get_table_value`.
146
113
  """
147
- return re.sub('[' + re.escape(_DELETE_CHARS) + ']', '', key.replace(' ', '_')) if key else ''
114
+ return _replace_chars(key) if key else ''
115
+
116
+
117
+ def _parse_value(value: str):
118
+ """ Automatically converts the value to float or bool if possible """
119
+ try:
120
+ return (
121
+ True if str(value).lower() == 'true' else
122
+ False if str(value).lower() == 'false' else
123
+ float(value)
124
+ )
125
+ except Exception:
126
+ return value
148
127
 
149
128
 
150
129
  def _get_single_table_value(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
@@ -191,7 +170,7 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
191
170
  _get_single_table_value(lookup, col_match, col_match_with, col_val) if single else
192
171
  _get_multiple_table_values(lookup, col_match, col_match_with, col_val)
193
172
  )
194
- return None if _is_missing_value(value) else value
173
+ return None if is_missing_value(value) else _parse_value(value)
195
174
  except Exception:
196
175
  return None
197
176
 
@@ -251,7 +230,7 @@ def extract_grouped_data(data: str, key: str) -> str:
251
230
  **{curr.split(':')[0]: curr.split(':')[1]}
252
231
  }, data.split(';'), {}) if data is not None and isinstance(data, str) and len(data) > 1 else {}
253
232
  value = grouped_data.get(key)
254
- return None if _is_missing_value(value) else value
233
+ return None if is_missing_value(value) else _parse_value(value)
255
234
 
256
235
 
257
236
  def extract_grouped_data_closest_date(data: str, year: int) -> str:
@@ -278,13 +257,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
278
257
  lambda prev, curr: {
279
258
  **prev,
280
259
  **{curr.split(':')[0]: curr.split(':')[1]}
281
- } if len(curr) > 0 and not _is_missing_value(curr.split(':')[1]) else prev,
260
+ } if len(curr) > 0 and not is_missing_value(curr.split(':')[1]) else prev,
282
261
  data.split(';'),
283
262
  {}
284
263
  ) if data is not None and isinstance(data, str) and len(data) > 1 else {}
285
264
  dist_years = list(data_by_date.keys())
286
265
  closest_year = min(dist_years, key=lambda x: abs(int(x) - year)) if len(dist_years) > 0 else None
287
- return None if closest_year is None else data_by_date.get(closest_year)
266
+ return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
288
267
 
289
268
 
290
269
  def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
@@ -2,6 +2,7 @@ import copy
2
2
  import json
3
3
  import re
4
4
  import numpy as np
5
+ import pandas as pd
5
6
  from hestia_earth.schema import UNIQUENESS_FIELDS, Term, NODE_TYPES
6
7
  from hestia_earth.schema.utils.sort import get_sort_key, SORT_CONFIG
7
8
  from flatten_json import flatten as flatten_json
@@ -11,17 +12,6 @@ from ..api import find_term_ids_by_names
11
12
  from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
12
13
 
13
14
 
14
- PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
15
- try:
16
- import pandas as pd
17
-
18
- version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
19
- if version[0] < 1 or (version[0] == 1 and version[1] < 2):
20
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
21
- except ImportError:
22
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
23
-
24
-
25
15
  # We only want to pivot array items containing blank nodes
26
16
  # Assume these are all fields with uniqueness fields not of type Node
27
17
  def _get_blank_node_uniqueness_fields():
@@ -1,22 +1,12 @@
1
1
  from functools import reduce
2
2
  import numpy as np
3
+ import pandas as pd
3
4
  from hestia_earth.schema import NodeType
4
5
 
5
6
  # __package__ = "hestia_earth.utils" # required to run interactively in vscode
6
7
  from .tools import flatten
7
8
 
8
9
 
9
- PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
10
- try:
11
- import pandas as pd
12
-
13
- version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
14
- if version[0] < 1 or (version[0] == 1 and version[1] < 2):
15
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
16
- except ImportError:
17
- raise ImportError(PANDAS_IMPORT_ERROR_MSG)
18
-
19
-
20
10
  def _replace_ids(df):
21
11
  # in columns, first letter is always lower case
22
12
  node_types = [e.value[0].lower() + e.value[1:] for e in NodeType]
@@ -74,11 +64,6 @@ def format_for_upload(filepath: str):
74
64
  pandas.DataFrame
75
65
  Formatted pandas dataframe
76
66
  """
77
- try:
78
- import pandas as pd
79
- except ImportError:
80
- raise ImportError("Run `pip install pandas~=1.2.0` to use this functionality")
81
-
82
67
  df = pd.read_csv(filepath, index_col=None, na_values="")
83
68
 
84
69
  # replace @id with id for top-level Node
@@ -0,0 +1 @@
1
+ VERSION = '0.16.2'
@@ -1,16 +1,35 @@
1
- Metadata-Version: 2.1
2
- Name: hestia-earth-utils
3
- Version: 0.15.16
1
+ Metadata-Version: 2.4
2
+ Name: hestia_earth_utils
3
+ Version: 0.16.2
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
7
7
  Author-email: guillaumeroyer.mail@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
- Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
11
- Classifier: Programming Language :: Python :: 3.9
12
- Requires-Python: >=3.9
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Requires-Python: >=3.12
13
11
  Description-Content-Type: text/markdown
12
+ Requires-Dist: hestia-earth-schema>=35.0.1
13
+ Requires-Dist: requests>=2.24.0
14
+ Requires-Dist: urllib3~=1.26.0
15
+ Requires-Dist: python-dateutil>=2.8.1
16
+ Requires-Dist: numpy>=2
17
+ Requires-Dist: flatten_json
18
+ Provides-Extra: pivot-csv
19
+ Requires-Dist: pandas>=2; extra == "pivot-csv"
20
+ Provides-Extra: table
21
+ Requires-Dist: pandas>=2; extra == "table"
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
14
33
 
15
34
  # HESTIA Utils
16
35
 
@@ -60,5 +79,3 @@ from hestia_earth.utils.lookup import download_lookup
60
79
 
61
80
  df = download_lookup('crop.csv')
62
81
  ```
63
-
64
-
@@ -3,11 +3,11 @@ README.md
3
3
  setup.py
4
4
  bin/hestia-format-upload
5
5
  bin/hestia-pivot-csv
6
- hestia_earth/__init__.py
7
6
  hestia_earth/utils/__init__.py
8
7
  hestia_earth/utils/api.py
9
8
  hestia_earth/utils/blank_node.py
10
9
  hestia_earth/utils/calculation_status.py
10
+ hestia_earth/utils/csv_utils.py
11
11
  hestia_earth/utils/cycle.py
12
12
  hestia_earth/utils/date.py
13
13
  hestia_earth/utils/descriptive_stats.py
@@ -36,6 +36,19 @@ hestia_earth_utils.egg-info/SOURCES.txt
36
36
  hestia_earth_utils.egg-info/dependency_links.txt
37
37
  hestia_earth_utils.egg-info/requires.txt
38
38
  hestia_earth_utils.egg-info/top_level.txt
39
- tests/pivot/__init__.py
40
- tests/pivot/test_pivot_csv.py
41
- tests/pivot/test_pivot_json.py
39
+ tests/test_api.py
40
+ tests/test_blank_node.py
41
+ tests/test_calculation_status.py
42
+ tests/test_cycle.py
43
+ tests/test_date.py
44
+ tests/test_descriptive_stats.py
45
+ tests/test_emission.py
46
+ tests/test_lookup.py
47
+ tests/test_lookup_utils.py
48
+ tests/test_model.py
49
+ tests/test_pipeline.py
50
+ tests/test_request.py
51
+ tests/test_stats.py
52
+ tests/test_table.py
53
+ tests/test_term.py
54
+ tests/test_tools.py
@@ -0,0 +1,12 @@
1
+ hestia-earth-schema>=35.0.1
2
+ requests>=2.24.0
3
+ urllib3~=1.26.0
4
+ python-dateutil>=2.8.1
5
+ numpy>=2
6
+ flatten_json
7
+
8
+ [pivot-csv]
9
+ pandas>=2
10
+
11
+ [table]
12
+ pandas>=2
@@ -1,5 +1,5 @@
1
1
  import pathlib
2
- from setuptools import find_packages, setup
2
+ from setuptools import find_namespace_packages, setup
3
3
 
4
4
  from hestia_earth.utils.version import VERSION
5
5
 
@@ -23,15 +23,18 @@ setup(
23
23
  author_email='guillaumeroyer.mail@gmail.com',
24
24
  license='MIT',
25
25
  classifiers=[
26
- 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
27
- 'Programming Language :: Python :: 3.9',
26
+ 'Programming Language :: Python :: 3.12',
28
27
  ],
29
- packages=find_packages(exclude=('tests', 'scripts')),
28
+ packages=find_namespace_packages(include=['hestia_earth.*']),
29
+ python_requires='>=3.12',
30
30
  include_package_data=True,
31
31
  install_requires=REQUIRES,
32
- python_requires='>=3.9',
33
32
  scripts=[
34
33
  'bin/hestia-pivot-csv',
35
34
  'bin/hestia-format-upload'
36
- ]
35
+ ],
36
+ extras_require={
37
+ 'pivot-csv': ['pandas>=2'],
38
+ 'table': ['pandas>=2'],
39
+ }
37
40
  )
@@ -0,0 +1,169 @@
1
+ from unittest.mock import patch
2
+ import os
3
+ import requests
4
+ import json
5
+ from hestia_earth.schema import SchemaType
6
+ import pytest
7
+
8
+ from .utils import fixtures_path
9
+ from hestia_earth.utils.request import api_url
10
+ from hestia_earth.utils.api import (
11
+ search,
12
+ find_related,
13
+ download_hestia,
14
+ node_exists,
15
+ find_node,
16
+ find_node_exact,
17
+ find_term_ids_by_names,
18
+ )
19
+
20
+
21
+ fake_related_response = {'results': [[{'@id': 'related_id'}]]}
22
+ fake_download_response = {'@id': 'id', '@type': 'type'}
23
+
24
+
25
+ class FakeFindRelatedSuccess():
26
+ def json():
27
+ return fake_related_response
28
+
29
+
30
+ class FakeFindRelatedError():
31
+ def json():
32
+ return {}
33
+
34
+
35
+ class FakeFindRelatedException():
36
+ def json():
37
+ raise requests.exceptions.RequestException('error')
38
+
39
+
40
+ class FakeDownloadSuccess():
41
+ def json():
42
+ return fake_download_response
43
+
44
+
45
+ class FakeDownloadError():
46
+ def json():
47
+ raise requests.exceptions.RequestException('error')
48
+
49
+
50
+ class FakeNodeExistSuccess():
51
+ def json():
52
+ return fake_download_response
53
+
54
+
55
+ class FakeNodeExistError():
56
+ def json():
57
+ return {"message": "not-found", "details": {}}
58
+
59
+
60
+ class FakeElasticSearchEmptyResult:
61
+ def json():
62
+ return {"results": []}
63
+
64
+
65
+ @patch('requests.get', return_value=FakeFindRelatedSuccess)
66
+ def test_find_related_success(mock_get):
67
+ res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
68
+ assert res == fake_related_response.get('results')
69
+ mock_get.assert_called_once_with(
70
+ f"{api_url()}/cycles/id/sources?limit=100", headers={'Content-Type': 'application/json'}
71
+ )
72
+
73
+
74
+ @patch('requests.get', return_value=FakeFindRelatedError)
75
+ def test_find_related_error(*args):
76
+ res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
77
+ assert not res
78
+
79
+
80
+ @patch('requests.get', return_value=FakeFindRelatedException)
81
+ def test_find_related_exception(*args):
82
+ res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
83
+ assert not res
84
+
85
+
86
+ @patch('requests.get', return_value=FakeDownloadError) # make sure fallback is not enabled
87
+ def test_download_hestia_local_file(*args):
88
+ id = 'sandContent'
89
+ with open(f"{fixtures_path}/Term/{id}.jsonld", encoding='utf-8') as f:
90
+ expected = json.load(f)
91
+ os.environ['DOWNLOAD_FOLDER'] = fixtures_path
92
+ res = download_hestia(id, SchemaType.TERM)
93
+ assert res == expected
94
+ del os.environ['DOWNLOAD_FOLDER']
95
+
96
+
97
+ @patch('requests.get', return_value=FakeDownloadSuccess)
98
+ def test_download_hestia_success(mock_get):
99
+ res = download_hestia('id', SchemaType.SOURCE)
100
+ assert res == fake_download_response
101
+ mock_get.assert_called_once_with(
102
+ f"{api_url()}/sources/id", headers={'Content-Type': 'application/json'})
103
+
104
+
105
+ @patch('requests.get', return_value=FakeDownloadError)
106
+ def test_download_hestia_error(*args):
107
+ res = download_hestia('id', SchemaType.SOURCE)
108
+ assert not res
109
+
110
+
111
+ @patch('requests.get', return_value=FakeNodeExistError) # make sure fallback is not enabled
112
+ def test_node_exists_local_file(*args):
113
+ os.environ['DOWNLOAD_FOLDER'] = fixtures_path
114
+ id = 'sandContent'
115
+ assert node_exists(id, SchemaType.TERM)
116
+ del os.environ['DOWNLOAD_FOLDER']
117
+
118
+
119
+ @patch('requests.get', return_value=FakeNodeExistSuccess)
120
+ def test_node_exists_true(*args):
121
+ assert node_exists('id', SchemaType.SOURCE)
122
+
123
+
124
+ @patch('requests.get', return_value=FakeNodeExistError)
125
+ def test_node_exists_false(*args):
126
+ assert not node_exists('id', SchemaType.SOURCE)
127
+
128
+
129
+ def test_search():
130
+ name = 'Wheat'
131
+ res = search(query={
132
+ 'bool': {
133
+ 'must': [{'match': {'name': name}}]
134
+ }
135
+ }, limit=2)
136
+ assert res[0].get('name').startswith(name)
137
+
138
+
139
+ def test_find_node():
140
+ name = 'Wheat'
141
+ res = find_node(SchemaType.TERM, {'name': name}, 2)
142
+ assert res[0].get('name').startswith(name)
143
+
144
+
145
+ def test_find_node_exact():
146
+ name = 'Wheat'
147
+ res = find_node_exact(SchemaType.TERM, {'name': name})
148
+ assert not res
149
+
150
+ name = 'Wheat, grain'
151
+ res = find_node_exact(SchemaType.TERM, {'name': name})
152
+ assert res.get('name') == name
153
+
154
+
155
+ def test_find_term_ids_by_names():
156
+ names = ["Harris Termite Powder", "Wheat, grain", "Urea (kg N)"]
157
+ res = find_term_ids_by_names(names, 2)
158
+ assert res == {
159
+ "Wheat, grain": "wheatGrain",
160
+ "Harris Termite Powder": "harrisTermitePowder",
161
+ "Urea (kg N)": "ureaKgN",
162
+ }
163
+
164
+
165
+ @patch("requests.post", return_value=FakeElasticSearchEmptyResult)
166
+ def test_find_term_ids_by_names_error(mock):
167
+ names = ["id_not_found_name"]
168
+ with pytest.raises(Exception, match=names[0]):
169
+ find_term_ids_by_names(names)
@@ -0,0 +1,59 @@
1
+ # import os
2
+ # import json
3
+ # from pytest import mark
4
+ # from hestia_earth.schema import TermTermType
5
+
6
+ # from tests.utils import fixtures_path
7
+ # from hestia_earth.utils.blank_node import get_node_value, ArrayTreatment, get_blank_nodes_calculation_status
8
+
9
+ # fixtures_folder = os.path.join(fixtures_path, 'blank_node')
10
+ # calculation_status_folder = os.path.join(fixtures_folder, 'calculation_status')
11
+
12
+
13
+ # def test_get_node_value():
14
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10]}
15
+ # assert get_node_value(blank_node, 'value', default=None) == 10
16
+
17
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [0]}
18
+ # assert get_node_value(blank_node, 'value', default=None) == 0
19
+
20
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}}
21
+ # assert get_node_value(blank_node, 'value', default=None) is None
22
+
23
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, 20]}
24
+ # assert get_node_value(blank_node, 'value', default=None) == 15
25
+
26
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': True}
27
+ # assert get_node_value(blank_node, 'value', default=None) is True
28
+
29
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': 10}
30
+ # assert get_node_value(blank_node, 'value', default=None) == 10
31
+
32
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': None}
33
+ # assert get_node_value(blank_node, 'value', default=None) is None
34
+
35
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': None}
36
+ # assert get_node_value(blank_node, 'value', default=0) == 0
37
+
38
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, None, 20]}
39
+ # assert get_node_value(blank_node, 'value', default=None) == 15
40
+
41
+ # blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, None, 20]}
42
+ # assert get_node_value(blank_node, 'value', array_treatment=ArrayTreatment.SUM) == 30
43
+
44
+
45
+ # @mark.parametrize(
46
+ # 'folder,list_key,termType',
47
+ # [
48
+ # ('cycle', 'emissions', TermTermType.EMISSION),
49
+ # ]
50
+ # )
51
+ # def test_get_blank_nodes_calculation_status(folder: str, list_key: str, termType: TermTermType):
52
+ # with open(f"{calculation_status_folder}/{folder}/node.jsonld", encoding='utf-8') as f:
53
+ # node = json.load(f)
54
+
55
+ # with open(f"{calculation_status_folder}/{folder}/{list_key}-{termType.value}.json", encoding='utf-8') as f:
56
+ # expected = json.load(f)
57
+
58
+ # result = get_blank_nodes_calculation_status(node, list_key=list_key, termType=termType)
59
+ # assert result == expected, folder