hestia-earth-utils 0.15.16__tar.gz → 0.16.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/PKG-INFO +25 -8
- hestia_earth_utils-0.16.2/hestia_earth/utils/csv_utils.py +72 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/lookup.py +25 -46
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/pivot_csv.py +1 -11
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/table.py +1 -16
- hestia_earth_utils-0.16.2/hestia_earth/utils/version.py +1 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/PKG-INFO +26 -9
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/SOURCES.txt +17 -4
- hestia_earth_utils-0.16.2/hestia_earth_utils.egg-info/requires.txt +12 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/top_level.txt +0 -1
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/setup.py +9 -6
- hestia_earth_utils-0.16.2/tests/test_api.py +169 -0
- hestia_earth_utils-0.16.2/tests/test_blank_node.py +59 -0
- hestia_earth_utils-0.16.2/tests/test_calculation_status.py +40 -0
- hestia_earth_utils-0.16.2/tests/test_cycle.py +18 -0
- hestia_earth_utils-0.16.2/tests/test_date.py +17 -0
- hestia_earth_utils-0.16.2/tests/test_descriptive_stats.py +49 -0
- hestia_earth_utils-0.16.2/tests/test_emission.py +62 -0
- hestia_earth_utils-0.16.2/tests/test_lookup.py +150 -0
- hestia_earth_utils-0.16.2/tests/test_lookup_utils.py +104 -0
- hestia_earth_utils-0.16.2/tests/test_model.py +69 -0
- hestia_earth_utils-0.16.2/tests/test_pipeline.py +212 -0
- hestia_earth_utils-0.16.2/tests/test_request.py +9 -0
- hestia_earth_utils-0.16.2/tests/test_stats.py +194 -0
- hestia_earth_utils-0.16.2/tests/test_table.py +11 -0
- hestia_earth_utils-0.16.2/tests/test_term.py +19 -0
- hestia_earth_utils-0.16.2/tests/test_tools.py +153 -0
- hestia_earth_utils-0.15.16/hestia_earth/__init__.py +0 -1
- hestia_earth_utils-0.15.16/hestia_earth/utils/__init__.py +0 -3
- hestia_earth_utils-0.15.16/hestia_earth/utils/version.py +0 -1
- hestia_earth_utils-0.15.16/hestia_earth_utils.egg-info/requires.txt +0 -6
- hestia_earth_utils-0.15.16/tests/pivot/test_pivot_csv.py +0 -267
- hestia_earth_utils-0.15.16/tests/pivot/test_pivot_json.py +0 -231
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/MANIFEST.in +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/README.md +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/bin/hestia-format-upload +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/bin/hestia-pivot-csv +0 -0
- {hestia_earth_utils-0.15.16/hestia_earth/utils/pivot → hestia_earth_utils-0.16.2/hestia_earth/utils}/__init__.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/api.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/blank_node.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/calculation_status.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/cycle.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/date.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/descriptive_stats.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/emission.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/lookup_utils.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/model.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pipeline.py +0 -0
- {hestia_earth_utils-0.15.16/tests → hestia_earth_utils-0.16.2/hestia_earth/utils}/pivot/__init__.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/_shared.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/pivot_json.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/request.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/stats.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/__init__.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_azure_client.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_local_client.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_s3_client.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/storage/_sns_client.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/term.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/tools.py +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/dependency_links.txt +0 -0
- {hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/setup.cfg +0 -0
|
@@ -1,16 +1,35 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hestia_earth_utils
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.16.2
|
|
4
4
|
Summary: HESTIA's utils library
|
|
5
5
|
Home-page: https://gitlab.com/hestia-earth/hestia-utils
|
|
6
6
|
Author: HESTIA Team
|
|
7
7
|
Author-email: guillaumeroyer.mail@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Requires-Python: >=3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Requires-Python: >=3.12
|
|
13
11
|
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: hestia-earth-schema>=35.0.1
|
|
13
|
+
Requires-Dist: requests>=2.24.0
|
|
14
|
+
Requires-Dist: urllib3~=1.26.0
|
|
15
|
+
Requires-Dist: python-dateutil>=2.8.1
|
|
16
|
+
Requires-Dist: numpy>=2
|
|
17
|
+
Requires-Dist: flatten_json
|
|
18
|
+
Provides-Extra: pivot-csv
|
|
19
|
+
Requires-Dist: pandas>=2; extra == "pivot-csv"
|
|
20
|
+
Provides-Extra: table
|
|
21
|
+
Requires-Dist: pandas>=2; extra == "table"
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: provides-extra
|
|
30
|
+
Dynamic: requires-dist
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
Dynamic: summary
|
|
14
33
|
|
|
15
34
|
# HESTIA Utils
|
|
16
35
|
|
|
@@ -60,5 +79,3 @@ from hestia_earth.utils.lookup import download_lookup
|
|
|
60
79
|
|
|
61
80
|
df = download_lookup('crop.csv')
|
|
62
81
|
```
|
|
63
|
-
|
|
64
|
-
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import csv
|
|
3
|
+
import re
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
_MISSING_VALUE = '-'
|
|
7
|
+
_MISSING = -99999
|
|
8
|
+
_DELIMITER = ','
|
|
9
|
+
_QUOTE_CHAR = '"'
|
|
10
|
+
ENCODING = 'ISO-8859-1'
|
|
11
|
+
# default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
|
|
12
|
+
_DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _text_to_csv(csv_content: str):
|
|
25
|
+
return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _csv_reader_converter(field_str_bytes):
|
|
29
|
+
field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
|
|
30
|
+
reader = _text_to_csv(field_str)
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
return _replace_missing_values(next(reader)[0].strip())
|
|
34
|
+
except StopIteration:
|
|
35
|
+
return str(_MISSING)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_columns(csv_content: str):
|
|
39
|
+
try:
|
|
40
|
+
reader = _text_to_csv(csv_content)
|
|
41
|
+
names = next(reader)
|
|
42
|
+
return list(map(_replace_chars, names))
|
|
43
|
+
except StopIteration:
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def csv_str_to_recarray(csv_content: str) -> np.recarray:
|
|
48
|
+
names = _get_columns(csv_content)
|
|
49
|
+
num_cols = len(names)
|
|
50
|
+
|
|
51
|
+
converters_dict = {
|
|
52
|
+
i: _csv_reader_converter
|
|
53
|
+
for i in range(num_cols)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# TODO: find the maximum column size instead of using 1000
|
|
57
|
+
max_size = 1000
|
|
58
|
+
return np.loadtxt(
|
|
59
|
+
io.StringIO(csv_content.strip()),
|
|
60
|
+
delimiter=_DELIMITER,
|
|
61
|
+
quotechar=_QUOTE_CHAR,
|
|
62
|
+
skiprows=1,
|
|
63
|
+
converters=converters_dict,
|
|
64
|
+
dtype=[(name, f"U{max_size}") for name in names],
|
|
65
|
+
encoding=ENCODING
|
|
66
|
+
).view(np.recarray)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def csv_file_to_recarray(filepath: str):
|
|
70
|
+
with open(filepath, 'r', encoding=ENCODING) as f:
|
|
71
|
+
content = f.read()
|
|
72
|
+
return csv_str_to_recarray(content)
|
|
@@ -1,51 +1,16 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
|
-
from io import StringIO
|
|
3
2
|
from typing import Union
|
|
4
|
-
import re
|
|
5
3
|
import requests
|
|
6
|
-
import csv
|
|
7
4
|
import numpy
|
|
5
|
+
import traceback
|
|
8
6
|
|
|
9
7
|
from .storage import _load_from_storage
|
|
10
8
|
from .request import request_url, web_url
|
|
9
|
+
from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
ENCODING = 'ISO-8859-1'
|
|
14
|
-
GLOSSARY_FOLDER = 'glossary/lookups'
|
|
11
|
+
_GLOSSARY_FOLDER = 'glossary/lookups'
|
|
15
12
|
_memory = {}
|
|
16
|
-
MISSING_VALUE = '-'
|
|
17
|
-
MISSING = -99999
|
|
18
13
|
_INDEX_COL = 'termid'
|
|
19
|
-
# default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
|
|
20
|
-
_DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def _is_missing_value(value): return value == MISSING_VALUE or value == MISSING or value == str(MISSING)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _replace_missing_values(value: str): return str(MISSING) if str(value) == '-' else value
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _rewrite_csv_file_as_tab(filepath: str):
|
|
30
|
-
with open(filepath, 'r', encoding=ENCODING) as fp:
|
|
31
|
-
reader = csv.reader(fp)
|
|
32
|
-
for row in reader:
|
|
33
|
-
yield DELIMITER.join(list(map(_replace_missing_values, row)))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _rewrite_csv_text_as_tab(text: str):
|
|
37
|
-
reader = csv.reader(StringIO(text))
|
|
38
|
-
for row in reader:
|
|
39
|
-
yield DELIMITER.join(list(map(_replace_missing_values, row)))
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _recfromcsv(data): return numpy.recfromcsv(data,
|
|
43
|
-
missing_values=MISSING_VALUE,
|
|
44
|
-
filling_values=MISSING,
|
|
45
|
-
delimiter=DELIMITER,
|
|
46
|
-
encoding=ENCODING,
|
|
47
|
-
case_sensitive=True,
|
|
48
|
-
deletechars=_DELETE_CHARS)
|
|
49
14
|
|
|
50
15
|
|
|
51
16
|
def _memory_wrapper(key: str, func):
|
|
@@ -70,12 +35,12 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
|
|
|
70
35
|
numpy.recarray
|
|
71
36
|
The `numpy.recarray` converted from the csv content.
|
|
72
37
|
"""
|
|
73
|
-
def load(): return
|
|
38
|
+
def load(): return csv_file_to_recarray(filepath)
|
|
74
39
|
return _memory_wrapper(filepath, load) if keep_in_memory else load()
|
|
75
40
|
|
|
76
41
|
|
|
77
42
|
def _download_lookup_data(filename: str):
|
|
78
|
-
filepath = f"{
|
|
43
|
+
filepath = f"{_GLOSSARY_FOLDER}/{filename}"
|
|
79
44
|
|
|
80
45
|
def fallback():
|
|
81
46
|
url = request_url(f"{web_url()}/{filepath}")
|
|
@@ -121,12 +86,14 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
|
|
|
121
86
|
"""
|
|
122
87
|
def load():
|
|
123
88
|
data = _download_lookup_data(filename)
|
|
124
|
-
rec =
|
|
89
|
+
rec = csv_str_to_recarray(data) if data else None
|
|
125
90
|
return (_build_index(rec) if build_index else rec) if data else None
|
|
126
91
|
|
|
127
92
|
try:
|
|
128
93
|
return _memory_wrapper(filename, load) if keep_in_memory else load()
|
|
129
94
|
except Exception:
|
|
95
|
+
stack = traceback.format_exc()
|
|
96
|
+
print(stack)
|
|
130
97
|
return None
|
|
131
98
|
|
|
132
99
|
|
|
@@ -144,7 +111,19 @@ def column_name(key: str):
|
|
|
144
111
|
str
|
|
145
112
|
The column name that can be used in `get_table_value`.
|
|
146
113
|
"""
|
|
147
|
-
return
|
|
114
|
+
return _replace_chars(key) if key else ''
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _parse_value(value: str):
|
|
118
|
+
""" Automatically converts the value to float or bool if possible """
|
|
119
|
+
try:
|
|
120
|
+
return (
|
|
121
|
+
True if str(value).lower() == 'true' else
|
|
122
|
+
False if str(value).lower() == 'false' else
|
|
123
|
+
float(value)
|
|
124
|
+
)
|
|
125
|
+
except Exception:
|
|
126
|
+
return value
|
|
148
127
|
|
|
149
128
|
|
|
150
129
|
def _get_single_table_value(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
|
|
@@ -191,7 +170,7 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
|
|
|
191
170
|
_get_single_table_value(lookup, col_match, col_match_with, col_val) if single else
|
|
192
171
|
_get_multiple_table_values(lookup, col_match, col_match_with, col_val)
|
|
193
172
|
)
|
|
194
|
-
return None if
|
|
173
|
+
return None if is_missing_value(value) else _parse_value(value)
|
|
195
174
|
except Exception:
|
|
196
175
|
return None
|
|
197
176
|
|
|
@@ -251,7 +230,7 @@ def extract_grouped_data(data: str, key: str) -> str:
|
|
|
251
230
|
**{curr.split(':')[0]: curr.split(':')[1]}
|
|
252
231
|
}, data.split(';'), {}) if data is not None and isinstance(data, str) and len(data) > 1 else {}
|
|
253
232
|
value = grouped_data.get(key)
|
|
254
|
-
return None if
|
|
233
|
+
return None if is_missing_value(value) else _parse_value(value)
|
|
255
234
|
|
|
256
235
|
|
|
257
236
|
def extract_grouped_data_closest_date(data: str, year: int) -> str:
|
|
@@ -278,13 +257,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
|
|
|
278
257
|
lambda prev, curr: {
|
|
279
258
|
**prev,
|
|
280
259
|
**{curr.split(':')[0]: curr.split(':')[1]}
|
|
281
|
-
} if len(curr) > 0 and not
|
|
260
|
+
} if len(curr) > 0 and not is_missing_value(curr.split(':')[1]) else prev,
|
|
282
261
|
data.split(';'),
|
|
283
262
|
{}
|
|
284
263
|
) if data is not None and isinstance(data, str) and len(data) > 1 else {}
|
|
285
264
|
dist_years = list(data_by_date.keys())
|
|
286
265
|
closest_year = min(dist_years, key=lambda x: abs(int(x) - year)) if len(dist_years) > 0 else None
|
|
287
|
-
return None if closest_year is None else data_by_date.get(closest_year)
|
|
266
|
+
return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
|
|
288
267
|
|
|
289
268
|
|
|
290
269
|
def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
|
{hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth/utils/pivot/pivot_csv.py
RENAMED
|
@@ -2,6 +2,7 @@ import copy
|
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
5
6
|
from hestia_earth.schema import UNIQUENESS_FIELDS, Term, NODE_TYPES
|
|
6
7
|
from hestia_earth.schema.utils.sort import get_sort_key, SORT_CONFIG
|
|
7
8
|
from flatten_json import flatten as flatten_json
|
|
@@ -11,17 +12,6 @@ from ..api import find_term_ids_by_names
|
|
|
11
12
|
from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
|
|
15
|
-
try:
|
|
16
|
-
import pandas as pd
|
|
17
|
-
|
|
18
|
-
version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
|
|
19
|
-
if version[0] < 1 or (version[0] == 1 and version[1] < 2):
|
|
20
|
-
raise ImportError(PANDAS_IMPORT_ERROR_MSG)
|
|
21
|
-
except ImportError:
|
|
22
|
-
raise ImportError(PANDAS_IMPORT_ERROR_MSG)
|
|
23
|
-
|
|
24
|
-
|
|
25
15
|
# We only want to pivot array items containing blank nodes
|
|
26
16
|
# Assume these are all fields with uniqueness fields not of type Node
|
|
27
17
|
def _get_blank_node_uniqueness_fields():
|
|
@@ -1,22 +1,12 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
2
|
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
3
4
|
from hestia_earth.schema import NodeType
|
|
4
5
|
|
|
5
6
|
# __package__ = "hestia_earth.utils" # required to run interactively in vscode
|
|
6
7
|
from .tools import flatten
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
|
|
10
|
-
try:
|
|
11
|
-
import pandas as pd
|
|
12
|
-
|
|
13
|
-
version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
|
|
14
|
-
if version[0] < 1 or (version[0] == 1 and version[1] < 2):
|
|
15
|
-
raise ImportError(PANDAS_IMPORT_ERROR_MSG)
|
|
16
|
-
except ImportError:
|
|
17
|
-
raise ImportError(PANDAS_IMPORT_ERROR_MSG)
|
|
18
|
-
|
|
19
|
-
|
|
20
10
|
def _replace_ids(df):
|
|
21
11
|
# in columns, first letter is always lower case
|
|
22
12
|
node_types = [e.value[0].lower() + e.value[1:] for e in NodeType]
|
|
@@ -74,11 +64,6 @@ def format_for_upload(filepath: str):
|
|
|
74
64
|
pandas.DataFrame
|
|
75
65
|
Formatted pandas dataframe
|
|
76
66
|
"""
|
|
77
|
-
try:
|
|
78
|
-
import pandas as pd
|
|
79
|
-
except ImportError:
|
|
80
|
-
raise ImportError("Run `pip install pandas~=1.2.0` to use this functionality")
|
|
81
|
-
|
|
82
67
|
df = pd.read_csv(filepath, index_col=None, na_values="")
|
|
83
68
|
|
|
84
69
|
# replace @id with id for top-level Node
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = '0.16.2'
|
{hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/PKG-INFO
RENAMED
|
@@ -1,16 +1,35 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hestia_earth_utils
|
|
3
|
+
Version: 0.16.2
|
|
4
4
|
Summary: HESTIA's utils library
|
|
5
5
|
Home-page: https://gitlab.com/hestia-earth/hestia-utils
|
|
6
6
|
Author: HESTIA Team
|
|
7
7
|
Author-email: guillaumeroyer.mail@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Requires-Python: >=3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Requires-Python: >=3.12
|
|
13
11
|
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: hestia-earth-schema>=35.0.1
|
|
13
|
+
Requires-Dist: requests>=2.24.0
|
|
14
|
+
Requires-Dist: urllib3~=1.26.0
|
|
15
|
+
Requires-Dist: python-dateutil>=2.8.1
|
|
16
|
+
Requires-Dist: numpy>=2
|
|
17
|
+
Requires-Dist: flatten_json
|
|
18
|
+
Provides-Extra: pivot-csv
|
|
19
|
+
Requires-Dist: pandas>=2; extra == "pivot-csv"
|
|
20
|
+
Provides-Extra: table
|
|
21
|
+
Requires-Dist: pandas>=2; extra == "table"
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: provides-extra
|
|
30
|
+
Dynamic: requires-dist
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
Dynamic: summary
|
|
14
33
|
|
|
15
34
|
# HESTIA Utils
|
|
16
35
|
|
|
@@ -60,5 +79,3 @@ from hestia_earth.utils.lookup import download_lookup
|
|
|
60
79
|
|
|
61
80
|
df = download_lookup('crop.csv')
|
|
62
81
|
```
|
|
63
|
-
|
|
64
|
-
|
{hestia_earth_utils-0.15.16 → hestia_earth_utils-0.16.2}/hestia_earth_utils.egg-info/SOURCES.txt
RENAMED
|
@@ -3,11 +3,11 @@ README.md
|
|
|
3
3
|
setup.py
|
|
4
4
|
bin/hestia-format-upload
|
|
5
5
|
bin/hestia-pivot-csv
|
|
6
|
-
hestia_earth/__init__.py
|
|
7
6
|
hestia_earth/utils/__init__.py
|
|
8
7
|
hestia_earth/utils/api.py
|
|
9
8
|
hestia_earth/utils/blank_node.py
|
|
10
9
|
hestia_earth/utils/calculation_status.py
|
|
10
|
+
hestia_earth/utils/csv_utils.py
|
|
11
11
|
hestia_earth/utils/cycle.py
|
|
12
12
|
hestia_earth/utils/date.py
|
|
13
13
|
hestia_earth/utils/descriptive_stats.py
|
|
@@ -36,6 +36,19 @@ hestia_earth_utils.egg-info/SOURCES.txt
|
|
|
36
36
|
hestia_earth_utils.egg-info/dependency_links.txt
|
|
37
37
|
hestia_earth_utils.egg-info/requires.txt
|
|
38
38
|
hestia_earth_utils.egg-info/top_level.txt
|
|
39
|
-
tests/
|
|
40
|
-
tests/
|
|
41
|
-
tests/
|
|
39
|
+
tests/test_api.py
|
|
40
|
+
tests/test_blank_node.py
|
|
41
|
+
tests/test_calculation_status.py
|
|
42
|
+
tests/test_cycle.py
|
|
43
|
+
tests/test_date.py
|
|
44
|
+
tests/test_descriptive_stats.py
|
|
45
|
+
tests/test_emission.py
|
|
46
|
+
tests/test_lookup.py
|
|
47
|
+
tests/test_lookup_utils.py
|
|
48
|
+
tests/test_model.py
|
|
49
|
+
tests/test_pipeline.py
|
|
50
|
+
tests/test_request.py
|
|
51
|
+
tests/test_stats.py
|
|
52
|
+
tests/test_table.py
|
|
53
|
+
tests/test_term.py
|
|
54
|
+
tests/test_tools.py
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import pathlib
|
|
2
|
-
from setuptools import
|
|
2
|
+
from setuptools import find_namespace_packages, setup
|
|
3
3
|
|
|
4
4
|
from hestia_earth.utils.version import VERSION
|
|
5
5
|
|
|
@@ -23,15 +23,18 @@ setup(
|
|
|
23
23
|
author_email='guillaumeroyer.mail@gmail.com',
|
|
24
24
|
license='MIT',
|
|
25
25
|
classifiers=[
|
|
26
|
-
'
|
|
27
|
-
'Programming Language :: Python :: 3.9',
|
|
26
|
+
'Programming Language :: Python :: 3.12',
|
|
28
27
|
],
|
|
29
|
-
packages=
|
|
28
|
+
packages=find_namespace_packages(include=['hestia_earth.*']),
|
|
29
|
+
python_requires='>=3.12',
|
|
30
30
|
include_package_data=True,
|
|
31
31
|
install_requires=REQUIRES,
|
|
32
|
-
python_requires='>=3.9',
|
|
33
32
|
scripts=[
|
|
34
33
|
'bin/hestia-pivot-csv',
|
|
35
34
|
'bin/hestia-format-upload'
|
|
36
|
-
]
|
|
35
|
+
],
|
|
36
|
+
extras_require={
|
|
37
|
+
'pivot-csv': ['pandas>=2'],
|
|
38
|
+
'table': ['pandas>=2'],
|
|
39
|
+
}
|
|
37
40
|
)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
import os
|
|
3
|
+
import requests
|
|
4
|
+
import json
|
|
5
|
+
from hestia_earth.schema import SchemaType
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from .utils import fixtures_path
|
|
9
|
+
from hestia_earth.utils.request import api_url
|
|
10
|
+
from hestia_earth.utils.api import (
|
|
11
|
+
search,
|
|
12
|
+
find_related,
|
|
13
|
+
download_hestia,
|
|
14
|
+
node_exists,
|
|
15
|
+
find_node,
|
|
16
|
+
find_node_exact,
|
|
17
|
+
find_term_ids_by_names,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
fake_related_response = {'results': [[{'@id': 'related_id'}]]}
|
|
22
|
+
fake_download_response = {'@id': 'id', '@type': 'type'}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FakeFindRelatedSuccess():
|
|
26
|
+
def json():
|
|
27
|
+
return fake_related_response
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class FakeFindRelatedError():
|
|
31
|
+
def json():
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FakeFindRelatedException():
|
|
36
|
+
def json():
|
|
37
|
+
raise requests.exceptions.RequestException('error')
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class FakeDownloadSuccess():
|
|
41
|
+
def json():
|
|
42
|
+
return fake_download_response
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class FakeDownloadError():
|
|
46
|
+
def json():
|
|
47
|
+
raise requests.exceptions.RequestException('error')
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FakeNodeExistSuccess():
|
|
51
|
+
def json():
|
|
52
|
+
return fake_download_response
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class FakeNodeExistError():
|
|
56
|
+
def json():
|
|
57
|
+
return {"message": "not-found", "details": {}}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class FakeElasticSearchEmptyResult:
|
|
61
|
+
def json():
|
|
62
|
+
return {"results": []}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@patch('requests.get', return_value=FakeFindRelatedSuccess)
|
|
66
|
+
def test_find_related_success(mock_get):
|
|
67
|
+
res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
|
|
68
|
+
assert res == fake_related_response.get('results')
|
|
69
|
+
mock_get.assert_called_once_with(
|
|
70
|
+
f"{api_url()}/cycles/id/sources?limit=100", headers={'Content-Type': 'application/json'}
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@patch('requests.get', return_value=FakeFindRelatedError)
|
|
75
|
+
def test_find_related_error(*args):
|
|
76
|
+
res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
|
|
77
|
+
assert not res
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@patch('requests.get', return_value=FakeFindRelatedException)
|
|
81
|
+
def test_find_related_exception(*args):
|
|
82
|
+
res = find_related(SchemaType.CYCLE, 'id', SchemaType.SOURCE)
|
|
83
|
+
assert not res
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@patch('requests.get', return_value=FakeDownloadError) # make sure fallback is not enabled
|
|
87
|
+
def test_download_hestia_local_file(*args):
|
|
88
|
+
id = 'sandContent'
|
|
89
|
+
with open(f"{fixtures_path}/Term/{id}.jsonld", encoding='utf-8') as f:
|
|
90
|
+
expected = json.load(f)
|
|
91
|
+
os.environ['DOWNLOAD_FOLDER'] = fixtures_path
|
|
92
|
+
res = download_hestia(id, SchemaType.TERM)
|
|
93
|
+
assert res == expected
|
|
94
|
+
del os.environ['DOWNLOAD_FOLDER']
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@patch('requests.get', return_value=FakeDownloadSuccess)
|
|
98
|
+
def test_download_hestia_success(mock_get):
|
|
99
|
+
res = download_hestia('id', SchemaType.SOURCE)
|
|
100
|
+
assert res == fake_download_response
|
|
101
|
+
mock_get.assert_called_once_with(
|
|
102
|
+
f"{api_url()}/sources/id", headers={'Content-Type': 'application/json'})
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@patch('requests.get', return_value=FakeDownloadError)
|
|
106
|
+
def test_download_hestia_error(*args):
|
|
107
|
+
res = download_hestia('id', SchemaType.SOURCE)
|
|
108
|
+
assert not res
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@patch('requests.get', return_value=FakeNodeExistError) # make sure fallback is not enabled
|
|
112
|
+
def test_node_exists_local_file(*args):
|
|
113
|
+
os.environ['DOWNLOAD_FOLDER'] = fixtures_path
|
|
114
|
+
id = 'sandContent'
|
|
115
|
+
assert node_exists(id, SchemaType.TERM)
|
|
116
|
+
del os.environ['DOWNLOAD_FOLDER']
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@patch('requests.get', return_value=FakeNodeExistSuccess)
|
|
120
|
+
def test_node_exists_true(*args):
|
|
121
|
+
assert node_exists('id', SchemaType.SOURCE)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@patch('requests.get', return_value=FakeNodeExistError)
|
|
125
|
+
def test_node_exists_false(*args):
|
|
126
|
+
assert not node_exists('id', SchemaType.SOURCE)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_search():
|
|
130
|
+
name = 'Wheat'
|
|
131
|
+
res = search(query={
|
|
132
|
+
'bool': {
|
|
133
|
+
'must': [{'match': {'name': name}}]
|
|
134
|
+
}
|
|
135
|
+
}, limit=2)
|
|
136
|
+
assert res[0].get('name').startswith(name)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_find_node():
|
|
140
|
+
name = 'Wheat'
|
|
141
|
+
res = find_node(SchemaType.TERM, {'name': name}, 2)
|
|
142
|
+
assert res[0].get('name').startswith(name)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_find_node_exact():
|
|
146
|
+
name = 'Wheat'
|
|
147
|
+
res = find_node_exact(SchemaType.TERM, {'name': name})
|
|
148
|
+
assert not res
|
|
149
|
+
|
|
150
|
+
name = 'Wheat, grain'
|
|
151
|
+
res = find_node_exact(SchemaType.TERM, {'name': name})
|
|
152
|
+
assert res.get('name') == name
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def test_find_term_ids_by_names():
|
|
156
|
+
names = ["Harris Termite Powder", "Wheat, grain", "Urea (kg N)"]
|
|
157
|
+
res = find_term_ids_by_names(names, 2)
|
|
158
|
+
assert res == {
|
|
159
|
+
"Wheat, grain": "wheatGrain",
|
|
160
|
+
"Harris Termite Powder": "harrisTermitePowder",
|
|
161
|
+
"Urea (kg N)": "ureaKgN",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@patch("requests.post", return_value=FakeElasticSearchEmptyResult)
|
|
166
|
+
def test_find_term_ids_by_names_error(mock):
|
|
167
|
+
names = ["id_not_found_name"]
|
|
168
|
+
with pytest.raises(Exception, match=names[0]):
|
|
169
|
+
find_term_ids_by_names(names)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# import os
|
|
2
|
+
# import json
|
|
3
|
+
# from pytest import mark
|
|
4
|
+
# from hestia_earth.schema import TermTermType
|
|
5
|
+
|
|
6
|
+
# from tests.utils import fixtures_path
|
|
7
|
+
# from hestia_earth.utils.blank_node import get_node_value, ArrayTreatment, get_blank_nodes_calculation_status
|
|
8
|
+
|
|
9
|
+
# fixtures_folder = os.path.join(fixtures_path, 'blank_node')
|
|
10
|
+
# calculation_status_folder = os.path.join(fixtures_folder, 'calculation_status')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# def test_get_node_value():
|
|
14
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10]}
|
|
15
|
+
# assert get_node_value(blank_node, 'value', default=None) == 10
|
|
16
|
+
|
|
17
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [0]}
|
|
18
|
+
# assert get_node_value(blank_node, 'value', default=None) == 0
|
|
19
|
+
|
|
20
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}}
|
|
21
|
+
# assert get_node_value(blank_node, 'value', default=None) is None
|
|
22
|
+
|
|
23
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, 20]}
|
|
24
|
+
# assert get_node_value(blank_node, 'value', default=None) == 15
|
|
25
|
+
|
|
26
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': True}
|
|
27
|
+
# assert get_node_value(blank_node, 'value', default=None) is True
|
|
28
|
+
|
|
29
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': 10}
|
|
30
|
+
# assert get_node_value(blank_node, 'value', default=None) == 10
|
|
31
|
+
|
|
32
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': None}
|
|
33
|
+
# assert get_node_value(blank_node, 'value', default=None) is None
|
|
34
|
+
|
|
35
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': None}
|
|
36
|
+
# assert get_node_value(blank_node, 'value', default=0) == 0
|
|
37
|
+
|
|
38
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, None, 20]}
|
|
39
|
+
# assert get_node_value(blank_node, 'value', default=None) == 15
|
|
40
|
+
|
|
41
|
+
# blank_node = {'term': {'termType': 'crop', '@id': 'wheatGrain'}, 'value': [10, None, 20]}
|
|
42
|
+
# assert get_node_value(blank_node, 'value', array_treatment=ArrayTreatment.SUM) == 30
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# @mark.parametrize(
|
|
46
|
+
# 'folder,list_key,termType',
|
|
47
|
+
# [
|
|
48
|
+
# ('cycle', 'emissions', TermTermType.EMISSION),
|
|
49
|
+
# ]
|
|
50
|
+
# )
|
|
51
|
+
# def test_get_blank_nodes_calculation_status(folder: str, list_key: str, termType: TermTermType):
|
|
52
|
+
# with open(f"{calculation_status_folder}/{folder}/node.jsonld", encoding='utf-8') as f:
|
|
53
|
+
# node = json.load(f)
|
|
54
|
+
|
|
55
|
+
# with open(f"{calculation_status_folder}/{folder}/{list_key}-{termType.value}.json", encoding='utf-8') as f:
|
|
56
|
+
# expected = json.load(f)
|
|
57
|
+
|
|
58
|
+
# result = get_blank_nodes_calculation_status(node, list_key=list_key, termType=termType)
|
|
59
|
+
# assert result == expected, folder
|