hestia-earth-utils 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/utils/blank_node.py +2 -2
- hestia_earth/utils/lookup.py +41 -74
- hestia_earth/utils/lookup_utils.py +4 -4
- hestia_earth/utils/version.py +1 -1
- {hestia_earth_utils-0.16.5.dist-info → hestia_earth_utils-0.16.7.dist-info}/METADATA +2 -7
- {hestia_earth_utils-0.16.5.dist-info → hestia_earth_utils-0.16.7.dist-info}/RECORD +10 -11
- hestia_earth/utils/csv_utils.py +0 -84
- {hestia_earth_utils-0.16.5.data → hestia_earth_utils-0.16.7.data}/scripts/hestia-format-upload +0 -0
- {hestia_earth_utils-0.16.5.data → hestia_earth_utils-0.16.7.data}/scripts/hestia-pivot-csv +0 -0
- {hestia_earth_utils-0.16.5.dist-info → hestia_earth_utils-0.16.7.dist-info}/WHEEL +0 -0
- {hestia_earth_utils-0.16.5.dist-info → hestia_earth_utils-0.16.7.dist-info}/top_level.txt +0 -0
hestia_earth/utils/blank_node.py
CHANGED
|
@@ -5,7 +5,7 @@ from functools import reduce
|
|
|
5
5
|
from statistics import mode, mean
|
|
6
6
|
from hestia_earth.schema import TermTermType
|
|
7
7
|
|
|
8
|
-
from .lookup import download_lookup, get_table_value
|
|
8
|
+
from .lookup import download_lookup, get_table_value
|
|
9
9
|
from .tools import non_empty_list, non_empty_value, flatten
|
|
10
10
|
from .emission import cycle_emissions_in_system_boundary
|
|
11
11
|
from .model import filter_list_term_type
|
|
@@ -15,7 +15,7 @@ def get_lookup_value(blank_node: dict, column: str):
|
|
|
15
15
|
term = blank_node.get('term', {})
|
|
16
16
|
table_name = f"{term.get('termType')}.csv" if term else None
|
|
17
17
|
value = get_table_value(
|
|
18
|
-
download_lookup(table_name), '
|
|
18
|
+
download_lookup(table_name), 'term.id', term.get('@id'), column
|
|
19
19
|
) if table_name else None
|
|
20
20
|
return value
|
|
21
21
|
|
hestia_earth/utils/lookup.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any
|
|
3
3
|
import requests
|
|
4
|
-
import
|
|
4
|
+
from io import StringIO
|
|
5
|
+
import pandas as pd
|
|
5
6
|
|
|
6
7
|
from .storage import _load_from_storage
|
|
7
8
|
from .request import request_url, web_url
|
|
8
|
-
from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
|
|
9
9
|
|
|
10
10
|
_GLOSSARY_FOLDER = 'glossary/lookups'
|
|
11
11
|
_memory = {}
|
|
12
|
-
_INDEX_COL = 'termid'
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
def _memory_wrapper(key: str, func):
|
|
@@ -18,6 +17,18 @@ def _memory_wrapper(key: str, func):
|
|
|
18
17
|
return _memory[key]
|
|
19
18
|
|
|
20
19
|
|
|
20
|
+
def _read_csv(value: str) -> pd.DataFrame:
|
|
21
|
+
return pd.read_csv(value, na_values=['-', ''])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _read_csv_from_string(data: str) -> pd.DataFrame:
|
|
25
|
+
return _read_csv(StringIO(data))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_missing_value(value):
|
|
29
|
+
return pd.isna(value) or value is None or value == '' or value == '-'
|
|
30
|
+
|
|
31
|
+
|
|
21
32
|
def load_lookup(filepath: str, keep_in_memory: bool = False):
|
|
22
33
|
"""
|
|
23
34
|
Import local lookup table as csv file into a `numpy.recarray`.
|
|
@@ -34,7 +45,7 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
|
|
|
34
45
|
numpy.recarray
|
|
35
46
|
The `numpy.recarray` converted from the csv content.
|
|
36
47
|
"""
|
|
37
|
-
def load(): return
|
|
48
|
+
def load(): return _read_csv(filepath)
|
|
38
49
|
return _memory_wrapper(filepath, load) if keep_in_memory else load()
|
|
39
50
|
|
|
40
51
|
|
|
@@ -43,7 +54,8 @@ def _download_lookup_data(filename: str):
|
|
|
43
54
|
|
|
44
55
|
def fallback():
|
|
45
56
|
url = request_url(f"{web_url()}/{filepath}")
|
|
46
|
-
|
|
57
|
+
data = requests.get(url).content.decode('utf-8')
|
|
58
|
+
return data if data and '<html' not in data else None
|
|
47
59
|
|
|
48
60
|
try:
|
|
49
61
|
data = _load_from_storage(filepath, glossary=True)
|
|
@@ -52,20 +64,7 @@ def _download_lookup_data(filename: str):
|
|
|
52
64
|
return fallback()
|
|
53
65
|
|
|
54
66
|
|
|
55
|
-
def
|
|
56
|
-
columns = list(array.dtype.names)
|
|
57
|
-
try:
|
|
58
|
-
return {
|
|
59
|
-
row[_INDEX_COL]: {col: row[col] for col in columns}
|
|
60
|
-
for row in array
|
|
61
|
-
} if _INDEX_COL in columns else array
|
|
62
|
-
except TypeError:
|
|
63
|
-
return {
|
|
64
|
-
array[_INDEX_COL].item(): {col: array[col].item() for col in columns}
|
|
65
|
-
} if _INDEX_COL in columns else array
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def download_lookup(filename: str, keep_in_memory: bool = True, build_index: bool = False):
|
|
67
|
+
def download_lookup(filename: str, keep_in_memory: bool = True):
|
|
69
68
|
"""
|
|
70
69
|
Download lookup table from HESTIA as csv into a `numpy.recarray`.
|
|
71
70
|
|
|
@@ -85,8 +84,7 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
|
|
|
85
84
|
"""
|
|
86
85
|
def load():
|
|
87
86
|
data = _download_lookup_data(filename)
|
|
88
|
-
|
|
89
|
-
return (_build_index(rec) if build_index else rec) if data else None
|
|
87
|
+
return _read_csv_from_string(data) if data else None
|
|
90
88
|
|
|
91
89
|
try:
|
|
92
90
|
return _memory_wrapper(filename, load) if keep_in_memory else load()
|
|
@@ -96,19 +94,9 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
|
|
|
96
94
|
|
|
97
95
|
def column_name(key: str):
|
|
98
96
|
"""
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
Parameters
|
|
102
|
-
----------
|
|
103
|
-
key : str
|
|
104
|
-
The column name.
|
|
105
|
-
|
|
106
|
-
Returns
|
|
107
|
-
-------
|
|
108
|
-
str
|
|
109
|
-
The column name that can be used in `get_table_value`.
|
|
97
|
+
Deprecated. Columns are no longer renamed.
|
|
110
98
|
"""
|
|
111
|
-
return
|
|
99
|
+
return key
|
|
112
100
|
|
|
113
101
|
|
|
114
102
|
def _parse_value(value: str):
|
|
@@ -123,36 +111,24 @@ def _parse_value(value: str):
|
|
|
123
111
|
return value
|
|
124
112
|
|
|
125
113
|
|
|
126
|
-
def _get_single_table_value(
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
data[data[col_match] == col_match_with][col_val][0]
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def _get_multiple_table_values(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
|
|
134
|
-
def reducer(x, values):
|
|
135
|
-
col = values[1]
|
|
136
|
-
value = col_match_with[values[0]]
|
|
137
|
-
return x.get(value) if isinstance(x, dict) else x[x[col] == value]
|
|
114
|
+
def _get_single_table_value(df: pd.DataFrame, col_match: str, col_match_with, col_val):
|
|
115
|
+
filtered_df = df[df[col_match] == col_match_with]
|
|
116
|
+
return None if filtered_df.empty else filtered_df[col_val].iloc[0]
|
|
138
117
|
|
|
139
|
-
return reduce(reducer, enumerate(col_match), data)[col_val][0]
|
|
140
118
|
|
|
141
|
-
|
|
142
|
-
def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val, default_value=''):
|
|
119
|
+
def get_table_value(lookup: pd.DataFrame, col_match: str, col_match_with: str, col_val: Any, default_value=''):
|
|
143
120
|
"""
|
|
144
121
|
Get a value matched by one or more columns from a `numpy.recarray`.
|
|
145
122
|
|
|
146
123
|
Parameters
|
|
147
124
|
----------
|
|
148
|
-
lookup :
|
|
125
|
+
lookup : DataFrame
|
|
149
126
|
The value returned by the `download_lookup` function.
|
|
150
127
|
col_match : str
|
|
151
128
|
Which `column` should be used to find data in. This will restrict the rows to search for.
|
|
152
129
|
Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match_with`.
|
|
153
|
-
col_match_with
|
|
130
|
+
col_match_with: str
|
|
154
131
|
Which column `value` should be used to find data in. This will restrict the rows to search for.
|
|
155
|
-
Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match`.
|
|
156
132
|
col_val: str
|
|
157
133
|
The column which contains the value to look for.
|
|
158
134
|
default_value : Any
|
|
@@ -163,44 +139,35 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
|
|
|
163
139
|
str
|
|
164
140
|
The value found or `None` if no match.
|
|
165
141
|
"""
|
|
166
|
-
single = isinstance(col_match, str) and isinstance(col_match_with, str)
|
|
167
142
|
try:
|
|
168
|
-
value = (
|
|
169
|
-
|
|
170
|
-
_get_multiple_table_values(lookup, col_match, col_match_with, col_val)
|
|
171
|
-
)
|
|
143
|
+
value = _get_single_table_value(lookup, col_match, col_match_with, col_val)
|
|
144
|
+
print(value, type(value))
|
|
172
145
|
return default_value if is_missing_value(value) else _parse_value(value)
|
|
173
146
|
except Exception:
|
|
174
147
|
return None
|
|
175
148
|
|
|
176
149
|
|
|
177
|
-
def find_term_ids_by(lookup:
|
|
150
|
+
def find_term_ids_by(lookup: pd.DataFrame, col_match: str, col_match_with: str):
|
|
178
151
|
"""
|
|
179
152
|
Find `term.id` values where a column matches a specific value.
|
|
180
153
|
|
|
181
154
|
Parameters
|
|
182
155
|
----------
|
|
183
|
-
lookup :
|
|
156
|
+
lookup : DataFrame
|
|
184
157
|
The value returned by the `download_lookup` function.
|
|
185
158
|
col_match : str
|
|
186
159
|
Which `column` should be used to find data in. This will restrict the rows to search for.
|
|
187
160
|
Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match_with`.
|
|
188
|
-
col_match_with
|
|
161
|
+
col_match_with: str
|
|
189
162
|
Which column `value` should be used to find data in. This will restrict the rows to search for.
|
|
190
|
-
Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match`.
|
|
191
163
|
|
|
192
164
|
Returns
|
|
193
165
|
-------
|
|
194
166
|
list[str]
|
|
195
167
|
The list of `term.id` that matched the expected column value.
|
|
196
168
|
"""
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
key
|
|
200
|
-
for key, value in lookup.items()
|
|
201
|
-
if value.get(col_match) == col_match_with
|
|
202
|
-
])
|
|
203
|
-
) if isinstance(lookup, dict) else set(list(lookup[lookup[col_match] == col_match_with].termid))
|
|
169
|
+
filtered_df = lookup[lookup[col_match] == col_match_with]
|
|
170
|
+
term_ids = filtered_df['term.id'].unique().tolist() if 'term.id' in filtered_df.columns else []
|
|
204
171
|
return list(map(str, term_ids))
|
|
205
172
|
|
|
206
173
|
|
|
@@ -265,13 +232,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
|
|
|
265
232
|
return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
|
|
266
233
|
|
|
267
234
|
|
|
268
|
-
def lookup_term_ids(lookup:
|
|
235
|
+
def lookup_term_ids(lookup: pd.DataFrame):
|
|
269
236
|
"""
|
|
270
237
|
Get the `term.id` values from a lookup.
|
|
271
238
|
|
|
272
239
|
Parameters
|
|
273
240
|
----------
|
|
274
|
-
lookup :
|
|
241
|
+
lookup : DataFrame
|
|
275
242
|
The value returned by the `download_lookup` function.
|
|
276
243
|
|
|
277
244
|
Returns
|
|
@@ -279,16 +246,16 @@ def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
|
|
|
279
246
|
list[str]
|
|
280
247
|
The `term.id` values from the lookup.
|
|
281
248
|
"""
|
|
282
|
-
return lookup.
|
|
249
|
+
return list(map(str, lookup['term.id'].tolist())) if 'term.id' in lookup.columns else []
|
|
283
250
|
|
|
284
251
|
|
|
285
|
-
def lookup_columns(lookup:
|
|
252
|
+
def lookup_columns(lookup: pd.DataFrame):
|
|
286
253
|
"""
|
|
287
254
|
Get the columns from a lookup.
|
|
288
255
|
|
|
289
256
|
Parameters
|
|
290
257
|
----------
|
|
291
|
-
lookup :
|
|
258
|
+
lookup : DataFrame
|
|
292
259
|
The value returned by the `download_lookup` function.
|
|
293
260
|
|
|
294
261
|
Returns
|
|
@@ -296,4 +263,4 @@ def lookup_columns(lookup: Union[dict, numpy.recarray]):
|
|
|
296
263
|
list[str]
|
|
297
264
|
The columns from the lookup.
|
|
298
265
|
"""
|
|
299
|
-
return list(
|
|
266
|
+
return list(lookup.columns)
|
|
@@ -2,7 +2,7 @@ from functools import lru_cache
|
|
|
2
2
|
import json
|
|
3
3
|
from hestia_earth.schema import SchemaType
|
|
4
4
|
|
|
5
|
-
from .lookup import _download_lookup_data, download_lookup, get_table_value
|
|
5
|
+
from .lookup import _download_lookup_data, download_lookup, get_table_value
|
|
6
6
|
from .api import download_hestia
|
|
7
7
|
from .tools import non_empty_list, flatten
|
|
8
8
|
|
|
@@ -45,7 +45,7 @@ def _allowed_model_mapping(model: str, term_id: str, column: str):
|
|
|
45
45
|
mapping = _allowed_mapping_data()
|
|
46
46
|
value = mapping.get(term_id, {}).get(model, {}).get(column) if mapping else get_table_value(
|
|
47
47
|
download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}-model-{column}.csv"),
|
|
48
|
-
'
|
|
48
|
+
'term.id', term_id, column
|
|
49
49
|
)
|
|
50
50
|
return (value or _ALLOW_ALL).split(';') if isinstance(value, str) else _ALLOW_ALL
|
|
51
51
|
|
|
@@ -78,7 +78,7 @@ def _allowed_mapping(term_id: str, column: str):
|
|
|
78
78
|
mapping = _allowed_mapping_data()
|
|
79
79
|
value = mapping.get(term_id, {}).get(column) if mapping else get_table_value(
|
|
80
80
|
download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}.csv"),
|
|
81
|
-
'
|
|
81
|
+
'term.id', term_id, column
|
|
82
82
|
)
|
|
83
83
|
return (value or _ALLOW_ALL).split(';') if isinstance(value, str) else _ALLOW_ALL
|
|
84
84
|
|
|
@@ -174,7 +174,7 @@ def is_in_system_boundary(term_id: str) -> bool:
|
|
|
174
174
|
column = 'inHestiaDefaultSystemBoundary'
|
|
175
175
|
value = mapping.get(term_id, {}).get(column) if mapping else get_table_value(
|
|
176
176
|
download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}.csv"),
|
|
177
|
-
'
|
|
177
|
+
'term.id', term_id, column
|
|
178
178
|
)
|
|
179
179
|
# handle numpy bool from table value
|
|
180
180
|
return not (not value)
|
hestia_earth/utils/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '0.16.
|
|
1
|
+
VERSION = '0.16.7'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hestia_earth_utils
|
|
3
|
-
Version: 0.16.
|
|
3
|
+
Version: 0.16.7
|
|
4
4
|
Summary: HESTIA's utils library
|
|
5
5
|
Home-page: https://gitlab.com/hestia-earth/hestia-utils
|
|
6
6
|
Author: HESTIA Team
|
|
@@ -13,12 +13,8 @@ Requires-Dist: hestia-earth-schema>=35.0.1
|
|
|
13
13
|
Requires-Dist: requests>=2.24.0
|
|
14
14
|
Requires-Dist: urllib3~=1.26.0
|
|
15
15
|
Requires-Dist: python-dateutil>=2.8.1
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: pandas>=2
|
|
17
17
|
Requires-Dist: flatten_json
|
|
18
|
-
Provides-Extra: pivot-csv
|
|
19
|
-
Requires-Dist: pandas>=2; extra == "pivot-csv"
|
|
20
|
-
Provides-Extra: table
|
|
21
|
-
Requires-Dist: pandas>=2; extra == "table"
|
|
22
18
|
Dynamic: author
|
|
23
19
|
Dynamic: author-email
|
|
24
20
|
Dynamic: classifier
|
|
@@ -26,7 +22,6 @@ Dynamic: description
|
|
|
26
22
|
Dynamic: description-content-type
|
|
27
23
|
Dynamic: home-page
|
|
28
24
|
Dynamic: license
|
|
29
|
-
Dynamic: provides-extra
|
|
30
25
|
Dynamic: requires-dist
|
|
31
26
|
Dynamic: requires-python
|
|
32
27
|
Dynamic: summary
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
|
|
3
|
-
hestia_earth/utils/blank_node.py,sha256=
|
|
3
|
+
hestia_earth/utils/blank_node.py,sha256=kLjq8U0PYyq_SQ-VHGMll_3XxKdYEnHEwtCCglNT3vg,7350
|
|
4
4
|
hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
|
|
5
|
-
hestia_earth/utils/csv_utils.py,sha256=BK-tci1sALmsxamSR1Y7f9O6ajTTdhggLC2pBEWhYME,2310
|
|
6
5
|
hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
|
|
7
6
|
hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
|
|
8
7
|
hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
|
|
9
8
|
hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
|
|
10
|
-
hestia_earth/utils/lookup.py,sha256=
|
|
11
|
-
hestia_earth/utils/lookup_utils.py,sha256=
|
|
9
|
+
hestia_earth/utils/lookup.py,sha256=Sea1EkwT1K4mb9eNQBkJHoXkvNLSg_N9eeNiUL6pLq0,8028
|
|
10
|
+
hestia_earth/utils/lookup_utils.py,sha256=P3Ae2MqZWvk3f9AObNwk6Fq9AyyX279K4kR9qHX8rKQ,6667
|
|
12
11
|
hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
|
|
13
12
|
hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
|
|
14
13
|
hestia_earth/utils/request.py,sha256=bu7hkWKmFdXl2_Feawiam_x32whlclA9oP0asJyC69k,626
|
|
@@ -16,7 +15,7 @@ hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,3
|
|
|
16
15
|
hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
|
|
17
16
|
hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
|
|
18
17
|
hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
|
|
19
|
-
hestia_earth/utils/version.py,sha256=
|
|
18
|
+
hestia_earth/utils/version.py,sha256=izOjXE-oE9zdUdGeSgNJik6goDuxSRXghKlLPR0OuNE,19
|
|
20
19
|
hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
20
|
hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
|
|
22
21
|
hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
|
|
@@ -26,9 +25,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0K
|
|
|
26
25
|
hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
|
|
27
26
|
hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
|
|
28
27
|
hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
|
|
29
|
-
hestia_earth_utils-0.16.
|
|
30
|
-
hestia_earth_utils-0.16.
|
|
31
|
-
hestia_earth_utils-0.16.
|
|
32
|
-
hestia_earth_utils-0.16.
|
|
33
|
-
hestia_earth_utils-0.16.
|
|
34
|
-
hestia_earth_utils-0.16.
|
|
28
|
+
hestia_earth_utils-0.16.7.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
|
|
29
|
+
hestia_earth_utils-0.16.7.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
|
|
30
|
+
hestia_earth_utils-0.16.7.dist-info/METADATA,sha256=o6sR5_7DeeXBLuKWYMFmg0CWRg3O-Cynh6NVZkI1mC0,1869
|
|
31
|
+
hestia_earth_utils-0.16.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
32
|
+
hestia_earth_utils-0.16.7.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
|
|
33
|
+
hestia_earth_utils-0.16.7.dist-info/RECORD,,
|
hestia_earth/utils/csv_utils.py
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import csv
|
|
3
|
-
import re
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
_MISSING_VALUE = '-'
|
|
7
|
-
_MISSING = -99999
|
|
8
|
-
_DELIMITER = ','
|
|
9
|
-
_QUOTE_CHAR = '"'
|
|
10
|
-
ENCODING = 'ISO-8859-1'
|
|
11
|
-
# default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
|
|
12
|
-
_DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _text_to_csv(csv_content: str):
|
|
25
|
-
return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _get_columns(csv_content: str):
|
|
29
|
-
try:
|
|
30
|
-
reader = _text_to_csv(csv_content)
|
|
31
|
-
names = next(reader)
|
|
32
|
-
return list(map(_replace_chars, names))
|
|
33
|
-
except StopIteration:
|
|
34
|
-
return []
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _get_rows(csv_content: str):
|
|
38
|
-
string_io = io.StringIO(csv_content.strip())
|
|
39
|
-
try:
|
|
40
|
-
next(string_io)
|
|
41
|
-
except StopIteration:
|
|
42
|
-
return
|
|
43
|
-
|
|
44
|
-
return csv.reader(string_io, delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def _csv_str_to_recarray_chunks_numpy(csv_content: str, chunk_size: int = 5):
|
|
48
|
-
names = _get_columns(csv_content)
|
|
49
|
-
num_cols = len(names)
|
|
50
|
-
|
|
51
|
-
max_size = 1000
|
|
52
|
-
dtype = [(name, f"U{max_size}") for name in names]
|
|
53
|
-
|
|
54
|
-
reader = _get_rows(csv_content)
|
|
55
|
-
|
|
56
|
-
# 4. Process the file in batches
|
|
57
|
-
chunk_rows = []
|
|
58
|
-
for row in reader:
|
|
59
|
-
if not row:
|
|
60
|
-
continue
|
|
61
|
-
if len(row) != num_cols:
|
|
62
|
-
continue
|
|
63
|
-
|
|
64
|
-
# replace missing values
|
|
65
|
-
processed_row = tuple(_replace_missing_values(field) for field in row)
|
|
66
|
-
chunk_rows.append(processed_row)
|
|
67
|
-
|
|
68
|
-
if len(chunk_rows) >= chunk_size:
|
|
69
|
-
yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
|
|
70
|
-
chunk_rows = []
|
|
71
|
-
|
|
72
|
-
if chunk_rows:
|
|
73
|
-
yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def csv_str_to_recarray(csv_content: str) -> np.recarray:
|
|
77
|
-
array_rows = list(_csv_str_to_recarray_chunks_numpy(csv_content))
|
|
78
|
-
return np.hstack(array_rows).view(np.recarray)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def csv_file_to_recarray(filepath: str):
|
|
82
|
-
with open(filepath, 'r', encoding=ENCODING) as f:
|
|
83
|
-
content = f.read()
|
|
84
|
-
return csv_str_to_recarray(content)
|
{hestia_earth_utils-0.16.5.data → hestia_earth_utils-0.16.7.data}/scripts/hestia-format-upload
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|