hestia-earth-utils 0.15.16__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/utils/__init__.py +0 -3
- hestia_earth/utils/version.py +1 -1
- {hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.1.dist-info}/METADATA +17 -11
- {hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.1.dist-info}/RECORD +8 -12
- {hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.1.dist-info}/WHEEL +1 -1
- {hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.1.dist-info}/top_level.txt +0 -1
- hestia_earth/__init__.py +0 -1
- tests/pivot/__init__.py +0 -0
- tests/pivot/test_pivot_csv.py +0 -267
- tests/pivot/test_pivot_json.py +0 -231
- {hestia_earth_utils-0.15.16.data → hestia_earth_utils-0.16.1.data}/scripts/hestia-format-upload +0 -0
- {hestia_earth_utils-0.15.16.data → hestia_earth_utils-0.16.1.data}/scripts/hestia-pivot-csv +0 -0
hestia_earth/utils/__init__.py
CHANGED
hestia_earth/utils/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '0.
|
|
1
|
+
VERSION = '0.16.1'
|
|
@@ -1,22 +1,30 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hestia_earth_utils
|
|
3
|
+
Version: 0.16.1
|
|
4
4
|
Summary: HESTIA's utils library
|
|
5
5
|
Home-page: https://gitlab.com/hestia-earth/hestia-utils
|
|
6
6
|
Author: HESTIA Team
|
|
7
7
|
Author-email: guillaumeroyer.mail@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Requires-Python: >=3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Requires-Python: >=3.12
|
|
13
11
|
Description-Content-Type: text/markdown
|
|
14
|
-
Requires-Dist: hestia-earth
|
|
12
|
+
Requires-Dist: hestia-earth-schema>=35.0.1
|
|
15
13
|
Requires-Dist: requests>=2.24.0
|
|
16
14
|
Requires-Dist: urllib3~=1.26.0
|
|
17
15
|
Requires-Dist: python-dateutil>=2.8.1
|
|
18
16
|
Requires-Dist: numpy<2,>=1.25.0
|
|
19
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: flatten_json
|
|
18
|
+
Dynamic: author
|
|
19
|
+
Dynamic: author-email
|
|
20
|
+
Dynamic: classifier
|
|
21
|
+
Dynamic: description
|
|
22
|
+
Dynamic: description-content-type
|
|
23
|
+
Dynamic: home-page
|
|
24
|
+
Dynamic: license
|
|
25
|
+
Dynamic: requires-dist
|
|
26
|
+
Dynamic: requires-python
|
|
27
|
+
Dynamic: summary
|
|
20
28
|
|
|
21
29
|
# HESTIA Utils
|
|
22
30
|
|
|
@@ -66,5 +74,3 @@ from hestia_earth.utils.lookup import download_lookup
|
|
|
66
74
|
|
|
67
75
|
df = download_lookup('crop.csv')
|
|
68
76
|
```
|
|
69
|
-
|
|
70
|
-
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
hestia_earth/__init__.py,sha256=
|
|
2
|
-
hestia_earth/utils/__init__.py,sha256=qEFeq3yuf3lQKVseALmL8aPM8fpCS54B_5pry00M3hk,76
|
|
1
|
+
hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
2
|
hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
|
|
4
3
|
hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
|
|
5
4
|
hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
|
|
@@ -16,7 +15,7 @@ hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,3
|
|
|
16
15
|
hestia_earth/utils/table.py,sha256=RrTt-KF_QzjKiCpaAueoG6La1FG-Iusxw5NMDpoRBpQ,2861
|
|
17
16
|
hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
|
|
18
17
|
hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
|
|
19
|
-
hestia_earth/utils/version.py,sha256=
|
|
18
|
+
hestia_earth/utils/version.py,sha256=xSEZ3N7McvEb7jAV8u8vNc-uFNjN6zu3mnEj2i-XYyk,19
|
|
20
19
|
hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
20
|
hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
|
|
22
21
|
hestia_earth/utils/pivot/pivot_csv.py,sha256=zaiDcig4I5lVSHPZ-2bXKKBcIRrayA0GUaw0c8H3D-w,12371
|
|
@@ -26,12 +25,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0K
|
|
|
26
25
|
hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
|
|
27
26
|
hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
|
|
28
27
|
hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
|
|
29
|
-
hestia_earth_utils-0.
|
|
30
|
-
hestia_earth_utils-0.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
hestia_earth_utils-0.
|
|
35
|
-
hestia_earth_utils-0.15.16.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
36
|
-
hestia_earth_utils-0.15.16.dist-info/top_level.txt,sha256=1dqA9TqpOLTEgpqa-YBsmbCmmNU1y56AtfFGEceZ2A0,19
|
|
37
|
-
hestia_earth_utils-0.15.16.dist-info/RECORD,,
|
|
28
|
+
hestia_earth_utils-0.16.1.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
|
|
29
|
+
hestia_earth_utils-0.16.1.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
|
|
30
|
+
hestia_earth_utils-0.16.1.dist-info/METADATA,sha256=ofiEuFahZ5b4fyOxvnJ0N40gck62LEBKWsZiSWaSzFU,1876
|
|
31
|
+
hestia_earth_utils-0.16.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
32
|
+
hestia_earth_utils-0.16.1.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
|
|
33
|
+
hestia_earth_utils-0.16.1.dist-info/RECORD,,
|
hestia_earth/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__import__('pkg_resources').declare_namespace(__name__)
|
tests/pivot/__init__.py
DELETED
|
File without changes
|
tests/pivot/test_pivot_csv.py
DELETED
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pandas as pd
|
|
3
|
-
from unittest.mock import patch, call
|
|
4
|
-
|
|
5
|
-
from tests.utils import fixtures_path
|
|
6
|
-
from hestia_earth.utils.pivot.pivot_csv import pivot_csv, pivot_hestia_file
|
|
7
|
-
|
|
8
|
-
class_path = 'hestia_earth.utils.pivot.pivot_csv'
|
|
9
|
-
fixtures_folder = os.path.join(fixtures_path, 'pivot', 'pivot_csv')
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@patch(
|
|
13
|
-
f"{class_path}.find_term_ids_by_names",
|
|
14
|
-
return_value={
|
|
15
|
-
"Full tillage": "fullTillage",
|
|
16
|
-
"Diesel": "diesel",
|
|
17
|
-
"Inorganic Potassium fertiliser, unspecified (kg K2O)": "inorganicPotassiumFertiliserUnspecifiedKgK2O",
|
|
18
|
-
"Inorganic Phosphorus fertiliser, unspecified (kg P2O5)": "inorganicPhosphorusFertiliserUnspecifiedKgP2O5",
|
|
19
|
-
"Urea (kg N)": "ureaKgN",
|
|
20
|
-
"Peanut, in shell": "peanutInShell",
|
|
21
|
-
},
|
|
22
|
-
)
|
|
23
|
-
def test_pivot_csv_cycle(mock):
|
|
24
|
-
filepath = f"{fixtures_folder}/cycle.csv"
|
|
25
|
-
expected = pd.read_csv(
|
|
26
|
-
f"{fixtures_folder}/cycle-pivoted.csv", index_col=None, dtype=object
|
|
27
|
-
)
|
|
28
|
-
df = pivot_csv(filepath)
|
|
29
|
-
assert df.to_csv() == expected.to_csv()
|
|
30
|
-
mock.assert_has_calls(
|
|
31
|
-
[
|
|
32
|
-
call(
|
|
33
|
-
[
|
|
34
|
-
"Diesel",
|
|
35
|
-
"Full tillage",
|
|
36
|
-
"Inorganic Phosphorus fertiliser, unspecified (kg P2O5)",
|
|
37
|
-
"Inorganic Potassium fertiliser, unspecified (kg K2O)",
|
|
38
|
-
"Peanut, in shell",
|
|
39
|
-
"Urea (kg N)",
|
|
40
|
-
]
|
|
41
|
-
)
|
|
42
|
-
]
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@patch(
|
|
47
|
-
f"{class_path}.find_term_ids_by_names",
|
|
48
|
-
return_value={
|
|
49
|
-
"Eutrophication potential, excluding fate": "eutrophicationPotentialExcludingFate",
|
|
50
|
-
"GWP100": "gwp100",
|
|
51
|
-
"N2O, to air, organic fertiliser, direct": "n2OToAirOrganicFertiliserDirect",
|
|
52
|
-
"N2O, to air, inorganic fertiliser, direct": "n2OToAirInorganicFertiliserDirect",
|
|
53
|
-
},
|
|
54
|
-
)
|
|
55
|
-
def test_pivot_csv_impact(mock):
|
|
56
|
-
filepath = f"{fixtures_folder}/impact.csv"
|
|
57
|
-
expected = pd.read_csv(
|
|
58
|
-
f"{fixtures_folder}/impact-pivoted.csv", index_col=None, dtype=object
|
|
59
|
-
)
|
|
60
|
-
df = pivot_csv(filepath)
|
|
61
|
-
assert df.to_csv() == expected.to_csv()
|
|
62
|
-
mock.assert_has_calls(
|
|
63
|
-
[
|
|
64
|
-
call(
|
|
65
|
-
[
|
|
66
|
-
"Eutrophication potential, excluding fate",
|
|
67
|
-
"GWP100",
|
|
68
|
-
"N2O, to air, inorganic fertiliser, direct",
|
|
69
|
-
"N2O, to air, organic fertiliser, direct",
|
|
70
|
-
]
|
|
71
|
-
)
|
|
72
|
-
]
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def test_pivot_csv_multinode_rows():
|
|
77
|
-
filepath = f"{fixtures_folder}/multinode-rows.csv"
|
|
78
|
-
expected = pd.read_csv(
|
|
79
|
-
f"{fixtures_folder}/multinode-rows-pivoted.csv",
|
|
80
|
-
index_col=None,
|
|
81
|
-
dtype=object,
|
|
82
|
-
)
|
|
83
|
-
df = pivot_csv(filepath)
|
|
84
|
-
assert df.to_csv() == expected.to_csv()
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
@patch(
|
|
88
|
-
f"{class_path}.find_term_ids_by_names",
|
|
89
|
-
return_value={"Urea (kg N)": "ureaKgN"},
|
|
90
|
-
)
|
|
91
|
-
def test_pivot_csv_cycle_missing_ids(mock):
|
|
92
|
-
filepath = f"{fixtures_folder}/missing-ids.csv"
|
|
93
|
-
expected = pd.read_csv(
|
|
94
|
-
f"{fixtures_folder}/missing-ids-pivoted.csv",
|
|
95
|
-
index_col=None,
|
|
96
|
-
dtype=object,
|
|
97
|
-
)
|
|
98
|
-
df = pivot_csv(filepath)
|
|
99
|
-
assert df.to_csv() == expected.to_csv()
|
|
100
|
-
mock.assert_has_calls([call(["Urea (kg N)"])])
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@patch(
|
|
104
|
-
f"{class_path}.find_term_ids_by_names",
|
|
105
|
-
return_value={"Irrigated": "irrigated"},
|
|
106
|
-
)
|
|
107
|
-
def test_pivot_csv_empty_cells(mock):
|
|
108
|
-
filepath = f"{fixtures_folder}/empty-cells.csv"
|
|
109
|
-
expected = pd.read_csv(
|
|
110
|
-
f"{fixtures_folder}/empty-cells-pivoted.csv",
|
|
111
|
-
index_col=None,
|
|
112
|
-
dtype=object,
|
|
113
|
-
)
|
|
114
|
-
df = pivot_csv(filepath)
|
|
115
|
-
assert df.to_csv() == expected.to_csv()
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def test_pivot_csv_preserves_uniqueness_fields():
|
|
119
|
-
filepath = f"{fixtures_folder}/uniqueness-fields-undifferentiating.csv"
|
|
120
|
-
expected = pd.read_csv(
|
|
121
|
-
f"{fixtures_folder}/uniqueness-fields-undifferentiating-pivoted.csv",
|
|
122
|
-
index_col=None,
|
|
123
|
-
dtype=object,
|
|
124
|
-
)
|
|
125
|
-
df = pivot_csv(filepath)
|
|
126
|
-
assert df.to_csv() == expected.to_csv()
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
@patch(
|
|
130
|
-
f"{class_path}.find_term_ids_by_names",
|
|
131
|
-
return_value={
|
|
132
|
-
"Helicopter use, operation unspecified": "helicopterUseOperationUnspecified",
|
|
133
|
-
"Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
|
|
134
|
-
"Small tractor use, operation unspecified": "smallTractorUseOperationUnspecified",
|
|
135
|
-
"Coating seeds": "coatingSeeds",
|
|
136
|
-
"Buttage of vine": "buttageOfVine",
|
|
137
|
-
},
|
|
138
|
-
)
|
|
139
|
-
def test_pivot_csv_uniqueness_fields_differentiating(mock):
|
|
140
|
-
filepath = f"{fixtures_folder}/uniqueness-fields-differentiating.csv"
|
|
141
|
-
expected = pd.read_csv(
|
|
142
|
-
f"{fixtures_folder}/uniqueness-fields-differentiating-pivoted.csv",
|
|
143
|
-
index_col=None,
|
|
144
|
-
dtype=object,
|
|
145
|
-
)
|
|
146
|
-
df = pivot_csv(filepath)
|
|
147
|
-
assert df.to_csv() == expected.to_csv()
|
|
148
|
-
mock.assert_has_calls(
|
|
149
|
-
[
|
|
150
|
-
call(
|
|
151
|
-
[
|
|
152
|
-
"Buttage of vine",
|
|
153
|
-
"Coating seeds",
|
|
154
|
-
"Cooling, with evaporative cooling tower",
|
|
155
|
-
"Helicopter use, operation unspecified",
|
|
156
|
-
"Small tractor use, operation unspecified",
|
|
157
|
-
]
|
|
158
|
-
)
|
|
159
|
-
]
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
@patch(
|
|
164
|
-
f"{class_path}.find_term_ids_by_names",
|
|
165
|
-
return_value={
|
|
166
|
-
"Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
|
|
167
|
-
},
|
|
168
|
-
)
|
|
169
|
-
def test_pivot_csv_uniqueness_fields_non_matching(mock):
|
|
170
|
-
filepath = f"{fixtures_folder}/uniqueness-fields-non-matching.csv"
|
|
171
|
-
expected = pd.read_csv(
|
|
172
|
-
f"{fixtures_folder}/uniqueness-fields-non-matching-pivoted.csv",
|
|
173
|
-
index_col=None,
|
|
174
|
-
dtype=object,
|
|
175
|
-
)
|
|
176
|
-
df = pivot_csv(filepath)
|
|
177
|
-
assert df.to_csv() == expected.to_csv()
|
|
178
|
-
mock.assert_has_calls([call(["Cooling, with evaporative cooling tower"])])
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
@patch(
|
|
182
|
-
f"{class_path}.find_term_ids_by_names",
|
|
183
|
-
return_value={
|
|
184
|
-
"Nitrogen content": "nitrogenContent",
|
|
185
|
-
},
|
|
186
|
-
)
|
|
187
|
-
def test_pivot_csv_properties(mock):
|
|
188
|
-
filepath = f"{fixtures_folder}/properties-exception.csv"
|
|
189
|
-
expected = pd.read_csv(
|
|
190
|
-
f"{fixtures_folder}/properties-exception-pivoted.csv",
|
|
191
|
-
index_col=None,
|
|
192
|
-
dtype=object,
|
|
193
|
-
)
|
|
194
|
-
df = pivot_csv(filepath)
|
|
195
|
-
assert df.to_csv() == expected.to_csv()
|
|
196
|
-
mock.assert_has_calls([call(["Nitrogen content"])])
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def test_pivot_csv_depth():
|
|
200
|
-
filepath = f"{fixtures_folder}/depth-exception.csv"
|
|
201
|
-
expected = pd.read_csv(
|
|
202
|
-
f"{fixtures_folder}/depth-exception-pivoted.csv",
|
|
203
|
-
index_col=None,
|
|
204
|
-
dtype=object,
|
|
205
|
-
)
|
|
206
|
-
df = pivot_csv(filepath)
|
|
207
|
-
assert df.to_csv() == expected.to_csv()
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def test_pivot_csv_shuffled():
|
|
211
|
-
filepath = f"{fixtures_folder}/shuffled.csv"
|
|
212
|
-
expected = pd.read_csv(
|
|
213
|
-
f"{fixtures_folder}/shuffled-pivoted.csv",
|
|
214
|
-
index_col=None,
|
|
215
|
-
dtype=object,
|
|
216
|
-
)
|
|
217
|
-
df = pivot_csv(filepath)
|
|
218
|
-
assert df.to_csv() == expected.to_csv()
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
@patch(
|
|
222
|
-
f"{class_path}.find_term_ids_by_names",
|
|
223
|
-
return_value={"Full tillage": "fullTillage", "Urea (kg N)": "ureaKgN"},
|
|
224
|
-
)
|
|
225
|
-
def test_pivot_csv_cycle_deep(*args):
|
|
226
|
-
filepath = f"{fixtures_folder}/deep.csv"
|
|
227
|
-
expected = pd.read_csv(
|
|
228
|
-
f"{fixtures_folder}/deep-pivoted.csv",
|
|
229
|
-
index_col=None,
|
|
230
|
-
dtype=object,
|
|
231
|
-
)
|
|
232
|
-
df = pivot_csv(filepath)
|
|
233
|
-
assert df.to_csv() == expected.to_csv()
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def test_pivot_csv_non_node_arrayfields(*args):
|
|
237
|
-
filepath = f"{fixtures_folder}/non-node-arrayfields.csv"
|
|
238
|
-
expected = pd.read_csv(
|
|
239
|
-
f"{fixtures_folder}/non-node-arrayfields-pivoted.csv",
|
|
240
|
-
index_col=None,
|
|
241
|
-
dtype=object,
|
|
242
|
-
)
|
|
243
|
-
df = pivot_csv(filepath)
|
|
244
|
-
assert df.to_csv() == expected.to_csv()
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
@patch(
|
|
248
|
-
f"{class_path}.find_term_ids_by_names",
|
|
249
|
-
return_value={
|
|
250
|
-
"Grinding, with grinder": "grinding",
|
|
251
|
-
"Motor gasoline": "motorGasoline",
|
|
252
|
-
"Orchard density": "orchardDensity",
|
|
253
|
-
},
|
|
254
|
-
)
|
|
255
|
-
def test_pivot_hestia_file(*args):
|
|
256
|
-
filepath = f"{fixtures_folder}/nodes.hestia"
|
|
257
|
-
expected = pd.read_csv(
|
|
258
|
-
f"{fixtures_folder}/nodes.hestia-pivoted.csv",
|
|
259
|
-
index_col=None,
|
|
260
|
-
dtype=object,
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
with open(filepath) as fd:
|
|
264
|
-
hestia_file = fd.read()
|
|
265
|
-
|
|
266
|
-
df = pivot_hestia_file(hestia_file)
|
|
267
|
-
assert df.to_csv() == expected.to_csv()
|
tests/pivot/test_pivot_json.py
DELETED
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import re
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
|
|
7
|
-
from tests.utils import fixtures_path
|
|
8
|
-
from hestia_earth.utils.pivot.pivot_json import (
|
|
9
|
-
_with_csv_formatting,
|
|
10
|
-
pivot_nodes,
|
|
11
|
-
pivot_hestia_file,
|
|
12
|
-
)
|
|
13
|
-
from flatten_json import unflatten_list
|
|
14
|
-
from hestia_earth.schema.utils.sort import SORT_CONFIG
|
|
15
|
-
from hestia_earth import schema
|
|
16
|
-
|
|
17
|
-
class_path = 'hestia_earth.utils.pivot.pivot_csv'
|
|
18
|
-
fixtures_folder = os.path.join(fixtures_path, 'pivot', 'pivot_json')
|
|
19
|
-
|
|
20
|
-
node_types = {k: getattr(schema, k)().fields for k in schema.SCHEMA_TYPES}
|
|
21
|
-
name_to_ids_mapping = {
|
|
22
|
-
"Full tillage": "fullTillage",
|
|
23
|
-
"Diesel": "diesel",
|
|
24
|
-
"Motor gasoline": "motorGasoline",
|
|
25
|
-
"Inorganic Potassium fertiliser, unspecified (kg K2O)": "inorganicPotassiumFertiliserUnspecifiedKgK2O",
|
|
26
|
-
"Inorganic Phosphorus fertiliser, unspecified (kg P2O5)": "inorganicPhosphorusFertiliserUnspecifiedKgP2O5",
|
|
27
|
-
"Urea (kg N)": "ureaKgN",
|
|
28
|
-
"Peanut, in shell": "peanutInShell",
|
|
29
|
-
"Eutrophication potential, excluding fate": "eutrophicationPotentialExcludingFate",
|
|
30
|
-
"GWP100": "gwp100",
|
|
31
|
-
"N2O, to air, organic fertiliser, direct": "n2OToAirOrganicFertiliserDirect",
|
|
32
|
-
"N2O, to air, inorganic fertiliser, direct": "n2OToAirInorganicFertiliserDirect",
|
|
33
|
-
"Irrigated": "irrigated",
|
|
34
|
-
"Helicopter use, operation unspecified": "helicopterUseOperationUnspecified",
|
|
35
|
-
"Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
|
|
36
|
-
"Small tractor use, operation unspecified": "smallTractorUseOperationUnspecified",
|
|
37
|
-
"Coating seeds": "coatingSeeds",
|
|
38
|
-
"Buttage of vine": "buttageOfVine",
|
|
39
|
-
"Nitrogen content": "nitrogenContent",
|
|
40
|
-
"Grinding, with grinder": "grinding",
|
|
41
|
-
"Orchard density": "orchardDensity",
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _get_node_type(col):
|
|
46
|
-
label = col.split(".")[0]
|
|
47
|
-
return label[0].upper() + label[1:]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _add_missing_fields(row, is_input, col, parent_type, prefix=""):
|
|
51
|
-
subnode_col = re.search(r"(.+?\.\d+)\.(.+)", col)
|
|
52
|
-
if not subnode_col:
|
|
53
|
-
return None
|
|
54
|
-
sub_node, deep_col = subnode_col.groups()
|
|
55
|
-
node_type = (
|
|
56
|
-
# We are not handling fields like subnode_type_A.subnode_type_B.0
|
|
57
|
-
# We are always fetching type_A in this scenario.
|
|
58
|
-
SORT_CONFIG.get(parent_type)
|
|
59
|
-
.get(sub_node.split(".")[0])
|
|
60
|
-
.get("type")
|
|
61
|
-
)
|
|
62
|
-
next_prefix = ".".join([el for el in (prefix, sub_node) if el])
|
|
63
|
-
row[f"{next_prefix}.@type"] = node_type
|
|
64
|
-
_add_missing_fields(row, is_input, deep_col, node_type, prefix=next_prefix)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _row_to_dict(row, is_input, parent_type):
|
|
68
|
-
row.dropna(inplace=True)
|
|
69
|
-
if is_input:
|
|
70
|
-
for col in row.index:
|
|
71
|
-
_add_missing_fields(row, is_input, col, parent_type)
|
|
72
|
-
return row.to_dict()
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _df_to_dict(df, is_input):
|
|
76
|
-
df.index = map(lambda col: ".".join(col.split(".")[1:]), df.index)
|
|
77
|
-
df.loc["@type"] = df.name
|
|
78
|
-
dicts = df.apply(_row_to_dict, is_input=is_input, parent_type=df.name)
|
|
79
|
-
return dicts
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _ensure_id_cols(df, name_to_ids):
|
|
83
|
-
names_df = df.filter(regex=r"\.name", axis=1)
|
|
84
|
-
for name_col in names_df.columns:
|
|
85
|
-
id_col = name_col.replace(".name", ".@id")
|
|
86
|
-
for idx, name in df[name_col].items():
|
|
87
|
-
if id_col not in df:
|
|
88
|
-
df[id_col] = np.nan
|
|
89
|
-
if pd.isna(df.loc[idx, id_col]):
|
|
90
|
-
df.loc[idx, id_col] = name_to_ids[name]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _convert_csv_to_nodes(fixture, is_input, name_to_ids):
|
|
94
|
-
"""
|
|
95
|
-
Gets json fixtures or creates them from corresponding csv files.
|
|
96
|
-
Conversion for *-pivoted files is not perfect as we do not detect
|
|
97
|
-
the difference between an empty cell which should be discarded
|
|
98
|
-
(ie. header not used by a row) and a node without a value key
|
|
99
|
-
(the latter are represented in csv as field.nodeId.value = None)
|
|
100
|
-
"""
|
|
101
|
-
filepath = (
|
|
102
|
-
f"{fixtures_path}/pivot/pivot_csv/{fixture}.csv"
|
|
103
|
-
if is_input
|
|
104
|
-
else f"{fixtures_path}/pivot/pivot_csv/{fixture}-pivoted.csv"
|
|
105
|
-
)
|
|
106
|
-
df = pd.read_csv(filepath, index_col=None, dtype=object)
|
|
107
|
-
df.drop(columns="-", errors="ignore", inplace=True)
|
|
108
|
-
df.replace("-", np.nan, inplace=True)
|
|
109
|
-
df.replace(
|
|
110
|
-
["TRUE", "True", "true", "FALSE", "False", "false"],
|
|
111
|
-
[True, True, True, False, False, False],
|
|
112
|
-
inplace=True,
|
|
113
|
-
)
|
|
114
|
-
if is_input:
|
|
115
|
-
df.dropna(how="all", axis=1, inplace=True)
|
|
116
|
-
df.rename(lambda col: col.replace(".id", ".@id"), axis=1, inplace=True)
|
|
117
|
-
if is_input:
|
|
118
|
-
_ensure_id_cols(df, name_to_ids)
|
|
119
|
-
df = df.T.groupby(_get_node_type).apply(_df_to_dict, is_input)
|
|
120
|
-
nodes = [
|
|
121
|
-
node for _node_type, nodes in df.iterrows() for node in nodes if node.get("@id")
|
|
122
|
-
]
|
|
123
|
-
return nodes
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def get_nodes_from_fixture(fixture, name_to_ids={}):
|
|
127
|
-
try:
|
|
128
|
-
with open(f"{fixtures_folder}/{fixture}.json") as file:
|
|
129
|
-
input = json.load(file, object_hook=_with_csv_formatting)["nodes"]
|
|
130
|
-
with open(f"{fixtures_folder}/{fixture}-pivoted.json") as file:
|
|
131
|
-
expected = json.load(file, object_hook=_with_csv_formatting)["nodes"]
|
|
132
|
-
except FileNotFoundError:
|
|
133
|
-
print(f"\n{fixture} not found: attempting to create from csv.\n")
|
|
134
|
-
name_to_ids.update({np.nan: np.nan})
|
|
135
|
-
input = _convert_csv_to_nodes(fixture, True, name_to_ids)
|
|
136
|
-
expected = _convert_csv_to_nodes(fixture, False, name_to_ids)
|
|
137
|
-
|
|
138
|
-
input, expected = (
|
|
139
|
-
[unflatten_list(node, ".") for node in input],
|
|
140
|
-
[unflatten_list(node, ".") for node in expected],
|
|
141
|
-
)
|
|
142
|
-
with open(f"{fixtures_folder}/{fixture}.json", "w") as file:
|
|
143
|
-
file.write(json.dumps({"nodes": input}, sort_keys=True, indent=2))
|
|
144
|
-
with open(
|
|
145
|
-
f"{fixtures_folder}/{fixture}-pivoted.json", "w"
|
|
146
|
-
) as file:
|
|
147
|
-
file.write(json.dumps({"nodes": expected}, sort_keys=True, indent=2))
|
|
148
|
-
|
|
149
|
-
return (input, expected)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def test_pivot_json_cycle():
|
|
153
|
-
input, expected = get_nodes_from_fixture("cycle", name_to_ids_mapping)
|
|
154
|
-
actual = pivot_nodes(input)
|
|
155
|
-
assert expected == actual
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def test_pivot_json_impact():
|
|
159
|
-
input, expected = get_nodes_from_fixture("impact", name_to_ids_mapping)
|
|
160
|
-
actual = pivot_nodes(input)
|
|
161
|
-
assert expected == actual
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def test_pivot_json_multinode_rows():
|
|
165
|
-
input, expected = get_nodes_from_fixture("multinode-rows")
|
|
166
|
-
actual = pivot_nodes(input)
|
|
167
|
-
assert expected == actual
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def test_pivot_json_preserves_uniqueness_fields():
|
|
171
|
-
input, expected = get_nodes_from_fixture(
|
|
172
|
-
"uniqueness-fields-undifferentiating", name_to_ids_mapping
|
|
173
|
-
)
|
|
174
|
-
actual = pivot_nodes(input)
|
|
175
|
-
assert expected == actual
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def test_pivot_json_uniqueness_fields_differentiating():
|
|
179
|
-
input, expected = get_nodes_from_fixture(
|
|
180
|
-
"uniqueness-fields-differentiating", name_to_ids_mapping
|
|
181
|
-
)
|
|
182
|
-
actual = pivot_nodes(input)
|
|
183
|
-
assert expected == actual
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
# Output differs from CSV pivoter (see https://gitlab.com/hestia-earth/hestia-utils/-/issues/32)
|
|
187
|
-
def test_pivot_json_uniqueness_fields_non_matching():
|
|
188
|
-
input, expected = get_nodes_from_fixture("uniqueness-fields-non-matching", name_to_ids_mapping)
|
|
189
|
-
actual = pivot_nodes(input)
|
|
190
|
-
assert expected == actual
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def test_pivot_json_properties():
|
|
194
|
-
input, expected = get_nodes_from_fixture("properties-exception", name_to_ids_mapping)
|
|
195
|
-
actual = pivot_nodes(input)
|
|
196
|
-
assert expected == actual
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def test_pivot_json_depth():
|
|
200
|
-
input, expected = get_nodes_from_fixture("depth-exception")
|
|
201
|
-
actual = pivot_nodes(input)
|
|
202
|
-
assert expected == actual
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
# Output differs from CSV pivoter (see https://gitlab.com/hestia-earth/hestia-utils/-/issues/32)
|
|
206
|
-
def test_pivot_json_cycle_deep():
|
|
207
|
-
input, expected = get_nodes_from_fixture("deep", name_to_ids_mapping)
|
|
208
|
-
actual = pivot_nodes(input)
|
|
209
|
-
assert expected == actual
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def test_pivot_json_node_arrayfields_merged():
|
|
213
|
-
input, expected = get_nodes_from_fixture("node-arrayfields-merged")
|
|
214
|
-
actual = pivot_nodes(input)
|
|
215
|
-
|
|
216
|
-
assert expected == actual
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def test_pivot_json_unindexed_node():
|
|
220
|
-
input, expected = get_nodes_from_fixture("unindexed-node")
|
|
221
|
-
actual = pivot_nodes(input)
|
|
222
|
-
|
|
223
|
-
assert expected == actual
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def test_pivot_hestia_file():
|
|
227
|
-
_input, expected = get_nodes_from_fixture("nodes.hestia", name_to_ids_mapping)
|
|
228
|
-
actual = pivot_hestia_file(
|
|
229
|
-
open(f"{fixtures_folder}/nodes.hestia.json", "r").read()
|
|
230
|
-
)
|
|
231
|
-
assert expected == actual
|
{hestia_earth_utils-0.15.16.data → hestia_earth_utils-0.16.1.data}/scripts/hestia-format-upload
RENAMED
|
File without changes
|
|
File without changes
|