hdx-python-scraper 2.3.1__py3-none-any.whl → 2.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx/scraper/_version.py +2 -2
- hdx/scraper/runner.py +32 -12
- {hdx_python_scraper-2.3.1.dist-info → hdx_python_scraper-2.3.2.dist-info}/METADATA +1 -1
- {hdx_python_scraper-2.3.1.dist-info → hdx_python_scraper-2.3.2.dist-info}/RECORD +6 -6
- {hdx_python_scraper-2.3.1.dist-info → hdx_python_scraper-2.3.2.dist-info}/WHEEL +0 -0
- {hdx_python_scraper-2.3.1.dist-info → hdx_python_scraper-2.3.2.dist-info}/licenses/LICENSE +0 -0
hdx/scraper/_version.py
CHANGED
hdx/scraper/runner.py
CHANGED
|
@@ -1209,32 +1209,52 @@ class Runner:
|
|
|
1209
1209
|
"""
|
|
1210
1210
|
if not names:
|
|
1211
1211
|
names = self.scrapers.keys()
|
|
1212
|
-
|
|
1212
|
+
hapi_results = {}
|
|
1213
1213
|
|
|
1214
1214
|
def add_results(scraper_level, scrap, levels_used):
|
|
1215
|
-
nonlocal
|
|
1215
|
+
nonlocal hapi_results
|
|
1216
1216
|
|
|
1217
1217
|
if scraper_level in levels_used:
|
|
1218
1218
|
return
|
|
1219
1219
|
headers = scrap.headers.get(scraper_level)
|
|
1220
1220
|
if headers is None:
|
|
1221
1221
|
return
|
|
1222
|
+
headings = headers[0]
|
|
1223
|
+
hxltags = headers[1]
|
|
1222
1224
|
values = scrap.get_values(scraper_level)
|
|
1223
1225
|
hapi_dataset_metadata = scrap.get_hapi_dataset_metadata()
|
|
1226
|
+
if not hapi_dataset_metadata:
|
|
1227
|
+
return
|
|
1224
1228
|
hapi_resource_metadata = scrap.get_hapi_resource_metadata()
|
|
1229
|
+
if not hapi_resource_metadata:
|
|
1230
|
+
return
|
|
1225
1231
|
dataset_id = hapi_dataset_metadata["hdx_id"]
|
|
1226
|
-
hapi_metadata =
|
|
1232
|
+
hapi_metadata = hapi_results.get(
|
|
1227
1233
|
dataset_id, copy(hapi_dataset_metadata)
|
|
1228
1234
|
)
|
|
1229
|
-
|
|
1230
|
-
level_results
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1235
|
+
results = hapi_metadata.get("results", {})
|
|
1236
|
+
level_results = results.get(scraper_level)
|
|
1237
|
+
if level_results is None:
|
|
1238
|
+
level_results = {
|
|
1239
|
+
"headers": ([], []),
|
|
1240
|
+
"values": [],
|
|
1241
|
+
"hapi_resource_metadata": hapi_resource_metadata,
|
|
1242
|
+
}
|
|
1243
|
+
results[scraper_level] = level_results
|
|
1244
|
+
lev_headings = level_results["headers"][0]
|
|
1245
|
+
lev_hxltags = level_results["headers"][1]
|
|
1246
|
+
lev_values = level_results["values"]
|
|
1247
|
+
for i, hxltag in enumerate(hxltags):
|
|
1248
|
+
if hxltag in lev_hxltags:
|
|
1249
|
+
index = lev_hxltags.index(hxltag)
|
|
1250
|
+
lev_values[index].update(values[i])
|
|
1251
|
+
else:
|
|
1252
|
+
lev_headings.append(headings[i])
|
|
1253
|
+
lev_hxltags.append(hxltag)
|
|
1254
|
+
lev_values.append(values[i])
|
|
1255
|
+
hapi_metadata["results"] = results
|
|
1236
1256
|
levels_used.add(scraper_level)
|
|
1237
|
-
|
|
1257
|
+
hapi_results[dataset_id] = hapi_metadata
|
|
1238
1258
|
|
|
1239
1259
|
for name in names:
|
|
1240
1260
|
if self.scrapers_to_run and not any(
|
|
@@ -1247,4 +1267,4 @@ class Runner:
|
|
|
1247
1267
|
lvls_used = set()
|
|
1248
1268
|
for scrap_level in scraper.headers:
|
|
1249
1269
|
add_results(scrap_level, scraper, lvls_used)
|
|
1250
|
-
return
|
|
1270
|
+
return hapi_results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
hdx/scraper/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
-
hdx/scraper/_version.py,sha256=
|
|
2
|
+
hdx/scraper/_version.py,sha256=aKqtdIqWETcZnGj_9koZ-EQK7itBfKLMIKY7ucdTIMI,411
|
|
3
3
|
hdx/scraper/base_scraper.py,sha256=oo9oMqCUpK8_hPwcTz2PAKabzoyU0BQu5dgWgsFa55Y,15431
|
|
4
|
-
hdx/scraper/runner.py,sha256=
|
|
4
|
+
hdx/scraper/runner.py,sha256=KIEVLSJwEw9fzQxqsN92c50yDG3CRYAVDO7A6Zv_KJY,52262
|
|
5
5
|
hdx/scraper/configurable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
hdx/scraper/configurable/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
|
|
7
7
|
hdx/scraper/configurable/resource_downloader.py,sha256=lCIQpNZtcCTRc3z0FFM2_JxRtoua9GEq2XiKRZ9fqZk,1549
|
|
@@ -19,7 +19,7 @@ hdx/scraper/utilities/reader.py,sha256=HaR0da1my59P1T4sYe15GwX5cf5m4UbMo1r2uR9yv
|
|
|
19
19
|
hdx/scraper/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
|
|
20
20
|
hdx/scraper/utilities/sources.py,sha256=h27PjBADqIhqDwmhzMXt1OjwJWZc2iVnIBwJuAJKHwo,11204
|
|
21
21
|
hdx/scraper/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
|
|
22
|
-
hdx_python_scraper-2.3.
|
|
23
|
-
hdx_python_scraper-2.3.
|
|
24
|
-
hdx_python_scraper-2.3.
|
|
25
|
-
hdx_python_scraper-2.3.
|
|
22
|
+
hdx_python_scraper-2.3.2.dist-info/METADATA,sha256=cDyJQpQAf7U486xjcUYFbyoVm2fPLZPu8mLMG7GWVMU,3318
|
|
23
|
+
hdx_python_scraper-2.3.2.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
|
|
24
|
+
hdx_python_scraper-2.3.2.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
25
|
+
hdx_python_scraper-2.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|