hdx-python-scraper 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx/scraper/_version.py +13 -3
- hdx/scraper/configurable/scraper.py +6 -1
- hdx/scraper/runner.py +13 -7
- hdx/scraper/utilities/reader.py +10 -3
- {hdx_python_scraper-2.2.0.dist-info → hdx_python_scraper-2.2.1.dist-info}/METADATA +3 -3
- {hdx_python_scraper-2.2.0.dist-info → hdx_python_scraper-2.2.1.dist-info}/RECORD +8 -8
- {hdx_python_scraper-2.2.0.dist-info → hdx_python_scraper-2.2.1.dist-info}/WHEEL +0 -0
- {hdx_python_scraper-2.2.0.dist-info → hdx_python_scraper-2.2.1.dist-info}/licenses/LICENSE +0 -0
hdx/scraper/_version.py
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
# file generated by setuptools_scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
-
|
|
3
|
+
TYPE_CHECKING = False
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from typing import Tuple, Union
|
|
6
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
+
else:
|
|
8
|
+
VERSION_TUPLE = object
|
|
4
9
|
|
|
5
|
-
|
|
6
|
-
|
|
10
|
+
version: str
|
|
11
|
+
__version__: str
|
|
12
|
+
__version_tuple__: VERSION_TUPLE
|
|
13
|
+
version_tuple: VERSION_TUPLE
|
|
14
|
+
|
|
15
|
+
__version__ = version = '2.2.1'
|
|
16
|
+
__version_tuple__ = version_tuple = (2, 2, 1)
|
|
@@ -122,6 +122,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
122
122
|
"input": datasetinfo.get("input", []),
|
|
123
123
|
"transform": datasetinfo.get("transform", {}),
|
|
124
124
|
"population_key": datasetinfo.get("population_key"),
|
|
125
|
+
"list": datasetinfo.get("list", []),
|
|
125
126
|
"process": datasetinfo.get("process", []),
|
|
126
127
|
"input_keep": datasetinfo.get("input_keep", []),
|
|
127
128
|
"input_append": datasetinfo.get("input_append", []),
|
|
@@ -292,6 +293,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
292
293
|
filter = subset["filter"]
|
|
293
294
|
input_ignore_vals = subset.get("input_ignore_vals", [])
|
|
294
295
|
input_transforms = subset.get("transform", {})
|
|
296
|
+
list_cols = subset.get("list")
|
|
295
297
|
sum_cols = subset.get("sum")
|
|
296
298
|
process_cols = subset.get("process")
|
|
297
299
|
input_append = subset.get("input_append", [])
|
|
@@ -304,6 +306,8 @@ class ConfigurableScraper(BaseScraper):
|
|
|
304
306
|
val = eval(input_transform.replace(valcol, "val"))
|
|
305
307
|
if sum_cols or process_cols:
|
|
306
308
|
dict_of_lists_add(valuedict, adm, val)
|
|
309
|
+
elif list_cols and valcol in list_cols:
|
|
310
|
+
dict_of_lists_add(valuedict, adm, val)
|
|
307
311
|
else:
|
|
308
312
|
curval = valuedict.get(adm)
|
|
309
313
|
if valcol in input_append:
|
|
@@ -326,6 +330,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
326
330
|
population_str = "self.population_lookup[adm]"
|
|
327
331
|
else:
|
|
328
332
|
population_str = "self.population_lookup[population_key]"
|
|
333
|
+
subset.get("list")
|
|
329
334
|
process_cols = subset.get("process")
|
|
330
335
|
input_keep = subset.get("input_keep", [])
|
|
331
336
|
sum_cols = subset.get("sum")
|
|
@@ -440,7 +445,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
440
445
|
valcols[i], f"newvaldicts[{i}][adm]"
|
|
441
446
|
)
|
|
442
447
|
formula = formula.replace("#pzbgvjh", population_str)
|
|
443
|
-
for adm in valdicts[0]
|
|
448
|
+
for adm in valdicts[0]:
|
|
444
449
|
try:
|
|
445
450
|
val = eval(formula)
|
|
446
451
|
except (ValueError, TypeError, KeyError):
|
hdx/scraper/runner.py
CHANGED
|
@@ -1147,7 +1147,10 @@ class Runner:
|
|
|
1147
1147
|
def get_hapi_metadata(
|
|
1148
1148
|
self, names: Optional[ListTuple[str]] = None
|
|
1149
1149
|
) -> Dict:
|
|
1150
|
-
"""Get HAPI metadata for all datasets
|
|
1150
|
+
"""Get HAPI metadata for all datasets. A dictionary is returned that
|
|
1151
|
+
maps from dataset ids to a dictionary. The dictionary has keys for
|
|
1152
|
+
dataset metadata and a key resources under which is a dictionary that
|
|
1153
|
+
maps from resource ids to resource metadata.
|
|
1151
1154
|
|
|
1152
1155
|
Args:
|
|
1153
1156
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
@@ -1184,12 +1187,15 @@ class Runner:
|
|
|
1184
1187
|
metadata) for scrapers limiting to those in names if given and limiting
|
|
1185
1188
|
further to those that have been set in the constructor if previously
|
|
1186
1189
|
given. By default, only scrapers marked as having run are returned
|
|
1187
|
-
unless has_run is set to False.
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1190
|
+
unless has_run is set to False.
|
|
1191
|
+
|
|
1192
|
+
A dictionary is returned where key is HDX dataset id and value is a
|
|
1193
|
+
dictionary that has HAPI dataset metadata as well as a results key.
|
|
1194
|
+
The value associated with the results key is a dictionary where each
|
|
1195
|
+
key is an admin level. Each admin level key has a value dictionary with
|
|
1196
|
+
headers, values and HAPI resource metadata. Headers is a tuple of
|
|
1197
|
+
(column headers, hxl hashtags). Values is a list. HAPI resource
|
|
1198
|
+
metadata is a dictionary.
|
|
1193
1199
|
|
|
1194
1200
|
Args:
|
|
1195
1201
|
names (Optional[ListTuple[str]]): Names of scrapers. Defaults to None (all scrapers).
|
hdx/scraper/utilities/reader.py
CHANGED
|
@@ -340,9 +340,16 @@ class Read(Retrieve):
|
|
|
340
340
|
def read_hdx_metadata(
|
|
341
341
|
self, datasetinfo: Dict, do_resource_check: bool = True
|
|
342
342
|
) -> Optional[Resource]:
|
|
343
|
-
"""Read metadata from HDX dataset and add to input dictionary. If url
|
|
344
|
-
supplied, will look through resources for one that matches
|
|
345
|
-
use its url unless do_resource_check is False.
|
|
343
|
+
"""Read metadata from HDX dataset and add to input dictionary. If url
|
|
344
|
+
is not supplied, will look through resources for one that matches
|
|
345
|
+
specified format and use its url unless do_resource_check is False.
|
|
346
|
+
The dataset key of the parameter datasetinfo will usually point to a
|
|
347
|
+
string (single dataset) but where sources vary across HXL tags can be
|
|
348
|
+
a dictionary that maps from HXL tags to datasets with the key
|
|
349
|
+
default_dataset setting a default for HXL tags. For a single dataset,
|
|
350
|
+
the keys hapi_dataset_metadata and hapi_resource_metadata will be
|
|
351
|
+
populated with more detailed dataset and resource information required
|
|
352
|
+
by HAPI.
|
|
346
353
|
|
|
347
354
|
Args:
|
|
348
355
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,12 +26,12 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.1.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.1.3
|
|
30
30
|
Requires-Dist: regex
|
|
31
31
|
Provides-Extra: dev
|
|
32
32
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
33
33
|
Provides-Extra: pandas
|
|
34
|
-
Requires-Dist: pandas>=2.
|
|
34
|
+
Requires-Dist: pandas>=2.1.1; extra == 'pandas'
|
|
35
35
|
Provides-Extra: test
|
|
36
36
|
Requires-Dist: pytest; extra == 'test'
|
|
37
37
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
hdx/scraper/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
-
hdx/scraper/_version.py,sha256=
|
|
2
|
+
hdx/scraper/_version.py,sha256=R_Wr7clGXr8a07n6uqFj88MyYFGydFRXYBI10R9k_uw,411
|
|
3
3
|
hdx/scraper/base_scraper.py,sha256=IaUDqnrSxB0kbEQynX-81NEyv9DLxypWKwEDAEr9GWg,14628
|
|
4
|
-
hdx/scraper/runner.py,sha256
|
|
4
|
+
hdx/scraper/runner.py,sha256=-7L-L9WGZdTGl5mWNAPgvpTreU9bvbdxklruGCRzjRs,51217
|
|
5
5
|
hdx/scraper/configurable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
hdx/scraper/configurable/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
|
|
7
7
|
hdx/scraper/configurable/resource_downloader.py,sha256=vK8zNFy7T_Rj1h8Tj676-3B2oYYXFNKsrM9dxz7RZC8,1537
|
|
8
8
|
hdx/scraper/configurable/rowparser.py,sha256=h7a0W2xvVJSAu94nS5CAXvZSZXdwZ-isFHHNaIce0gM,14635
|
|
9
|
-
hdx/scraper/configurable/scraper.py,sha256=
|
|
9
|
+
hdx/scraper/configurable/scraper.py,sha256=TyB7ipTzhVpOC3in0ZBIMwbcTAOR0Ul-W6Np85NnogI,20468
|
|
10
10
|
hdx/scraper/configurable/timeseries.py,sha256=uhnENo7Wsy0-YVjglm7OQkXI72-te61DkepkihbQrP8,2982
|
|
11
11
|
hdx/scraper/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
hdx/scraper/outputs/base.py,sha256=UBVFPANdd7wawifbKkPQWKwVC-Tr7Jg5ax1eLTmWX3M,2566
|
|
@@ -15,11 +15,11 @@ hdx/scraper/outputs/googlesheets.py,sha256=gPjzikxP4wmMBGL5LW50MXUcDq5nwCRMW74G1
|
|
|
15
15
|
hdx/scraper/outputs/json.py,sha256=uw9_yAVpHVPWQ8LtMUZKTH88okyrHQs_SVjT6HJOxZ4,9498
|
|
16
16
|
hdx/scraper/utilities/__init__.py,sha256=iBjD7bc8wEzQhwkcx2mOZwYmu28VHjl5px66quqWJ8E,2491
|
|
17
17
|
hdx/scraper/utilities/fallbacks.py,sha256=08tvqVFuFV_gsvS7jqEiJUr7gqNILKCakDa8xMuIMpI,6186
|
|
18
|
-
hdx/scraper/utilities/reader.py,sha256=
|
|
18
|
+
hdx/scraper/utilities/reader.py,sha256=awm24AUWlweJmJVE1h0iid7xb6njvF7Taf0afbGXIG4,18331
|
|
19
19
|
hdx/scraper/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
|
|
20
20
|
hdx/scraper/utilities/sources.py,sha256=h27PjBADqIhqDwmhzMXt1OjwJWZc2iVnIBwJuAJKHwo,11204
|
|
21
21
|
hdx/scraper/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
|
|
22
|
-
hdx_python_scraper-2.2.
|
|
23
|
-
hdx_python_scraper-2.2.
|
|
24
|
-
hdx_python_scraper-2.2.
|
|
25
|
-
hdx_python_scraper-2.2.
|
|
22
|
+
hdx_python_scraper-2.2.1.dist-info/METADATA,sha256=hnYCmTG7ZlGqfc4QKCHjBKSesZ2q7ooTbdtyAuuhkqs,3289
|
|
23
|
+
hdx_python_scraper-2.2.1.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
|
|
24
|
+
hdx_python_scraper-2.2.1.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
25
|
+
hdx_python_scraper-2.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|