hdx-python-scraper 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdx/scraper/_version.py CHANGED
@@ -1,6 +1,16 @@
1
1
  # file generated by setuptools_scm
2
2
  # don't change, don't track in version control
3
- from __future__ import annotations
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
4
9
 
5
- __version__ = version = '2.2.0' # type: str
6
- __version_tuple__ = version_tuple = (2, 2, 0) # type: tuple[int | str, ...]
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '2.2.1'
16
+ __version_tuple__ = version_tuple = (2, 2, 1)
@@ -122,6 +122,7 @@ class ConfigurableScraper(BaseScraper):
122
122
  "input": datasetinfo.get("input", []),
123
123
  "transform": datasetinfo.get("transform", {}),
124
124
  "population_key": datasetinfo.get("population_key"),
125
+ "list": datasetinfo.get("list", []),
125
126
  "process": datasetinfo.get("process", []),
126
127
  "input_keep": datasetinfo.get("input_keep", []),
127
128
  "input_append": datasetinfo.get("input_append", []),
@@ -292,6 +293,7 @@ class ConfigurableScraper(BaseScraper):
292
293
  filter = subset["filter"]
293
294
  input_ignore_vals = subset.get("input_ignore_vals", [])
294
295
  input_transforms = subset.get("transform", {})
296
+ list_cols = subset.get("list")
295
297
  sum_cols = subset.get("sum")
296
298
  process_cols = subset.get("process")
297
299
  input_append = subset.get("input_append", [])
@@ -304,6 +306,8 @@ class ConfigurableScraper(BaseScraper):
304
306
  val = eval(input_transform.replace(valcol, "val"))
305
307
  if sum_cols or process_cols:
306
308
  dict_of_lists_add(valuedict, adm, val)
309
+ elif list_cols and valcol in list_cols:
310
+ dict_of_lists_add(valuedict, adm, val)
307
311
  else:
308
312
  curval = valuedict.get(adm)
309
313
  if valcol in input_append:
@@ -326,6 +330,7 @@ class ConfigurableScraper(BaseScraper):
326
330
  population_str = "self.population_lookup[adm]"
327
331
  else:
328
332
  population_str = "self.population_lookup[population_key]"
333
+ subset.get("list")
329
334
  process_cols = subset.get("process")
330
335
  input_keep = subset.get("input_keep", [])
331
336
  sum_cols = subset.get("sum")
@@ -440,7 +445,7 @@ class ConfigurableScraper(BaseScraper):
440
445
  valcols[i], f"newvaldicts[{i}][adm]"
441
446
  )
442
447
  formula = formula.replace("#pzbgvjh", population_str)
443
- for adm in valdicts[0].keys():
448
+ for adm in valdicts[0]:
444
449
  try:
445
450
  val = eval(formula)
446
451
  except (ValueError, TypeError, KeyError):
hdx/scraper/runner.py CHANGED
@@ -1147,7 +1147,10 @@ class Runner:
1147
1147
  def get_hapi_metadata(
1148
1148
  self, names: Optional[ListTuple[str]] = None
1149
1149
  ) -> Dict:
1150
- """Get HAPI metadata for all datasets
1150
+ """Get HAPI metadata for all datasets. A dictionary is returned that
1151
+ maps from dataset ids to a dictionary. The dictionary has keys for
1152
+ dataset metadata and a key resources under which is a dictionary that
1153
+ maps from resource ids to resource metadata.
1151
1154
 
1152
1155
  Args:
1153
1156
  names (Optional[ListTuple[str]]): Names of scrapers
@@ -1184,12 +1187,15 @@ class Runner:
1184
1187
  metadata) for scrapers limiting to those in names if given and limiting
1185
1188
  further to those that have been set in the constructor if previously
1186
1189
  given. By default, only scrapers marked as having run are returned
1187
- unless has_run is set to False. A dictionary is returned where key is
1188
- HDX dataset id and value is a dictionary that has HAPI dataset metadata
1189
- as well as a results key. The value associated with the results key is
1190
- a dictionary where each key is an admin level. Each admin level key has
1191
- a value dictionary with headers, values and HAPI resource metadata.
1192
- Headers is a tuple of (column headers, hxl hashtags). Values is a list.
1190
+ unless has_run is set to False.
1191
+
1192
+ A dictionary is returned where key is HDX dataset id and value is a
1193
+ dictionary that has HAPI dataset metadata as well as a results key.
1194
+ The value associated with the results key is a dictionary where each
1195
+ key is an admin level. Each admin level key has a value dictionary with
1196
+ headers, values and HAPI resource metadata. Headers is a tuple of
1197
+ (column headers, hxl hashtags). Values is a list. HAPI resource
1198
+ metadata is a dictionary.
1193
1199
 
1194
1200
  Args:
1195
1201
  names (Optional[ListTuple[str]]): Names of scrapers. Defaults to None (all scrapers).
@@ -340,9 +340,16 @@ class Read(Retrieve):
340
340
  def read_hdx_metadata(
341
341
  self, datasetinfo: Dict, do_resource_check: bool = True
342
342
  ) -> Optional[Resource]:
343
- """Read metadata from HDX dataset and add to input dictionary. If url is not
344
- supplied, will look through resources for one that matches specified format and
345
- use its url unless do_resource_check is False.
343
+ """Read metadata from HDX dataset and add to input dictionary. If url
344
+ is not supplied, will look through resources for one that matches
345
+ specified format and use its url unless do_resource_check is False.
346
+ The dataset key of the parameter datasetinfo will usually point to a
347
+ string (single dataset) but where sources vary across HXL tags can be
348
+ a dictionary that maps from HXL tags to datasets with the key
349
+ default_dataset setting a default for HXL tags. For a single dataset,
350
+ the keys hapi_dataset_metadata and hapi_resource_metadata will be
351
+ populated with more detailed dataset and resource information required
352
+ by HAPI.
346
353
 
347
354
  Args:
348
355
  datasetinfo (Dict): Dictionary of information about dataset
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hdx-python-scraper
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: HDX Python scraper utilities to assemble data from multiple sources
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -26,12 +26,12 @@ Classifier: Programming Language :: Python :: 3.12
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Requires-Python: >=3.8
28
28
  Requires-Dist: gspread
29
- Requires-Dist: hdx-python-api>=6.1.2
29
+ Requires-Dist: hdx-python-api>=6.1.3
30
30
  Requires-Dist: regex
31
31
  Provides-Extra: dev
32
32
  Requires-Dist: pre-commit; extra == 'dev'
33
33
  Provides-Extra: pandas
34
- Requires-Dist: pandas>=2.0.3; extra == 'pandas'
34
+ Requires-Dist: pandas>=2.1.1; extra == 'pandas'
35
35
  Provides-Extra: test
36
36
  Requires-Dist: pytest; extra == 'test'
37
37
  Requires-Dist: pytest-cov; extra == 'test'
@@ -1,12 +1,12 @@
1
1
  hdx/scraper/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
2
- hdx/scraper/_version.py,sha256=XTy2WNrumA7Z8HDQ_NiV6KQTscT0Qx-QCHUYar0vV7w,240
2
+ hdx/scraper/_version.py,sha256=R_Wr7clGXr8a07n6uqFj88MyYFGydFRXYBI10R9k_uw,411
3
3
  hdx/scraper/base_scraper.py,sha256=IaUDqnrSxB0kbEQynX-81NEyv9DLxypWKwEDAEr9GWg,14628
4
- hdx/scraper/runner.py,sha256=xqn2awE70l08DgoPIpRAX5ifKF8S272fmp6Bq9HbKh0,50923
4
+ hdx/scraper/runner.py,sha256=-7L-L9WGZdTGl5mWNAPgvpTreU9bvbdxklruGCRzjRs,51217
5
5
  hdx/scraper/configurable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  hdx/scraper/configurable/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
7
7
  hdx/scraper/configurable/resource_downloader.py,sha256=vK8zNFy7T_Rj1h8Tj676-3B2oYYXFNKsrM9dxz7RZC8,1537
8
8
  hdx/scraper/configurable/rowparser.py,sha256=h7a0W2xvVJSAu94nS5CAXvZSZXdwZ-isFHHNaIce0gM,14635
9
- hdx/scraper/configurable/scraper.py,sha256=kVQpVMHEYizSq94PeSP119gi24a1XR_mzL4mtlAvK2M,20217
9
+ hdx/scraper/configurable/scraper.py,sha256=TyB7ipTzhVpOC3in0ZBIMwbcTAOR0Ul-W6Np85NnogI,20468
10
10
  hdx/scraper/configurable/timeseries.py,sha256=uhnENo7Wsy0-YVjglm7OQkXI72-te61DkepkihbQrP8,2982
11
11
  hdx/scraper/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  hdx/scraper/outputs/base.py,sha256=UBVFPANdd7wawifbKkPQWKwVC-Tr7Jg5ax1eLTmWX3M,2566
@@ -15,11 +15,11 @@ hdx/scraper/outputs/googlesheets.py,sha256=gPjzikxP4wmMBGL5LW50MXUcDq5nwCRMW74G1
15
15
  hdx/scraper/outputs/json.py,sha256=uw9_yAVpHVPWQ8LtMUZKTH88okyrHQs_SVjT6HJOxZ4,9498
16
16
  hdx/scraper/utilities/__init__.py,sha256=iBjD7bc8wEzQhwkcx2mOZwYmu28VHjl5px66quqWJ8E,2491
17
17
  hdx/scraper/utilities/fallbacks.py,sha256=08tvqVFuFV_gsvS7jqEiJUr7gqNILKCakDa8xMuIMpI,6186
18
- hdx/scraper/utilities/reader.py,sha256=BrCAxBibIPYp2gnkVacOTiPfeMKLEVULAqXzpaweCUY,17858
18
+ hdx/scraper/utilities/reader.py,sha256=awm24AUWlweJmJVE1h0iid7xb6njvF7Taf0afbGXIG4,18331
19
19
  hdx/scraper/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
20
20
  hdx/scraper/utilities/sources.py,sha256=h27PjBADqIhqDwmhzMXt1OjwJWZc2iVnIBwJuAJKHwo,11204
21
21
  hdx/scraper/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
22
- hdx_python_scraper-2.2.0.dist-info/METADATA,sha256=9xsBUu6DpgDmJZah70hsfj1Y7l9f-GgBtU2THR4UFv4,3289
23
- hdx_python_scraper-2.2.0.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
24
- hdx_python_scraper-2.2.0.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
25
- hdx_python_scraper-2.2.0.dist-info/RECORD,,
22
+ hdx_python_scraper-2.2.1.dist-info/METADATA,sha256=hnYCmTG7ZlGqfc4QKCHjBKSesZ2q7ooTbdtyAuuhkqs,3289
23
+ hdx_python_scraper-2.2.1.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
24
+ hdx_python_scraper-2.2.1.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
25
+ hdx_python_scraper-2.2.1.dist-info/RECORD,,