hdx-python-scraper 2.3.5__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx/scraper/{__init__.py → framework/__init__.py} +0 -0
- hdx/scraper/{_version.py → framework/_version.py} +2 -2
- hdx/scraper/{base_scraper.py → framework/base_scraper.py} +4 -4
- hdx/scraper/{outputs → framework/outputs}/googlesheets.py +1 -1
- hdx/scraper/{runner.py → framework/runner.py} +12 -12
- hdx/scraper/{configurable/scraper.py → framework/scrapers/configurable_scraper.py} +5 -5
- hdx/scraper/{configurable → framework/scrapers}/rowparser.py +58 -23
- hdx/scraper/{utilities → framework/utilities}/reader.py +98 -22
- hdx/scraper/framework/utilities/sector.py +63 -0
- hdx/scraper/framework/utilities/sector_configuration.yaml +138 -0
- hdx/scraper/{utilities → framework/utilities}/sources.py +3 -3
- {hdx_python_scraper-2.3.5.dist-info → hdx_python_scraper-2.5.3.dist-info}/METADATA +6 -5
- hdx_python_scraper-2.5.3.dist-info/RECORD +27 -0
- {hdx_python_scraper-2.3.5.dist-info → hdx_python_scraper-2.5.3.dist-info}/WHEEL +1 -1
- hdx_python_scraper-2.3.5.dist-info/RECORD +0 -25
- /hdx/scraper/{configurable → framework/outputs}/__init__.py +0 -0
- /hdx/scraper/{outputs → framework/outputs}/base.py +0 -0
- /hdx/scraper/{outputs → framework/outputs}/excelfile.py +0 -0
- /hdx/scraper/{outputs → framework/outputs}/json.py +0 -0
- /hdx/scraper/{outputs → framework/scrapers}/__init__.py +0 -0
- /hdx/scraper/{configurable → framework/scrapers}/aggregator.py +0 -0
- /hdx/scraper/{configurable → framework/scrapers}/resource_downloader.py +0 -0
- /hdx/scraper/{configurable → framework/scrapers}/timeseries.py +0 -0
- /hdx/scraper/{utilities → framework/utilities}/__init__.py +0 -0
- /hdx/scraper/{utilities → framework/utilities}/fallbacks.py +0 -0
- /hdx/scraper/{utilities → framework/utilities}/region_lookup.py +0 -0
- /hdx/scraper/{utilities → framework/utilities}/writer.py +0 -0
- {hdx_python_scraper-2.3.5.dist-info → hdx_python_scraper-2.5.3.dist-info}/licenses/LICENSE +0 -0
|
File without changes
|
|
@@ -36,7 +36,7 @@ class BaseScraper(ABC):
|
|
|
36
36
|
self.reader = datasetinfo.get("reader", name)
|
|
37
37
|
self.setup(headers, source_configuration)
|
|
38
38
|
self.datasetinfo = deepcopy(datasetinfo)
|
|
39
|
-
self.
|
|
39
|
+
self.error_handler = None
|
|
40
40
|
self.can_fallback = True
|
|
41
41
|
|
|
42
42
|
def setup(
|
|
@@ -141,9 +141,9 @@ class BaseScraper(ABC):
|
|
|
141
141
|
"should_overwrite_sources"
|
|
142
142
|
)
|
|
143
143
|
if should_overwrite_sources is not None:
|
|
144
|
-
self.source_configuration[
|
|
145
|
-
|
|
146
|
-
|
|
144
|
+
self.source_configuration["should_overwrite_sources"] = (
|
|
145
|
+
should_overwrite_sources
|
|
146
|
+
)
|
|
147
147
|
source = self.datasetinfo["source"]
|
|
148
148
|
if isinstance(source, str):
|
|
149
149
|
source = {"default_source": source}
|
|
@@ -5,18 +5,18 @@ from traceback import format_exc
|
|
|
5
5
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
6
6
|
|
|
7
7
|
from .base_scraper import BaseScraper
|
|
8
|
-
from .configurable.aggregator import Aggregator
|
|
9
|
-
from .configurable.resource_downloader import ResourceDownloader
|
|
10
|
-
from .configurable.scraper import ConfigurableScraper
|
|
11
|
-
from .configurable.timeseries import TimeSeries
|
|
12
8
|
from .outputs.base import BaseOutput
|
|
9
|
+
from .scrapers.aggregator import Aggregator
|
|
10
|
+
from .scrapers.configurable_scraper import ConfigurableScraper
|
|
11
|
+
from .scrapers.resource_downloader import ResourceDownloader
|
|
12
|
+
from .scrapers.timeseries import TimeSeries
|
|
13
13
|
from .utilities import get_startend_dates_from_time_period
|
|
14
14
|
from .utilities.fallbacks import Fallbacks
|
|
15
15
|
from .utilities.reader import Read
|
|
16
16
|
from .utilities.sources import Sources
|
|
17
17
|
from hdx.location.adminlevel import AdminLevel
|
|
18
18
|
from hdx.utilities.dateparse import now_utc
|
|
19
|
-
from hdx.utilities.
|
|
19
|
+
from hdx.utilities.error_handler import ErrorHandler
|
|
20
20
|
from hdx.utilities.typehint import ListTuple
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
@@ -28,7 +28,7 @@ class Runner:
|
|
|
28
28
|
Args:
|
|
29
29
|
countryiso3s (ListTuple[str]): List of ISO3 country codes to process
|
|
30
30
|
today (datetime): Value to use for today. Defaults to now_utc().
|
|
31
|
-
|
|
31
|
+
error_handler (ErrorHandler): ErrorHandler object that logs errors on exit
|
|
32
32
|
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None (all scrapers).
|
|
33
33
|
"""
|
|
34
34
|
|
|
@@ -36,12 +36,12 @@ class Runner:
|
|
|
36
36
|
self,
|
|
37
37
|
countryiso3s: ListTuple[str],
|
|
38
38
|
today: datetime = now_utc(),
|
|
39
|
-
|
|
39
|
+
error_handler: Optional[ErrorHandler] = None,
|
|
40
40
|
scrapers_to_run: Optional[ListTuple[str]] = None,
|
|
41
41
|
):
|
|
42
42
|
self.countryiso3s = countryiso3s
|
|
43
43
|
self.today = today
|
|
44
|
-
self.
|
|
44
|
+
self.error_handler = error_handler
|
|
45
45
|
if isinstance(scrapers_to_run, tuple):
|
|
46
46
|
scrapers_to_run = list(scrapers_to_run)
|
|
47
47
|
self.scrapers_to_run: Optional[List[str]] = scrapers_to_run
|
|
@@ -73,7 +73,7 @@ class Runner:
|
|
|
73
73
|
and scraper_name not in self.scrapers_to_run
|
|
74
74
|
):
|
|
75
75
|
self.scrapers_to_run.append(scraper_name)
|
|
76
|
-
scraper.
|
|
76
|
+
scraper.error_handler = self.error_handler
|
|
77
77
|
return scraper_name
|
|
78
78
|
|
|
79
79
|
def add_customs(
|
|
@@ -142,7 +142,7 @@ class Runner:
|
|
|
142
142
|
level_name,
|
|
143
143
|
source_configuration,
|
|
144
144
|
self.today,
|
|
145
|
-
self.
|
|
145
|
+
self.error_handler,
|
|
146
146
|
)
|
|
147
147
|
if scraper_name not in self.scraper_names:
|
|
148
148
|
self.scraper_names.append(scraper_name)
|
|
@@ -612,8 +612,8 @@ class Runner:
|
|
|
612
612
|
if not Fallbacks.exist() or scraper.can_fallback is False:
|
|
613
613
|
raise
|
|
614
614
|
logger.exception(f"Using fallbacks for {scraper.name}!")
|
|
615
|
-
if self.
|
|
616
|
-
self.
|
|
615
|
+
if self.error_handler:
|
|
616
|
+
self.error_handler.add(
|
|
617
617
|
f"Using fallbacks for {scraper.name}! Error: {format_exc()}"
|
|
618
618
|
)
|
|
619
619
|
for level in scraper.headers.keys():
|
|
@@ -17,7 +17,7 @@ from hdx.utilities.dateparse import (
|
|
|
17
17
|
)
|
|
18
18
|
from hdx.utilities.dictandlist import dict_of_lists_add
|
|
19
19
|
from hdx.utilities.downloader import DownloadError
|
|
20
|
-
from hdx.utilities.
|
|
20
|
+
from hdx.utilities.error_handler import ErrorHandler
|
|
21
21
|
from hdx.utilities.text import ( # noqa: F401
|
|
22
22
|
get_fraction_str,
|
|
23
23
|
get_numeric_if_possible,
|
|
@@ -42,7 +42,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
42
42
|
level_name (Optional[str]): Customised level_name name. Defaults to None (level).
|
|
43
43
|
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
|
|
44
44
|
today (datetime): Value to use for today. Defaults to now_utc().
|
|
45
|
-
|
|
45
|
+
error_handler (Optional[ErrorHandler]): ErrorHandler object that logs errors on exit
|
|
46
46
|
**kwargs: Variables to use when evaluating template arguments in urls
|
|
47
47
|
"""
|
|
48
48
|
|
|
@@ -67,7 +67,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
67
67
|
level_name: Optional[str] = None,
|
|
68
68
|
source_configuration: Dict = {},
|
|
69
69
|
today: datetime = now_utc(),
|
|
70
|
-
|
|
70
|
+
error_handler: Optional[ErrorHandler] = None,
|
|
71
71
|
**kwargs: Any,
|
|
72
72
|
):
|
|
73
73
|
self.name = name
|
|
@@ -83,10 +83,10 @@ class ConfigurableScraper(BaseScraper):
|
|
|
83
83
|
else:
|
|
84
84
|
self.level_name: str = level_name
|
|
85
85
|
self.countryiso3s = countryiso3s
|
|
86
|
-
self.adminlevel = adminlevel
|
|
86
|
+
self.adminlevel: Optional[AdminLevel] = adminlevel
|
|
87
87
|
self.today = today
|
|
88
88
|
self.subsets = self.get_subsets_from_datasetinfo(datasetinfo)
|
|
89
|
-
self.
|
|
89
|
+
self.error_handler: Optional[ErrorHandler] = error_handler
|
|
90
90
|
self.variables = kwargs
|
|
91
91
|
self.rowparser = None
|
|
92
92
|
self.datasetinfo = copy.deepcopy(datasetinfo)
|
|
@@ -185,20 +185,14 @@ class RowParser:
|
|
|
185
185
|
Returns:
|
|
186
186
|
Iterator[Dict]: Input data with prefilter applied if specified and sorted if specified or deemed necessary
|
|
187
187
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
if self.
|
|
196
|
-
if all(
|
|
197
|
-
row[key] == value for key, value in self.stop_row.items()
|
|
198
|
-
):
|
|
199
|
-
break
|
|
200
|
-
for newrow in self.flatten(row):
|
|
201
|
-
rows.append(newrow)
|
|
188
|
+
if self.header_to_hxltag:
|
|
189
|
+
iterator = self.header_to_hxltag_rows(iterator)
|
|
190
|
+
if self.stop_row:
|
|
191
|
+
iterator = self.stop_rows(iterator)
|
|
192
|
+
if self.flatteninfo:
|
|
193
|
+
iterator = self.flatten_rows(iterator)
|
|
194
|
+
if self.prefilter:
|
|
195
|
+
iterator = (row for row in iterator if eval(self.prefilter))
|
|
202
196
|
if not self.sort:
|
|
203
197
|
if self.datecol:
|
|
204
198
|
for subset in self.subsets:
|
|
@@ -212,15 +206,59 @@ class RowParser:
|
|
|
212
206
|
)
|
|
213
207
|
self.sort = {"keys": [self.datecol], "reverse": True}
|
|
214
208
|
break
|
|
215
|
-
if self.prefilter:
|
|
216
|
-
rows = [row for row in rows if eval(self.prefilter)]
|
|
217
209
|
if self.sort:
|
|
218
210
|
keys = self.sort["keys"]
|
|
219
211
|
reverse = self.sort.get("reverse", False)
|
|
220
|
-
|
|
221
|
-
return
|
|
212
|
+
iterator = sorted(iterator, key=itemgetter(*keys), reverse=reverse)
|
|
213
|
+
return iterator
|
|
214
|
+
|
|
215
|
+
def header_to_hxltag_rows(
|
|
216
|
+
self, iterator: Iterator[Dict]
|
|
217
|
+
) -> Generator[Dict, None, None]:
|
|
218
|
+
"""Convert headers to HXL tags in keys
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
iterator (Iterator[Dict]): Input data
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Generator[Dict]: Rows where keys are HXL tags
|
|
225
|
+
"""
|
|
226
|
+
for row in iterator:
|
|
227
|
+
newrow = {}
|
|
228
|
+
for header in row:
|
|
229
|
+
newrow[self.header_to_hxltag[header]] = row[header]
|
|
230
|
+
yield newrow
|
|
231
|
+
|
|
232
|
+
def stop_rows(
|
|
233
|
+
self, iterator: Iterator[Dict]
|
|
234
|
+
) -> Generator[Dict, None, None]:
|
|
235
|
+
"""Stop processing rows after condition met
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
iterator (Iterator[Dict]): Input data
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Generator[Dict]: Rows up to stop condition
|
|
242
|
+
"""
|
|
243
|
+
for row in iterator:
|
|
244
|
+
if all(row[key] == value for key, value in self.stop_row.items()):
|
|
245
|
+
break
|
|
246
|
+
yield row
|
|
247
|
+
|
|
248
|
+
def flatten_rows(self, iterator: Iterator[Dict]) -> Iterator[Dict]:
|
|
249
|
+
"""Flatten rows
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
iterator (Iterator[Dict]): Input data
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Generator[Dict]: Flattened rows
|
|
256
|
+
"""
|
|
257
|
+
for row in iterator:
|
|
258
|
+
for newrow in self.flatten_row(row):
|
|
259
|
+
yield newrow
|
|
222
260
|
|
|
223
|
-
def
|
|
261
|
+
def flatten_row(self, row: Dict) -> Generator[Dict, None, None]:
|
|
224
262
|
"""Flatten a wide spreadsheet format into a long one
|
|
225
263
|
|
|
226
264
|
Args:
|
|
@@ -229,9 +267,6 @@ class RowParser:
|
|
|
229
267
|
Returns:
|
|
230
268
|
Generator[Dict]: Flattened row(s)
|
|
231
269
|
"""
|
|
232
|
-
if not self.flatteninfo:
|
|
233
|
-
yield row
|
|
234
|
-
return
|
|
235
270
|
counters = [-1 for _ in self.flatteninfo]
|
|
236
271
|
while True:
|
|
237
272
|
newrow = copy.deepcopy(row)
|
|
@@ -314,7 +349,7 @@ class RowParser:
|
|
|
314
349
|
adms[i], exact = Country.get_iso3_country_code_fuzzy(adm)
|
|
315
350
|
elif i == 1:
|
|
316
351
|
adms[i], exact = self.adminlevel.get_pcode(
|
|
317
|
-
adms[0], adm, self.name
|
|
352
|
+
adms[0], adm, logname=self.name
|
|
318
353
|
)
|
|
319
354
|
if adms[i] not in self.adms[i]:
|
|
320
355
|
adms[i] = None
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import glob
|
|
1
2
|
import logging
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from os.path import join
|
|
@@ -10,6 +11,7 @@ from slugify import slugify
|
|
|
10
11
|
|
|
11
12
|
from . import get_startend_dates_from_time_period, match_template
|
|
12
13
|
from .sources import Sources
|
|
14
|
+
from hdx.api.configuration import Configuration
|
|
13
15
|
from hdx.data.dataset import Dataset
|
|
14
16
|
from hdx.data.resource import Resource
|
|
15
17
|
from hdx.utilities.dateparse import parse_date
|
|
@@ -115,6 +117,11 @@ class Read(Retrieve):
|
|
|
115
117
|
for name in basic_auths:
|
|
116
118
|
custom_configs[name] = {"basic_auth": basic_auths[name]}
|
|
117
119
|
del kwargs["basic_auths"]
|
|
120
|
+
bearer_tokens = kwargs.get("bearer_tokens")
|
|
121
|
+
if bearer_tokens is not None:
|
|
122
|
+
for name in bearer_tokens:
|
|
123
|
+
custom_configs[name] = {"bearer_token": bearer_tokens[name]}
|
|
124
|
+
del kwargs["bearer_tokens"]
|
|
118
125
|
param_auths = kwargs.get("param_auths")
|
|
119
126
|
if param_auths is not None:
|
|
120
127
|
for name in param_auths:
|
|
@@ -204,15 +211,19 @@ class Read(Retrieve):
|
|
|
204
211
|
if headers is None:
|
|
205
212
|
headers = 1
|
|
206
213
|
datasetinfo["headers"] = 1
|
|
207
|
-
kwargs["headers"] = headers
|
|
208
|
-
if isinstance(headers, list):
|
|
209
|
-
kwargs["fill_merged_cells"] = True
|
|
210
214
|
format = datasetinfo["format"]
|
|
211
215
|
kwargs["format"] = format
|
|
212
|
-
if
|
|
213
|
-
|
|
216
|
+
if format in ("xls", "xlsx"):
|
|
217
|
+
if not sheet:
|
|
218
|
+
sheet = 1
|
|
219
|
+
if isinstance(headers, list):
|
|
220
|
+
kwargs["fill_merged_cells"] = True
|
|
221
|
+
elif "fill_merged_cells" not in kwargs:
|
|
222
|
+
kwargs["fill_merged_cells"] = False
|
|
223
|
+
kwargs["xlsx2csv"] = datasetinfo.get("xlsx2csv", False)
|
|
214
224
|
if sheet:
|
|
215
225
|
kwargs["sheet"] = sheet
|
|
226
|
+
kwargs["headers"] = headers
|
|
216
227
|
compression = datasetinfo.get("compression")
|
|
217
228
|
if compression:
|
|
218
229
|
kwargs["compression"] = compression
|
|
@@ -238,11 +249,14 @@ class Read(Retrieve):
|
|
|
238
249
|
**kwargs,
|
|
239
250
|
)
|
|
240
251
|
|
|
241
|
-
def read_dataset(
|
|
252
|
+
def read_dataset(
|
|
253
|
+
self, dataset_name: str, configuration: Optional[Configuration] = None
|
|
254
|
+
) -> Optional[Dataset]:
|
|
242
255
|
"""Read HDX dataset
|
|
243
256
|
|
|
244
257
|
Args:
|
|
245
258
|
dataset_name (str): Dataset name
|
|
259
|
+
configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration.
|
|
246
260
|
|
|
247
261
|
Returns:
|
|
248
262
|
Optional[Dataset]: The dataset that was read or None
|
|
@@ -252,7 +266,7 @@ class Read(Retrieve):
|
|
|
252
266
|
logger.info(f"Using saved dataset {dataset_name} in {saved_path}")
|
|
253
267
|
dataset = Dataset.load_from_json(saved_path)
|
|
254
268
|
else:
|
|
255
|
-
dataset = Dataset.read_from_hdx(dataset_name)
|
|
269
|
+
dataset = Dataset.read_from_hdx(dataset_name, configuration)
|
|
256
270
|
if self.save:
|
|
257
271
|
logger.info(f"Saving dataset {dataset_name} in {saved_path}")
|
|
258
272
|
if dataset is None:
|
|
@@ -261,6 +275,56 @@ class Read(Retrieve):
|
|
|
261
275
|
dataset.save_to_json(saved_path, follow_urls=True)
|
|
262
276
|
return dataset
|
|
263
277
|
|
|
278
|
+
def search_datasets(
|
|
279
|
+
self,
|
|
280
|
+
filename: str,
|
|
281
|
+
query: Optional[str] = "*:*",
|
|
282
|
+
configuration: Optional[Configuration] = None,
|
|
283
|
+
page_size: int = 1000,
|
|
284
|
+
**kwargs: Any,
|
|
285
|
+
) -> List[Dataset]:
|
|
286
|
+
"""Read HDX dataset
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
filename (str): Filename for saved files. Will be prefixed by underscore and a number.
|
|
290
|
+
query (Optional[str]): Query (in Solr format). Defaults to '*:*'.
|
|
291
|
+
configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration.
|
|
292
|
+
page_size (int): Size of page to return. Defaults to 1000.
|
|
293
|
+
**kwargs: See below
|
|
294
|
+
fq (string): Any filter queries to apply
|
|
295
|
+
rows (int): Number of matching rows to return. Defaults to all datasets (sys.maxsize).
|
|
296
|
+
start (int): Offset in the complete result for where the set of returned datasets should begin
|
|
297
|
+
sort (string): Sorting of results. Defaults to 'relevance asc, metadata_modified desc' if rows<=page_size or 'metadata_modified asc' if rows>page_size.
|
|
298
|
+
facet (string): Whether to enable faceted results. Default to True.
|
|
299
|
+
facet.mincount (int): Minimum counts for facet fields should be included in the results
|
|
300
|
+
facet.limit (int): Maximum number of values the facet fields return (- = unlimited). Defaults to 50.
|
|
301
|
+
facet.field (List[str]): Fields to facet upon. Default is empty.
|
|
302
|
+
use_default_schema (bool): Use default package schema instead of custom schema. Defaults to False.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
List[Dataset]: list of datasets resulting from query
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
saved_path = join(self.saved_dir, filename)
|
|
309
|
+
if self.use_saved:
|
|
310
|
+
logger.info(
|
|
311
|
+
f"Using saved datasets in {filename}_n.json in {self.saved_dir}"
|
|
312
|
+
)
|
|
313
|
+
datasets = []
|
|
314
|
+
for file_path in sorted(glob.glob(f"{saved_path}_*.json")):
|
|
315
|
+
datasets.append(Dataset.load_from_json(file_path))
|
|
316
|
+
else:
|
|
317
|
+
datasets = Dataset.search_in_hdx(
|
|
318
|
+
query, configuration, page_size, **kwargs
|
|
319
|
+
)
|
|
320
|
+
if self.save:
|
|
321
|
+
for i, dataset in enumerate(datasets):
|
|
322
|
+
file_path = f"{saved_path}_{i}.json"
|
|
323
|
+
name = dataset["name"]
|
|
324
|
+
logger.info(f"Saving dataset {name} in {file_path}")
|
|
325
|
+
dataset.save_to_json(file_path, follow_urls=True)
|
|
326
|
+
return datasets
|
|
327
|
+
|
|
264
328
|
@staticmethod
|
|
265
329
|
def construct_filename(name: str, format: str):
|
|
266
330
|
"""Construct filename from name and format. The filename of the file
|
|
@@ -438,7 +502,10 @@ class Read(Retrieve):
|
|
|
438
502
|
return self.hxl_info_file(name, format, url, **kwargs)
|
|
439
503
|
|
|
440
504
|
def read_hdx_metadata(
|
|
441
|
-
self,
|
|
505
|
+
self,
|
|
506
|
+
datasetinfo: Dict,
|
|
507
|
+
do_resource_check: bool = True,
|
|
508
|
+
configuration: Optional[Configuration] = None,
|
|
442
509
|
) -> Optional[Resource]:
|
|
443
510
|
"""Read metadata from HDX dataset and add to input dictionary. If url
|
|
444
511
|
is not supplied, will look through resources for one that matches
|
|
@@ -454,13 +521,14 @@ class Read(Retrieve):
|
|
|
454
521
|
Args:
|
|
455
522
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
456
523
|
do_resource_check (bool): Whether to check resources. Defaults to False.
|
|
524
|
+
configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration.
|
|
457
525
|
|
|
458
526
|
Returns:
|
|
459
527
|
Optional[Resource]: The resource if a url was not given
|
|
460
528
|
"""
|
|
461
529
|
dataset_nameinfo = datasetinfo["dataset"]
|
|
462
530
|
if isinstance(dataset_nameinfo, str):
|
|
463
|
-
dataset = self.read_dataset(dataset_nameinfo)
|
|
531
|
+
dataset = self.read_dataset(dataset_nameinfo, configuration)
|
|
464
532
|
resource = None
|
|
465
533
|
url = datasetinfo.get("url")
|
|
466
534
|
resource_name = datasetinfo.get("resource")
|
|
@@ -491,24 +559,24 @@ class Read(Retrieve):
|
|
|
491
559
|
else:
|
|
492
560
|
url = resource["url"] # otherwise set the url key in
|
|
493
561
|
# datasetinfo to the resource url (by setting url here)
|
|
494
|
-
datasetinfo[
|
|
495
|
-
|
|
496
|
-
|
|
562
|
+
datasetinfo["hapi_resource_metadata"] = (
|
|
563
|
+
self.get_hapi_resource_metadata(resource)
|
|
564
|
+
)
|
|
497
565
|
datasetinfo["url"] = url
|
|
498
566
|
if "source_date" not in datasetinfo:
|
|
499
|
-
datasetinfo[
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
567
|
+
datasetinfo["source_date"] = (
|
|
568
|
+
get_startend_dates_from_time_period(
|
|
569
|
+
dataset, today=self.today
|
|
570
|
+
)
|
|
503
571
|
)
|
|
504
572
|
if "source" not in datasetinfo:
|
|
505
573
|
datasetinfo["source"] = dataset["dataset_source"]
|
|
506
574
|
if "source_url" not in datasetinfo:
|
|
507
575
|
datasetinfo["source_url"] = dataset.get_hdx_url()
|
|
508
576
|
Sources.standardise_datasetinfo_source_date(datasetinfo)
|
|
509
|
-
datasetinfo[
|
|
510
|
-
|
|
511
|
-
|
|
577
|
+
datasetinfo["hapi_dataset_metadata"] = (
|
|
578
|
+
self.get_hapi_dataset_metadata(dataset, datasetinfo)
|
|
579
|
+
)
|
|
512
580
|
return resource
|
|
513
581
|
|
|
514
582
|
if "source_date" not in datasetinfo:
|
|
@@ -527,7 +595,7 @@ class Read(Retrieve):
|
|
|
527
595
|
for hxltag, dataset_name in dataset_nameinfo.items():
|
|
528
596
|
dataset = datasets.get(dataset_name)
|
|
529
597
|
if not dataset:
|
|
530
|
-
dataset = self.read_dataset(dataset_name)
|
|
598
|
+
dataset = self.read_dataset(dataset_name, configuration)
|
|
531
599
|
datasets[dataset_name] = dataset
|
|
532
600
|
if source_date is not None:
|
|
533
601
|
if hxltag == "default_dataset":
|
|
@@ -561,18 +629,22 @@ class Read(Retrieve):
|
|
|
561
629
|
def read_hdx(
|
|
562
630
|
self,
|
|
563
631
|
datasetinfo: Dict,
|
|
632
|
+
configuration: Optional[Configuration] = None,
|
|
564
633
|
**kwargs: Any,
|
|
565
634
|
) -> Tuple[List[str], Iterator[Dict]]:
|
|
566
635
|
"""Read data and metadata from HDX dataset
|
|
567
636
|
|
|
568
637
|
Args:
|
|
569
638
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
639
|
+
configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration.
|
|
570
640
|
**kwargs: Parameters to pass to download_file call
|
|
571
641
|
|
|
572
642
|
Returns:
|
|
573
643
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
574
644
|
"""
|
|
575
|
-
resource = self.read_hdx_metadata(
|
|
645
|
+
resource = self.read_hdx_metadata(
|
|
646
|
+
datasetinfo, configuration=configuration
|
|
647
|
+
)
|
|
576
648
|
filename = kwargs.get("filename")
|
|
577
649
|
if filename:
|
|
578
650
|
del kwargs["filename"]
|
|
@@ -593,12 +665,14 @@ class Read(Retrieve):
|
|
|
593
665
|
def read(
|
|
594
666
|
self,
|
|
595
667
|
datasetinfo: Dict,
|
|
668
|
+
configuration: Optional[Configuration] = None,
|
|
596
669
|
**kwargs: Any,
|
|
597
670
|
) -> Tuple[List[str], Iterator[Dict]]:
|
|
598
671
|
"""Read data and metadata from HDX dataset
|
|
599
672
|
|
|
600
673
|
Args:
|
|
601
674
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
675
|
+
configuration (Optional[Configuration]): HDX configuration. Defaults to global configuration.
|
|
602
676
|
**kwargs: Parameters to pass to download_file call
|
|
603
677
|
|
|
604
678
|
Returns:
|
|
@@ -607,7 +681,9 @@ class Read(Retrieve):
|
|
|
607
681
|
format = datasetinfo["format"]
|
|
608
682
|
if format in ["json", "csv", "xls", "xlsx"]:
|
|
609
683
|
if "dataset" in datasetinfo:
|
|
610
|
-
headers, iterator = self.read_hdx(
|
|
684
|
+
headers, iterator = self.read_hdx(
|
|
685
|
+
datasetinfo, configuration, **kwargs
|
|
686
|
+
)
|
|
611
687
|
else:
|
|
612
688
|
headers, iterator = self.read_tabular(datasetinfo, **kwargs)
|
|
613
689
|
else:
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Populate the sector mapping."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from copy import copy
|
|
5
|
+
from typing import Dict, Optional
|
|
6
|
+
|
|
7
|
+
from .reader import Read
|
|
8
|
+
from hdx.utilities.loader import load_yaml
|
|
9
|
+
from hdx.utilities.matching import get_code_from_name
|
|
10
|
+
from hdx.utilities.path import script_dir_plus_file
|
|
11
|
+
from hdx.utilities.text import normalise
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Sector:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
configuration: Optional[Dict] = None,
|
|
20
|
+
):
|
|
21
|
+
if configuration is None:
|
|
22
|
+
configuration = load_yaml(
|
|
23
|
+
script_dir_plus_file("sector_configuration.yaml", Sector)
|
|
24
|
+
)
|
|
25
|
+
self._datasetinfo = configuration["sector"]
|
|
26
|
+
self.data = copy(configuration["sector_map"])
|
|
27
|
+
self.unmatched = []
|
|
28
|
+
self.populate()
|
|
29
|
+
|
|
30
|
+
def populate(self) -> None:
|
|
31
|
+
logger.info("Populating sector mapping")
|
|
32
|
+
|
|
33
|
+
def parse_sector_values(code: str, name: str):
|
|
34
|
+
self.data[name] = code
|
|
35
|
+
self.data[code] = code
|
|
36
|
+
self.data[normalise(name)] = code
|
|
37
|
+
self.data[normalise(code)] = code
|
|
38
|
+
|
|
39
|
+
reader = Read.get_reader()
|
|
40
|
+
headers, iterator = reader.read(
|
|
41
|
+
self._datasetinfo, file_prefix="sector"
|
|
42
|
+
)
|
|
43
|
+
for row in iterator:
|
|
44
|
+
parse_sector_values(
|
|
45
|
+
code=row["#sector +code +acronym"],
|
|
46
|
+
name=row["#sector +name +preferred +i_en"],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
extra_entries = {
|
|
50
|
+
"Cash": "Cash programming",
|
|
51
|
+
"Hum": "Humanitarian assistance (unspecified)",
|
|
52
|
+
"Multi": "Multi-sector (unspecified)",
|
|
53
|
+
"Intersectoral": "Intersectoral",
|
|
54
|
+
}
|
|
55
|
+
for code, name in extra_entries.items():
|
|
56
|
+
parse_sector_values(code=code, name=name)
|
|
57
|
+
|
|
58
|
+
def get_sector_code(self, sector: str) -> str | None:
|
|
59
|
+
return get_code_from_name(
|
|
60
|
+
name=sector,
|
|
61
|
+
code_lookup=self.data,
|
|
62
|
+
unmatched=self.unmatched,
|
|
63
|
+
)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
sector:
|
|
2
|
+
dataset: "global-coordination-groups-beta"
|
|
3
|
+
resource: "Global Coordination Groups (Beta) CSV"
|
|
4
|
+
format: "csv"
|
|
5
|
+
headers: 2
|
|
6
|
+
|
|
7
|
+
sector_map:
|
|
8
|
+
abna: "SHL"
|
|
9
|
+
abri: "SHL"
|
|
10
|
+
abri bna: "SHL"
|
|
11
|
+
abris: "SHL"
|
|
12
|
+
abris ame: "SHL"
|
|
13
|
+
abris bna: "SHL"
|
|
14
|
+
abris bna cccm: "SHL"
|
|
15
|
+
abris durgence et nfi: "SHL"
|
|
16
|
+
abris nfi: "SHL"
|
|
17
|
+
action contre les mines: "PRO-MIN"
|
|
18
|
+
aee: "SHL"
|
|
19
|
+
agriculture: "FSC"
|
|
20
|
+
agua saneamiento e higiene: "WSH"
|
|
21
|
+
all: "Intersectoral"
|
|
22
|
+
alojamiento de emergencia: "SHL"
|
|
23
|
+
alojamiento de emergencia shelter: "SHL"
|
|
24
|
+
alojamientos y asentamientos: "SHL"
|
|
25
|
+
ame: "SHL"
|
|
26
|
+
ash: "WSH"
|
|
27
|
+
assainissement: "WSH"
|
|
28
|
+
camp coordination and camp management: "CCM"
|
|
29
|
+
camp coordination camp management: "CCM"
|
|
30
|
+
cash: "Cash"
|
|
31
|
+
cccm: "CCM"
|
|
32
|
+
ccs: "CCM"
|
|
33
|
+
cluster coordination: "CCM"
|
|
34
|
+
coord services support: "CCM"
|
|
35
|
+
coordinacion informacion: "CCM"
|
|
36
|
+
coordination: "CCM"
|
|
37
|
+
coordination et gestion des camps: "CCM"
|
|
38
|
+
eah: "WSH"
|
|
39
|
+
eau: "WSH"
|
|
40
|
+
eau assainissement et hygiene: "WSH"
|
|
41
|
+
eau hygiene: "WSH"
|
|
42
|
+
eau hygiene assainissement: "WSH"
|
|
43
|
+
eau hygiene et assainissement: "WSH"
|
|
44
|
+
educacion: "EDU"
|
|
45
|
+
educacion en emergencias: "EDU"
|
|
46
|
+
education: "EDU"
|
|
47
|
+
eha: "WSH"
|
|
48
|
+
emergency shelter and non food items: "SHL"
|
|
49
|
+
epah: "WSH"
|
|
50
|
+
erl: "ERY"
|
|
51
|
+
esnfi: "SHL"
|
|
52
|
+
explosive hazards: "PRO-MIN"
|
|
53
|
+
food: "FSC"
|
|
54
|
+
food security and agriculture: "FSC"
|
|
55
|
+
food security and livelihoods: "FSC"
|
|
56
|
+
food security and nutrition: "FSC"
|
|
57
|
+
food security livelihood: "FSC"
|
|
58
|
+
fsl: "FSC"
|
|
59
|
+
gestion des sites daccueil temporaires: "SHL"
|
|
60
|
+
gbv: "PRO-GBV"
|
|
61
|
+
hlp: "PRO-HLP"
|
|
62
|
+
humanitaire: "Hum"
|
|
63
|
+
hygiene: "WSH"
|
|
64
|
+
hygiene assainissement: "WSH"
|
|
65
|
+
intercluster: "Multi" # From Somalia 3W, hopefully not to be confused with intersectoral
|
|
66
|
+
logement terre et biens: "PRO-HLP"
|
|
67
|
+
logistica: "LOG"
|
|
68
|
+
logistique: "LOG"
|
|
69
|
+
manejo y gestion de campamentos: "CCM"
|
|
70
|
+
ms: "Multi"
|
|
71
|
+
multi secteur: "Multi"
|
|
72
|
+
multisectoriel: "Multi"
|
|
73
|
+
nutricion: "NUT"
|
|
74
|
+
nutrition: "NUT"
|
|
75
|
+
operatioanl presence water sanitation hygiene: "WSH"
|
|
76
|
+
operational presence education in emergencies: "EDU"
|
|
77
|
+
operational presence emergency shelter non food items: "SHL"
|
|
78
|
+
operational presence food security agriculture: "FSC"
|
|
79
|
+
operational presence health: "HEA"
|
|
80
|
+
operational presence nutrition: "NUT"
|
|
81
|
+
operational presence protection: "PRO"
|
|
82
|
+
pro cpm: "PRO-CPN"
|
|
83
|
+
pronna: "PRO-CPN"
|
|
84
|
+
propg: "PRO"
|
|
85
|
+
proteccion infantil: "PRO-CPN"
|
|
86
|
+
protection: "PRO"
|
|
87
|
+
protection de lenfance: "PRO-CPN"
|
|
88
|
+
protection de lenfant: "PRO-CPN"
|
|
89
|
+
protection generale: "PRO"
|
|
90
|
+
protection logement terre et propriete: "PRO-HLP"
|
|
91
|
+
protection ltb: "PRO-HLP"
|
|
92
|
+
protection lutte anti mines: "PRO-MIN"
|
|
93
|
+
protection pe: "PRO-CPN"
|
|
94
|
+
protection protection de lenfant: "PRO-CPN"
|
|
95
|
+
protection violences basees sur le genre: "PRO-GBV"
|
|
96
|
+
protection vgb: "PRO-GBV"
|
|
97
|
+
proteccion: "PRO"
|
|
98
|
+
provbg: "PRO-GBV"
|
|
99
|
+
psea: "PRO-GBV"
|
|
100
|
+
rapid response mechanism: "ERY"
|
|
101
|
+
rcf: "CCM"
|
|
102
|
+
rcf education: "EDU"
|
|
103
|
+
rcf food security and livelihoods: "FSC"
|
|
104
|
+
rcf health and nutrtion: "HEA"
|
|
105
|
+
rcf protection: "PRO"
|
|
106
|
+
recuperacion temprana: "ERY"
|
|
107
|
+
relevement precoce: "ERY"
|
|
108
|
+
relevement rapide: "ERY"
|
|
109
|
+
refugee response: "CCM"
|
|
110
|
+
refugees migrants multi sector: "CCM"
|
|
111
|
+
reponse aux refugies: "CCM"
|
|
112
|
+
sa: "FSC"
|
|
113
|
+
sal: "HEA"
|
|
114
|
+
salud: "HEA"
|
|
115
|
+
samv: "FSC"
|
|
116
|
+
sante: "HEA"
|
|
117
|
+
securite alimentaire: "FSC"
|
|
118
|
+
seguridad alimentaria: "FSC"
|
|
119
|
+
seguridad alimentaria y nutricion: "FSC"
|
|
120
|
+
services humanitaires communs: "Hum"
|
|
121
|
+
sexual and reproductive health: "HEA"
|
|
122
|
+
shelter: "SHL"
|
|
123
|
+
shelter nfi: "SHL"
|
|
124
|
+
shelter nfis: "SHL"
|
|
125
|
+
shelter and nfi: "SHL"
|
|
126
|
+
shelter and nfis: "SHL"
|
|
127
|
+
shelter and non food items: "SHL"
|
|
128
|
+
site management: "CCM"
|
|
129
|
+
snfi: "SHL"
|
|
130
|
+
telecommunications: "TEL"
|
|
131
|
+
telecommunications durgence: "TEL"
|
|
132
|
+
telecomunicaciones de emergencia: "TEL"
|
|
133
|
+
vbg: "PRO-GBV"
|
|
134
|
+
violences basees sur le genre: "PRO-GBV"
|
|
135
|
+
violence basee sur le genre: "PRO-GBV"
|
|
136
|
+
violencia basada en genero: "PRO-GBV"
|
|
137
|
+
wash: "WSH"
|
|
138
|
+
water sanitation and hygiene: "WSH"
|
|
@@ -282,9 +282,9 @@ class Sources:
|
|
|
282
282
|
if no_sources:
|
|
283
283
|
source_configuration["no_sources"] = True
|
|
284
284
|
return source_configuration
|
|
285
|
-
source_configuration[
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
source_configuration["should_overwrite_sources"] = (
|
|
286
|
+
should_overwrite_sources
|
|
287
|
+
)
|
|
288
288
|
if suffix_attribute:
|
|
289
289
|
source_configuration["suffix_attribute"] = suffix_attribute
|
|
290
290
|
return source_configuration
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.3
|
|
3
|
+
Version: 2.5.3
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,13 +26,14 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.
|
|
30
|
-
Requires-Dist: hdx-python-country>=3.6
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.3.7
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.8.6
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=3.8.2
|
|
31
32
|
Requires-Dist: regex
|
|
32
33
|
Provides-Extra: dev
|
|
33
34
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
34
35
|
Provides-Extra: pandas
|
|
35
|
-
Requires-Dist: pandas>=2.
|
|
36
|
+
Requires-Dist: pandas>=2.2.2; extra == 'pandas'
|
|
36
37
|
Provides-Extra: test
|
|
37
38
|
Requires-Dist: pytest; extra == 'test'
|
|
38
39
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
hdx/scraper/framework/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
+
hdx/scraper/framework/_version.py,sha256=YlFdzLR6C3fl-9jq4_71rr5eVxx1hHLisz6muXMUhiQ,411
|
|
3
|
+
hdx/scraper/framework/base_scraper.py,sha256=vvwljQ5QWr6hpCjOS89RG1pvC955aLoPvm6pSovO75o,15432
|
|
4
|
+
hdx/scraper/framework/runner.py,sha256=GFnZM9HciZFibwwRgDHVk9F_y2n27ctpRwyeD1_ZcKw,53538
|
|
5
|
+
hdx/scraper/framework/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
hdx/scraper/framework/outputs/base.py,sha256=UBVFPANdd7wawifbKkPQWKwVC-Tr7Jg5ax1eLTmWX3M,2566
|
|
7
|
+
hdx/scraper/framework/outputs/excelfile.py,sha256=bKBj1aYUJCIXhvpmGXAJ0FLoKwjnj-2E0LlR64RcFdY,2197
|
|
8
|
+
hdx/scraper/framework/outputs/googlesheets.py,sha256=jLAfXz4usmLFrePxRIsMflxKPzSGv9T3jlMpSV-s4II,3087
|
|
9
|
+
hdx/scraper/framework/outputs/json.py,sha256=uw9_yAVpHVPWQ8LtMUZKTH88okyrHQs_SVjT6HJOxZ4,9498
|
|
10
|
+
hdx/scraper/framework/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
hdx/scraper/framework/scrapers/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
|
|
12
|
+
hdx/scraper/framework/scrapers/configurable_scraper.py,sha256=PYPtU9XZALNx-2Jr8a8kVVDsT2j9yGgBaw6wXhztQIM,20612
|
|
13
|
+
hdx/scraper/framework/scrapers/resource_downloader.py,sha256=lCIQpNZtcCTRc3z0FFM2_JxRtoua9GEq2XiKRZ9fqZk,1549
|
|
14
|
+
hdx/scraper/framework/scrapers/rowparser.py,sha256=bH05JUqViIVes9T7gWp0D2778BlFiJuNHmdovSFdFoI,15614
|
|
15
|
+
hdx/scraper/framework/scrapers/timeseries.py,sha256=oAby_sGL6NmRoKnDG_fMB952W9zvzujPIsXkbqcXv-o,3027
|
|
16
|
+
hdx/scraper/framework/utilities/__init__.py,sha256=dvbp0qTV-kLvN4Xp0GQf8LjN3IqlytW1eaTmDjlyZy0,2391
|
|
17
|
+
hdx/scraper/framework/utilities/fallbacks.py,sha256=08tvqVFuFV_gsvS7jqEiJUr7gqNILKCakDa8xMuIMpI,6186
|
|
18
|
+
hdx/scraper/framework/utilities/reader.py,sha256=0XQ335Qj0ihafDklsXhDa5GHLux4FThIM4oZt1B5uLo,26814
|
|
19
|
+
hdx/scraper/framework/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
|
|
20
|
+
hdx/scraper/framework/utilities/sector.py,sha256=rl_TceRYc5YRoLccr0ABCM42ZLLtLzezWWWQ5YtbQDE,1947
|
|
21
|
+
hdx/scraper/framework/utilities/sector_configuration.yaml,sha256=LAUR5xfLU5qua5qtc3TcwEei0sD1zoCb_vfAxD7Grb8,3894
|
|
22
|
+
hdx/scraper/framework/utilities/sources.py,sha256=KuhaTvvGzjuw0dbhWpmPFvSq5RWP9cY83nl687O3CSs,11513
|
|
23
|
+
hdx/scraper/framework/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
|
|
24
|
+
hdx_python_scraper-2.5.3.dist-info/METADATA,sha256=eJwqy5OyM3ngW2rUWWqTUpjmnDQy8ChsDLivhoAgypI,3361
|
|
25
|
+
hdx_python_scraper-2.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
26
|
+
hdx_python_scraper-2.5.3.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
27
|
+
hdx_python_scraper-2.5.3.dist-info/RECORD,,
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
hdx/scraper/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
-
hdx/scraper/_version.py,sha256=-9aYLvgAp04zL8yFAMPjvf6kLKgqW1mLgyuk6XA3LcE,411
|
|
3
|
-
hdx/scraper/base_scraper.py,sha256=oo9oMqCUpK8_hPwcTz2PAKabzoyU0BQu5dgWgsFa55Y,15431
|
|
4
|
-
hdx/scraper/runner.py,sha256=v5ToiTBOvFbkMOcBAoWGmDyO5bhGooTL8pPIt3BIQ8Y,53550
|
|
5
|
-
hdx/scraper/configurable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
hdx/scraper/configurable/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
|
|
7
|
-
hdx/scraper/configurable/resource_downloader.py,sha256=lCIQpNZtcCTRc3z0FFM2_JxRtoua9GEq2XiKRZ9fqZk,1549
|
|
8
|
-
hdx/scraper/configurable/rowparser.py,sha256=h7a0W2xvVJSAu94nS5CAXvZSZXdwZ-isFHHNaIce0gM,14635
|
|
9
|
-
hdx/scraper/configurable/scraper.py,sha256=4f4kNbG0HCIfPe1ft93T247s841rk1fP4cIpkFQ6NWU,20594
|
|
10
|
-
hdx/scraper/configurable/timeseries.py,sha256=oAby_sGL6NmRoKnDG_fMB952W9zvzujPIsXkbqcXv-o,3027
|
|
11
|
-
hdx/scraper/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
hdx/scraper/outputs/base.py,sha256=UBVFPANdd7wawifbKkPQWKwVC-Tr7Jg5ax1eLTmWX3M,2566
|
|
13
|
-
hdx/scraper/outputs/excelfile.py,sha256=bKBj1aYUJCIXhvpmGXAJ0FLoKwjnj-2E0LlR64RcFdY,2197
|
|
14
|
-
hdx/scraper/outputs/googlesheets.py,sha256=gPjzikxP4wmMBGL5LW50MXUcDq5nwCRMW74G1Ep39QY,3087
|
|
15
|
-
hdx/scraper/outputs/json.py,sha256=uw9_yAVpHVPWQ8LtMUZKTH88okyrHQs_SVjT6HJOxZ4,9498
|
|
16
|
-
hdx/scraper/utilities/__init__.py,sha256=dvbp0qTV-kLvN4Xp0GQf8LjN3IqlytW1eaTmDjlyZy0,2391
|
|
17
|
-
hdx/scraper/utilities/fallbacks.py,sha256=08tvqVFuFV_gsvS7jqEiJUr7gqNILKCakDa8xMuIMpI,6186
|
|
18
|
-
hdx/scraper/utilities/reader.py,sha256=hexLIJW3CdP4DmobqMM-Z2d6pgcCs1zWWBW-stqoeNU,22975
|
|
19
|
-
hdx/scraper/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
|
|
20
|
-
hdx/scraper/utilities/sources.py,sha256=VNhFYSUM2xeDlN6y4Ya9_0BskjPtjwQZmCKnQgpOemQ,11511
|
|
21
|
-
hdx/scraper/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
|
|
22
|
-
hdx_python_scraper-2.3.5.dist-info/METADATA,sha256=jYBTVEB111S1R3Cj8fZByzM4E3nRRKCr31bsCPstjPA,3318
|
|
23
|
-
hdx_python_scraper-2.3.5.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
|
|
24
|
-
hdx_python_scraper-2.3.5.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
25
|
-
hdx_python_scraper-2.3.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|