hdx-python-scraper 2.2.3__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx/scraper/_version.py +2 -2
- hdx/scraper/base_scraper.py +21 -9
- hdx/scraper/configurable/resource_downloader.py +1 -1
- hdx/scraper/configurable/scraper.py +1 -0
- hdx/scraper/configurable/timeseries.py +3 -1
- hdx/scraper/runner.py +5 -2
- hdx/scraper/utilities/reader.py +142 -40
- {hdx_python_scraper-2.2.3.dist-info → hdx_python_scraper-2.3.0.dist-info}/METADATA +16 -15
- {hdx_python_scraper-2.2.3.dist-info → hdx_python_scraper-2.3.0.dist-info}/RECORD +11 -11
- {hdx_python_scraper-2.2.3.dist-info → hdx_python_scraper-2.3.0.dist-info}/WHEEL +1 -1
- {hdx_python_scraper-2.2.3.dist-info → hdx_python_scraper-2.3.0.dist-info}/licenses/LICENSE +0 -0
hdx/scraper/_version.py
CHANGED
hdx/scraper/base_scraper.py
CHANGED
|
@@ -81,15 +81,12 @@ class BaseScraper(ABC):
|
|
|
81
81
|
self.sources: Dict[str, List] = {level: [] for level in self.headers}
|
|
82
82
|
self.source_configuration = deepcopy(source_configuration)
|
|
83
83
|
|
|
84
|
-
def get_reader(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"""Get reader given name if provided or using name member variable if not.
|
|
88
|
-
Set reader prefix to given prefix or name if not provided.
|
|
84
|
+
def get_reader(self, name: Optional[str] = None):
|
|
85
|
+
"""Get reader given name if provided or using name member variable if
|
|
86
|
+
not.
|
|
89
87
|
|
|
90
88
|
Args:
|
|
91
89
|
name (str): Name of scraper
|
|
92
|
-
prefix (Optional[str]): Prefix to use. Defaults to None (use scraper name).
|
|
93
90
|
|
|
94
91
|
Returns:
|
|
95
92
|
None
|
|
@@ -97,9 +94,6 @@ class BaseScraper(ABC):
|
|
|
97
94
|
if not name:
|
|
98
95
|
name = self.name
|
|
99
96
|
reader = Read.get_reader(name)
|
|
100
|
-
if not prefix:
|
|
101
|
-
prefix = name
|
|
102
|
-
reader.prefix = prefix
|
|
103
97
|
return reader
|
|
104
98
|
|
|
105
99
|
def get_headers(self, level: str) -> Optional[Tuple[Tuple]]:
|
|
@@ -362,6 +356,24 @@ class BaseScraper(ABC):
|
|
|
362
356
|
Returns:
|
|
363
357
|
Optional[Dict]: HAPI resource metadata
|
|
364
358
|
"""
|
|
359
|
+
hapi_resource_metadata = self.datasetinfo.get("hapi_resource_metadata")
|
|
360
|
+
if not hapi_resource_metadata:
|
|
361
|
+
return None
|
|
362
|
+
if "is_hxl" in hapi_resource_metadata:
|
|
363
|
+
return hapi_resource_metadata
|
|
364
|
+
reader = self.get_reader(self.name)
|
|
365
|
+
filename = self.datasetinfo.get("filename")
|
|
366
|
+
hxl_info = reader.hxl_info_hapi_resource_metadata(
|
|
367
|
+
hapi_resource_metadata, filename=filename, file_prefix=self.name
|
|
368
|
+
)
|
|
369
|
+
is_hxl = False
|
|
370
|
+
if hxl_info:
|
|
371
|
+
for sheet in hxl_info.get("sheets", ()):
|
|
372
|
+
if sheet["is_hxlated"]:
|
|
373
|
+
is_hxl = True
|
|
374
|
+
break
|
|
375
|
+
hapi_resource_metadata["is_hxl"] = is_hxl
|
|
376
|
+
|
|
365
377
|
return self.datasetinfo.get("hapi_resource_metadata")
|
|
366
378
|
|
|
367
379
|
def add_population(self) -> None:
|
|
@@ -33,7 +33,7 @@ class ResourceDownloader(BaseScraper):
|
|
|
33
33
|
"""
|
|
34
34
|
reader = self.get_reader("hdx")
|
|
35
35
|
resource = reader.read_hdx_metadata(self.datasetinfo)
|
|
36
|
-
url, path = reader.download_resource(self.name
|
|
36
|
+
url, path = reader.download_resource(resource, file_prefix=self.name)
|
|
37
37
|
logger.info(f"Downloading {url} to {path}")
|
|
38
38
|
copy2(path, join(self.folder, self.datasetinfo["filename"]))
|
|
39
39
|
|
|
@@ -50,7 +50,9 @@ class TimeSeries(BaseScraper):
|
|
|
50
50
|
"output_hxl"
|
|
51
51
|
]
|
|
52
52
|
rows = [headers, hxltags]
|
|
53
|
-
file_headers, iterator = self.get_reader().read(
|
|
53
|
+
file_headers, iterator = self.get_reader(self.name).read(
|
|
54
|
+
self.datasetinfo, file_prefix=self.name
|
|
55
|
+
)
|
|
54
56
|
for inrow in iterator:
|
|
55
57
|
if isinstance(datecol, list):
|
|
56
58
|
dates = [str(inrow[x]) for x in datecol]
|
hdx/scraper/runner.py
CHANGED
|
@@ -1145,7 +1145,9 @@ class Runner:
|
|
|
1145
1145
|
return sorted(source_urls)
|
|
1146
1146
|
|
|
1147
1147
|
def get_hapi_metadata(
|
|
1148
|
-
self,
|
|
1148
|
+
self,
|
|
1149
|
+
names: Optional[ListTuple[str]] = None,
|
|
1150
|
+
has_run: bool = True,
|
|
1149
1151
|
) -> Dict:
|
|
1150
1152
|
"""Get HAPI metadata for all datasets. A dictionary is returned that
|
|
1151
1153
|
maps from dataset ids to a dictionary. The dictionary has keys for
|
|
@@ -1154,6 +1156,7 @@ class Runner:
|
|
|
1154
1156
|
|
|
1155
1157
|
Args:
|
|
1156
1158
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
1159
|
+
has_run (bool): Only get results for scrapers marked as having run. Defaults to True.
|
|
1157
1160
|
|
|
1158
1161
|
Returns:
|
|
1159
1162
|
Dict: HAPI metadata for all datasets
|
|
@@ -1163,7 +1166,7 @@ class Runner:
|
|
|
1163
1166
|
results = {}
|
|
1164
1167
|
for name in names:
|
|
1165
1168
|
scraper = self.get_scraper(name)
|
|
1166
|
-
if not scraper.has_run:
|
|
1169
|
+
if has_run and not scraper.has_run:
|
|
1167
1170
|
continue
|
|
1168
1171
|
hapi_dataset_metadata = scraper.get_hapi_dataset_metadata()
|
|
1169
1172
|
hapi_resource_metadata = scraper.get_hapi_resource_metadata()
|
hdx/scraper/utilities/reader.py
CHANGED
|
@@ -194,7 +194,7 @@ class Read(Retrieve):
|
|
|
194
194
|
|
|
195
195
|
Args:
|
|
196
196
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
197
|
-
**kwargs:
|
|
197
|
+
**kwargs: Parameters to pass to download_file call
|
|
198
198
|
|
|
199
199
|
Returns:
|
|
200
200
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
@@ -219,6 +219,11 @@ class Read(Retrieve):
|
|
|
219
219
|
url = datasetinfo["url"]
|
|
220
220
|
if isinstance(url, list):
|
|
221
221
|
url = [self.get_url(x, **kwargs) for x in url]
|
|
222
|
+
filename = kwargs.get("filename")
|
|
223
|
+
if not filename:
|
|
224
|
+
filename = datasetinfo.get("filename")
|
|
225
|
+
if filename:
|
|
226
|
+
kwargs["filename"] = filename
|
|
222
227
|
return self.get_tabular_rows(
|
|
223
228
|
url,
|
|
224
229
|
dict_form=True,
|
|
@@ -249,57 +254,67 @@ class Read(Retrieve):
|
|
|
249
254
|
dataset.save_to_json(saved_path, follow_urls=True)
|
|
250
255
|
return dataset
|
|
251
256
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
of the file. The identifier is information to identify what called
|
|
257
|
-
this function and is used to prefix the filename of the file.
|
|
257
|
+
@staticmethod
|
|
258
|
+
def construct_filename(name: str, format: str):
|
|
259
|
+
"""Construct filename from name and format. The filename of the file
|
|
260
|
+
comes from the name and format.
|
|
258
261
|
|
|
259
262
|
Args:
|
|
260
|
-
|
|
261
|
-
|
|
263
|
+
name (str): Name for the download
|
|
264
|
+
format (str): Format of download
|
|
262
265
|
|
|
263
266
|
Returns:
|
|
264
|
-
|
|
267
|
+
str: Filename of file
|
|
265
268
|
"""
|
|
266
|
-
filename =
|
|
267
|
-
file_type = f".{
|
|
269
|
+
filename = name.lower()
|
|
270
|
+
file_type = f".{format}"
|
|
268
271
|
if filename.endswith(file_type):
|
|
269
272
|
filename = filename[: -len(file_type)]
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
+
return f"{slugify(filename, separator='_')}{file_type}"
|
|
274
|
+
|
|
275
|
+
def construct_filename_and_download(
|
|
276
|
+
self, name: str, format: str, url: str, **kwargs: Any
|
|
277
|
+
) -> Tuple[str, str]:
|
|
278
|
+
"""Construct filename, download file and return the url downloaded and
|
|
279
|
+
the path of the file. The filename of the file comes from the name and
|
|
280
|
+
format.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
name (str): Name for the download
|
|
284
|
+
format (str): Format of download
|
|
285
|
+
url (str): URL of download
|
|
286
|
+
**kwargs: Parameters to pass to download_file call
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Tuple[str, str]: (URL that was downloaded, path to downloaded file)
|
|
290
|
+
"""
|
|
291
|
+
filename = kwargs.get("filename")
|
|
292
|
+
if not filename:
|
|
293
|
+
kwargs["filename"] = self.construct_filename(name, format)
|
|
294
|
+
url = munge_url(url, InputOptions())
|
|
295
|
+
path = self.download_file(url, **kwargs)
|
|
273
296
|
return url, path
|
|
274
297
|
|
|
275
|
-
def
|
|
276
|
-
self,
|
|
277
|
-
) ->
|
|
278
|
-
"""
|
|
279
|
-
|
|
280
|
-
|
|
298
|
+
def download_resource(
|
|
299
|
+
self, resource: Resource, **kwargs: Any
|
|
300
|
+
) -> Tuple[str, str]:
|
|
301
|
+
"""Download HDX resource os a file and return the url downloaded and
|
|
302
|
+
the path of the file. The filename of the file comes from the name and
|
|
303
|
+
format.
|
|
281
304
|
|
|
282
305
|
Args:
|
|
283
|
-
identifier (str): Information to identify caller
|
|
284
306
|
resource (Resource): HDX resource
|
|
285
|
-
|
|
307
|
+
**kwargs: Parameters to pass to download_file call
|
|
286
308
|
|
|
287
309
|
Returns:
|
|
288
|
-
|
|
310
|
+
Tuple[str, str]: (URL that was downloaded, path to downloaded file)
|
|
289
311
|
"""
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
logger.warning(
|
|
297
|
-
f"Could not process {data_type} for {identifier}. Maybe there are no HXL tags?"
|
|
298
|
-
)
|
|
299
|
-
return None
|
|
300
|
-
except Exception:
|
|
301
|
-
logger.exception(f"Error reading {data_type} for {identifier}!")
|
|
302
|
-
raise
|
|
312
|
+
return self.construct_filename_and_download(
|
|
313
|
+
resource["name"],
|
|
314
|
+
resource.get_file_type(),
|
|
315
|
+
resource["url"],
|
|
316
|
+
**kwargs,
|
|
317
|
+
)
|
|
303
318
|
|
|
304
319
|
def get_hapi_dataset_metadata(self, dataset: Dataset) -> Dict:
|
|
305
320
|
"""Get HAPI dataset metadata from HDX dataset
|
|
@@ -337,6 +352,82 @@ class Read(Retrieve):
|
|
|
337
352
|
"download_url": resource["url"],
|
|
338
353
|
}
|
|
339
354
|
|
|
355
|
+
def read_hxl_resource(
|
|
356
|
+
self, resource: Resource, **kwargs: Any
|
|
357
|
+
) -> Optional[hxl.Dataset]:
|
|
358
|
+
"""Read HDX resource as a HXL dataset.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
resource (Resource): HDX resource
|
|
362
|
+
**kwargs: Parameters to pass to download_file call
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Optional[hxl.Dataset]: HXL dataset or None
|
|
366
|
+
"""
|
|
367
|
+
url = resource["url"]
|
|
368
|
+
try:
|
|
369
|
+
_, path = self.download_resource(resource, **kwargs)
|
|
370
|
+
data = hxl.data(path, InputOptions(allow_local=True)).cache()
|
|
371
|
+
data.display_tags
|
|
372
|
+
return data
|
|
373
|
+
except hxl.HXLException:
|
|
374
|
+
logger.warning(
|
|
375
|
+
f"Could not process {url}. Maybe there are no HXL tags?"
|
|
376
|
+
)
|
|
377
|
+
return None
|
|
378
|
+
except Exception:
|
|
379
|
+
logger.exception(f"Error reading {url}!")
|
|
380
|
+
raise
|
|
381
|
+
|
|
382
|
+
def hxl_info_file(
|
|
383
|
+
self, name: str, format: str, url: str, **kwargs: Any
|
|
384
|
+
) -> Optional[Dict]:
|
|
385
|
+
"""Get HXL info on file. The filename comes from the name and
|
|
386
|
+
format.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
name (str): Name for the download
|
|
390
|
+
format (str): Format of download
|
|
391
|
+
url (str): URL of download
|
|
392
|
+
**kwargs (Any): Parameters to pass to download_file call
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
Optional[Dict]: Information about file or None
|
|
396
|
+
"""
|
|
397
|
+
try:
|
|
398
|
+
_, path = self.construct_filename_and_download(
|
|
399
|
+
name, format, url, **kwargs
|
|
400
|
+
)
|
|
401
|
+
return hxl.info(path, InputOptions(allow_local=True))
|
|
402
|
+
except hxl.HXLException:
|
|
403
|
+
logger.warning(
|
|
404
|
+
f"Could not process {url}. Maybe there are no HXL tags?"
|
|
405
|
+
)
|
|
406
|
+
return None
|
|
407
|
+
except Exception:
|
|
408
|
+
logger.exception(f"Error reading {url}!")
|
|
409
|
+
raise
|
|
410
|
+
|
|
411
|
+
def hxl_info_hapi_resource_metadata(
|
|
412
|
+
self,
|
|
413
|
+
hapi_resource_metadata: Dict,
|
|
414
|
+
**kwargs: Any,
|
|
415
|
+
) -> Optional[Dict]:
|
|
416
|
+
"""Get HXL info on HAPI resource. The filename comes from the name and
|
|
417
|
+
format.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
hapi_resource_metadata (Dict): HAPI resource metadata
|
|
421
|
+
**kwargs (Any): Parameters to pass to download_file call
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Optional[Dict]: Information about file or None
|
|
425
|
+
"""
|
|
426
|
+
name = hapi_resource_metadata["name"]
|
|
427
|
+
format = hapi_resource_metadata["format"]
|
|
428
|
+
url = hapi_resource_metadata["download_url"]
|
|
429
|
+
return self.hxl_info_file(name, format, url, **kwargs)
|
|
430
|
+
|
|
340
431
|
def read_hdx_metadata(
|
|
341
432
|
self, datasetinfo: Dict, do_resource_check: bool = True
|
|
342
433
|
) -> Optional[Resource]:
|
|
@@ -452,12 +543,23 @@ class Read(Retrieve):
|
|
|
452
543
|
|
|
453
544
|
Args:
|
|
454
545
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
455
|
-
**kwargs:
|
|
546
|
+
**kwargs: Parameters to pass to download_file call
|
|
456
547
|
|
|
457
548
|
Returns:
|
|
458
549
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
459
550
|
"""
|
|
460
|
-
self.read_hdx_metadata(datasetinfo)
|
|
551
|
+
resource = self.read_hdx_metadata(datasetinfo)
|
|
552
|
+
filename = kwargs.get("filename")
|
|
553
|
+
if filename:
|
|
554
|
+
del kwargs["filename"]
|
|
555
|
+
datasetinfo["filename"] = filename
|
|
556
|
+
filename = datasetinfo.get("filename")
|
|
557
|
+
if resource and not filename:
|
|
558
|
+
# prefix is added later
|
|
559
|
+
filename = self.construct_filename(
|
|
560
|
+
resource["name"], resource.get_file_type()
|
|
561
|
+
)
|
|
562
|
+
datasetinfo["filename"] = filename
|
|
461
563
|
return self.read_tabular(datasetinfo, **kwargs)
|
|
462
564
|
|
|
463
565
|
def read(
|
|
@@ -469,7 +571,7 @@ class Read(Retrieve):
|
|
|
469
571
|
|
|
470
572
|
Args:
|
|
471
573
|
datasetinfo (Dict): Dictionary of information about dataset
|
|
472
|
-
**kwargs:
|
|
574
|
+
**kwargs: Parameters to pass to download_file call
|
|
473
575
|
|
|
474
576
|
Returns:
|
|
475
577
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,12 +26,13 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.1.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.1.4
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.6.3
|
|
30
31
|
Requires-Dist: regex
|
|
31
32
|
Provides-Extra: dev
|
|
32
33
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
33
34
|
Provides-Extra: pandas
|
|
34
|
-
Requires-Dist: pandas>=2.1.
|
|
35
|
+
Requires-Dist: pandas>=2.1.3; extra == 'pandas'
|
|
35
36
|
Provides-Extra: test
|
|
36
37
|
Requires-Dist: pytest; extra == 'test'
|
|
37
38
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
@@ -43,19 +44,19 @@ Description-Content-Type: text/markdown
|
|
|
43
44
|
[](https://pycqa.github.io/isort/)
|
|
44
45
|
[](https://pypistats.org/packages/hdx-python-scraper)
|
|
45
46
|
|
|
46
|
-
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
47
|
-
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
48
|
-
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
49
|
-
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
50
|
-
and/or Excel and includes the addition of
|
|
51
|
-
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
52
|
-
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
53
|
-
specification and the framework handles the execution of both configurable and custom
|
|
47
|
+
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
48
|
+
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
49
|
+
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
50
|
+
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
51
|
+
and/or Excel and includes the addition of
|
|
52
|
+
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
53
|
+
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
54
|
+
specification and the framework handles the execution of both configurable and custom
|
|
54
55
|
scrapers.
|
|
55
56
|
|
|
56
|
-
For more information, please read the
|
|
57
|
-
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
57
|
+
For more information, please read the
|
|
58
|
+
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
58
59
|
|
|
59
|
-
This library is part of the
|
|
60
|
-
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
60
|
+
This library is part of the
|
|
61
|
+
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
61
62
|
humanitarian related data, please upload your datasets to HDX.
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
hdx/scraper/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
-
hdx/scraper/_version.py,sha256=
|
|
3
|
-
hdx/scraper/base_scraper.py,sha256=
|
|
4
|
-
hdx/scraper/runner.py,sha256
|
|
2
|
+
hdx/scraper/_version.py,sha256=ChsIHG8bRc-eXUbXOgv4Fm4DstSKLq9FpsTAsaMeR08,411
|
|
3
|
+
hdx/scraper/base_scraper.py,sha256=OZoC8X3woecKbMxTtjx_aRr027SeJCS2gbtyB20n31o,15079
|
|
4
|
+
hdx/scraper/runner.py,sha256=fojFcfEh3mZXe1dY3Jpis22dr9Zc6VY-0XTMiabuXFE,51366
|
|
5
5
|
hdx/scraper/configurable/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
hdx/scraper/configurable/aggregator.py,sha256=xC7bOF-wrQ17LlvdjSZUnUGuZHlNMH5jlmLSgyz5pe0,14976
|
|
7
|
-
hdx/scraper/configurable/resource_downloader.py,sha256=
|
|
7
|
+
hdx/scraper/configurable/resource_downloader.py,sha256=lCIQpNZtcCTRc3z0FFM2_JxRtoua9GEq2XiKRZ9fqZk,1549
|
|
8
8
|
hdx/scraper/configurable/rowparser.py,sha256=h7a0W2xvVJSAu94nS5CAXvZSZXdwZ-isFHHNaIce0gM,14635
|
|
9
|
-
hdx/scraper/configurable/scraper.py,sha256=
|
|
10
|
-
hdx/scraper/configurable/timeseries.py,sha256=
|
|
9
|
+
hdx/scraper/configurable/scraper.py,sha256=kBkS-bm4zIQ9jbzFcwVoAnyji_9PTV_KKrNJVLTuYa4,20498
|
|
10
|
+
hdx/scraper/configurable/timeseries.py,sha256=lWoQJApml-onTN4l9YnTAYnhj5uuTc-Luk05DIT7O9k,3036
|
|
11
11
|
hdx/scraper/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
hdx/scraper/outputs/base.py,sha256=UBVFPANdd7wawifbKkPQWKwVC-Tr7Jg5ax1eLTmWX3M,2566
|
|
13
13
|
hdx/scraper/outputs/excelfile.py,sha256=bKBj1aYUJCIXhvpmGXAJ0FLoKwjnj-2E0LlR64RcFdY,2197
|
|
@@ -15,11 +15,11 @@ hdx/scraper/outputs/googlesheets.py,sha256=gPjzikxP4wmMBGL5LW50MXUcDq5nwCRMW74G1
|
|
|
15
15
|
hdx/scraper/outputs/json.py,sha256=uw9_yAVpHVPWQ8LtMUZKTH88okyrHQs_SVjT6HJOxZ4,9498
|
|
16
16
|
hdx/scraper/utilities/__init__.py,sha256=iBjD7bc8wEzQhwkcx2mOZwYmu28VHjl5px66quqWJ8E,2491
|
|
17
17
|
hdx/scraper/utilities/fallbacks.py,sha256=08tvqVFuFV_gsvS7jqEiJUr7gqNILKCakDa8xMuIMpI,6186
|
|
18
|
-
hdx/scraper/utilities/reader.py,sha256=
|
|
18
|
+
hdx/scraper/utilities/reader.py,sha256=9cXrk8_NrE4kHIm3wrM3KHgKX6bho_eCyibMDBairiU,21499
|
|
19
19
|
hdx/scraper/utilities/region_lookup.py,sha256=VSfIoBGmhS0lNgwe4kKIhHqP7k0DlJYI2JDdABAAmoM,3917
|
|
20
20
|
hdx/scraper/utilities/sources.py,sha256=h27PjBADqIhqDwmhzMXt1OjwJWZc2iVnIBwJuAJKHwo,11204
|
|
21
21
|
hdx/scraper/utilities/writer.py,sha256=x-3xnOjvZEMUR2Op42eiBbaSmtNM6MY86adnL_Cob9s,16726
|
|
22
|
-
hdx_python_scraper-2.
|
|
23
|
-
hdx_python_scraper-2.
|
|
24
|
-
hdx_python_scraper-2.
|
|
25
|
-
hdx_python_scraper-2.
|
|
22
|
+
hdx_python_scraper-2.3.0.dist-info/METADATA,sha256=E5b13txhk44RjnOSKJu_SkaypNFXxe5YDLUBCWKA7Pk,3318
|
|
23
|
+
hdx_python_scraper-2.3.0.dist-info/WHEEL,sha256=0wCxn4rnLsvRWBK-NC7mK2QMIQ_aZSl7Qvk-8IWl_pY,87
|
|
24
|
+
hdx_python_scraper-2.3.0.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
25
|
+
hdx_python_scraper-2.3.0.dist-info/RECORD,,
|
|
File without changes
|