hdx-python-scraper 2.6.2__py3-none-any.whl → 2.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx/scraper/framework/_version.py +9 -4
- hdx/scraper/framework/base_scraper.py +5 -14
- hdx/scraper/framework/outputs/googlesheets.py +1 -3
- hdx/scraper/framework/outputs/json.py +1 -3
- hdx/scraper/framework/runner.py +18 -50
- hdx/scraper/framework/scrapers/aggregator.py +3 -9
- hdx/scraper/framework/scrapers/configurable_scraper.py +10 -35
- hdx/scraper/framework/scrapers/rowparser.py +3 -9
- hdx/scraper/framework/scrapers/timeseries.py +1 -3
- hdx/scraper/framework/utilities/hapi_admins.py +23 -15
- hdx/scraper/framework/utilities/lookup.py +2 -6
- hdx/scraper/framework/utilities/reader.py +16 -38
- hdx/scraper/framework/utilities/sources.py +3 -9
- hdx/scraper/framework/utilities/writer.py +1 -3
- {hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/METADATA +8 -5
- hdx_python_scraper-2.6.4.dist-info/RECORD +31 -0
- hdx_python_scraper-2.6.2.dist-info/RECORD +0 -31
- {hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/WHEEL +0 -0
- {hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
# file generated by
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
3
6
|
TYPE_CHECKING = False
|
|
4
7
|
if TYPE_CHECKING:
|
|
5
|
-
from typing import Tuple
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
12
|
else:
|
|
8
13
|
VERSION_TUPLE = object
|
|
@@ -12,5 +17,5 @@ __version__: str
|
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
|
13
18
|
version_tuple: VERSION_TUPLE
|
|
14
19
|
|
|
15
|
-
__version__ = version = '2.6.
|
|
16
|
-
__version_tuple__ = version_tuple = (2, 6,
|
|
20
|
+
__version__ = version = '2.6.4'
|
|
21
|
+
__version_tuple__ = version_tuple = (2, 6, 4)
|
|
@@ -79,8 +79,7 @@ class BaseScraper(ABC):
|
|
|
79
79
|
None
|
|
80
80
|
"""
|
|
81
81
|
self.values: Dict[str, Tuple] = {
|
|
82
|
-
level: tuple({} for _ in value[0])
|
|
83
|
-
for level, value in self.headers.items()
|
|
82
|
+
level: tuple({} for _ in value[0]) for level, value in self.headers.items()
|
|
84
83
|
}
|
|
85
84
|
self.sources: Dict[str, List] = {level: [] for level in self.headers}
|
|
86
85
|
self.source_configuration = deepcopy(source_configuration)
|
|
@@ -137,9 +136,7 @@ class BaseScraper(ABC):
|
|
|
137
136
|
return
|
|
138
137
|
if self.datasetinfo.get("no_sources", False):
|
|
139
138
|
return
|
|
140
|
-
should_overwrite_sources = self.datasetinfo.get(
|
|
141
|
-
"should_overwrite_sources"
|
|
142
|
-
)
|
|
139
|
+
should_overwrite_sources = self.datasetinfo.get("should_overwrite_sources")
|
|
143
140
|
if should_overwrite_sources is not None:
|
|
144
141
|
self.source_configuration["should_overwrite_sources"] = (
|
|
145
142
|
should_overwrite_sources
|
|
@@ -209,16 +206,12 @@ class BaseScraper(ABC):
|
|
|
209
206
|
)
|
|
210
207
|
|
|
211
208
|
for i, hxltag in enumerate(self.headers[level][1]):
|
|
212
|
-
suffix_attribute = self.source_configuration.get(
|
|
213
|
-
"suffix_attribute"
|
|
214
|
-
)
|
|
209
|
+
suffix_attribute = self.source_configuration.get("suffix_attribute")
|
|
215
210
|
if suffix_attribute:
|
|
216
211
|
add_source(hxltag, suffix_attribute)
|
|
217
212
|
continue
|
|
218
213
|
values = self.get_values(level)[i]
|
|
219
|
-
admin_sources = self.source_configuration.get(
|
|
220
|
-
"admin_sources", False
|
|
221
|
-
)
|
|
214
|
+
admin_sources = self.source_configuration.get("admin_sources", False)
|
|
222
215
|
if not admin_sources:
|
|
223
216
|
raise ValueError("Invalid source configuration!")
|
|
224
217
|
admin_mapping = self.source_configuration.get("admin_mapping")
|
|
@@ -260,9 +253,7 @@ class BaseScraper(ABC):
|
|
|
260
253
|
"""
|
|
261
254
|
if datasetinfo is None:
|
|
262
255
|
datasetinfo = self.datasetinfo
|
|
263
|
-
date = Sources.get_hxltag_source_date(
|
|
264
|
-
datasetinfo, hxltag, fallback=True
|
|
265
|
-
)
|
|
256
|
+
date = Sources.get_hxltag_source_date(datasetinfo, hxltag, fallback=True)
|
|
266
257
|
if key is None:
|
|
267
258
|
key = self.name
|
|
268
259
|
dict_of_lists_add(
|
|
@@ -80,9 +80,7 @@ class GoogleSheets(BaseOutput):
|
|
|
80
80
|
headers = list(values.columns.values)
|
|
81
81
|
rows = [headers]
|
|
82
82
|
if hxltags:
|
|
83
|
-
rows.append(
|
|
84
|
-
[hxltags.get(header, "") for header in headers]
|
|
85
|
-
)
|
|
83
|
+
rows.append([hxltags.get(header, "") for header in headers])
|
|
86
84
|
if limit is not None:
|
|
87
85
|
values = values.head(limit)
|
|
88
86
|
df = values.copy(deep=True)
|
|
@@ -219,9 +219,7 @@ class JsonFile(BaseOutput):
|
|
|
219
219
|
newjson = self.json.get(key)
|
|
220
220
|
filters = tabdetails.get("filters", {})
|
|
221
221
|
hxltags = tabdetails.get("output")
|
|
222
|
-
if (filters or hxltags or remove) and isinstance(
|
|
223
|
-
newjson, list
|
|
224
|
-
):
|
|
222
|
+
if (filters or hxltags or remove) and isinstance(newjson, list):
|
|
225
223
|
rows = []
|
|
226
224
|
for row in newjson:
|
|
227
225
|
ignore_row = False
|
hdx/scraper/framework/runner.py
CHANGED
|
@@ -48,9 +48,7 @@ class Runner:
|
|
|
48
48
|
self.scrapers = {}
|
|
49
49
|
self.scraper_names = []
|
|
50
50
|
|
|
51
|
-
def add_custom(
|
|
52
|
-
self, scraper: BaseScraper, force_add_to_run: bool = False
|
|
53
|
-
) -> str:
|
|
51
|
+
def add_custom(self, scraper: BaseScraper, force_add_to_run: bool = False) -> str:
|
|
54
52
|
"""Add custom scrapers that inherit BaseScraper. If running specific scrapers
|
|
55
53
|
rather than all, and you want to force the inclusion of the scraper in the run
|
|
56
54
|
regardless of the specific scrapers given, the parameter force_add_to_run
|
|
@@ -285,9 +283,7 @@ class Runner:
|
|
|
285
283
|
Returns:
|
|
286
284
|
Optional["Aggregator"]: scraper or None
|
|
287
285
|
"""
|
|
288
|
-
input_headers = self.get_headers(
|
|
289
|
-
names, [input_level], overrides=overrides
|
|
290
|
-
)
|
|
286
|
+
input_headers = self.get_headers(names, [input_level], overrides=overrides)
|
|
291
287
|
input_headers = input_headers.get(input_level)
|
|
292
288
|
if not input_headers:
|
|
293
289
|
return None
|
|
@@ -312,9 +308,7 @@ class Runner:
|
|
|
312
308
|
) = self.get_values_sourcesinfo_by_header(
|
|
313
309
|
input_level, names, overrides, True, use_hxl
|
|
314
310
|
)
|
|
315
|
-
scraper_self.set_input_values_sources(
|
|
316
|
-
input_values, input_sourcesinfo
|
|
317
|
-
)
|
|
311
|
+
scraper_self.set_input_values_sources(input_values, input_sourcesinfo)
|
|
318
312
|
|
|
319
313
|
scraper.pre_run = lambda: get_values_sourcesinfo_by_header(scraper)
|
|
320
314
|
return scraper
|
|
@@ -469,9 +463,7 @@ class Runner:
|
|
|
469
463
|
keys = []
|
|
470
464
|
for datasetinfo in configuration:
|
|
471
465
|
keys.append(
|
|
472
|
-
self.add_resource_downloader(
|
|
473
|
-
datasetinfo, folder, force_add_to_run
|
|
474
|
-
)
|
|
466
|
+
self.add_resource_downloader(datasetinfo, folder, force_add_to_run)
|
|
475
467
|
)
|
|
476
468
|
return keys
|
|
477
469
|
|
|
@@ -552,9 +544,7 @@ class Runner:
|
|
|
552
544
|
for key, value in kwargs.items():
|
|
553
545
|
setattr(scraper, key, value)
|
|
554
546
|
|
|
555
|
-
def add_pre_run(
|
|
556
|
-
self, name: str, fn: Callable[[BaseScraper], None]
|
|
557
|
-
) -> None:
|
|
547
|
+
def add_pre_run(self, name: str, fn: Callable[[BaseScraper], None]) -> None:
|
|
558
548
|
"""Add pre run instance method to scraper instance given scraper name. The
|
|
559
549
|
function should have one parameter. Since it is being added as an instance
|
|
560
550
|
method to the scraper instance, that parameter will be self and hence is of
|
|
@@ -570,9 +560,7 @@ class Runner:
|
|
|
570
560
|
scraper = self.get_scraper_exception(name)
|
|
571
561
|
scraper.pre_run = lambda: fn(scraper)
|
|
572
562
|
|
|
573
|
-
def add_post_run(
|
|
574
|
-
self, name: str, fn: Callable[[BaseScraper], None]
|
|
575
|
-
) -> None:
|
|
563
|
+
def add_post_run(self, name: str, fn: Callable[[BaseScraper], None]) -> None:
|
|
576
564
|
"""Add post run instance method to scraper instance given scraper name. The
|
|
577
565
|
function should have one parameter. Since it is being added as an instance
|
|
578
566
|
method to the scraper instance, that parameter will be self and hence is of
|
|
@@ -617,9 +605,7 @@ class Runner:
|
|
|
617
605
|
f"Using fallbacks for {scraper.name}! Error: {format_exc()}"
|
|
618
606
|
)
|
|
619
607
|
for level in scraper.headers.keys():
|
|
620
|
-
values, sources = Fallbacks.get(
|
|
621
|
-
level, scraper.headers[level]
|
|
622
|
-
)
|
|
608
|
+
values, sources = Fallbacks.get(level, scraper.headers[level])
|
|
623
609
|
scraper.values[level] = values
|
|
624
610
|
scraper.sources[level] = sources
|
|
625
611
|
scraper.add_population()
|
|
@@ -643,9 +629,7 @@ class Runner:
|
|
|
643
629
|
Returns:
|
|
644
630
|
bool: Return True if scraper was run, False if not
|
|
645
631
|
"""
|
|
646
|
-
if self.scrapers_to_run and not any(
|
|
647
|
-
x in name for x in self.scrapers_to_run
|
|
648
|
-
):
|
|
632
|
+
if self.scrapers_to_run and not any(x in name for x in self.scrapers_to_run):
|
|
649
633
|
return False
|
|
650
634
|
logger.info(f"Running {name}")
|
|
651
635
|
return self.run_one(name, force_run)
|
|
@@ -728,9 +712,7 @@ class Runner:
|
|
|
728
712
|
names = self.scrapers.keys()
|
|
729
713
|
results = {}
|
|
730
714
|
|
|
731
|
-
def add_level_results(
|
|
732
|
-
scraper_level, override_level, scrap, levels_used
|
|
733
|
-
):
|
|
715
|
+
def add_level_results(scraper_level, override_level, scrap, levels_used):
|
|
734
716
|
nonlocal results
|
|
735
717
|
|
|
736
718
|
if scraper_level in levels_used:
|
|
@@ -802,9 +784,7 @@ class Runner:
|
|
|
802
784
|
names = self.scrapers.keys()
|
|
803
785
|
results = {}
|
|
804
786
|
|
|
805
|
-
def add_level_results(
|
|
806
|
-
scraper_level, override_level, scrap, levels_used
|
|
807
|
-
):
|
|
787
|
+
def add_level_results(scraper_level, override_level, scrap, levels_used):
|
|
808
788
|
nonlocal results
|
|
809
789
|
|
|
810
790
|
if scraper_level in levels_used:
|
|
@@ -830,10 +810,8 @@ class Runner:
|
|
|
830
810
|
lev_headings = level_results["headers"][0]
|
|
831
811
|
lev_hxltags = level_results["headers"][1]
|
|
832
812
|
lev_values = level_results["values"]
|
|
833
|
-
scraper_should_overwrite_sources = (
|
|
834
|
-
|
|
835
|
-
"should_overwrite_sources", should_overwrite_sources
|
|
836
|
-
)
|
|
813
|
+
scraper_should_overwrite_sources = scraper.source_configuration.get(
|
|
814
|
+
"should_overwrite_sources", should_overwrite_sources
|
|
837
815
|
)
|
|
838
816
|
for i, hxltag in enumerate(hxltags):
|
|
839
817
|
if hxltag in lev_hxltags:
|
|
@@ -903,9 +881,7 @@ class Runner:
|
|
|
903
881
|
Returns:
|
|
904
882
|
List[List]: Rows for a given level
|
|
905
883
|
"""
|
|
906
|
-
results = self.get_results(names, [level], overrides=overrides).get(
|
|
907
|
-
level
|
|
908
|
-
)
|
|
884
|
+
results = self.get_results(names, [level], overrides=overrides).get(level)
|
|
909
885
|
rows = []
|
|
910
886
|
if results:
|
|
911
887
|
all_headers = results["headers"]
|
|
@@ -956,9 +932,7 @@ class Runner:
|
|
|
956
932
|
else:
|
|
957
933
|
main_index = 0
|
|
958
934
|
|
|
959
|
-
def add_level_results(
|
|
960
|
-
scraper_level, override_level, scrap, levels_used
|
|
961
|
-
):
|
|
935
|
+
def add_level_results(scraper_level, override_level, scrap, levels_used):
|
|
962
936
|
nonlocal values, sourcesinfo
|
|
963
937
|
|
|
964
938
|
if scraper_level in levels_used:
|
|
@@ -1129,10 +1103,8 @@ class Runner:
|
|
|
1129
1103
|
levels_to_check = levels
|
|
1130
1104
|
else:
|
|
1131
1105
|
levels_to_check = scraper.sources.keys()
|
|
1132
|
-
scraper_should_overwrite_sources = (
|
|
1133
|
-
|
|
1134
|
-
"should_overwrite_sources", should_overwrite_sources
|
|
1135
|
-
)
|
|
1106
|
+
scraper_should_overwrite_sources = scraper.source_configuration.get(
|
|
1107
|
+
"should_overwrite_sources", should_overwrite_sources
|
|
1136
1108
|
)
|
|
1137
1109
|
for level in levels_to_check:
|
|
1138
1110
|
Sources.add_sources_overwrite(
|
|
@@ -1145,9 +1117,7 @@ class Runner:
|
|
|
1145
1117
|
add_additional_sources()
|
|
1146
1118
|
return sources
|
|
1147
1119
|
|
|
1148
|
-
def get_source_urls(
|
|
1149
|
-
self, names: Optional[ListTuple[str]] = None
|
|
1150
|
-
) -> List[str]:
|
|
1120
|
+
def get_source_urls(self, names: Optional[ListTuple[str]] = None) -> List[str]:
|
|
1151
1121
|
"""Get source urls for scrapers limiting to those in names if given.
|
|
1152
1122
|
|
|
1153
1123
|
Args:
|
|
@@ -1260,9 +1230,7 @@ class Runner:
|
|
|
1260
1230
|
if not hapi_resource_metadata:
|
|
1261
1231
|
return
|
|
1262
1232
|
dataset_id = hapi_dataset_metadata["hdx_id"]
|
|
1263
|
-
hapi_metadata = hapi_results.get(
|
|
1264
|
-
dataset_id, copy(hapi_dataset_metadata)
|
|
1265
|
-
)
|
|
1233
|
+
hapi_metadata = hapi_results.get(dataset_id, copy(hapi_dataset_metadata))
|
|
1266
1234
|
results = hapi_metadata.get("results", {})
|
|
1267
1235
|
level_results = results.get(scraper_level)
|
|
1268
1236
|
if level_results is None:
|
|
@@ -115,9 +115,7 @@ class Aggregator(BaseScraper):
|
|
|
115
115
|
config_headers_or_hxltags = datasetinfo.get("input")
|
|
116
116
|
if config_headers_or_hxltags:
|
|
117
117
|
exists = True
|
|
118
|
-
for i, config_header_or_hxltag in enumerate(
|
|
119
|
-
config_headers_or_hxltags
|
|
120
|
-
):
|
|
118
|
+
for i, config_header_or_hxltag in enumerate(config_headers_or_hxltags):
|
|
121
119
|
try:
|
|
122
120
|
input_headers[main_index].index(config_header_or_hxltag)
|
|
123
121
|
except ValueError:
|
|
@@ -218,9 +216,7 @@ class Aggregator(BaseScraper):
|
|
|
218
216
|
novals = 0
|
|
219
217
|
for valuestr in valuelist:
|
|
220
218
|
value = ""
|
|
221
|
-
if isinstance(valuestr, int) or isinstance(
|
|
222
|
-
valuestr, float
|
|
223
|
-
):
|
|
219
|
+
if isinstance(valuestr, int) or isinstance(valuestr, float):
|
|
224
220
|
value = valuestr
|
|
225
221
|
else:
|
|
226
222
|
if valuestr:
|
|
@@ -359,9 +355,7 @@ class Aggregator(BaseScraper):
|
|
|
359
355
|
if "source" not in self.datasetinfo:
|
|
360
356
|
self.datasetinfo["source"] = ",".join(sourceinfo["source"])
|
|
361
357
|
if "source_url" not in self.datasetinfo:
|
|
362
|
-
self.datasetinfo["source_url"] = ",".join(
|
|
363
|
-
sourceinfo["source_url"]
|
|
364
|
-
)
|
|
358
|
+
self.datasetinfo["source_url"] = ",".join(sourceinfo["source_url"])
|
|
365
359
|
if "source" not in self.datasetinfo:
|
|
366
360
|
return
|
|
367
361
|
super().add_sources()
|
|
@@ -129,9 +129,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
129
129
|
"input_keep": datasetinfo.get("input_keep", []),
|
|
130
130
|
"input_append": datasetinfo.get("input_append", []),
|
|
131
131
|
"sum": datasetinfo.get("sum"),
|
|
132
|
-
"input_ignore_vals": datasetinfo.get(
|
|
133
|
-
"input_ignore_vals", []
|
|
134
|
-
),
|
|
132
|
+
"input_ignore_vals": datasetinfo.get("input_ignore_vals", []),
|
|
135
133
|
"output": datasetinfo.get("output", []),
|
|
136
134
|
"output_hxl": datasetinfo.get("output_hxl", []),
|
|
137
135
|
}
|
|
@@ -144,10 +142,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
144
142
|
Returns:
|
|
145
143
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
146
144
|
"""
|
|
147
|
-
if
|
|
148
|
-
"filename" not in self.datasetinfo
|
|
149
|
-
and "file_prefix" not in self.datasetinfo
|
|
150
|
-
):
|
|
145
|
+
if "filename" not in self.datasetinfo and "file_prefix" not in self.datasetinfo:
|
|
151
146
|
self.datasetinfo["file_prefix"] = self.name
|
|
152
147
|
return self.get_reader().read(self.datasetinfo, **self.variables)
|
|
153
148
|
|
|
@@ -162,9 +157,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
162
157
|
if not date or use_date_from_date_col:
|
|
163
158
|
date = self.rowparser.get_maxdate()
|
|
164
159
|
if date == 0:
|
|
165
|
-
raise ValueError(
|
|
166
|
-
"No date given in datasetinfo or as a column!"
|
|
167
|
-
)
|
|
160
|
+
raise ValueError("No date given in datasetinfo or as a column!")
|
|
168
161
|
if self.rowparser.datetype == "date":
|
|
169
162
|
if not isinstance(date, datetime):
|
|
170
163
|
date = parse_date(date)
|
|
@@ -361,11 +354,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
361
354
|
else:
|
|
362
355
|
input_keep_index = -1
|
|
363
356
|
val = valdicts[j][adm][input_keep_index]
|
|
364
|
-
if
|
|
365
|
-
val is None
|
|
366
|
-
or val == ""
|
|
367
|
-
or val in input_ignore_vals
|
|
368
|
-
):
|
|
357
|
+
if val is None or val == "" or val in input_ignore_vals:
|
|
369
358
|
val = 0
|
|
370
359
|
else:
|
|
371
360
|
hasvalues = True
|
|
@@ -384,16 +373,12 @@ class ConfigurableScraper(BaseScraper):
|
|
|
384
373
|
for bracketed_str in matches.captures("rec"):
|
|
385
374
|
if any(bracketed_str in x for x in valcols):
|
|
386
375
|
continue
|
|
387
|
-
_, hasvalues_t = text_replacement(
|
|
388
|
-
bracketed_str, adm
|
|
389
|
-
)
|
|
376
|
+
_, hasvalues_t = text_replacement(bracketed_str, adm)
|
|
390
377
|
if not hasvalues_t:
|
|
391
378
|
hasvalues = False
|
|
392
379
|
break
|
|
393
380
|
if hasvalues:
|
|
394
|
-
formula, hasvalues_t = text_replacement(
|
|
395
|
-
process_col, adm
|
|
396
|
-
)
|
|
381
|
+
formula, hasvalues_t = text_replacement(process_col, adm)
|
|
397
382
|
if hasvalues_t:
|
|
398
383
|
formula = formula.replace(
|
|
399
384
|
"#population",
|
|
@@ -431,20 +416,14 @@ class ConfigurableScraper(BaseScraper):
|
|
|
431
416
|
continue
|
|
432
417
|
for j, valdict in enumerate(valdicts):
|
|
433
418
|
val = valdict[adm][i]
|
|
434
|
-
if
|
|
435
|
-
val is None
|
|
436
|
-
or val == ""
|
|
437
|
-
or val in input_ignore_vals
|
|
438
|
-
):
|
|
419
|
+
if val is None or val == "" or val in input_ignore_vals:
|
|
439
420
|
continue
|
|
440
421
|
newvaldicts[j][adm] = eval(
|
|
441
422
|
f"newvaldicts[j].get(adm, 0.0) + {str(valdict[adm][i])}"
|
|
442
423
|
)
|
|
443
424
|
formula = formula.replace("#population", "#pzbgvjh")
|
|
444
425
|
for i in sorted_len_indices:
|
|
445
|
-
formula = formula.replace(
|
|
446
|
-
valcols[i], f"newvaldicts[{i}][adm]"
|
|
447
|
-
)
|
|
426
|
+
formula = formula.replace(valcols[i], f"newvaldicts[{i}][adm]")
|
|
448
427
|
formula = formula.replace("#pzbgvjh", population_str)
|
|
449
428
|
for adm in valdicts[0]:
|
|
450
429
|
try:
|
|
@@ -470,14 +449,10 @@ class ConfigurableScraper(BaseScraper):
|
|
|
470
449
|
header_to_hxltag = self.use_hxl(None, file_headers, iterator)
|
|
471
450
|
if "source_url" not in self.datasetinfo:
|
|
472
451
|
self.datasetinfo["source_url"] = self.datasetinfo["url"]
|
|
473
|
-
source_date = Sources.standardise_datasetinfo_source_date(
|
|
474
|
-
self.datasetinfo
|
|
475
|
-
)
|
|
452
|
+
source_date = Sources.standardise_datasetinfo_source_date(self.datasetinfo)
|
|
476
453
|
if not source_date or self.datasetinfo.get("force_date_today", False):
|
|
477
454
|
source_date = self.today
|
|
478
|
-
self.datasetinfo["source_date"] = {
|
|
479
|
-
"default_date": {"end": source_date}
|
|
480
|
-
}
|
|
455
|
+
self.datasetinfo["source_date"] = {"default_date": {"end": source_date}}
|
|
481
456
|
self.rowparser = RowParser(
|
|
482
457
|
self.name,
|
|
483
458
|
self.countryiso3s,
|
|
@@ -115,9 +115,7 @@ class RowParser:
|
|
|
115
115
|
self.maxdates = {i: date for i, _ in enumerate(subsets)}
|
|
116
116
|
else:
|
|
117
117
|
if self.datelevel > len(self.admcols):
|
|
118
|
-
raise ValueError(
|
|
119
|
-
"No admin columns specified for required level_type!"
|
|
120
|
-
)
|
|
118
|
+
raise ValueError("No admin columns specified for required level_type!")
|
|
121
119
|
self.maxdates = {
|
|
122
120
|
i: {adm: date for adm in self.adms[self.datelevel]}
|
|
123
121
|
for i, _ in enumerate(subsets)
|
|
@@ -150,9 +148,7 @@ class RowParser:
|
|
|
150
148
|
header = hxltag.display_tag
|
|
151
149
|
else:
|
|
152
150
|
header = hxltag.header
|
|
153
|
-
dict_of_lists_add(
|
|
154
|
-
self.filters, header, row.get("#country+code")
|
|
155
|
-
)
|
|
151
|
+
dict_of_lists_add(self.filters, header, row.get("#country+code"))
|
|
156
152
|
|
|
157
153
|
def get_filter_str_for_eval(self, filter: str) -> str:
|
|
158
154
|
"""Replace filter string variables with columns in row of data
|
|
@@ -229,9 +225,7 @@ class RowParser:
|
|
|
229
225
|
newrow[self.header_to_hxltag[header]] = row[header]
|
|
230
226
|
yield newrow
|
|
231
227
|
|
|
232
|
-
def stop_rows(
|
|
233
|
-
self, iterator: Iterator[Dict]
|
|
234
|
-
) -> Generator[Dict, None, None]:
|
|
228
|
+
def stop_rows(self, iterator: Iterator[Dict]) -> Generator[Dict, None, None]:
|
|
235
229
|
"""Stop processing rows after condition met
|
|
236
230
|
|
|
237
231
|
Args:
|
|
@@ -46,9 +46,7 @@ class TimeSeries(BaseScraper):
|
|
|
46
46
|
datetype = self.datasetinfo["date_type"]
|
|
47
47
|
ignore_future_date = self.datasetinfo.get("ignore_future_date", True)
|
|
48
48
|
headers = [datecol] + self.datasetinfo["output"]
|
|
49
|
-
hxltags = [self.datasetinfo["date_hxl"]] + self.datasetinfo[
|
|
50
|
-
"output_hxl"
|
|
51
|
-
]
|
|
49
|
+
hxltags = [self.datasetinfo["date_hxl"]] + self.datasetinfo["output_hxl"]
|
|
52
50
|
rows = [headers, hxltags]
|
|
53
51
|
file_headers, iterator = self.get_reader().read(
|
|
54
52
|
self.datasetinfo, file_prefix=self.name
|
|
@@ -31,8 +31,20 @@ def complete_admins(
|
|
|
31
31
|
warnings = []
|
|
32
32
|
child = None
|
|
33
33
|
adm_level = len(provider_adm_names)
|
|
34
|
+
|
|
35
|
+
def check_unknown_pcode(adm_code: str, pcode: str) -> str:
|
|
36
|
+
if pcode:
|
|
37
|
+
warnings.append(f"PCode unknown {adm_code}->{pcode} ({warntxt})")
|
|
38
|
+
return pcode
|
|
39
|
+
else:
|
|
40
|
+
warnings.append(f"PCode unknown {adm_code}->''")
|
|
41
|
+
return ""
|
|
42
|
+
|
|
34
43
|
for i, provider_adm_name in reversed(list(enumerate(provider_adm_names))):
|
|
35
44
|
adm_code = adm_codes[i]
|
|
45
|
+
parent = admins[i].pcode_to_parent.get(adm_code)
|
|
46
|
+
if not parent and i > 0:
|
|
47
|
+
parent = adm_codes[i - 1]
|
|
36
48
|
if not provider_adm_name:
|
|
37
49
|
provider_adm_name = ""
|
|
38
50
|
provider_adm_names[i] = ""
|
|
@@ -40,9 +52,6 @@ def complete_admins(
|
|
|
40
52
|
pcode = admins[i + 1].pcode_to_parent.get(child)
|
|
41
53
|
warntxt = "parent"
|
|
42
54
|
elif provider_adm_name:
|
|
43
|
-
parent = admins[i].pcode_to_parent.get(adm_code)
|
|
44
|
-
if not parent and i > 0:
|
|
45
|
-
parent = adm_codes[i - 1]
|
|
46
55
|
pcode, _ = admins[i].get_pcode(
|
|
47
56
|
countryiso3,
|
|
48
57
|
provider_adm_name,
|
|
@@ -54,24 +63,23 @@ def complete_admins(
|
|
|
54
63
|
pcode = None
|
|
55
64
|
if adm_code:
|
|
56
65
|
if adm_code not in admins[i].pcodes:
|
|
57
|
-
if
|
|
58
|
-
|
|
59
|
-
|
|
66
|
+
if admins[i].looks_like_pcode(adm_code):
|
|
67
|
+
adj_adm_code = admins[i].convert_admin_pcode_length(
|
|
68
|
+
countryiso3, adm_code, parent=parent
|
|
60
69
|
)
|
|
61
|
-
|
|
70
|
+
if adj_adm_code:
|
|
71
|
+
warnings.append(f"PCode length {adm_code}->{adj_adm_code}")
|
|
72
|
+
adm_code = adj_adm_code
|
|
73
|
+
else:
|
|
74
|
+
adm_code = check_unknown_pcode(adm_code, pcode)
|
|
62
75
|
else:
|
|
63
|
-
|
|
64
|
-
adm_code = ""
|
|
76
|
+
adm_code = check_unknown_pcode(adm_code, pcode)
|
|
65
77
|
elif pcode and adm_code != pcode:
|
|
66
78
|
if child:
|
|
67
|
-
warnings.append(
|
|
68
|
-
f"PCode mismatch {adm_code}->{pcode} ({warntxt})"
|
|
69
|
-
)
|
|
79
|
+
warnings.append(f"PCode mismatch {adm_code}->{pcode} ({warntxt})")
|
|
70
80
|
adm_code = pcode
|
|
71
81
|
else:
|
|
72
|
-
warnings.append(
|
|
73
|
-
f"PCode mismatch {adm_code} != {provider_adm_name}"
|
|
74
|
-
)
|
|
82
|
+
warnings.append(f"PCode mismatch {adm_code} != {provider_adm_name}")
|
|
75
83
|
elif pcode:
|
|
76
84
|
adm_code = pcode
|
|
77
85
|
else:
|
|
@@ -21,9 +21,7 @@ class Lookup:
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self, yaml_config_path: str, classobject: Type):
|
|
24
|
-
configuration = load_yaml(
|
|
25
|
-
script_dir_plus_file(yaml_config_path, classobject)
|
|
26
|
-
)
|
|
24
|
+
configuration = load_yaml(script_dir_plus_file(yaml_config_path, classobject))
|
|
27
25
|
self._configuration = configuration
|
|
28
26
|
initial_lookup = configuration.get("initial_lookup", {})
|
|
29
27
|
self._code_lookup = copy(initial_lookup)
|
|
@@ -91,9 +89,7 @@ class Lookup:
|
|
|
91
89
|
unmatched=self._unmatched,
|
|
92
90
|
)
|
|
93
91
|
|
|
94
|
-
def get_name(
|
|
95
|
-
self, code: str, default: Optional[str] = None
|
|
96
|
-
) -> Optional[str]:
|
|
92
|
+
def get_name(self, code: str, default: Optional[str] = None) -> Optional[str]:
|
|
97
93
|
"""Get name from code
|
|
98
94
|
|
|
99
95
|
Args:
|
|
@@ -194,9 +194,7 @@ class Read(Retrieve):
|
|
|
194
194
|
today=self.today,
|
|
195
195
|
)
|
|
196
196
|
|
|
197
|
-
def setup_tabular(
|
|
198
|
-
self, datasetinfo: Dict, kwargs: Dict
|
|
199
|
-
) -> Union[str, List]:
|
|
197
|
+
def setup_tabular(self, datasetinfo: Dict, kwargs: Dict) -> Union[str, List]:
|
|
200
198
|
"""Setup kwargs for tabular source eg. csv, xls, xlsx from
|
|
201
199
|
datasetinfo and return url.
|
|
202
200
|
|
|
@@ -330,9 +328,7 @@ class Read(Retrieve):
|
|
|
330
328
|
for file_path in sorted(glob.glob(f"{saved_path}_*.json")):
|
|
331
329
|
datasets.append(Dataset.load_from_json(file_path))
|
|
332
330
|
else:
|
|
333
|
-
datasets = Dataset.search_in_hdx(
|
|
334
|
-
query, configuration, page_size, **kwargs
|
|
335
|
-
)
|
|
331
|
+
datasets = Dataset.search_in_hdx(query, configuration, page_size, **kwargs)
|
|
336
332
|
if self.save:
|
|
337
333
|
for i, dataset in enumerate(datasets):
|
|
338
334
|
file_path = f"{saved_path}_{i}.json"
|
|
@@ -382,9 +378,7 @@ class Read(Retrieve):
|
|
|
382
378
|
path = self.download_file(url, **kwargs)
|
|
383
379
|
return url, path
|
|
384
380
|
|
|
385
|
-
def download_resource(
|
|
386
|
-
self, resource: Resource, **kwargs: Any
|
|
387
|
-
) -> Tuple[str, str]:
|
|
381
|
+
def download_resource(self, resource: Resource, **kwargs: Any) -> Tuple[str, str]:
|
|
388
382
|
"""Download HDX resource os a file and return the url downloaded and
|
|
389
383
|
the path of the file. The filename of the file comes from the name and
|
|
390
384
|
format.
|
|
@@ -471,9 +465,7 @@ class Read(Retrieve):
|
|
|
471
465
|
data.display_tags
|
|
472
466
|
return data
|
|
473
467
|
except hxl.HXLException:
|
|
474
|
-
logger.warning(
|
|
475
|
-
f"Could not process {url}. Maybe there are no HXL tags?"
|
|
476
|
-
)
|
|
468
|
+
logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
|
|
477
469
|
return None
|
|
478
470
|
except Exception:
|
|
479
471
|
logger.exception(f"Error reading {url}!")
|
|
@@ -495,14 +487,10 @@ class Read(Retrieve):
|
|
|
495
487
|
Optional[Dict]: Information about file or None
|
|
496
488
|
"""
|
|
497
489
|
try:
|
|
498
|
-
_, path = self.construct_filename_and_download(
|
|
499
|
-
name, format, url, **kwargs
|
|
500
|
-
)
|
|
490
|
+
_, path = self.construct_filename_and_download(name, format, url, **kwargs)
|
|
501
491
|
return hxl.info(path, InputOptions(allow_local=True))
|
|
502
492
|
except hxl.HXLException:
|
|
503
|
-
logger.warning(
|
|
504
|
-
f"Could not process {url}. Maybe there are no HXL tags?"
|
|
505
|
-
)
|
|
493
|
+
logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
|
|
506
494
|
return None
|
|
507
495
|
except Exception:
|
|
508
496
|
logger.exception(f"Error reading {url}!")
|
|
@@ -586,23 +574,21 @@ class Read(Retrieve):
|
|
|
586
574
|
else:
|
|
587
575
|
url = resource["url"] # otherwise set the url key in
|
|
588
576
|
# datasetinfo to the resource url (by setting url here)
|
|
589
|
-
datasetinfo["hapi_resource_metadata"] = (
|
|
590
|
-
|
|
577
|
+
datasetinfo["hapi_resource_metadata"] = self.get_hapi_resource_metadata(
|
|
578
|
+
resource
|
|
591
579
|
)
|
|
592
580
|
datasetinfo["url"] = url
|
|
593
581
|
if "source_date" not in datasetinfo:
|
|
594
|
-
datasetinfo["source_date"] = (
|
|
595
|
-
|
|
596
|
-
dataset, today=self.today
|
|
597
|
-
)
|
|
582
|
+
datasetinfo["source_date"] = get_startend_dates_from_time_period(
|
|
583
|
+
dataset, today=self.today
|
|
598
584
|
)
|
|
599
585
|
if "source" not in datasetinfo:
|
|
600
586
|
datasetinfo["source"] = dataset["dataset_source"]
|
|
601
587
|
if "source_url" not in datasetinfo:
|
|
602
588
|
datasetinfo["source_url"] = dataset.get_hdx_url()
|
|
603
589
|
Sources.standardise_datasetinfo_source_date(datasetinfo)
|
|
604
|
-
datasetinfo["hapi_dataset_metadata"] = (
|
|
605
|
-
|
|
590
|
+
datasetinfo["hapi_dataset_metadata"] = self.get_hapi_dataset_metadata(
|
|
591
|
+
dataset, datasetinfo
|
|
606
592
|
)
|
|
607
593
|
return resource
|
|
608
594
|
|
|
@@ -669,18 +655,14 @@ class Read(Retrieve):
|
|
|
669
655
|
Returns:
|
|
670
656
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
671
657
|
"""
|
|
672
|
-
resource = self.read_hdx_metadata(
|
|
673
|
-
datasetinfo, configuration=configuration
|
|
674
|
-
)
|
|
658
|
+
resource = self.read_hdx_metadata(datasetinfo, configuration=configuration)
|
|
675
659
|
filename = kwargs.get("filename")
|
|
676
660
|
if filename:
|
|
677
661
|
del kwargs["filename"]
|
|
678
662
|
datasetinfo["filename"] = filename
|
|
679
663
|
filename = datasetinfo.get("filename")
|
|
680
664
|
if resource and not filename:
|
|
681
|
-
filename = self.construct_filename(
|
|
682
|
-
resource["name"], resource.get_format()
|
|
683
|
-
)
|
|
665
|
+
filename = self.construct_filename(resource["name"], resource.get_format())
|
|
684
666
|
file_prefix = kwargs.get("file_prefix")
|
|
685
667
|
if not file_prefix:
|
|
686
668
|
file_prefix = datasetinfo.get("file_prefix")
|
|
@@ -708,13 +690,9 @@ class Read(Retrieve):
|
|
|
708
690
|
format = datasetinfo["format"]
|
|
709
691
|
if format in ["json", "csv", "xls", "xlsx"]:
|
|
710
692
|
if "dataset" in datasetinfo:
|
|
711
|
-
headers, iterator = self.read_hdx(
|
|
712
|
-
datasetinfo, configuration, **kwargs
|
|
713
|
-
)
|
|
693
|
+
headers, iterator = self.read_hdx(datasetinfo, configuration, **kwargs)
|
|
714
694
|
else:
|
|
715
695
|
headers, iterator = self.read_tabular(datasetinfo, **kwargs)
|
|
716
696
|
else:
|
|
717
|
-
raise ValueError(
|
|
718
|
-
f"Invalid format {format} for {datasetinfo['name']}!"
|
|
719
|
-
)
|
|
697
|
+
raise ValueError(f"Invalid format {format} for {datasetinfo['name']}!")
|
|
720
698
|
return headers, iterator
|
|
@@ -91,9 +91,7 @@ class Sources:
|
|
|
91
91
|
else:
|
|
92
92
|
if isinstance(value, dict):
|
|
93
93
|
for startend, date in value.items():
|
|
94
|
-
set_source_date(
|
|
95
|
-
date, hxltag=key, startend=startend
|
|
96
|
-
)
|
|
94
|
+
set_source_date(date, hxltag=key, startend=startend)
|
|
97
95
|
else:
|
|
98
96
|
set_source_date(value, hxltag=key)
|
|
99
97
|
else:
|
|
@@ -214,9 +212,7 @@ class Sources:
|
|
|
214
212
|
index = hxltags.index(hxltag)
|
|
215
213
|
sources[index] = source
|
|
216
214
|
else:
|
|
217
|
-
logger.warning(
|
|
218
|
-
f"Keeping existing source information for {hxltag}!"
|
|
219
|
-
)
|
|
215
|
+
logger.warning(f"Keeping existing source information for {hxltag}!")
|
|
220
216
|
else:
|
|
221
217
|
hxltags.append(hxltag)
|
|
222
218
|
sources.append(source)
|
|
@@ -282,9 +278,7 @@ class Sources:
|
|
|
282
278
|
if no_sources:
|
|
283
279
|
source_configuration["no_sources"] = True
|
|
284
280
|
return source_configuration
|
|
285
|
-
source_configuration["should_overwrite_sources"] =
|
|
286
|
-
should_overwrite_sources
|
|
287
|
-
)
|
|
281
|
+
source_configuration["should_overwrite_sources"] = should_overwrite_sources
|
|
288
282
|
if suffix_attribute:
|
|
289
283
|
source_configuration["suffix_attribute"] = suffix_attribute
|
|
290
284
|
return source_configuration
|
|
@@ -309,9 +309,7 @@ class Writer:
|
|
|
309
309
|
|
|
310
310
|
fns.append(region_fn)
|
|
311
311
|
|
|
312
|
-
rows = self.runner.get_rows(
|
|
313
|
-
level, countries, headers, fns, names=names
|
|
314
|
-
)
|
|
312
|
+
rows = self.runner.get_rows(level, countries, headers, fns, names=names)
|
|
315
313
|
if rows:
|
|
316
314
|
self.update(tab, rows)
|
|
317
315
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.4
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,15 +26,18 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.3.
|
|
30
|
-
Requires-Dist: hdx-python-country>=3.
|
|
31
|
-
Requires-Dist: hdx-python-utilities>=3.8.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.3.9
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.9.2
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=3.8.6
|
|
32
32
|
Requires-Dist: regex
|
|
33
33
|
Provides-Extra: dev
|
|
34
34
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
35
|
+
Provides-Extra: docs
|
|
36
|
+
Requires-Dist: mkapi; extra == 'docs'
|
|
35
37
|
Provides-Extra: pandas
|
|
36
|
-
Requires-Dist: pandas>=2.2.
|
|
38
|
+
Requires-Dist: pandas>=2.2.3; extra == 'pandas'
|
|
37
39
|
Provides-Extra: test
|
|
40
|
+
Requires-Dist: pandas>=2.2.3; extra == 'test'
|
|
38
41
|
Requires-Dist: pytest; extra == 'test'
|
|
39
42
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
40
43
|
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
hdx/scraper/framework/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
+
hdx/scraper/framework/_version.py,sha256=a5nalDjLY2yvq7ieXFfR076fN3sJh2mCxFSXqRSIcE0,511
|
|
3
|
+
hdx/scraper/framework/base_scraper.py,sha256=bv9FguvOD40nulgC16zmOsxyg3iAPUDn_zM1V-MVvSY,15292
|
|
4
|
+
hdx/scraper/framework/runner.py,sha256=M6YqiZvOvCewlGn2E0ksslkK7ZHRiWGnRVwQjus805c,53087
|
|
5
|
+
hdx/scraper/framework/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
hdx/scraper/framework/outputs/base.py,sha256=VASnjmw8yM_-c0-G5Ku2gDTuQiYcFxkE27i7jWJfg4c,2563
|
|
7
|
+
hdx/scraper/framework/outputs/excelfile.py,sha256=dkyaI3nKUTn_tpVTbb7NB8F3sKzZQ-7U8l825EdzQ18,2196
|
|
8
|
+
hdx/scraper/framework/outputs/googlesheets.py,sha256=--mri4hhWslfshcVExlobnHgt87aaAtGrmzW2RAk4Ic,3040
|
|
9
|
+
hdx/scraper/framework/outputs/json.py,sha256=NPOMfrG0brIPf3B7NENi-6LdCDbso-K-nPMAVMVa7JU,9455
|
|
10
|
+
hdx/scraper/framework/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
hdx/scraper/framework/scrapers/aggregator.py,sha256=pPKu8QR0_GWRhpSqPBA6bd0KKq-WgN6AEtv9_8gj9d8,14858
|
|
12
|
+
hdx/scraper/framework/scrapers/configurable_scraper.py,sha256=OW_Y5ESVb91hWMtpvZQAqXxP-VyPt9Af5IGvZk0xSuE,19994
|
|
13
|
+
hdx/scraper/framework/scrapers/resource_downloader.py,sha256=ZuSc5L4X4LWcWKniHS5BDmMuM97H8kWCzB1H_PNceGc,1548
|
|
14
|
+
hdx/scraper/framework/scrapers/rowparser.py,sha256=_xImgr6gXXfLRcLu1xEcXmSr6OCCsctkJXfKuzkw97w,15515
|
|
15
|
+
hdx/scraper/framework/scrapers/timeseries.py,sha256=FYk5-MoOnvAa6ym5UWNUWHLpKmJNgHBk1La_nHap18c,3004
|
|
16
|
+
hdx/scraper/framework/utilities/__init__.py,sha256=-zOJzat-fbv427FBIKDnWLs2QStXTBZahiNy_-pgPOc,2390
|
|
17
|
+
hdx/scraper/framework/utilities/fallbacks.py,sha256=t8oKE3_3I6fX4-kzvAdRIhdjg-9vWBGE6shd2_EvC4c,6184
|
|
18
|
+
hdx/scraper/framework/utilities/hapi_admins.py,sha256=XuZXAnkooCLg4tSKZfPqY4bK0rzBALejPxac7IFdwW4,4093
|
|
19
|
+
hdx/scraper/framework/utilities/lookup.py,sha256=WnZa3lY4matfAIsr-GnxurmYndBVbtzbcM9Twm7-4Ho,3483
|
|
20
|
+
hdx/scraper/framework/utilities/org_type.py,sha256=euQyRV01yA8kJ3nMFvZxnTRLnvCuxgV1ZZQx8gEOB8Y,183
|
|
21
|
+
hdx/scraper/framework/utilities/org_type_configuration.yaml,sha256=tTordLPgnE90FSJzbVJPEnE06KyhlQBsPlIu1IAw3iw,1841
|
|
22
|
+
hdx/scraper/framework/utilities/reader.py,sha256=VYi92sAxqZD0nFn9q8OSEEcxXtELNB6DNCF838ES0x4,27415
|
|
23
|
+
hdx/scraper/framework/utilities/region_lookup.py,sha256=82tl1A2GLcxhiTqd1etTpxE5T6anbM-9dHih2ZlN00o,3916
|
|
24
|
+
hdx/scraper/framework/utilities/sector.py,sha256=XGysivvPhTqQfK6z1y96sDJATk3zx7sS_qGqCa4PbaI,177
|
|
25
|
+
hdx/scraper/framework/utilities/sector_configuration.yaml,sha256=VKddsahminPOc3QKKieb1DvaYXkdPdhT5cPAL9_HjDw,4940
|
|
26
|
+
hdx/scraper/framework/utilities/sources.py,sha256=0aW0IbH8nsViDixjD-fIh3gO86vwklYkPU8cXxmJkz4,11379
|
|
27
|
+
hdx/scraper/framework/utilities/writer.py,sha256=yJQ_HcJj-l6DJW5Fl8nr1f3wLIJjogxmF22IU4ysj4c,16673
|
|
28
|
+
hdx_python_scraper-2.6.4.dist-info/METADATA,sha256=fnetPHzz1YtmMbyuOzA97cYlh2GBNowGWiii_adRU-k,3466
|
|
29
|
+
hdx_python_scraper-2.6.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
30
|
+
hdx_python_scraper-2.6.4.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
31
|
+
hdx_python_scraper-2.6.4.dist-info/RECORD,,
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
hdx/scraper/framework/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
|
|
2
|
-
hdx/scraper/framework/_version.py,sha256=U2b7313v-bM1h69WtyleJ6hXm9RX-9buCsEK4Qgb1fg,411
|
|
3
|
-
hdx/scraper/framework/base_scraper.py,sha256=J7AHhDFBehENragRvpZnV8Qi7IcfFql9U_UU1svNr5o,15424
|
|
4
|
-
hdx/scraper/framework/runner.py,sha256=an0c_tz46PCnyyUk0dgDV8xfkb7F3LHMUIhPyPSU8sM,53499
|
|
5
|
-
hdx/scraper/framework/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
hdx/scraper/framework/outputs/base.py,sha256=VASnjmw8yM_-c0-G5Ku2gDTuQiYcFxkE27i7jWJfg4c,2563
|
|
7
|
-
hdx/scraper/framework/outputs/excelfile.py,sha256=dkyaI3nKUTn_tpVTbb7NB8F3sKzZQ-7U8l825EdzQ18,2196
|
|
8
|
-
hdx/scraper/framework/outputs/googlesheets.py,sha256=gw9VM2UM3D6N7saUWaXiU2H_ihZn40b6J227I7t-SQs,3086
|
|
9
|
-
hdx/scraper/framework/outputs/json.py,sha256=nINV-P5gTGCrtq-zEWYT_Si4ggqhJvUWHCQHxTHUh64,9493
|
|
10
|
-
hdx/scraper/framework/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
hdx/scraper/framework/scrapers/aggregator.py,sha256=zC6kmk6hC7xZ-j72R75VEppJKk6kIF5RE18KYBoavX0,14972
|
|
12
|
-
hdx/scraper/framework/scrapers/configurable_scraper.py,sha256=Q9AeThIHZwWQetCejrWKOK3RH1Fh1RqXhbU4qDbq2d4,20608
|
|
13
|
-
hdx/scraper/framework/scrapers/resource_downloader.py,sha256=ZuSc5L4X4LWcWKniHS5BDmMuM97H8kWCzB1H_PNceGc,1548
|
|
14
|
-
hdx/scraper/framework/scrapers/rowparser.py,sha256=j_FxNYzhSB9QmBAzqNhuNVCjlJP4cJKS1l4qLzXG5qE,15613
|
|
15
|
-
hdx/scraper/framework/scrapers/timeseries.py,sha256=w0Ejg4nXaLSuq32zx7vlULMZuDZaTqRO94Dm-VKA9co,3026
|
|
16
|
-
hdx/scraper/framework/utilities/__init__.py,sha256=-zOJzat-fbv427FBIKDnWLs2QStXTBZahiNy_-pgPOc,2390
|
|
17
|
-
hdx/scraper/framework/utilities/fallbacks.py,sha256=t8oKE3_3I6fX4-kzvAdRIhdjg-9vWBGE6shd2_EvC4c,6184
|
|
18
|
-
hdx/scraper/framework/utilities/hapi_admins.py,sha256=k_VZtDuSDL3h_0RwZqu9x_fckRnvfQ62KPv5Q-IMVEo,3664
|
|
19
|
-
hdx/scraper/framework/utilities/lookup.py,sha256=4IkO35eBN5xz2H5y688C3L80zEvTjrHQgYiM4dFVpPo,3519
|
|
20
|
-
hdx/scraper/framework/utilities/org_type.py,sha256=euQyRV01yA8kJ3nMFvZxnTRLnvCuxgV1ZZQx8gEOB8Y,183
|
|
21
|
-
hdx/scraper/framework/utilities/org_type_configuration.yaml,sha256=tTordLPgnE90FSJzbVJPEnE06KyhlQBsPlIu1IAw3iw,1841
|
|
22
|
-
hdx/scraper/framework/utilities/reader.py,sha256=wM878LqmsuDfsbIGmDmrESRfSfRhJS2POF-dnQ1Ix58,27735
|
|
23
|
-
hdx/scraper/framework/utilities/region_lookup.py,sha256=82tl1A2GLcxhiTqd1etTpxE5T6anbM-9dHih2ZlN00o,3916
|
|
24
|
-
hdx/scraper/framework/utilities/sector.py,sha256=XGysivvPhTqQfK6z1y96sDJATk3zx7sS_qGqCa4PbaI,177
|
|
25
|
-
hdx/scraper/framework/utilities/sector_configuration.yaml,sha256=VKddsahminPOc3QKKieb1DvaYXkdPdhT5cPAL9_HjDw,4940
|
|
26
|
-
hdx/scraper/framework/utilities/sources.py,sha256=3miKn_iruWpfpBA-7R9jFt6_EdfX1zvW4PvjifOCd7s,11503
|
|
27
|
-
hdx/scraper/framework/utilities/writer.py,sha256=YjOhVo3Ks0I5WH7oyM2Q7fO6ImGabYZ2CBhbYw_A0Kk,16695
|
|
28
|
-
hdx_python_scraper-2.6.2.dist-info/METADATA,sha256=KXHMz2OhUVSO1K02K7_bAdY91Nt0sagGcYcIbjr8iKU,3361
|
|
29
|
-
hdx_python_scraper-2.6.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
30
|
-
hdx_python_scraper-2.6.2.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
|
|
31
|
-
hdx_python_scraper-2.6.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|