hdx-python-scraper 2.3.0__tar.gz → 2.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/pre-commit-config.yaml +1 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/PKG-INFO +2 -2
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/documentation/main.md +1 -1
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/pyproject.toml +1 -1
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/requirements.txt +14 -14
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/_version.py +2 -2
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/base_scraper.py +17 -7
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/scraper.py +13 -13
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/timeseries.py +1 -1
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/runner.py +35 -15
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/__init__.py +5 -5
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/reader.py +18 -7
- hdx_python_scraper-2.3.2/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +1 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/education_closures.py +2 -1
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_readers.py +2 -2
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_runner.py +66 -34
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_custom.py +94 -50
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/black.toml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/coveragerc +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/ruff.toml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.github/workflows/publish.yaml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.gitignore +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/CONTRIBUTING.md +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/LICENSE +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/README.md +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/__init__.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/aggregator.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/resource_downloader.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/rowparser.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/outputs/base.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/outputs/excelfile.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/outputs/googlesheets.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/outputs/json.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/fallbacks.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/sources.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/writer.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/config/project_configuration.yaml +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/education_enrolment_download-countries-enrollment-data-uis-feb-22.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/affected_targeted_reached.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/conftest.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/education_enrolment.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_output.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_regionlookup.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_aggregation.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_appenddata.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_global.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_multipleurls.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_national.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_regionaltoplevel.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_resource_downloaders.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_subnational.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_scrapers_timeseries.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_sources.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_utils.py +0 -0
- {hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/unhcr_myanmar_idps.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.2
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,7 +26,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.2.0
|
|
30
30
|
Requires-Dist: hdx-python-country>=3.6.3
|
|
31
31
|
Requires-Dist: regex
|
|
32
32
|
Provides-Extra: dev
|
|
@@ -438,7 +438,7 @@ configured:
|
|
|
438
438
|
|
|
439
439
|
The economicindex configurable scraper reads the dataset
|
|
440
440
|
“covid-19-economic-exposure-index” on HDX, taking from it dataset source,
|
|
441
|
-
|
|
441
|
+
time period and using the url of the dataset in HDX as the source url. (In HDX data
|
|
442
442
|
explorers, these are used by the DATA links.) The scraper framework finds the first
|
|
443
443
|
resource that is of format `xlsx`, reads the “economic exposure” sheet and looks for the
|
|
444
444
|
headers in row 1 (by default). Note that it is possible to specify a specific resource
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
#
|
|
7
7
|
annotated-types==0.6.0
|
|
8
8
|
# via pydantic
|
|
9
|
-
attrs==23.
|
|
9
|
+
attrs==23.2.0
|
|
10
10
|
# via
|
|
11
11
|
# frictionless
|
|
12
12
|
# jsonlines
|
|
@@ -29,13 +29,13 @@ click==8.1.7
|
|
|
29
29
|
# via typer
|
|
30
30
|
colorama==0.4.6
|
|
31
31
|
# via typer
|
|
32
|
-
coverage[toml]==7.
|
|
32
|
+
coverage[toml]==7.4.0
|
|
33
33
|
# via pytest-cov
|
|
34
34
|
cryptography==41.0.7
|
|
35
35
|
# via pyopenssl
|
|
36
36
|
defopt==6.4.0
|
|
37
37
|
# via hdx-python-api
|
|
38
|
-
distlib==0.3.
|
|
38
|
+
distlib==0.3.8
|
|
39
39
|
# via virtualenv
|
|
40
40
|
dnspython==2.4.2
|
|
41
41
|
# via email-validator
|
|
@@ -57,17 +57,17 @@ google-auth==2.25.2
|
|
|
57
57
|
# via
|
|
58
58
|
# google-auth-oauthlib
|
|
59
59
|
# gspread
|
|
60
|
-
google-auth-oauthlib==1.
|
|
60
|
+
google-auth-oauthlib==1.2.0
|
|
61
61
|
# via gspread
|
|
62
|
-
gspread==5.12.
|
|
62
|
+
gspread==5.12.4
|
|
63
63
|
# via hdx-python-scraper (pyproject.toml)
|
|
64
|
-
hdx-python-api==6.
|
|
64
|
+
hdx-python-api==6.2.0
|
|
65
65
|
# via hdx-python-scraper (pyproject.toml)
|
|
66
66
|
hdx-python-country==3.6.3
|
|
67
67
|
# via
|
|
68
68
|
# hdx-python-api
|
|
69
69
|
# hdx-python-scraper (pyproject.toml)
|
|
70
|
-
hdx-python-utilities==3.6.
|
|
70
|
+
hdx-python-utilities==3.6.3
|
|
71
71
|
# via
|
|
72
72
|
# hdx-python-api
|
|
73
73
|
# hdx-python-country
|
|
@@ -119,7 +119,7 @@ nodeenv==1.8.0
|
|
|
119
119
|
# via pre-commit
|
|
120
120
|
num2words==0.5.13
|
|
121
121
|
# via quantulum3
|
|
122
|
-
numpy==1.26.
|
|
122
|
+
numpy==1.26.3
|
|
123
123
|
# via pandas
|
|
124
124
|
oauthlib==3.2.2
|
|
125
125
|
# via requests-oauthlib
|
|
@@ -153,11 +153,11 @@ pyasn1-modules==0.3.0
|
|
|
153
153
|
# via google-auth
|
|
154
154
|
pycparser==2.21
|
|
155
155
|
# via cffi
|
|
156
|
-
pydantic==2.5.
|
|
156
|
+
pydantic==2.5.3
|
|
157
157
|
# via
|
|
158
158
|
# frictionless
|
|
159
159
|
# inflect
|
|
160
|
-
pydantic-core==2.14.
|
|
160
|
+
pydantic-core==2.14.6
|
|
161
161
|
# via pydantic
|
|
162
162
|
pygments==2.17.2
|
|
163
163
|
# via rich
|
|
@@ -169,7 +169,7 @@ pyphonetics==0.5.3
|
|
|
169
169
|
# via hdx-python-country
|
|
170
170
|
pyrsistent==0.20.0
|
|
171
171
|
# via jsonschema
|
|
172
|
-
pytest==7.4.
|
|
172
|
+
pytest==7.4.4
|
|
173
173
|
# via
|
|
174
174
|
# hdx-python-scraper (pyproject.toml)
|
|
175
175
|
# pytest-cov
|
|
@@ -198,7 +198,7 @@ quantulum3==0.9.0
|
|
|
198
198
|
# via hdx-python-api
|
|
199
199
|
ratelimit==2.2.1
|
|
200
200
|
# via hdx-python-utilities
|
|
201
|
-
regex==2023.
|
|
201
|
+
regex==2023.12.25
|
|
202
202
|
# via hdx-python-scraper (pyproject.toml)
|
|
203
203
|
requests==2.31.0
|
|
204
204
|
# via
|
|
@@ -238,7 +238,7 @@ sphinxcontrib-napoleon==0.7
|
|
|
238
238
|
# via defopt
|
|
239
239
|
stringcase==1.2.0
|
|
240
240
|
# via frictionless
|
|
241
|
-
structlog==23.
|
|
241
|
+
structlog==23.3.0
|
|
242
242
|
# via libhxl
|
|
243
243
|
tableschema-to-template==0.0.13
|
|
244
244
|
# via hdx-python-utilities
|
|
@@ -255,7 +255,7 @@ typing-extensions==4.9.0
|
|
|
255
255
|
# pydantic
|
|
256
256
|
# pydantic-core
|
|
257
257
|
# typer
|
|
258
|
-
tzdata==2023.
|
|
258
|
+
tzdata==2023.4
|
|
259
259
|
# via pandas
|
|
260
260
|
unidecode==1.3.7
|
|
261
261
|
# via
|
|
@@ -16,6 +16,7 @@ class BaseScraper(ABC):
|
|
|
16
16
|
datasetinfo (Dict): Information about dataset
|
|
17
17
|
headers (Dict[str, Tuple]): Headers to be oytput at each level_name
|
|
18
18
|
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
|
|
19
|
+
reader (str): Reader to use. Defaults to "" (datasetinfo reader falling back on name).
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
22
|
population_lookup = {}
|
|
@@ -26,15 +27,20 @@ class BaseScraper(ABC):
|
|
|
26
27
|
datasetinfo: Dict,
|
|
27
28
|
headers: Dict[str, Tuple],
|
|
28
29
|
source_configuration: Dict = {},
|
|
30
|
+
reader: str = "",
|
|
29
31
|
) -> None:
|
|
30
|
-
self.
|
|
32
|
+
self.name = name
|
|
33
|
+
if reader:
|
|
34
|
+
self.reader = reader
|
|
35
|
+
else:
|
|
36
|
+
self.reader = datasetinfo.get("reader", name)
|
|
37
|
+
self.setup(headers, source_configuration)
|
|
31
38
|
self.datasetinfo = deepcopy(datasetinfo)
|
|
32
39
|
self.errors_on_exit = None
|
|
33
40
|
self.can_fallback = True
|
|
34
41
|
|
|
35
42
|
def setup(
|
|
36
43
|
self,
|
|
37
|
-
name: str,
|
|
38
44
|
headers: Dict[str, Tuple],
|
|
39
45
|
source_configuration: Dict = {},
|
|
40
46
|
) -> None:
|
|
@@ -42,14 +48,12 @@ class BaseScraper(ABC):
|
|
|
42
48
|
{"national": (("School Closure",), ("#impact+type",)), ...},
|
|
43
49
|
|
|
44
50
|
Args:
|
|
45
|
-
name (str): Name of scraper
|
|
46
51
|
headers (Dict[str, Tuple]): Headers to be output at each level_name
|
|
47
52
|
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
|
|
48
53
|
|
|
49
54
|
Returns:
|
|
50
55
|
None
|
|
51
56
|
"""
|
|
52
|
-
self.name = name
|
|
53
57
|
self.headers = headers
|
|
54
58
|
self.initialise_values_sources(source_configuration)
|
|
55
59
|
self.has_run = False
|
|
@@ -92,7 +96,7 @@ class BaseScraper(ABC):
|
|
|
92
96
|
None
|
|
93
97
|
"""
|
|
94
98
|
if not name:
|
|
95
|
-
name = self.
|
|
99
|
+
name = self.reader
|
|
96
100
|
reader = Read.get_reader(name)
|
|
97
101
|
return reader
|
|
98
102
|
|
|
@@ -361,10 +365,16 @@ class BaseScraper(ABC):
|
|
|
361
365
|
return None
|
|
362
366
|
if "is_hxl" in hapi_resource_metadata:
|
|
363
367
|
return hapi_resource_metadata
|
|
364
|
-
reader = self.get_reader(
|
|
368
|
+
reader = self.get_reader()
|
|
365
369
|
filename = self.datasetinfo.get("filename")
|
|
370
|
+
file_prefix = self.datasetinfo.get("file_prefix", self.name)
|
|
371
|
+
if filename:
|
|
372
|
+
kwargs = {"filename": filename}
|
|
373
|
+
else:
|
|
374
|
+
kwargs = {"file_prefix": file_prefix}
|
|
366
375
|
hxl_info = reader.hxl_info_hapi_resource_metadata(
|
|
367
|
-
hapi_resource_metadata,
|
|
376
|
+
hapi_resource_metadata,
|
|
377
|
+
**kwargs,
|
|
368
378
|
)
|
|
369
379
|
is_hxl = False
|
|
370
380
|
if hxl_info:
|
{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/scraper.py
RENAMED
|
@@ -70,6 +70,8 @@ class ConfigurableScraper(BaseScraper):
|
|
|
70
70
|
errors_on_exit: Optional[ErrorsOnExit] = None,
|
|
71
71
|
**kwargs: Any,
|
|
72
72
|
):
|
|
73
|
+
self.name = name
|
|
74
|
+
self.reader = datasetinfo.get("reader", name)
|
|
73
75
|
self.level = level
|
|
74
76
|
datelevel = datasetinfo.get("date_level")
|
|
75
77
|
if datelevel is None:
|
|
@@ -98,11 +100,11 @@ class ConfigurableScraper(BaseScraper):
|
|
|
98
100
|
use_hxl = self.datasetinfo.get("use_hxl", False)
|
|
99
101
|
if use_hxl:
|
|
100
102
|
try:
|
|
101
|
-
file_headers, iterator = self.get_iterator(
|
|
103
|
+
file_headers, iterator = self.get_iterator()
|
|
102
104
|
self.use_hxl(headers, file_headers, iterator)
|
|
103
105
|
except DownloadError:
|
|
104
106
|
self.can_fallback = False
|
|
105
|
-
self.setup(
|
|
107
|
+
self.setup(headers, source_configuration)
|
|
106
108
|
|
|
107
109
|
@staticmethod
|
|
108
110
|
def get_subsets_from_datasetinfo(datasetinfo: Dict) -> List[Dict]:
|
|
@@ -136,20 +138,18 @@ class ConfigurableScraper(BaseScraper):
|
|
|
136
138
|
]
|
|
137
139
|
return subsets
|
|
138
140
|
|
|
139
|
-
def get_iterator(self
|
|
140
|
-
"""Get the iterator from the preconfigured reader for
|
|
141
|
-
|
|
142
|
-
Args:
|
|
143
|
-
name (str): Name of scraper
|
|
141
|
+
def get_iterator(self) -> Tuple[List[str], Iterator[Dict]]:
|
|
142
|
+
"""Get the iterator from the preconfigured reader for this scraper
|
|
144
143
|
|
|
145
144
|
Returns:
|
|
146
145
|
Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
|
|
147
146
|
"""
|
|
148
|
-
|
|
149
|
-
self.datasetinfo
|
|
150
|
-
file_prefix
|
|
151
|
-
|
|
152
|
-
|
|
147
|
+
if (
|
|
148
|
+
"filename" not in self.datasetinfo
|
|
149
|
+
and "file_prefix" not in self.datasetinfo
|
|
150
|
+
):
|
|
151
|
+
self.datasetinfo["file_prefix"] = self.name
|
|
152
|
+
return self.get_reader().read(self.datasetinfo, **self.variables)
|
|
153
153
|
|
|
154
154
|
def add_sources(self) -> None:
|
|
155
155
|
"""Add source for each HXL hashtag
|
|
@@ -466,7 +466,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
466
466
|
Returns:
|
|
467
467
|
None
|
|
468
468
|
"""
|
|
469
|
-
file_headers, iterator = self.get_iterator(
|
|
469
|
+
file_headers, iterator = self.get_iterator()
|
|
470
470
|
header_to_hxltag = self.use_hxl(None, file_headers, iterator)
|
|
471
471
|
if "source_url" not in self.datasetinfo:
|
|
472
472
|
self.datasetinfo["source_url"] = self.datasetinfo["url"]
|
{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/timeseries.py
RENAMED
|
@@ -50,7 +50,7 @@ class TimeSeries(BaseScraper):
|
|
|
50
50
|
"output_hxl"
|
|
51
51
|
]
|
|
52
52
|
rows = [headers, hxltags]
|
|
53
|
-
file_headers, iterator = self.get_reader(
|
|
53
|
+
file_headers, iterator = self.get_reader().read(
|
|
54
54
|
self.datasetinfo, file_prefix=self.name
|
|
55
55
|
)
|
|
56
56
|
for inrow in iterator:
|
|
@@ -10,7 +10,7 @@ from .configurable.resource_downloader import ResourceDownloader
|
|
|
10
10
|
from .configurable.scraper import ConfigurableScraper
|
|
11
11
|
from .configurable.timeseries import TimeSeries
|
|
12
12
|
from .outputs.base import BaseOutput
|
|
13
|
-
from .utilities import
|
|
13
|
+
from .utilities import get_startend_dates_from_time_period
|
|
14
14
|
from .utilities.fallbacks import Fallbacks
|
|
15
15
|
from .utilities.reader import Read
|
|
16
16
|
from .utilities.sources import Sources
|
|
@@ -29,7 +29,7 @@ class Runner:
|
|
|
29
29
|
countryiso3s (ListTuple[str]): List of ISO3 country codes to process
|
|
30
30
|
today (datetime): Value to use for today. Defaults to now_utc().
|
|
31
31
|
errors_on_exit (ErrorsOnExit): ErrorsOnExit object that logs errors on exit
|
|
32
|
-
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None.
|
|
32
|
+
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None (all scrapers).
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
35
|
def __init__(
|
|
@@ -1061,7 +1061,7 @@ class Runner:
|
|
|
1061
1061
|
if dataset_name:
|
|
1062
1062
|
dataset = reader.read_dataset(dataset_name)
|
|
1063
1063
|
if date is None:
|
|
1064
|
-
date =
|
|
1064
|
+
date = get_startend_dates_from_time_period(
|
|
1065
1065
|
dataset, today=self.today
|
|
1066
1066
|
)
|
|
1067
1067
|
if source_name is None:
|
|
@@ -1209,32 +1209,52 @@ class Runner:
|
|
|
1209
1209
|
"""
|
|
1210
1210
|
if not names:
|
|
1211
1211
|
names = self.scrapers.keys()
|
|
1212
|
-
|
|
1212
|
+
hapi_results = {}
|
|
1213
1213
|
|
|
1214
1214
|
def add_results(scraper_level, scrap, levels_used):
|
|
1215
|
-
nonlocal
|
|
1215
|
+
nonlocal hapi_results
|
|
1216
1216
|
|
|
1217
1217
|
if scraper_level in levels_used:
|
|
1218
1218
|
return
|
|
1219
1219
|
headers = scrap.headers.get(scraper_level)
|
|
1220
1220
|
if headers is None:
|
|
1221
1221
|
return
|
|
1222
|
+
headings = headers[0]
|
|
1223
|
+
hxltags = headers[1]
|
|
1222
1224
|
values = scrap.get_values(scraper_level)
|
|
1223
1225
|
hapi_dataset_metadata = scrap.get_hapi_dataset_metadata()
|
|
1226
|
+
if not hapi_dataset_metadata:
|
|
1227
|
+
return
|
|
1224
1228
|
hapi_resource_metadata = scrap.get_hapi_resource_metadata()
|
|
1229
|
+
if not hapi_resource_metadata:
|
|
1230
|
+
return
|
|
1225
1231
|
dataset_id = hapi_dataset_metadata["hdx_id"]
|
|
1226
|
-
hapi_metadata =
|
|
1232
|
+
hapi_metadata = hapi_results.get(
|
|
1227
1233
|
dataset_id, copy(hapi_dataset_metadata)
|
|
1228
1234
|
)
|
|
1229
|
-
|
|
1230
|
-
level_results
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1235
|
+
results = hapi_metadata.get("results", {})
|
|
1236
|
+
level_results = results.get(scraper_level)
|
|
1237
|
+
if level_results is None:
|
|
1238
|
+
level_results = {
|
|
1239
|
+
"headers": ([], []),
|
|
1240
|
+
"values": [],
|
|
1241
|
+
"hapi_resource_metadata": hapi_resource_metadata,
|
|
1242
|
+
}
|
|
1243
|
+
results[scraper_level] = level_results
|
|
1244
|
+
lev_headings = level_results["headers"][0]
|
|
1245
|
+
lev_hxltags = level_results["headers"][1]
|
|
1246
|
+
lev_values = level_results["values"]
|
|
1247
|
+
for i, hxltag in enumerate(hxltags):
|
|
1248
|
+
if hxltag in lev_hxltags:
|
|
1249
|
+
index = lev_hxltags.index(hxltag)
|
|
1250
|
+
lev_values[index].update(values[i])
|
|
1251
|
+
else:
|
|
1252
|
+
lev_headings.append(headings[i])
|
|
1253
|
+
lev_hxltags.append(hxltag)
|
|
1254
|
+
lev_values.append(values[i])
|
|
1255
|
+
hapi_metadata["results"] = results
|
|
1236
1256
|
levels_used.add(scraper_level)
|
|
1237
|
-
|
|
1257
|
+
hapi_results[dataset_id] = hapi_metadata
|
|
1238
1258
|
|
|
1239
1259
|
for name in names:
|
|
1240
1260
|
if self.scrapers_to_run and not any(
|
|
@@ -1247,4 +1267,4 @@ class Runner:
|
|
|
1247
1267
|
lvls_used = set()
|
|
1248
1268
|
for scrap_level in scraper.headers:
|
|
1249
1269
|
add_results(scrap_level, scraper, lvls_used)
|
|
1250
|
-
return
|
|
1270
|
+
return hapi_results
|
|
@@ -57,22 +57,22 @@ def get_rowval(row: Dict, valcol: str) -> Any:
|
|
|
57
57
|
return result
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
def
|
|
60
|
+
def get_startend_dates_from_time_period(
|
|
61
61
|
dataset: Dataset, today: Optional[datetime] = None
|
|
62
62
|
) -> Optional[Dict]:
|
|
63
|
-
"""Return the
|
|
63
|
+
"""Return the time period in form required for source_date
|
|
64
64
|
|
|
65
65
|
Args:
|
|
66
66
|
dataset (Dataset): Dataset object
|
|
67
67
|
today (Optional[datetime]): Date to use for today. Defaults to None (datetime.utcnow)
|
|
68
68
|
|
|
69
69
|
Returns:
|
|
70
|
-
Optional[Dict]:
|
|
70
|
+
Optional[Dict]: Time period in form required for source_date
|
|
71
71
|
"""
|
|
72
72
|
if today is None:
|
|
73
|
-
date_info = dataset.
|
|
73
|
+
date_info = dataset.get_time_period()
|
|
74
74
|
else:
|
|
75
|
-
date_info = dataset.
|
|
75
|
+
date_info = dataset.get_time_period(today=today)
|
|
76
76
|
startdate = date_info.get("startdate")
|
|
77
77
|
enddate = date_info.get("enddate")
|
|
78
78
|
if enddate is None:
|
|
@@ -8,7 +8,7 @@ import hxl
|
|
|
8
8
|
from hxl.input import InputOptions, munge_url
|
|
9
9
|
from slugify import slugify
|
|
10
10
|
|
|
11
|
-
from . import
|
|
11
|
+
from . import get_startend_dates_from_time_period, match_template
|
|
12
12
|
from .sources import Sources
|
|
13
13
|
from hdx.data.dataset import Dataset
|
|
14
14
|
from hdx.data.resource import Resource
|
|
@@ -224,6 +224,13 @@ class Read(Retrieve):
|
|
|
224
224
|
filename = datasetinfo.get("filename")
|
|
225
225
|
if filename:
|
|
226
226
|
kwargs["filename"] = filename
|
|
227
|
+
if filename:
|
|
228
|
+
# remove file_prefix if filename provided
|
|
229
|
+
kwargs.pop("file_prefix", None)
|
|
230
|
+
elif "file_prefix" not in kwargs:
|
|
231
|
+
file_prefix = datasetinfo.get("file_prefix")
|
|
232
|
+
if file_prefix:
|
|
233
|
+
kwargs["file_prefix"] = file_prefix
|
|
227
234
|
return self.get_tabular_rows(
|
|
228
235
|
url,
|
|
229
236
|
dict_form=True,
|
|
@@ -311,7 +318,7 @@ class Read(Retrieve):
|
|
|
311
318
|
"""
|
|
312
319
|
return self.construct_filename_and_download(
|
|
313
320
|
resource["name"],
|
|
314
|
-
resource.
|
|
321
|
+
resource.get_format(),
|
|
315
322
|
resource["url"],
|
|
316
323
|
**kwargs,
|
|
317
324
|
)
|
|
@@ -331,7 +338,7 @@ class Read(Retrieve):
|
|
|
331
338
|
"title": dataset["title"],
|
|
332
339
|
"hdx_provider_stub": dataset["organization"]["name"],
|
|
333
340
|
"hdx_provider_name": dataset["organization"]["title"],
|
|
334
|
-
"reference_period": dataset.
|
|
341
|
+
"reference_period": dataset.get_time_period(today=self.today),
|
|
335
342
|
}
|
|
336
343
|
|
|
337
344
|
@staticmethod
|
|
@@ -477,7 +484,7 @@ class Read(Retrieve):
|
|
|
477
484
|
if "source_date" not in datasetinfo:
|
|
478
485
|
datasetinfo[
|
|
479
486
|
"source_date"
|
|
480
|
-
] =
|
|
487
|
+
] = get_startend_dates_from_time_period(
|
|
481
488
|
dataset, today=self.today
|
|
482
489
|
)
|
|
483
490
|
if "source" not in datasetinfo:
|
|
@@ -510,7 +517,7 @@ class Read(Retrieve):
|
|
|
510
517
|
key = "default_date"
|
|
511
518
|
else:
|
|
512
519
|
key = hxltag
|
|
513
|
-
source_date[key] =
|
|
520
|
+
source_date[key] = get_startend_dates_from_time_period(
|
|
514
521
|
dataset, today=self.today
|
|
515
522
|
)
|
|
516
523
|
if source is not None:
|
|
@@ -555,10 +562,14 @@ class Read(Retrieve):
|
|
|
555
562
|
datasetinfo["filename"] = filename
|
|
556
563
|
filename = datasetinfo.get("filename")
|
|
557
564
|
if resource and not filename:
|
|
558
|
-
# prefix is added later
|
|
559
565
|
filename = self.construct_filename(
|
|
560
|
-
resource["name"], resource.
|
|
566
|
+
resource["name"], resource.get_format()
|
|
561
567
|
)
|
|
568
|
+
file_prefix = kwargs.get("file_prefix")
|
|
569
|
+
if not file_prefix:
|
|
570
|
+
file_prefix = datasetinfo.get("file_prefix")
|
|
571
|
+
if file_prefix:
|
|
572
|
+
filename = f"{file_prefix}_{filename}"
|
|
562
573
|
datasetinfo["filename"] = filename
|
|
563
574
|
return self.read_tabular(datasetinfo, **kwargs)
|
|
564
575
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"owner_org": "ac91832d-2477-4e1f-8520-9a591a7c3d69", "maintainer": "f240651d-52d6-4a3c-b229-b5dd0443d642", "relationships_as_object": [], "package_creator": "nafissah", "private": false, "dataset_date": "[2016-09-01T00:00:00 TO 2016-09-01T23:59:59]", "num_tags": 3, "solr_additions": "{\"countries\": [\"Benin\", \"Burkina Faso\", \"Cameroon\", \"Chad\", \"Gambia\", \"Mali\", \"Niger\", \"Nigeria\", \"Senegal\"]}", "id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "metadata_created": "2017-03-10T09:30:34.278280", "archived": false, "methodology_other": "Countries data aggregation. ", "metadata_modified": "2019-09-11T16:15:43.873688", "title": "Sahel : Humanitarian Needs Overview", "tags": [{"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "humanitarian needs overview - hno", "id": "bfd4300a-b35f-4c3d-a14b-05e0f040ad29", "name": "humanitarian needs overview - hno"}, {"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "hxl", "id": "a0fbb23a-6aad-4ccc-8062-e9ef9f20e5d2", "name": "hxl"}, {"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "people in need - pin", "id": "47dd0671-78c8-4359-9f96-5cfe4b86b48c", "name": "people in need - pin"}], "state": "active", "relationships_as_subject": [], "methodology": "Other", "version": null, "is_requestdata_type": false, "creator_user_id": "7711391a-7647-4432-a71b-294e7f901a2c", "type": "dataset", "has_showcases": true, "due_date": "2018-03-10T10:08:37", "dataset_preview": "first_resource", "num_resources": 5, "dataset_source": "Multiple organisations", "subnational": "1", "last_modified": "2017-03-10T10:08:37.690970", "groups": [{"display_name": "Benin", "description": "", "title": "Benin", "image_display_url": "", "id": "ben", "name": "ben"}, {"display_name": "Burkina Faso", "description": "", "title": "Burkina Faso", "image_display_url": "", "id": "bfa", "name": "bfa"}, {"display_name": "Cameroon", "description": "", "title": "Cameroon", "image_display_url": "", "id": "cmr", "name": "cmr"}, {"display_name": "Chad", "description": "", "title": "Chad", "image_display_url": "", "id": "tcd", "name": "tcd"}, {"display_name": "Gambia", "description": "", "title": "Gambia", "image_display_url": "", "id": "gmb", "name": "gmb"}, {"display_name": "Mali", "description": "", "title": "Mali", "image_display_url": "", "id": "mli", "name": "mli"}, {"display_name": "Niger", "description": "", "title": "Niger", "image_display_url": "", "id": "ner", "name": "ner"}, {"display_name": "Nigeria", "description": "", "title": "Nigeria", "image_display_url": "", "id": "nga", "name": "nga"}, {"display_name": "Senegal", "description": "", "title": "Senegal", "image_display_url": "", "id": "sen", "name": "sen"}], "license_id": "cc-by", "has_quickcharts": true, "has_geodata": false, "overdue_date": "2018-05-09T10:08:37", "total_res_downloads": 3083, "qa_completed": true, "name": "sahel-humanitarian-needs-overview", "isopen": true, "url": null, "notes": "This dataset is produced by the United Nations for the Coordination of Humanitarian Affairs (OCHA) in collaboration with humanitarian partners. It covers the period from January to December 2017 and was issued on December 2016.", "license_title": "Creative Commons Attribution International", "batch": "7c91002c-9c43-4c56-ac78-02a20ba9575e", "license_url": "http://www.opendefinition.org/licenses/cc-by", "pageviews_last_14_days": 4, "organization": {"description": "OCHA Regional Office for West and Central Africa (ROWCA).\r\n\r\nDans les pays de l'Afrique de l\u2019Ouest et du Centre, l'ins\u00e9curit\u00e9 alimentaire et la malnutrition s\u2019aggravent avec l'impact des catastrophes naturelles, le changement climatique, l\u2019\u00e9volution d\u00e9mographique, l'urbanisation mal g\u00e9r\u00e9e, les \u00e9pid\u00e9mies et les conflits violents.", "title": "OCHA West and Central Africa (ROWCA)", "created": "2014-09-26T15:15:48.616313", "approval_status": "approved", "is_organization": true, "state": "active", "image_url": "", "type": "organization", "id": "ac91832d-2477-4e1f-8520-9a591a7c3d69", "name": "ocha-rowca"}, "data_update_frequency": "365", "is_fresh": false, "update_status": "needs_update", "x_resource_grouping": [], "resources": [{"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3", "size": 1291, "metadata_modified": "2017-03-10T09:43:15.837967", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Nutrition ", "format": "CSV", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "mimetype_inner": null, "url_type": "upload", "originalHash": "1719647142", "mimetype": null, "cache_url": null, "name": "HNO -2017 -Sahel-nutrition.csv", "created": "2017-03-10T09:43:15.837967", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "tracking_summary[recent]": "0", "last_modified": "2017-03-10T10:08:37.690970", "tracking_summary[total]": "0", "position": 0, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "487e329d-3959-49bf-8e12-2675be7008ae", "size": 2306, "metadata_modified": "2017-03-10T09:46:28.411331", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "state": "active", "hash": "", "description": "HNO 2017 sahel - Lake Chad - Nutrition", "format": "CSV", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "HNO 2017 Sahel-Lake Chad Nutrition as of 25_11_2016.csv", "created": "2017-03-10T09:46:28.411331", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "last_modified": "2017-03-10T10:08:37.690970", "position": 1, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "d648d73f-8e66-461f-864f-0d66b19bcfa2", "size": 10348, "metadata_modified": "2017-03-10T10:01:38.716006", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Displacement per country", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "1345568791", "mimetype": null, "cache_url": null, "name": "HNO-2017-Sahel- Displacement_per_country.xlsx", "created": "2017-03-10T10:01:38.716006", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 2, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "d9248be4-7bfb-4a81-a7aa-c035dcb737a2", "size": 10275, "metadata_modified": "2017-03-10T10:03:01.842877", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - People in need", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "2017-Sahel- People in need.xlsx", "created": "2017-03-10T10:03:01.842877", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 3, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "798b6a50-75da-4c8c-8034-4a4439630a3a", "size": 9430, "metadata_modified": "2017-03-10T10:04:40.479364", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Food Insecurity", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "HNO-2017-Sahel-Food Insecurity.xlsx", "created": "2017-03-10T10:04:40.479364", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 4, "resource_type": "file.upload"}]}
|
{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/education_closures.py
RENAMED
|
@@ -19,6 +19,7 @@ class EducationClosures(BaseScraper):
|
|
|
19
19
|
("#status+country+closed",),
|
|
20
20
|
),
|
|
21
21
|
},
|
|
22
|
+
reader="hdx", # Just to test passing a specific reader to use
|
|
22
23
|
)
|
|
23
24
|
self.today = today
|
|
24
25
|
self.countryiso3s = countryiso3s
|
|
@@ -37,7 +38,7 @@ class EducationClosures(BaseScraper):
|
|
|
37
38
|
|
|
38
39
|
def run(self) -> None:
|
|
39
40
|
closures_headers, closures_iterator = self.get_reader().read(
|
|
40
|
-
self.datasetinfo, file_prefix=
|
|
41
|
+
self.datasetinfo, file_prefix="education_closures"
|
|
41
42
|
)
|
|
42
43
|
closures = self.get_values("national")[0]
|
|
43
44
|
closed_countries = self.get_values("regional")[0]
|
|
@@ -40,7 +40,7 @@ class TestReaders:
|
|
|
40
40
|
"url": "https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596",
|
|
41
41
|
}
|
|
42
42
|
)
|
|
43
|
-
resource.
|
|
43
|
+
resource.set_format("csv")
|
|
44
44
|
dataset.add_update_resource(resource)
|
|
45
45
|
return dataset
|
|
46
46
|
|
|
@@ -105,7 +105,7 @@ class TestReaders:
|
|
|
105
105
|
"url": "https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596",
|
|
106
106
|
}
|
|
107
107
|
)
|
|
108
|
-
resource.
|
|
108
|
+
resource.set_format("csv")
|
|
109
109
|
data = reader.read_hxl_resource(
|
|
110
110
|
resource, file_prefix="whowhatwhere_afg"
|
|
111
111
|
)
|