hdx-python-scraper 2.2.3__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/coveragerc +1 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/CONTRIBUTING.md +4 -4
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/PKG-INFO +16 -15
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/README.md +12 -12
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/main.md +4 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/pyproject.toml +4 -3
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/requirements.txt +47 -40
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/_version.py +2 -2
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/base_scraper.py +21 -9
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/resource_downloader.py +1 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/scraper.py +1 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/timeseries.py +3 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/runner.py +5 -2
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/reader.py +142 -40
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/config/project_configuration.yaml +10 -4
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/additional-json.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf2_global_download-full-pfmb-allocations.csv +7054 -7054
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf_global_download-full-pfmb-allocations.csv +7054 -7054
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +1 -1
- hdx_python_scraper-2.3.0/tests/fixtures/input/education_closures_school_closures.csv +4 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ethiopia-pin-targeted-reached-by-location-and-cluster.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_eth_ethiopia_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/fallbacks.json +17 -17
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/global-school-closures-covid19.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/download-hno-2017-sahel-nutrition.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/hno_2017_sahel_nutrition.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idmc-internally-displaced-persons-idps.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/ipc_somalia_download-som-food-insecurity-oct-dec2022-projection.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +2 -2
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/kenya-pin-targeted-reached-by-location-and-cluster.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_ken_kenya_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +1 -1
- hdx_python_scraper-2.3.0/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +36 -0
- hdx_python_scraper-2.3.0/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +1 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sadd-countries-to-include.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sahel-humanitarian-needs-overview.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-acute-food-insecurity-country-data.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-internally-displaced-persons-idps.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-pin-targeted-reached-by-location-and-cluster.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_som_somalia_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/total-covid-19-tests-performed-by-country.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ukraine-who-does-what-where-3w.json +1 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/unocha-office-locations.json +1 -1
- hdx_python_scraper-2.2.3/tests/fixtures/who_national2_who-covid-19-global-data.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -1
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/who_national_who-covid-19-global-data.csv +0 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/affected_targeted_reached.py +1 -3
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/conftest.py +9 -4
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/education_closures.py +1 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/education_enrolment.py +1 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_readers.py +75 -6
- hdx_python_scraper-2.2.3/tests/hdx/scraper/test_runner_get_results.py → hdx_python_scraper-2.3.0/tests/hdx/scraper/test_runner.py +56 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_custom.py +4 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_global.py +2 -2
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_national.py +24 -2
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_utils.py +6 -1
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/unhcr_myanmar_idps.py +2 -2
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/black.toml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/pre-commit-config.yaml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/ruff.toml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.github/workflows/publish.yaml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.gitignore +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/LICENSE +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/__init__.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/aggregator.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/rowparser.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/base.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/excelfile.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/googlesheets.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/json.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/fallbacks.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/sources.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/writer.py +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/covidtests_data-owid-covid-data.xlsx +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/education_closures_download-covid-impact-education.csv → /hdx_python_scraper-2.3.0/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/education_enrolment_download-countries-enrollment-data-uis-feb-22.xlsx +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/download-hno-2017-sahel-people-in-need.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idps_download-displacement-data.csv +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/idps_somalia_download-som-unhcr-prmn-displacement-dataset.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/regions_download-tbl-regcov-2020-ocha.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/hdx_resource_downloader_xlsx_ukr_border_crossings_090622.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ukraine-border-crossings.json +0 -0
- /hdx_python_scraper-2.2.3/tests/fixtures/who_national3_who-covid-19-global-data.csv → /hdx_python_scraper-2.3.0/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_output.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_regionlookup.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_aggregation.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_appenddata.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_multipleurls.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_regionaltoplevel.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_resource_downloaders.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_subnational.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_timeseries.py +0 -0
- {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_sources.py +0 -0
|
@@ -20,7 +20,7 @@ you make a git commit:
|
|
|
20
20
|
|
|
21
21
|
pre-commit install
|
|
22
22
|
|
|
23
|
-
The configuration file for this project is in a
|
|
23
|
+
The configuration file for this project is in a
|
|
24
24
|
non-start location. Thus, you will need to edit your
|
|
25
25
|
`.git/hooks/pre-commit` file to reflect this. Change
|
|
26
26
|
the line that begins with `ARGS` to:
|
|
@@ -29,7 +29,7 @@ the line that begins with `ARGS` to:
|
|
|
29
29
|
|
|
30
30
|
With pre-commit, all code is formatted according to
|
|
31
31
|
[black]("https://github.com/psf/black") and
|
|
32
|
-
[ruff]("https://github.com/charliermarsh/ruff") guidelines.
|
|
32
|
+
[ruff]("https://github.com/charliermarsh/ruff") guidelines.
|
|
33
33
|
|
|
34
34
|
To check if your changes pass pre-commit without committing, run:
|
|
35
35
|
|
|
@@ -46,8 +46,8 @@ Follow the example set out already in ``api.rst`` as you write the documentation
|
|
|
46
46
|
## Packages
|
|
47
47
|
|
|
48
48
|
[pip-tools](https://github.com/jazzband/pip-tools) is used for
|
|
49
|
-
package management. If you’ve introduced a new package to the
|
|
50
|
-
source code (i.e.anywhere in `src/`), please add it to the
|
|
49
|
+
package management. If you’ve introduced a new package to the
|
|
50
|
+
source code (i.e.anywhere in `src/`), please add it to the
|
|
51
51
|
`project.dependencies` section of
|
|
52
52
|
`pyproject.toml` with any known version constraints.
|
|
53
53
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,12 +26,13 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.1.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.1.4
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.6.3
|
|
30
31
|
Requires-Dist: regex
|
|
31
32
|
Provides-Extra: dev
|
|
32
33
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
33
34
|
Provides-Extra: pandas
|
|
34
|
-
Requires-Dist: pandas>=2.1.
|
|
35
|
+
Requires-Dist: pandas>=2.1.3; extra == 'pandas'
|
|
35
36
|
Provides-Extra: test
|
|
36
37
|
Requires-Dist: pytest; extra == 'test'
|
|
37
38
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
@@ -43,19 +44,19 @@ Description-Content-Type: text/markdown
|
|
|
43
44
|
[](https://pycqa.github.io/isort/)
|
|
44
45
|
[](https://pypistats.org/packages/hdx-python-scraper)
|
|
45
46
|
|
|
46
|
-
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
47
|
-
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
48
|
-
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
49
|
-
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
50
|
-
and/or Excel and includes the addition of
|
|
51
|
-
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
52
|
-
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
53
|
-
specification and the framework handles the execution of both configurable and custom
|
|
47
|
+
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
48
|
+
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
49
|
+
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
50
|
+
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
51
|
+
and/or Excel and includes the addition of
|
|
52
|
+
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
53
|
+
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
54
|
+
specification and the framework handles the execution of both configurable and custom
|
|
54
55
|
scrapers.
|
|
55
56
|
|
|
56
|
-
For more information, please read the
|
|
57
|
-
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
57
|
+
For more information, please read the
|
|
58
|
+
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
58
59
|
|
|
59
|
-
This library is part of the
|
|
60
|
-
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
60
|
+
This library is part of the
|
|
61
|
+
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
61
62
|
humanitarian related data, please upload your datasets to HDX.
|
|
@@ -4,19 +4,19 @@
|
|
|
4
4
|
[](https://pycqa.github.io/isort/)
|
|
5
5
|
[](https://pypistats.org/packages/hdx-python-scraper)
|
|
6
6
|
|
|
7
|
-
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
8
|
-
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
9
|
-
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
10
|
-
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
11
|
-
and/or Excel and includes the addition of
|
|
12
|
-
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
13
|
-
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
14
|
-
specification and the framework handles the execution of both configurable and custom
|
|
7
|
+
The HDX Python Scraper Library is designed to enable you to easily develop code that
|
|
8
|
+
assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
|
|
9
|
+
uses a YAML file that specifies for each source what needs to be read and allows some
|
|
10
|
+
transformations to be performed on the data. The output is written to JSON, Google sheets
|
|
11
|
+
and/or Excel and includes the addition of
|
|
12
|
+
[Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
|
|
13
|
+
the YAML file. Custom Python scrapers can also be written that conform to a defined
|
|
14
|
+
specification and the framework handles the execution of both configurable and custom
|
|
15
15
|
scrapers.
|
|
16
16
|
|
|
17
|
-
For more information, please read the
|
|
18
|
-
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
17
|
+
For more information, please read the
|
|
18
|
+
[documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
|
|
19
19
|
|
|
20
|
-
This library is part of the
|
|
21
|
-
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
20
|
+
This library is part of the
|
|
21
|
+
[Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
|
|
22
22
|
humanitarian related data, please upload your datasets to HDX.
|
|
@@ -26,6 +26,10 @@ install with:
|
|
|
26
26
|
pip install hdx-python-scraper[pandas]
|
|
27
27
|
|
|
28
28
|
## Breaking Changes
|
|
29
|
+
From 2.3.0, resource name is used when available instead of creating name from
|
|
30
|
+
url so tests that use saved data from the Read class may break. file_type
|
|
31
|
+
parameters in various Read methods renamed to format.
|
|
32
|
+
|
|
29
33
|
From 2.1.2, Python 3.7 no longer supported
|
|
30
34
|
|
|
31
35
|
From 2.0.1, all functions in outputs.update_tabs are methods in the new Writer class
|
|
@@ -34,7 +34,8 @@ classifiers = [
|
|
|
34
34
|
requires-python = ">=3.8"
|
|
35
35
|
|
|
36
36
|
dependencies = [
|
|
37
|
-
"hdx-python-api>=6.1.
|
|
37
|
+
"hdx-python-api>=6.1.4",
|
|
38
|
+
"hdx-python-country>=3.6.3",
|
|
38
39
|
"gspread",
|
|
39
40
|
"regex",
|
|
40
41
|
]
|
|
@@ -48,7 +49,7 @@ content-type = "text/markdown"
|
|
|
48
49
|
Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
|
|
49
50
|
|
|
50
51
|
[project.optional-dependencies]
|
|
51
|
-
pandas = ["pandas>=2.1.
|
|
52
|
+
pandas = ["pandas>=2.1.3"]
|
|
52
53
|
test = ["pytest", "pytest-cov"]
|
|
53
54
|
dev = ["pre-commit"]
|
|
54
55
|
|
|
@@ -87,7 +88,7 @@ test = """
|
|
|
87
88
|
"""
|
|
88
89
|
|
|
89
90
|
[[tool.hatch.envs.test.matrix]]
|
|
90
|
-
python = ["3.
|
|
91
|
+
python = ["3.12"]
|
|
91
92
|
|
|
92
93
|
[tool.hatch.envs.lint]
|
|
93
94
|
detached = true
|
|
@@ -4,16 +4,16 @@
|
|
|
4
4
|
#
|
|
5
5
|
# pip-compile --all-extras --output-file=requirements.txt --resolver=backtracking pyproject.toml
|
|
6
6
|
#
|
|
7
|
-
annotated-types==0.
|
|
7
|
+
annotated-types==0.6.0
|
|
8
8
|
# via pydantic
|
|
9
9
|
attrs==23.1.0
|
|
10
10
|
# via
|
|
11
11
|
# frictionless
|
|
12
12
|
# jsonlines
|
|
13
13
|
# jsonschema
|
|
14
|
-
cachetools==5.3.
|
|
14
|
+
cachetools==5.3.2
|
|
15
15
|
# via google-auth
|
|
16
|
-
certifi==2023.
|
|
16
|
+
certifi==2023.11.17
|
|
17
17
|
# via requests
|
|
18
18
|
cffi==1.16.0
|
|
19
19
|
# via cryptography
|
|
@@ -21,7 +21,7 @@ cfgv==3.4.0
|
|
|
21
21
|
# via pre-commit
|
|
22
22
|
chardet==5.2.0
|
|
23
23
|
# via frictionless
|
|
24
|
-
charset-normalizer==3.3.
|
|
24
|
+
charset-normalizer==3.3.2
|
|
25
25
|
# via requests
|
|
26
26
|
ckanapi==4.7
|
|
27
27
|
# via hdx-python-api
|
|
@@ -31,7 +31,7 @@ colorama==0.4.6
|
|
|
31
31
|
# via typer
|
|
32
32
|
coverage[toml]==7.3.2
|
|
33
33
|
# via pytest-cov
|
|
34
|
-
cryptography==41.0.
|
|
34
|
+
cryptography==41.0.7
|
|
35
35
|
# via pyopenssl
|
|
36
36
|
defopt==6.4.0
|
|
37
37
|
# via hdx-python-api
|
|
@@ -45,33 +45,37 @@ docopt==0.6.2
|
|
|
45
45
|
# num2words
|
|
46
46
|
docutils==0.20.1
|
|
47
47
|
# via defopt
|
|
48
|
-
email-validator==2.
|
|
48
|
+
email-validator==2.1.0.post1
|
|
49
49
|
# via hdx-python-api
|
|
50
50
|
et-xmlfile==1.1.0
|
|
51
51
|
# via openpyxl
|
|
52
|
-
filelock==3.
|
|
52
|
+
filelock==3.13.1
|
|
53
53
|
# via virtualenv
|
|
54
54
|
frictionless==5.16.0
|
|
55
55
|
# via hdx-python-utilities
|
|
56
|
-
google-auth==2.
|
|
56
|
+
google-auth==2.25.2
|
|
57
57
|
# via
|
|
58
58
|
# google-auth-oauthlib
|
|
59
59
|
# gspread
|
|
60
60
|
google-auth-oauthlib==1.1.0
|
|
61
61
|
# via gspread
|
|
62
|
-
gspread==5.
|
|
62
|
+
gspread==5.12.2
|
|
63
63
|
# via hdx-python-scraper (pyproject.toml)
|
|
64
|
-
hdx-python-api==6.1.
|
|
64
|
+
hdx-python-api==6.1.4
|
|
65
65
|
# via hdx-python-scraper (pyproject.toml)
|
|
66
|
-
hdx-python-country==3.
|
|
67
|
-
# via
|
|
66
|
+
hdx-python-country==3.6.3
|
|
67
|
+
# via
|
|
68
|
+
# hdx-python-api
|
|
69
|
+
# hdx-python-scraper (pyproject.toml)
|
|
68
70
|
hdx-python-utilities==3.6.2
|
|
69
|
-
# via
|
|
70
|
-
|
|
71
|
+
# via
|
|
72
|
+
# hdx-python-api
|
|
73
|
+
# hdx-python-country
|
|
74
|
+
humanize==4.9.0
|
|
71
75
|
# via frictionless
|
|
72
|
-
identify==2.5.
|
|
76
|
+
identify==2.5.33
|
|
73
77
|
# via pre-commit
|
|
74
|
-
idna==3.
|
|
78
|
+
idna==3.6
|
|
75
79
|
# via
|
|
76
80
|
# email-validator
|
|
77
81
|
# requests
|
|
@@ -94,14 +98,16 @@ jsonschema==4.17.3
|
|
|
94
98
|
# frictionless
|
|
95
99
|
# tableschema-to-template
|
|
96
100
|
libhxl==5.1
|
|
97
|
-
# via
|
|
101
|
+
# via
|
|
102
|
+
# hdx-python-api
|
|
103
|
+
# hdx-python-country
|
|
98
104
|
loguru==0.7.2
|
|
99
105
|
# via hdx-python-utilities
|
|
100
|
-
makefun==1.15.
|
|
106
|
+
makefun==1.15.2
|
|
101
107
|
# via hdx-python-api
|
|
102
108
|
markdown-it-py==3.0.0
|
|
103
109
|
# via rich
|
|
104
|
-
marko==2.0.
|
|
110
|
+
marko==2.0.2
|
|
105
111
|
# via frictionless
|
|
106
112
|
markupsafe==2.1.3
|
|
107
113
|
# via jinja2
|
|
@@ -111,9 +117,9 @@ ndg-httpsclient==0.5.1
|
|
|
111
117
|
# via hdx-python-api
|
|
112
118
|
nodeenv==1.8.0
|
|
113
119
|
# via pre-commit
|
|
114
|
-
num2words==0.5.
|
|
120
|
+
num2words==0.5.13
|
|
115
121
|
# via quantulum3
|
|
116
|
-
numpy==1.26.
|
|
122
|
+
numpy==1.26.2
|
|
117
123
|
# via pandas
|
|
118
124
|
oauthlib==3.2.2
|
|
119
125
|
# via requests-oauthlib
|
|
@@ -121,11 +127,11 @@ openpyxl==3.1.2
|
|
|
121
127
|
# via hdx-python-utilities
|
|
122
128
|
packaging==23.2
|
|
123
129
|
# via pytest
|
|
124
|
-
pandas==2.1.
|
|
130
|
+
pandas==2.1.4
|
|
125
131
|
# via hdx-python-scraper (pyproject.toml)
|
|
126
132
|
petl==1.7.14
|
|
127
133
|
# via frictionless
|
|
128
|
-
platformdirs==
|
|
134
|
+
platformdirs==4.1.0
|
|
129
135
|
# via virtualenv
|
|
130
136
|
pluggy==1.3.0
|
|
131
137
|
# via pytest
|
|
@@ -135,9 +141,9 @@ ply==3.11
|
|
|
135
141
|
# libhxl
|
|
136
142
|
pockets==0.9.1
|
|
137
143
|
# via sphinxcontrib-napoleon
|
|
138
|
-
pre-commit==3.
|
|
144
|
+
pre-commit==3.6.0
|
|
139
145
|
# via hdx-python-scraper (pyproject.toml)
|
|
140
|
-
pyasn1==0.5.
|
|
146
|
+
pyasn1==0.5.1
|
|
141
147
|
# via
|
|
142
148
|
# hdx-python-api
|
|
143
149
|
# ndg-httpsclient
|
|
@@ -147,23 +153,23 @@ pyasn1-modules==0.3.0
|
|
|
147
153
|
# via google-auth
|
|
148
154
|
pycparser==2.21
|
|
149
155
|
# via cffi
|
|
150
|
-
pydantic==2.
|
|
156
|
+
pydantic==2.5.2
|
|
151
157
|
# via
|
|
152
158
|
# frictionless
|
|
153
159
|
# inflect
|
|
154
|
-
pydantic-core==2.
|
|
160
|
+
pydantic-core==2.14.5
|
|
155
161
|
# via pydantic
|
|
156
|
-
pygments==2.
|
|
162
|
+
pygments==2.17.2
|
|
157
163
|
# via rich
|
|
158
|
-
pyopenssl==23.
|
|
164
|
+
pyopenssl==23.3.0
|
|
159
165
|
# via
|
|
160
166
|
# hdx-python-api
|
|
161
167
|
# ndg-httpsclient
|
|
162
168
|
pyphonetics==0.5.3
|
|
163
169
|
# via hdx-python-country
|
|
164
|
-
pyrsistent==0.
|
|
170
|
+
pyrsistent==0.20.0
|
|
165
171
|
# via jsonschema
|
|
166
|
-
pytest==7.4.
|
|
172
|
+
pytest==7.4.3
|
|
167
173
|
# via
|
|
168
174
|
# hdx-python-scraper (pyproject.toml)
|
|
169
175
|
# pytest-cov
|
|
@@ -198,6 +204,7 @@ requests==2.31.0
|
|
|
198
204
|
# via
|
|
199
205
|
# ckanapi
|
|
200
206
|
# frictionless
|
|
207
|
+
# hdx-python-api
|
|
201
208
|
# libhxl
|
|
202
209
|
# requests-file
|
|
203
210
|
# requests-oauthlib
|
|
@@ -207,15 +214,15 @@ requests-oauthlib==1.3.1
|
|
|
207
214
|
# via google-auth-oauthlib
|
|
208
215
|
rfc3986==2.0.0
|
|
209
216
|
# via frictionless
|
|
210
|
-
rich==13.
|
|
217
|
+
rich==13.7.0
|
|
211
218
|
# via typer
|
|
212
219
|
rsa==4.9
|
|
213
220
|
# via google-auth
|
|
214
|
-
ruamel-yaml==0.
|
|
221
|
+
ruamel-yaml==0.18.5
|
|
215
222
|
# via hdx-python-utilities
|
|
216
223
|
ruamel-yaml-clib==0.2.8
|
|
217
224
|
# via ruamel-yaml
|
|
218
|
-
shellingham==1.5.
|
|
225
|
+
shellingham==1.5.4
|
|
219
226
|
# via typer
|
|
220
227
|
simpleeval==0.9.13
|
|
221
228
|
# via frictionless
|
|
@@ -231,7 +238,7 @@ sphinxcontrib-napoleon==0.7
|
|
|
231
238
|
# via defopt
|
|
232
239
|
stringcase==1.2.0
|
|
233
240
|
# via frictionless
|
|
234
|
-
structlog==23.
|
|
241
|
+
structlog==23.2.0
|
|
235
242
|
# via libhxl
|
|
236
243
|
tableschema-to-template==0.0.13
|
|
237
244
|
# via hdx-python-utilities
|
|
@@ -241,7 +248,7 @@ text-unidecode==1.3
|
|
|
241
248
|
# via python-slugify
|
|
242
249
|
typer[all]==0.9.0
|
|
243
250
|
# via frictionless
|
|
244
|
-
typing-extensions==4.
|
|
251
|
+
typing-extensions==4.9.0
|
|
245
252
|
# via
|
|
246
253
|
# frictionless
|
|
247
254
|
# inflect
|
|
@@ -254,21 +261,21 @@ unidecode==1.3.7
|
|
|
254
261
|
# via
|
|
255
262
|
# libhxl
|
|
256
263
|
# pyphonetics
|
|
257
|
-
urllib3==2.0
|
|
264
|
+
urllib3==2.1.0
|
|
258
265
|
# via
|
|
259
266
|
# libhxl
|
|
260
267
|
# requests
|
|
261
268
|
validators==0.22.0
|
|
262
269
|
# via frictionless
|
|
263
|
-
virtualenv==20.
|
|
270
|
+
virtualenv==20.25.0
|
|
264
271
|
# via pre-commit
|
|
265
|
-
wheel==0.
|
|
272
|
+
wheel==0.42.0
|
|
266
273
|
# via libhxl
|
|
267
274
|
xlrd==2.0.1
|
|
268
275
|
# via hdx-python-utilities
|
|
269
276
|
xlrd3==1.1.0
|
|
270
277
|
# via libhxl
|
|
271
|
-
xlsxwriter==3.1.
|
|
278
|
+
xlsxwriter==3.1.9
|
|
272
279
|
# via tableschema-to-template
|
|
273
280
|
xlwt==1.3.0
|
|
274
281
|
# via hdx-python-utilities
|
|
@@ -81,15 +81,12 @@ class BaseScraper(ABC):
|
|
|
81
81
|
self.sources: Dict[str, List] = {level: [] for level in self.headers}
|
|
82
82
|
self.source_configuration = deepcopy(source_configuration)
|
|
83
83
|
|
|
84
|
-
def get_reader(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"""Get reader given name if provided or using name member variable if not.
|
|
88
|
-
Set reader prefix to given prefix or name if not provided.
|
|
84
|
+
def get_reader(self, name: Optional[str] = None):
|
|
85
|
+
"""Get reader given name if provided or using name member variable if
|
|
86
|
+
not.
|
|
89
87
|
|
|
90
88
|
Args:
|
|
91
89
|
name (str): Name of scraper
|
|
92
|
-
prefix (Optional[str]): Prefix to use. Defaults to None (use scraper name).
|
|
93
90
|
|
|
94
91
|
Returns:
|
|
95
92
|
None
|
|
@@ -97,9 +94,6 @@ class BaseScraper(ABC):
|
|
|
97
94
|
if not name:
|
|
98
95
|
name = self.name
|
|
99
96
|
reader = Read.get_reader(name)
|
|
100
|
-
if not prefix:
|
|
101
|
-
prefix = name
|
|
102
|
-
reader.prefix = prefix
|
|
103
97
|
return reader
|
|
104
98
|
|
|
105
99
|
def get_headers(self, level: str) -> Optional[Tuple[Tuple]]:
|
|
@@ -362,6 +356,24 @@ class BaseScraper(ABC):
|
|
|
362
356
|
Returns:
|
|
363
357
|
Optional[Dict]: HAPI resource metadata
|
|
364
358
|
"""
|
|
359
|
+
hapi_resource_metadata = self.datasetinfo.get("hapi_resource_metadata")
|
|
360
|
+
if not hapi_resource_metadata:
|
|
361
|
+
return None
|
|
362
|
+
if "is_hxl" in hapi_resource_metadata:
|
|
363
|
+
return hapi_resource_metadata
|
|
364
|
+
reader = self.get_reader(self.name)
|
|
365
|
+
filename = self.datasetinfo.get("filename")
|
|
366
|
+
hxl_info = reader.hxl_info_hapi_resource_metadata(
|
|
367
|
+
hapi_resource_metadata, filename=filename, file_prefix=self.name
|
|
368
|
+
)
|
|
369
|
+
is_hxl = False
|
|
370
|
+
if hxl_info:
|
|
371
|
+
for sheet in hxl_info.get("sheets", ()):
|
|
372
|
+
if sheet["is_hxlated"]:
|
|
373
|
+
is_hxl = True
|
|
374
|
+
break
|
|
375
|
+
hapi_resource_metadata["is_hxl"] = is_hxl
|
|
376
|
+
|
|
365
377
|
return self.datasetinfo.get("hapi_resource_metadata")
|
|
366
378
|
|
|
367
379
|
def add_population(self) -> None:
|
|
@@ -33,7 +33,7 @@ class ResourceDownloader(BaseScraper):
|
|
|
33
33
|
"""
|
|
34
34
|
reader = self.get_reader("hdx")
|
|
35
35
|
resource = reader.read_hdx_metadata(self.datasetinfo)
|
|
36
|
-
url, path = reader.download_resource(self.name
|
|
36
|
+
url, path = reader.download_resource(resource, file_prefix=self.name)
|
|
37
37
|
logger.info(f"Downloading {url} to {path}")
|
|
38
38
|
copy2(path, join(self.folder, self.datasetinfo["filename"]))
|
|
39
39
|
|
{hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/timeseries.py
RENAMED
|
@@ -50,7 +50,9 @@ class TimeSeries(BaseScraper):
|
|
|
50
50
|
"output_hxl"
|
|
51
51
|
]
|
|
52
52
|
rows = [headers, hxltags]
|
|
53
|
-
file_headers, iterator = self.get_reader().read(
|
|
53
|
+
file_headers, iterator = self.get_reader(self.name).read(
|
|
54
|
+
self.datasetinfo, file_prefix=self.name
|
|
55
|
+
)
|
|
54
56
|
for inrow in iterator:
|
|
55
57
|
if isinstance(datecol, list):
|
|
56
58
|
dates = [str(inrow[x]) for x in datecol]
|
|
@@ -1145,7 +1145,9 @@ class Runner:
|
|
|
1145
1145
|
return sorted(source_urls)
|
|
1146
1146
|
|
|
1147
1147
|
def get_hapi_metadata(
|
|
1148
|
-
self,
|
|
1148
|
+
self,
|
|
1149
|
+
names: Optional[ListTuple[str]] = None,
|
|
1150
|
+
has_run: bool = True,
|
|
1149
1151
|
) -> Dict:
|
|
1150
1152
|
"""Get HAPI metadata for all datasets. A dictionary is returned that
|
|
1151
1153
|
maps from dataset ids to a dictionary. The dictionary has keys for
|
|
@@ -1154,6 +1156,7 @@ class Runner:
|
|
|
1154
1156
|
|
|
1155
1157
|
Args:
|
|
1156
1158
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
1159
|
+
has_run (bool): Only get results for scrapers marked as having run. Defaults to True.
|
|
1157
1160
|
|
|
1158
1161
|
Returns:
|
|
1159
1162
|
Dict: HAPI metadata for all datasets
|
|
@@ -1163,7 +1166,7 @@ class Runner:
|
|
|
1163
1166
|
results = {}
|
|
1164
1167
|
for name in names:
|
|
1165
1168
|
scraper = self.get_scraper(name)
|
|
1166
|
-
if not scraper.has_run:
|
|
1169
|
+
if has_run and not scraper.has_run:
|
|
1167
1170
|
continue
|
|
1168
1171
|
hapi_dataset_metadata = scraper.get_hapi_dataset_metadata()
|
|
1169
1172
|
hapi_resource_metadata = scraper.get_hapi_resource_metadata()
|