hdx-python-scraper 2.6.0__tar.gz → 2.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/PKG-INFO +2 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/pyproject.toml +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/requirements.txt +4 -4
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/_version.py +2 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/base_scraper.py +8 -8
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/base.py +3 -3
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/excelfile.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/googlesheets.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/json.py +5 -5
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/runner.py +39 -39
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/aggregator.py +4 -4
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/configurable_scraper.py +4 -4
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/resource_downloader.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/rowparser.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/timeseries.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/__init__.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/fallbacks.py +2 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/hapi_admins.py +42 -6
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/lookup.py +49 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/org_type.py +2 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/reader.py +22 -22
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/region_lookup.py +1 -1
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/sector.py +2 -2
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/sector_configuration.yaml +1 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/sources.py +10 -10
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/writer.py +31 -31
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_hapi_admins.py +45 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.config/coveragerc +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.config/pre-commit-config.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.config/ruff.toml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.github/workflows/publish.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/.gitignore +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/CONTRIBUTING.md +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/LICENSE +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/README.md +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/documentation/main.md +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/scrapers/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/utilities/org_type_configuration.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/config/project_configuration.yaml +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/download-global-pcode-lengths.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/download-global-pcodes-adm-1-2.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/global-coordination-groups-beta.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/org_type_organization_types_beta_csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/organization-types-beta.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/sector_global_coordination_groups_beta_csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/conftest.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/custom/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/custom/affected_targeted_reached.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/custom/education_closures.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/custom/education_enrolment.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/custom/test_custom.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/outputs/test_output.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_aggregation.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_appenddata.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_global.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_national.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_subnational.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/test_timeseries.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/scrapers/unhcr_myanmar_idps.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/test_runner.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_lookup.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_readers.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_regionlookup.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_sources.py +0 -0
- {hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/tests/hdx/scraper/framework/utilities/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.2
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,7 +26,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.3.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.3.8
|
|
30
30
|
Requires-Dist: hdx-python-country>=3.8.8
|
|
31
31
|
Requires-Dist: hdx-python-utilities>=3.8.3
|
|
32
32
|
Requires-Dist: regex
|
|
@@ -52,7 +52,7 @@ google-auth-oauthlib==1.2.1
|
|
|
52
52
|
# via gspread
|
|
53
53
|
gspread==6.1.4
|
|
54
54
|
# via hdx-python-scraper (pyproject.toml)
|
|
55
|
-
hdx-python-api==6.3.
|
|
55
|
+
hdx-python-api==6.3.8
|
|
56
56
|
# via hdx-python-scraper (pyproject.toml)
|
|
57
57
|
hdx-python-country==3.8.8
|
|
58
58
|
# via
|
|
@@ -63,7 +63,7 @@ hdx-python-utilities==3.8.3
|
|
|
63
63
|
# hdx-python-scraper (pyproject.toml)
|
|
64
64
|
# hdx-python-api
|
|
65
65
|
# hdx-python-country
|
|
66
|
-
humanize==4.
|
|
66
|
+
humanize==4.12.0
|
|
67
67
|
# via frictionless
|
|
68
68
|
identify==2.6.7
|
|
69
69
|
# via pre-commit
|
|
@@ -218,7 +218,7 @@ shellingham==1.5.4
|
|
|
218
218
|
# via typer
|
|
219
219
|
simpleeval==1.0.3
|
|
220
220
|
# via frictionless
|
|
221
|
-
simplejson==3.
|
|
221
|
+
simplejson==3.20.1
|
|
222
222
|
# via ckanapi
|
|
223
223
|
six==1.17.0
|
|
224
224
|
# via
|
|
@@ -240,7 +240,7 @@ tenacity==9.0.0
|
|
|
240
240
|
# via hdx-python-country
|
|
241
241
|
text-unidecode==1.3
|
|
242
242
|
# via python-slugify
|
|
243
|
-
typeguard==4.4.
|
|
243
|
+
typeguard==4.4.2
|
|
244
244
|
# via inflect
|
|
245
245
|
typer==0.15.1
|
|
246
246
|
# via frictionless
|
{hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/base_scraper.py
RENAMED
|
@@ -15,8 +15,8 @@ class BaseScraper(ABC):
|
|
|
15
15
|
name (str): Name of scraper
|
|
16
16
|
datasetinfo (Dict): Information about dataset
|
|
17
17
|
headers (Dict[str, Tuple]): Headers to be oytput at each level_name
|
|
18
|
-
source_configuration (Dict): Configuration for sources.
|
|
19
|
-
reader (str): Reader to use.
|
|
18
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
19
|
+
reader (str): Reader to use. Default is "" (datasetinfo reader falling back on name).
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
population_lookup = {}
|
|
@@ -49,7 +49,7 @@ class BaseScraper(ABC):
|
|
|
49
49
|
|
|
50
50
|
Args:
|
|
51
51
|
headers (Dict[str, Tuple]): Headers to be output at each level_name
|
|
52
|
-
source_configuration (Dict): Configuration for sources.
|
|
52
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
53
53
|
|
|
54
54
|
Returns:
|
|
55
55
|
None
|
|
@@ -73,7 +73,7 @@ class BaseScraper(ABC):
|
|
|
73
73
|
{"national": [("#food-prices", "2022-07-15", "WFP", "https://data.humdata.org/dataset/global-wfp-food-prices"), ...]
|
|
74
74
|
|
|
75
75
|
Args:
|
|
76
|
-
source_configuration (Dict): Configuration for sources.
|
|
76
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
77
77
|
|
|
78
78
|
Returns:
|
|
79
79
|
None
|
|
@@ -252,8 +252,8 @@ class BaseScraper(ABC):
|
|
|
252
252
|
|
|
253
253
|
Args:
|
|
254
254
|
hxltag (str): HXL hashtag to use for source
|
|
255
|
-
datasetinfo (Optional[Dict]): Information about dataset.
|
|
256
|
-
key (Optional[str]): Key under which to add source.
|
|
255
|
+
datasetinfo (Optional[Dict]): Information about dataset. Default is None (use self.datasetinfo).
|
|
256
|
+
key (Optional[str]): Key under which to add source. Default is None (use scraper name).
|
|
257
257
|
|
|
258
258
|
Returns:
|
|
259
259
|
None
|
|
@@ -288,8 +288,8 @@ class BaseScraper(ABC):
|
|
|
288
288
|
|
|
289
289
|
Args:
|
|
290
290
|
hxltags (ListTuple[str]): HXL hashtags to use for sources
|
|
291
|
-
datasetinfo (Optional[Dict]): Information about dataset.
|
|
292
|
-
key (Optional[str]): Key under which to add source.
|
|
291
|
+
datasetinfo (Optional[Dict]): Information about dataset. Default is None (use self.datasetinfo).
|
|
292
|
+
key (Optional[str]): Key under which to add source. Default is None (use scraper name).
|
|
293
293
|
suffix_attributes (Optional[ListTuple]): List of suffix attributes to append to HXL hashtags eg. iso3 codes
|
|
294
294
|
|
|
295
295
|
Returns:
|
{hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/base.py
RENAMED
|
@@ -29,7 +29,7 @@ class BaseOutput:
|
|
|
29
29
|
Args:
|
|
30
30
|
tabname (str): Tab to update
|
|
31
31
|
values (Union[List, DataFrame]): Values in a list of lists or a DataFrame
|
|
32
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
32
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
33
33
|
**kwargs (Any): Keyword arguments
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
@@ -57,7 +57,7 @@ class BaseOutput:
|
|
|
57
57
|
Args:
|
|
58
58
|
key (str): Key in JSON to update
|
|
59
59
|
df (DataFrame): Dataframe containing rows
|
|
60
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
60
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
61
61
|
|
|
62
62
|
Returns:
|
|
63
63
|
None
|
|
@@ -77,7 +77,7 @@ class BaseOutput:
|
|
|
77
77
|
key (str): Key to update
|
|
78
78
|
countryiso (str): Country to use as subkey
|
|
79
79
|
rows (List[Dict]): List of dictionaries
|
|
80
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
80
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
81
81
|
|
|
82
82
|
Returns:
|
|
83
83
|
None
|
{hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/excelfile.py
RENAMED
|
@@ -43,7 +43,7 @@ class ExcelFile(BaseOutput):
|
|
|
43
43
|
Args:
|
|
44
44
|
tabname (str): Tab to update
|
|
45
45
|
values (Union[List, DataFrame]): Values in a list of lists or a DataFrame
|
|
46
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
46
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
47
47
|
|
|
48
48
|
Returns:
|
|
49
49
|
None
|
|
@@ -60,7 +60,7 @@ class GoogleSheets(BaseOutput):
|
|
|
60
60
|
Args:
|
|
61
61
|
tabname (str): Tab to update
|
|
62
62
|
values (Union[List, DataFrame]): Values in a list of lists or a DataFrame
|
|
63
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
63
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
64
64
|
limit (Optional[int]): Maximum number of rows to output
|
|
65
65
|
|
|
66
66
|
Returns:
|
{hdx_python_scraper-2.6.0 → hdx_python_scraper-2.6.2}/src/hdx/scraper/framework/outputs/json.py
RENAMED
|
@@ -53,7 +53,7 @@ class JsonFile(BaseOutput):
|
|
|
53
53
|
Args:
|
|
54
54
|
key (str): Key in JSON to update
|
|
55
55
|
df (DataFrame): Dataframe containing rows
|
|
56
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
56
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
57
57
|
|
|
58
58
|
Returns:
|
|
59
59
|
None
|
|
@@ -75,7 +75,7 @@ class JsonFile(BaseOutput):
|
|
|
75
75
|
key (str): Key in JSON to update
|
|
76
76
|
countryiso (str): Country to use as subkey
|
|
77
77
|
rows (List[Dict]): List of dictionaries
|
|
78
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
78
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
79
79
|
|
|
80
80
|
Returns:
|
|
81
81
|
None
|
|
@@ -121,7 +121,7 @@ class JsonFile(BaseOutput):
|
|
|
121
121
|
Args:
|
|
122
122
|
key (str): Key in JSON to update
|
|
123
123
|
df (DataFrame): Dataframe containing rows
|
|
124
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
124
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
125
125
|
|
|
126
126
|
Returns:
|
|
127
127
|
None
|
|
@@ -147,7 +147,7 @@ class JsonFile(BaseOutput):
|
|
|
147
147
|
Args:
|
|
148
148
|
tabname (str): Tab to update
|
|
149
149
|
values (Union[List, DataFrame]): Values in a list of lists or a DataFrame
|
|
150
|
-
hxltags (Optional[Dict]): HXL tag mapping.
|
|
150
|
+
hxltags (Optional[Dict]): HXL tag mapping. Default is None.
|
|
151
151
|
|
|
152
152
|
Returns:
|
|
153
153
|
None
|
|
@@ -187,7 +187,7 @@ class JsonFile(BaseOutput):
|
|
|
187
187
|
"""Save JSON file and any addition subsets of that JSON defined in the additional configuration
|
|
188
188
|
|
|
189
189
|
Args:
|
|
190
|
-
folder (Optional[str]): Folder to save to.
|
|
190
|
+
folder (Optional[str]): Folder to save to. Default is None.
|
|
191
191
|
**kwargs: Variables to use when evaluating template arguments
|
|
192
192
|
|
|
193
193
|
Returns:
|
|
@@ -27,9 +27,9 @@ class Runner:
|
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
29
|
countryiso3s (ListTuple[str]): List of ISO3 country codes to process
|
|
30
|
-
today (datetime): Value to use for today.
|
|
30
|
+
today (datetime): Value to use for today. Default is now_utc().
|
|
31
31
|
error_handler (ErrorHandler): ErrorHandler object that logs errors on exit
|
|
32
|
-
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run.
|
|
32
|
+
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Default is None (all scrapers).
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
35
|
def __init__(
|
|
@@ -117,12 +117,12 @@ class Runner:
|
|
|
117
117
|
name (str): Name of scraper
|
|
118
118
|
datasetinfo (Dict): Information about dataset
|
|
119
119
|
level (str): Can be national, subnational or single
|
|
120
|
-
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country.
|
|
121
|
-
level_name (Optional[str]): Customised level_name name.
|
|
122
|
-
source_configuration (Dict): Configuration for sources.
|
|
120
|
+
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country. Default is None.
|
|
121
|
+
level_name (Optional[str]): Customised level_name name. Default is None (level_name).
|
|
122
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
123
123
|
suffix (Optional[str]): Suffix to add to the scraper name
|
|
124
124
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
125
|
-
countryiso3s (Optional[List[str]]): Override list of country iso3s.
|
|
125
|
+
countryiso3s (Optional[List[str]]): Override list of country iso3s. Default is None.
|
|
126
126
|
|
|
127
127
|
Returns:
|
|
128
128
|
str: scraper name (including suffix if set)
|
|
@@ -173,12 +173,12 @@ class Runner:
|
|
|
173
173
|
Args:
|
|
174
174
|
configuration (Dict): Mapping from scraper name to information about datasets
|
|
175
175
|
level (str): Can be national, subnational or single
|
|
176
|
-
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country.
|
|
177
|
-
level_name (Optional[str]): Customised level_name name.
|
|
178
|
-
source_configuration (Dict): Configuration for sources.
|
|
176
|
+
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country. Default is None.
|
|
177
|
+
level_name (Optional[str]): Customised level_name name. Default is None (level_name).
|
|
178
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
179
179
|
suffix (Optional[str]): Suffix to add to the scraper name
|
|
180
180
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
181
|
-
countryiso3s (Optional[List[str]]): Override list of country iso3s.
|
|
181
|
+
countryiso3s (Optional[List[str]]): Override list of country iso3s. Default is None.
|
|
182
182
|
|
|
183
183
|
Returns:
|
|
184
184
|
List[str]: scraper names (including suffix if set)
|
|
@@ -277,10 +277,10 @@ class Runner:
|
|
|
277
277
|
input_level (str): Input level to aggregate like national or subnational
|
|
278
278
|
output_level (str): Output level of aggregated data like regional
|
|
279
279
|
adm_aggregation (Union[Dict, List]): Mapping from input admins to aggregated output admins
|
|
280
|
-
source_configuration (Dict): Configuration for sources.
|
|
281
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
282
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
283
|
-
aggregation_scrapers (List["Aggregator"]): Other aggregations needed.
|
|
280
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
281
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None.
|
|
282
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
283
|
+
aggregation_scrapers (List["Aggregator"]): Other aggregations needed. Default is [].
|
|
284
284
|
|
|
285
285
|
Returns:
|
|
286
286
|
Optional["Aggregator"]: scraper or None
|
|
@@ -347,10 +347,10 @@ class Runner:
|
|
|
347
347
|
input_level (str): Input level to aggregate like national or subnational
|
|
348
348
|
output_level (str): Output level of aggregated data like regional
|
|
349
349
|
adm_aggregation (Union[Dict, List]): Mapping from input admins to aggregated output admins
|
|
350
|
-
source_configuration (Dict): Configuration for sources.
|
|
351
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
352
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
353
|
-
aggregation_scrapers (List["Aggregator"]): Other aggregations needed.
|
|
350
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
351
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None.
|
|
352
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
353
|
+
aggregation_scrapers (List["Aggregator"]): Other aggregations needed. Default is [].
|
|
354
354
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
355
355
|
|
|
356
356
|
Returns:
|
|
@@ -398,9 +398,9 @@ class Runner:
|
|
|
398
398
|
input_level (str): Input level to aggregate like national or subnational
|
|
399
399
|
output_level (str): Output level of aggregated data like regional
|
|
400
400
|
adm_aggregation (Union[Dict, ListTuple]): Mapping from input admins to aggregated output admins
|
|
401
|
-
source_configuration (Dict): Configuration for sources.
|
|
401
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
402
402
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
403
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
403
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
404
404
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
405
405
|
|
|
406
406
|
Returns:
|
|
@@ -437,7 +437,7 @@ class Runner:
|
|
|
437
437
|
|
|
438
438
|
Args:
|
|
439
439
|
datasetinfo (Dict): Information about dataset
|
|
440
|
-
folder (str): Folder to which to download.
|
|
440
|
+
folder (str): Folder to which to download. Default is "".
|
|
441
441
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
442
442
|
|
|
443
443
|
Returns:
|
|
@@ -460,7 +460,7 @@ class Runner:
|
|
|
460
460
|
|
|
461
461
|
Args:
|
|
462
462
|
configuration (Dict): Mapping from scraper name to information about datasets
|
|
463
|
-
folder (str): Folder to which to download.
|
|
463
|
+
folder (str): Folder to which to download. Default is "".
|
|
464
464
|
force_add_to_run (bool): Whether to force include the scraper in the next run
|
|
465
465
|
|
|
466
466
|
Returns:
|
|
@@ -662,9 +662,9 @@ class Runner:
|
|
|
662
662
|
are run first. If scraper run fails and fallbacks have been set up, use them.
|
|
663
663
|
|
|
664
664
|
Args:
|
|
665
|
-
what_to_run (Optional[ListTuple[str]]): Run only these scrapers.
|
|
665
|
+
what_to_run (Optional[ListTuple[str]]): Run only these scrapers. Default is None (run all).
|
|
666
666
|
force_run (bool): Force run even if any scraper marked as already run
|
|
667
|
-
prioritise_scrapers (Optional[ListTuple[str]]): Scrapers to run first.
|
|
667
|
+
prioritise_scrapers (Optional[ListTuple[str]]): Scrapers to run first. Default is None.
|
|
668
668
|
|
|
669
669
|
Returns:
|
|
670
670
|
None
|
|
@@ -718,7 +718,7 @@ class Runner:
|
|
|
718
718
|
levels (Optional[ListTuple[str]]): Levels to get like national, subnational or single
|
|
719
719
|
headers (Optional[ListTuple[str]]): Headers to get
|
|
720
720
|
hxltags (Optional[ListTuple[str]]): HXL hashtags to get
|
|
721
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
721
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
722
722
|
|
|
723
723
|
|
|
724
724
|
Returns:
|
|
@@ -789,11 +789,11 @@ class Runner:
|
|
|
789
789
|
lists.
|
|
790
790
|
|
|
791
791
|
Args:
|
|
792
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
792
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None (all scrapers).
|
|
793
793
|
levels (Optional[ListTuple[str]]): Levels to get like national, subnational or single
|
|
794
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
795
|
-
has_run (bool): Only get results for scrapers marked as having run.
|
|
796
|
-
should_overwrite_sources (Optional[bool]): Whether to overwrite sources.
|
|
794
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
795
|
+
has_run (bool): Only get results for scrapers marked as having run. Default is True.
|
|
796
|
+
should_overwrite_sources (Optional[bool]): Whether to overwrite sources. Default is None (class default).
|
|
797
797
|
|
|
798
798
|
Returns:
|
|
799
799
|
Dict[str, Dict]: Results dictionary that maps each level to headers, values, sources, fallbacks.
|
|
@@ -897,8 +897,8 @@ class Runner:
|
|
|
897
897
|
adms (ListTuple[str]): Admin units
|
|
898
898
|
headers (ListTuple[ListTuple]): Additional headers in the form (list of headers, list of HXL hashtags)
|
|
899
899
|
row_fns (ListTuple[Callable[[str], str]]): Functions to populate additional columns
|
|
900
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
901
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
900
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None (all scrapers).
|
|
901
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
902
902
|
|
|
903
903
|
Returns:
|
|
904
904
|
List[List]: Rows for a given level
|
|
@@ -938,10 +938,10 @@ class Runner:
|
|
|
938
938
|
|
|
939
939
|
Args:
|
|
940
940
|
level (str): Level to get like national, subnational or single
|
|
941
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
942
|
-
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings.
|
|
943
|
-
has_run (bool): Only get results for scrapers marked as having run.
|
|
944
|
-
use_hxl (bool): Whether keys should be HXL hashtags or column headers.
|
|
941
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None (all scrapers).
|
|
942
|
+
overrides (Dict[str, Dict]): Dictionary mapping scrapers to level mappings. Default is {}.
|
|
943
|
+
has_run (bool): Only get results for scrapers marked as having run. Default is True.
|
|
944
|
+
use_hxl (bool): Whether keys should be HXL hashtags or column headers. Default is True.
|
|
945
945
|
|
|
946
946
|
Returns:
|
|
947
947
|
Tuple[Dict, Dict]: Tuple of (headers to values, headers to sources)
|
|
@@ -1059,7 +1059,7 @@ class Runner:
|
|
|
1059
1059
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
1060
1060
|
levels (Optional[ListTuple[str]]): Levels to get like national, subnational or single
|
|
1061
1061
|
additional_sources (ListTuple[Dict]): Additional sources to add
|
|
1062
|
-
should_overwrite_sources (Optional[bool]): Whether to overwrite sources.
|
|
1062
|
+
should_overwrite_sources (Optional[bool]): Whether to overwrite sources. Default is None (class default).
|
|
1063
1063
|
|
|
1064
1064
|
Returns:
|
|
1065
1065
|
List[Tuple]: Sources in form (indicator, date, source, source_url)
|
|
@@ -1178,7 +1178,7 @@ class Runner:
|
|
|
1178
1178
|
|
|
1179
1179
|
Args:
|
|
1180
1180
|
names (Optional[ListTuple[str]]): Names of scrapers
|
|
1181
|
-
has_run (bool): Only get results for scrapers marked as having run.
|
|
1181
|
+
has_run (bool): Only get results for scrapers marked as having run. Default is True.
|
|
1182
1182
|
|
|
1183
1183
|
Returns:
|
|
1184
1184
|
Dict: HAPI metadata for all datasets
|
|
@@ -1232,8 +1232,8 @@ class Runner:
|
|
|
1232
1232
|
metadata is a dictionary.
|
|
1233
1233
|
|
|
1234
1234
|
Args:
|
|
1235
|
-
names (Optional[ListTuple[str]]): Names of scrapers.
|
|
1236
|
-
has_run (bool): Only get results for scrapers marked as having run.
|
|
1235
|
+
names (Optional[ListTuple[str]]): Names of scrapers. Default is None (all scrapers).
|
|
1236
|
+
has_run (bool): Only get results for scrapers marked as having run. Default is True.
|
|
1237
1237
|
|
|
1238
1238
|
Returns:
|
|
1239
1239
|
Dict: Headers and values per admin level and HAPI metadata for all datasets
|
|
@@ -36,8 +36,8 @@ class Aggregator(BaseScraper):
|
|
|
36
36
|
adm_aggregation (Union[Dict, ListTuple]): Mapping from input admins to aggregated output admins
|
|
37
37
|
headers (Dict[str, Tuple]): Column headers and HXL hashtags
|
|
38
38
|
use_hxl (bool): Whether to map from headers or from HXL tags
|
|
39
|
-
source_configuration (Dict): Configuration for sources.
|
|
40
|
-
aggregation_scrapers (List["Aggregator"]): Other aggregations needed.
|
|
39
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
40
|
+
aggregation_scrapers (List["Aggregator"]): Other aggregations needed. Default is [].
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
43
|
def __init__(
|
|
@@ -95,8 +95,8 @@ class Aggregator(BaseScraper):
|
|
|
95
95
|
adm_aggregation (Union[Dict, ListTuple]): Mapping from input admins to aggregated output admins
|
|
96
96
|
input_headers (Tuple[ListTuple, ListTuple]): Column headers and HXL hashtags
|
|
97
97
|
runner(Runner): Runner object
|
|
98
|
-
source_configuration (Dict): Configuration for sources.
|
|
99
|
-
aggregation_scrapers (List["Aggregator"]): Other aggregations needed.
|
|
98
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
99
|
+
aggregation_scrapers (List["Aggregator"]): Other aggregations needed. Default is [].
|
|
100
100
|
|
|
101
101
|
Returns:
|
|
102
102
|
Optional["Aggregator"]: The aggregation scraper or None if it couldn't be created
|
|
@@ -38,10 +38,10 @@ class ConfigurableScraper(BaseScraper):
|
|
|
38
38
|
datasetinfo (Dict): Information about dataset
|
|
39
39
|
level (str): Can be national, subnational or single
|
|
40
40
|
countryiso3s (List[str]): List of ISO3 country codes to process
|
|
41
|
-
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country.
|
|
42
|
-
level_name (Optional[str]): Customised level_name name.
|
|
43
|
-
source_configuration (Dict): Configuration for sources.
|
|
44
|
-
today (datetime): Value to use for today.
|
|
41
|
+
adminlevel (Optional[AdminLevel]): AdminLevel object from HDX Python Country. Default is None.
|
|
42
|
+
level_name (Optional[str]): Customised level_name name. Default is None (level).
|
|
43
|
+
source_configuration (Dict): Configuration for sources. Default is empty dict (use defaults).
|
|
44
|
+
today (datetime): Value to use for today. Default is now_utc().
|
|
45
45
|
error_handler (Optional[ErrorHandler]): ErrorHandler object that logs errors on exit
|
|
46
46
|
**kwargs: Variables to use when evaluating template arguments in urls
|
|
47
47
|
"""
|
|
@@ -16,7 +16,7 @@ class ResourceDownloader(BaseScraper):
|
|
|
16
16
|
|
|
17
17
|
Args:
|
|
18
18
|
datasetinfo (Dict): Information about dataset
|
|
19
|
-
folder (str): Folder to which to download.
|
|
19
|
+
folder (str): Folder to which to download. Default is "".
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
def __init__(self, datasetinfo, folder):
|
|
@@ -30,7 +30,7 @@ class RowParser:
|
|
|
30
30
|
headers (List[str]): Row headers
|
|
31
31
|
header_to_hxltag (Optional[Dict[str, str]]): Mapping from headers to HXL hashtags or None
|
|
32
32
|
subsets (List[Dict]): List of subset definitions
|
|
33
|
-
maxdateonly (bool): Whether to only take the most recent date.
|
|
33
|
+
maxdateonly (bool): Whether to only take the most recent date. Default is True.
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
36
|
def __init__(
|
|
@@ -19,7 +19,7 @@ class TimeSeries(BaseScraper):
|
|
|
19
19
|
name (str): Name of scraper
|
|
20
20
|
datasetinfo (Dict): Information about dataset
|
|
21
21
|
outputs (Dict[str, BaseOutput]): Mapping from names to output objects
|
|
22
|
-
today (datetime): Value to use for today.
|
|
22
|
+
today (datetime): Value to use for today. Default is now_utc().
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
def __init__(
|
|
@@ -64,7 +64,7 @@ def get_startend_dates_from_time_period(
|
|
|
64
64
|
|
|
65
65
|
Args:
|
|
66
66
|
dataset (Dataset): Dataset object
|
|
67
|
-
today (Optional[datetime]): Date to use for today.
|
|
67
|
+
today (Optional[datetime]): Date to use for today. Default is None (datetime.utcnow)
|
|
68
68
|
|
|
69
69
|
Returns:
|
|
70
70
|
Optional[Dict]: Time period in form required for source_date
|
|
@@ -52,7 +52,7 @@ class Fallbacks:
|
|
|
52
52
|
Args:
|
|
53
53
|
fallbacks_path (str): Path to JSON fallbacks file
|
|
54
54
|
levels_mapping (Dict[str,str]): Map keys from file to levels. Defaults in description.
|
|
55
|
-
sources_key (str): Key to use for sources.
|
|
55
|
+
sources_key (str): Key to use for sources. Default is "sources".
|
|
56
56
|
admin_name_mapping: HXL hashtags for different admin levels. Defaults in description.
|
|
57
57
|
|
|
58
58
|
Returns:
|
|
@@ -86,7 +86,7 @@ class Fallbacks:
|
|
|
86
86
|
Args:
|
|
87
87
|
fallbacks_path (str): Path to JSON fallbacks file
|
|
88
88
|
levels_mapping (Dict[str,str]): Map keys from file to levels. Defaults in description.
|
|
89
|
-
sources_key (str): Key to use for sources.
|
|
89
|
+
sources_key (str): Key to use for sources. Default is "sources".
|
|
90
90
|
admin_name_mapping: HXL hashtags for different admin levels. Defaults in description.
|
|
91
91
|
|
|
92
92
|
Returns:
|
|
@@ -9,21 +9,45 @@ def complete_admins(
|
|
|
9
9
|
provider_adm_names: List,
|
|
10
10
|
adm_codes: List,
|
|
11
11
|
adm_names: List,
|
|
12
|
+
fuzzy_match: bool = True,
|
|
12
13
|
) -> Tuple[int, List[str]]:
|
|
14
|
+
"""Use information from adm_codes to populate adm_names and from
|
|
15
|
+
provider_adm_names to populate adm_codes with outptu of the admin level
|
|
16
|
+
and arnings for unknown and mismatched p-codes. All provided lists
|
|
17
|
+
should be of the same length.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
admins (List[AdminLevel]): List of AdminLevel objects
|
|
21
|
+
countryiso3 (str): Country ISO3 code
|
|
22
|
+
provider_adm_names (List): List of provider adm names
|
|
23
|
+
adm_codes (List): List of adm codes
|
|
24
|
+
adm_names (List): List of adm names
|
|
25
|
+
fuzzy_match (bool): Whether to use fuzzy matching. Default is True.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple[int, List[str]]: Admin level and warnings
|
|
29
|
+
"""
|
|
30
|
+
|
|
13
31
|
warnings = []
|
|
14
|
-
|
|
32
|
+
child = None
|
|
15
33
|
adm_level = len(provider_adm_names)
|
|
16
34
|
for i, provider_adm_name in reversed(list(enumerate(provider_adm_names))):
|
|
17
35
|
adm_code = adm_codes[i]
|
|
18
36
|
if not provider_adm_name:
|
|
19
37
|
provider_adm_name = ""
|
|
20
38
|
provider_adm_names[i] = ""
|
|
21
|
-
if
|
|
22
|
-
pcode = admins[i + 1].pcode_to_parent.get(
|
|
39
|
+
if child:
|
|
40
|
+
pcode = admins[i + 1].pcode_to_parent.get(child)
|
|
23
41
|
warntxt = "parent"
|
|
24
42
|
elif provider_adm_name:
|
|
43
|
+
parent = admins[i].pcode_to_parent.get(adm_code)
|
|
44
|
+
if not parent and i > 0:
|
|
45
|
+
parent = adm_codes[i - 1]
|
|
25
46
|
pcode, _ = admins[i].get_pcode(
|
|
26
|
-
countryiso3,
|
|
47
|
+
countryiso3,
|
|
48
|
+
provider_adm_name,
|
|
49
|
+
parent=parent,
|
|
50
|
+
fuzzy_match=fuzzy_match,
|
|
27
51
|
)
|
|
28
52
|
warntxt = f"provider_adm{i + 1}_name"
|
|
29
53
|
else:
|
|
@@ -39,7 +63,7 @@ def complete_admins(
|
|
|
39
63
|
warnings.append(f"PCode unknown {adm_code}->''")
|
|
40
64
|
adm_code = ""
|
|
41
65
|
elif pcode and adm_code != pcode:
|
|
42
|
-
if
|
|
66
|
+
if child:
|
|
43
67
|
warnings.append(
|
|
44
68
|
f"PCode mismatch {adm_code}->{pcode} ({warntxt})"
|
|
45
69
|
)
|
|
@@ -55,7 +79,7 @@ def complete_admins(
|
|
|
55
79
|
adm_codes[i] = adm_code
|
|
56
80
|
if adm_code:
|
|
57
81
|
adm_names[i] = admins[i].pcode_to_name.get(adm_code, "")
|
|
58
|
-
|
|
82
|
+
child = adm_code
|
|
59
83
|
else:
|
|
60
84
|
adm_names[i] = ""
|
|
61
85
|
if provider_adm_name == "":
|
|
@@ -69,6 +93,18 @@ def pad_admins(
|
|
|
69
93
|
adm_names: List[str],
|
|
70
94
|
adm_level: int = 2,
|
|
71
95
|
) -> None:
|
|
96
|
+
"""Pad lists to size given in adm_level adding as many "" as needed.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
provider_adm_names (List): List of provider adm names
|
|
100
|
+
adm_codes (List): List of adm codes
|
|
101
|
+
adm_names (List): List of adm names
|
|
102
|
+
adm_level (int): Admin level to which to pad. Default is 2.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple[int, List[str]]: Admin level and warnings
|
|
106
|
+
"""
|
|
107
|
+
|
|
72
108
|
for i in range(len(provider_adm_names), adm_level):
|
|
73
109
|
provider_adm_names.append("")
|
|
74
110
|
adm_codes.append("")
|