hdx-python-scraper 2.4.1__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.config/pre-commit-config.yaml +3 -3
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/CONTRIBUTING.md +2 -2
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/PKG-INFO +5 -5
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/documentation/main.md +4 -2
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/documentation/pydoc-markdown.yaml +11 -14
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/pyproject.toml +10 -4
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/requirements.txt +64 -77
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/_version.py +2 -2
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/base_scraper.py +1 -1
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/runner.py +12 -12
- hdx_python_scraper-2.4.1/src/hdx/scraper/configurable/scraper.py → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers/configurable_scraper.py +5 -5
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework}/conftest.py +4 -4
- hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/custom/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/custom}/affected_targeted_reached.py +2 -2
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/custom}/education_closures.py +1 -1
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/custom}/education_enrolment.py +1 -1
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_custom.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/custom/test_custom.py +5 -5
- hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/outputs}/test_output.py +4 -4
- hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_aggregation.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_aggregation.py +5 -5
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_appenddata.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_appenddata.py +7 -7
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_global.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_global.py +4 -4
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_multipleurls.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +4 -4
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_national.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_national.py +17 -17
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_regionaltoplevel.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +7 -7
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_resource_downloaders.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +3 -3
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_subnational.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_subnational.py +6 -6
- hdx_python_scraper-2.4.1/tests/hdx/scraper/test_scrapers_timeseries.py → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers/test_timeseries.py +4 -4
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/scrapers}/unhcr_myanmar_idps.py +2 -2
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework}/test_runner.py +2 -2
- hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/utilities}/test_readers.py +1 -1
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/utilities}/test_regionlookup.py +1 -1
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/utilities}/test_sources.py +5 -3
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework/utilities}/test_utils.py +1 -1
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.config/coveragerc +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.config/ruff.toml +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.github/workflows/publish.yaml +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/.gitignore +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/LICENSE +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/README.md +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/configurable → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/outputs}/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/outputs/base.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/outputs/excelfile.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/outputs/googlesheets.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/outputs/json.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/outputs → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers}/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/configurable → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers}/aggregator.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/configurable → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers}/resource_downloader.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/configurable → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers}/rowparser.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper/configurable → hdx_python_scraper-2.5.1/src/hdx/scraper/framework/scrapers}/timeseries.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/fallbacks.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/reader.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/sources.py +0 -0
- {hdx_python_scraper-2.4.1/src/hdx/scraper → hdx_python_scraper-2.5.1/src/hdx/scraper/framework}/utilities/writer.py +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/config/project_configuration.yaml +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.4.1 → hdx_python_scraper-2.5.1}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.4.1/tests/hdx/scraper → hdx_python_scraper-2.5.1/tests/hdx/scraper/framework}/__init__.py +0 -0
|
@@ -2,14 +2,14 @@ default_language_version:
|
|
|
2
2
|
python: python3.12
|
|
3
3
|
repos:
|
|
4
4
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
5
|
-
rev:
|
|
5
|
+
rev: v5.0.0
|
|
6
6
|
hooks:
|
|
7
7
|
- id: trailing-whitespace
|
|
8
8
|
- id: end-of-file-fixer
|
|
9
9
|
exclude: test_scraper_.*\.json
|
|
10
10
|
- id: check-ast
|
|
11
11
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
12
|
-
rev: v0.
|
|
12
|
+
rev: v0.8.6
|
|
13
13
|
hooks:
|
|
14
14
|
# Run the linter.
|
|
15
15
|
- id: ruff
|
|
@@ -18,7 +18,7 @@ repos:
|
|
|
18
18
|
- id: ruff-format
|
|
19
19
|
args: [--config, .config/ruff.toml]
|
|
20
20
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
21
|
-
rev: 0.
|
|
21
|
+
rev: 0.5.15
|
|
22
22
|
hooks:
|
|
23
23
|
# Run the pip compile
|
|
24
24
|
- id: pip-compile
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
## Environment
|
|
4
4
|
|
|
5
|
-
Development is currently done using Python 3.
|
|
5
|
+
Development is currently done using Python 3.12. We recommend using a virtual
|
|
6
6
|
environment such as ``venv``:
|
|
7
7
|
|
|
8
|
-
python3.
|
|
8
|
+
python3.12 -m venv venv
|
|
9
9
|
source venv/bin/activate
|
|
10
10
|
|
|
11
11
|
In your virtual environment, please install all packages for
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,9 +26,9 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.3.
|
|
30
|
-
Requires-Dist: hdx-python-country>=3.
|
|
31
|
-
Requires-Dist: hdx-python-utilities>=3.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.3.6
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.8.6
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=3.8.0
|
|
32
32
|
Requires-Dist: regex
|
|
33
33
|
Provides-Extra: dev
|
|
34
34
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
@@ -26,6 +26,8 @@ install with:
|
|
|
26
26
|
pip install hdx-python-scraper[pandas]
|
|
27
27
|
|
|
28
28
|
## Breaking Changes
|
|
29
|
+
From 2.5.0, package names have changed to avoid name space clashes
|
|
30
|
+
|
|
29
31
|
From 2.3.0, resource name is used when available instead of creating name from
|
|
30
32
|
url so tests that use saved data from the Read class may break. file_type
|
|
31
33
|
parameters in various Read methods renamed to format.
|
|
@@ -335,7 +337,7 @@ default is:
|
|
|
335
337
|
### Custom Scrapers
|
|
336
338
|
|
|
337
339
|
It is possible to define custom scrapers written in Python which must inherit
|
|
338
|
-
[BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/base_scraper.py),
|
|
340
|
+
[BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/framework/base_scraper.py),
|
|
339
341
|
calling its constructor and providing a `run` method. Other methods where a default
|
|
340
342
|
implementation has been provided can be overridden such as `add_sources` and
|
|
341
343
|
`add_population`. There are also two hooks for running steps at particular points.
|
|
@@ -392,7 +394,7 @@ from regions to values. In this case, since national and regional each have only
|
|
|
392
394
|
header and HXL hashtag, there is only one dictionary to populate for each.
|
|
393
395
|
|
|
394
396
|
An example of a custom scraper can be seen
|
|
395
|
-
[here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/education_closures.py).
|
|
397
|
+
[here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/framework/scrapers/education_closures.py).
|
|
396
398
|
|
|
397
399
|
An example of overriding `add_sources` to customise the source information that is
|
|
398
400
|
output is as follows:
|
|
@@ -3,7 +3,7 @@ loaders:
|
|
|
3
3
|
search_path:
|
|
4
4
|
- ../src
|
|
5
5
|
packages:
|
|
6
|
-
- hdx.scraper
|
|
6
|
+
- hdx.scraper.framework
|
|
7
7
|
renderer:
|
|
8
8
|
type: mkdocs
|
|
9
9
|
output_directory: docs
|
|
@@ -21,21 +21,18 @@ renderer:
|
|
|
21
21
|
source: main.md
|
|
22
22
|
- title: API Documentation
|
|
23
23
|
children:
|
|
24
|
-
- title:
|
|
24
|
+
- title: Runner
|
|
25
25
|
contents:
|
|
26
|
-
- hdx.scraper.
|
|
27
|
-
- title:
|
|
26
|
+
- hdx.scraper.framework.runner.Runner.*
|
|
27
|
+
- title: Scraper Base Class
|
|
28
28
|
contents:
|
|
29
|
-
- hdx.scraper.
|
|
30
|
-
- title:
|
|
29
|
+
- hdx.scraper.framework.base_scraper.BaseScraper.*
|
|
30
|
+
- title: Scrapers
|
|
31
31
|
contents:
|
|
32
|
-
- hdx.scraper.
|
|
33
|
-
- title: Fallbacks
|
|
34
|
-
contents:
|
|
35
|
-
- hdx.scraper.utilities.fallbacks.Fallbacks
|
|
36
|
-
- title: Run Scrapers
|
|
37
|
-
contents:
|
|
38
|
-
- hdx.scraper.runner.Runner.*
|
|
32
|
+
- hdx.scraper.framework.scrapers.*
|
|
39
33
|
- title: Outputs
|
|
40
34
|
contents:
|
|
41
|
-
- hdx.scraper.outputs.*
|
|
35
|
+
- hdx.scraper.framework.outputs.*
|
|
36
|
+
- title: Utilities
|
|
37
|
+
contents:
|
|
38
|
+
- hdx.scraper.framework.utilities.*
|
|
@@ -34,9 +34,9 @@ classifiers = [
|
|
|
34
34
|
requires-python = ">=3.8"
|
|
35
35
|
|
|
36
36
|
dependencies = [
|
|
37
|
-
"hdx-python-api>=6.3.
|
|
38
|
-
"hdx-python-country>=3.
|
|
39
|
-
"hdx-python-utilities>=3.
|
|
37
|
+
"hdx-python-api>=6.3.6",
|
|
38
|
+
"hdx-python-country>=3.8.6",
|
|
39
|
+
"hdx-python-utilities>=3.8.0",
|
|
40
40
|
"gspread",
|
|
41
41
|
"regex",
|
|
42
42
|
]
|
|
@@ -65,7 +65,10 @@ dev = ["pre-commit"]
|
|
|
65
65
|
packages = ["src/hdx"]
|
|
66
66
|
|
|
67
67
|
[tool.hatch.build.hooks.vcs]
|
|
68
|
-
version-file = "src/hdx/scraper/_version.py"
|
|
68
|
+
version-file = "src/hdx/scraper/framework/_version.py"
|
|
69
|
+
|
|
70
|
+
[tool.hatch.metadata]
|
|
71
|
+
allow-direct-references = true
|
|
69
72
|
|
|
70
73
|
# Versioning
|
|
71
74
|
|
|
@@ -91,6 +94,9 @@ run = """
|
|
|
91
94
|
--cov-report=lcov --cov-report=term-missing
|
|
92
95
|
"""
|
|
93
96
|
|
|
97
|
+
[tool.hatch.envs.hatch-static-analysis]
|
|
98
|
+
dependencies = ["ruff==0.8.6"]
|
|
99
|
+
|
|
94
100
|
[tool.hatch.envs.hatch-static-analysis.scripts]
|
|
95
101
|
format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
|
|
96
102
|
format-fix = ["ruff format --config .config/ruff.toml {args:.}",]
|
|
@@ -2,37 +2,33 @@
|
|
|
2
2
|
# uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt
|
|
3
3
|
annotated-types==0.7.0
|
|
4
4
|
# via pydantic
|
|
5
|
-
attrs==
|
|
5
|
+
attrs==24.3.0
|
|
6
6
|
# via
|
|
7
7
|
# frictionless
|
|
8
8
|
# jsonlines
|
|
9
9
|
# jsonschema
|
|
10
10
|
# referencing
|
|
11
|
-
cachetools==5.
|
|
11
|
+
cachetools==5.5.0
|
|
12
12
|
# via google-auth
|
|
13
|
-
certifi==2024.
|
|
13
|
+
certifi==2024.12.14
|
|
14
14
|
# via requests
|
|
15
|
-
cffi==1.16.0
|
|
16
|
-
# via cryptography
|
|
17
15
|
cfgv==3.4.0
|
|
18
16
|
# via pre-commit
|
|
19
17
|
chardet==5.2.0
|
|
20
18
|
# via frictionless
|
|
21
|
-
charset-normalizer==3.
|
|
19
|
+
charset-normalizer==3.4.1
|
|
22
20
|
# via requests
|
|
23
21
|
ckanapi==4.8
|
|
24
22
|
# via hdx-python-api
|
|
25
|
-
click==8.1.
|
|
23
|
+
click==8.1.8
|
|
26
24
|
# via typer
|
|
27
|
-
coverage==7.6.
|
|
25
|
+
coverage==7.6.10
|
|
28
26
|
# via pytest-cov
|
|
29
|
-
cryptography==43.0.0
|
|
30
|
-
# via pyopenssl
|
|
31
27
|
defopt==6.4.0
|
|
32
28
|
# via hdx-python-api
|
|
33
|
-
distlib==0.3.
|
|
29
|
+
distlib==0.3.9
|
|
34
30
|
# via virtualenv
|
|
35
|
-
dnspython==2.
|
|
31
|
+
dnspython==2.7.0
|
|
36
32
|
# via email-validator
|
|
37
33
|
docopt==0.6.2
|
|
38
34
|
# via
|
|
@@ -42,96 +38,94 @@ docutils==0.21.2
|
|
|
42
38
|
# via defopt
|
|
43
39
|
email-validator==2.2.0
|
|
44
40
|
# via hdx-python-api
|
|
45
|
-
et-xmlfile==
|
|
41
|
+
et-xmlfile==2.0.0
|
|
46
42
|
# via openpyxl
|
|
47
|
-
filelock==3.
|
|
43
|
+
filelock==3.16.1
|
|
48
44
|
# via virtualenv
|
|
49
|
-
frictionless==5.
|
|
45
|
+
frictionless==5.18.0
|
|
50
46
|
# via hdx-python-utilities
|
|
51
|
-
google-auth==2.
|
|
47
|
+
google-auth==2.37.0
|
|
52
48
|
# via
|
|
53
49
|
# google-auth-oauthlib
|
|
54
50
|
# gspread
|
|
55
51
|
google-auth-oauthlib==1.2.1
|
|
56
52
|
# via gspread
|
|
57
|
-
gspread==6.1.
|
|
53
|
+
gspread==6.1.4
|
|
58
54
|
# via hdx-python-scraper (pyproject.toml)
|
|
59
|
-
hdx-python-api==6.3.
|
|
55
|
+
hdx-python-api==6.3.6
|
|
60
56
|
# via hdx-python-scraper (pyproject.toml)
|
|
61
|
-
hdx-python-country==3.
|
|
57
|
+
hdx-python-country==3.8.6
|
|
62
58
|
# via
|
|
63
59
|
# hdx-python-scraper (pyproject.toml)
|
|
64
60
|
# hdx-python-api
|
|
65
|
-
hdx-python-utilities==3.
|
|
61
|
+
hdx-python-utilities==3.8.0
|
|
66
62
|
# via
|
|
67
63
|
# hdx-python-scraper (pyproject.toml)
|
|
68
64
|
# hdx-python-api
|
|
69
65
|
# hdx-python-country
|
|
70
|
-
humanize==4.
|
|
66
|
+
humanize==4.11.0
|
|
71
67
|
# via frictionless
|
|
72
|
-
identify==2.6.
|
|
68
|
+
identify==2.6.5
|
|
73
69
|
# via pre-commit
|
|
74
|
-
idna==3.
|
|
70
|
+
idna==3.10
|
|
75
71
|
# via
|
|
76
72
|
# email-validator
|
|
77
73
|
# requests
|
|
78
74
|
ijson==3.3.0
|
|
79
75
|
# via hdx-python-utilities
|
|
80
|
-
inflect==7.
|
|
76
|
+
inflect==7.5.0
|
|
81
77
|
# via quantulum3
|
|
82
78
|
iniconfig==2.0.0
|
|
83
79
|
# via pytest
|
|
84
|
-
isodate==0.
|
|
80
|
+
isodate==0.7.2
|
|
85
81
|
# via frictionless
|
|
86
|
-
jinja2==3.1.
|
|
82
|
+
jinja2==3.1.5
|
|
87
83
|
# via frictionless
|
|
88
84
|
jsonlines==4.0.0
|
|
89
85
|
# via hdx-python-utilities
|
|
90
|
-
jsonpath-ng==1.
|
|
86
|
+
jsonpath-ng==1.7.0
|
|
91
87
|
# via libhxl
|
|
92
88
|
jsonschema==4.23.0
|
|
93
89
|
# via
|
|
94
90
|
# frictionless
|
|
95
91
|
# tableschema-to-template
|
|
96
|
-
jsonschema-specifications==
|
|
92
|
+
jsonschema-specifications==2024.10.1
|
|
97
93
|
# via jsonschema
|
|
98
|
-
libhxl==5.2.
|
|
94
|
+
libhxl==5.2.2
|
|
99
95
|
# via
|
|
100
96
|
# hdx-python-api
|
|
101
97
|
# hdx-python-country
|
|
102
|
-
loguru==0.7.
|
|
98
|
+
loguru==0.7.3
|
|
103
99
|
# via hdx-python-utilities
|
|
104
|
-
makefun==1.15.
|
|
100
|
+
makefun==1.15.6
|
|
105
101
|
# via hdx-python-api
|
|
106
102
|
markdown-it-py==3.0.0
|
|
107
103
|
# via rich
|
|
108
104
|
marko==2.1.2
|
|
109
105
|
# via frictionless
|
|
110
|
-
markupsafe==
|
|
106
|
+
markupsafe==3.0.2
|
|
111
107
|
# via jinja2
|
|
112
108
|
mdurl==0.1.2
|
|
113
109
|
# via markdown-it-py
|
|
114
|
-
more-itertools==10.
|
|
110
|
+
more-itertools==10.5.0
|
|
115
111
|
# via inflect
|
|
116
|
-
ndg-httpsclient==0.5.1
|
|
117
|
-
# via hdx-python-api
|
|
118
112
|
nodeenv==1.9.1
|
|
119
113
|
# via pre-commit
|
|
120
|
-
num2words==0.5.
|
|
114
|
+
num2words==0.5.14
|
|
121
115
|
# via quantulum3
|
|
122
|
-
numpy==2.
|
|
116
|
+
numpy==2.2.1
|
|
123
117
|
# via pandas
|
|
124
118
|
oauthlib==3.2.2
|
|
125
119
|
# via requests-oauthlib
|
|
126
120
|
openpyxl==3.1.5
|
|
127
121
|
# via hdx-python-utilities
|
|
128
|
-
packaging==24.
|
|
122
|
+
packaging==24.2
|
|
129
123
|
# via pytest
|
|
130
|
-
pandas==2.2.
|
|
124
|
+
pandas==2.2.3
|
|
131
125
|
# via hdx-python-scraper (pyproject.toml)
|
|
132
126
|
petl==1.7.15
|
|
133
127
|
# via frictionless
|
|
134
|
-
platformdirs==4.
|
|
128
|
+
platformdirs==4.3.6
|
|
135
129
|
# via virtualenv
|
|
136
130
|
pluggy==1.5.0
|
|
137
131
|
# via pytest
|
|
@@ -141,35 +135,27 @@ ply==3.11
|
|
|
141
135
|
# libhxl
|
|
142
136
|
pockets==0.9.1
|
|
143
137
|
# via sphinxcontrib-napoleon
|
|
144
|
-
pre-commit==
|
|
138
|
+
pre-commit==4.0.1
|
|
145
139
|
# via hdx-python-scraper (pyproject.toml)
|
|
146
|
-
pyasn1==0.6.
|
|
140
|
+
pyasn1==0.6.1
|
|
147
141
|
# via
|
|
148
|
-
# hdx-python-api
|
|
149
|
-
# ndg-httpsclient
|
|
150
142
|
# pyasn1-modules
|
|
151
143
|
# rsa
|
|
152
|
-
pyasn1-modules==0.4.
|
|
144
|
+
pyasn1-modules==0.4.1
|
|
153
145
|
# via google-auth
|
|
154
|
-
|
|
155
|
-
# via cffi
|
|
156
|
-
pydantic==2.8.2
|
|
146
|
+
pydantic==2.10.5
|
|
157
147
|
# via frictionless
|
|
158
|
-
pydantic-core==2.
|
|
148
|
+
pydantic-core==2.27.2
|
|
159
149
|
# via pydantic
|
|
160
|
-
pygments==2.
|
|
150
|
+
pygments==2.19.1
|
|
161
151
|
# via rich
|
|
162
|
-
pyopenssl==24.2.1
|
|
163
|
-
# via
|
|
164
|
-
# hdx-python-api
|
|
165
|
-
# ndg-httpsclient
|
|
166
152
|
pyphonetics==0.5.3
|
|
167
|
-
# via hdx-python-
|
|
168
|
-
pytest==8.3.
|
|
153
|
+
# via hdx-python-utilities
|
|
154
|
+
pytest==8.3.4
|
|
169
155
|
# via
|
|
170
156
|
# hdx-python-scraper (pyproject.toml)
|
|
171
157
|
# pytest-cov
|
|
172
|
-
pytest-cov==
|
|
158
|
+
pytest-cov==6.0.0
|
|
173
159
|
# via hdx-python-scraper (pyproject.toml)
|
|
174
160
|
python-dateutil==2.9.0.post0
|
|
175
161
|
# via
|
|
@@ -183,9 +169,9 @@ python-slugify==8.0.4
|
|
|
183
169
|
# via
|
|
184
170
|
# ckanapi
|
|
185
171
|
# frictionless
|
|
186
|
-
pytz==2024.
|
|
172
|
+
pytz==2024.2
|
|
187
173
|
# via pandas
|
|
188
|
-
pyyaml==6.0.
|
|
174
|
+
pyyaml==6.0.2
|
|
189
175
|
# via
|
|
190
176
|
# frictionless
|
|
191
177
|
# pre-commit
|
|
@@ -198,7 +184,7 @@ referencing==0.35.1
|
|
|
198
184
|
# via
|
|
199
185
|
# jsonschema
|
|
200
186
|
# jsonschema-specifications
|
|
201
|
-
regex==2024.
|
|
187
|
+
regex==2024.11.6
|
|
202
188
|
# via hdx-python-scraper (pyproject.toml)
|
|
203
189
|
requests==2.32.3
|
|
204
190
|
# via
|
|
@@ -214,30 +200,29 @@ requests-oauthlib==2.0.0
|
|
|
214
200
|
# via google-auth-oauthlib
|
|
215
201
|
rfc3986==2.0.0
|
|
216
202
|
# via frictionless
|
|
217
|
-
rich==13.
|
|
203
|
+
rich==13.9.4
|
|
218
204
|
# via typer
|
|
219
|
-
rpds-py==0.
|
|
205
|
+
rpds-py==0.22.3
|
|
220
206
|
# via
|
|
221
207
|
# jsonschema
|
|
222
208
|
# referencing
|
|
223
209
|
rsa==4.9
|
|
224
210
|
# via google-auth
|
|
225
|
-
ruamel-yaml==0.18.
|
|
211
|
+
ruamel-yaml==0.18.10
|
|
226
212
|
# via hdx-python-utilities
|
|
227
|
-
ruamel-yaml-clib==0.2.
|
|
213
|
+
ruamel-yaml-clib==0.2.12
|
|
228
214
|
# via ruamel-yaml
|
|
229
|
-
setuptools==
|
|
215
|
+
setuptools==75.8.0
|
|
230
216
|
# via ckanapi
|
|
231
217
|
shellingham==1.5.4
|
|
232
218
|
# via typer
|
|
233
|
-
simpleeval==0.
|
|
219
|
+
simpleeval==1.0.3
|
|
234
220
|
# via frictionless
|
|
235
|
-
simplejson==3.19.
|
|
221
|
+
simplejson==3.19.3
|
|
236
222
|
# via ckanapi
|
|
237
|
-
six==1.
|
|
223
|
+
six==1.17.0
|
|
238
224
|
# via
|
|
239
225
|
# ckanapi
|
|
240
|
-
# isodate
|
|
241
226
|
# pockets
|
|
242
227
|
# python-dateutil
|
|
243
228
|
# sphinxcontrib-napoleon
|
|
@@ -251,11 +236,13 @@ tableschema-to-template==0.0.13
|
|
|
251
236
|
# via hdx-python-utilities
|
|
252
237
|
tabulate==0.9.0
|
|
253
238
|
# via frictionless
|
|
239
|
+
tenacity==9.0.0
|
|
240
|
+
# via hdx-python-country
|
|
254
241
|
text-unidecode==1.3
|
|
255
242
|
# via python-slugify
|
|
256
|
-
typeguard==4.
|
|
243
|
+
typeguard==4.4.1
|
|
257
244
|
# via inflect
|
|
258
|
-
typer==0.
|
|
245
|
+
typer==0.15.1
|
|
259
246
|
# via frictionless
|
|
260
247
|
typing-extensions==4.12.2
|
|
261
248
|
# via
|
|
@@ -264,27 +251,27 @@ typing-extensions==4.12.2
|
|
|
264
251
|
# pydantic-core
|
|
265
252
|
# typeguard
|
|
266
253
|
# typer
|
|
267
|
-
tzdata==2024.
|
|
254
|
+
tzdata==2024.2
|
|
268
255
|
# via pandas
|
|
269
256
|
unidecode==1.3.8
|
|
270
257
|
# via
|
|
271
258
|
# libhxl
|
|
272
259
|
# pyphonetics
|
|
273
|
-
urllib3==2.
|
|
260
|
+
urllib3==2.3.0
|
|
274
261
|
# via
|
|
275
262
|
# libhxl
|
|
276
263
|
# requests
|
|
277
|
-
validators==0.
|
|
264
|
+
validators==0.34.0
|
|
278
265
|
# via frictionless
|
|
279
|
-
virtualenv==20.
|
|
266
|
+
virtualenv==20.28.1
|
|
280
267
|
# via pre-commit
|
|
281
|
-
wheel==0.
|
|
268
|
+
wheel==0.45.1
|
|
282
269
|
# via libhxl
|
|
283
270
|
xlrd==2.0.1
|
|
284
271
|
# via hdx-python-utilities
|
|
285
272
|
xlrd3==1.1.0
|
|
286
273
|
# via libhxl
|
|
287
|
-
xlsx2csv==0.8.
|
|
274
|
+
xlsx2csv==0.8.4
|
|
288
275
|
# via hdx-python-utilities
|
|
289
276
|
xlsxwriter==3.2.0
|
|
290
277
|
# via tableschema-to-template
|
|
File without changes
|
|
@@ -36,7 +36,7 @@ class BaseScraper(ABC):
|
|
|
36
36
|
self.reader = datasetinfo.get("reader", name)
|
|
37
37
|
self.setup(headers, source_configuration)
|
|
38
38
|
self.datasetinfo = deepcopy(datasetinfo)
|
|
39
|
-
self.
|
|
39
|
+
self.error_handler = None
|
|
40
40
|
self.can_fallback = True
|
|
41
41
|
|
|
42
42
|
def setup(
|
|
@@ -5,18 +5,18 @@ from traceback import format_exc
|
|
|
5
5
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
6
6
|
|
|
7
7
|
from .base_scraper import BaseScraper
|
|
8
|
-
from .configurable.aggregator import Aggregator
|
|
9
|
-
from .configurable.resource_downloader import ResourceDownloader
|
|
10
|
-
from .configurable.scraper import ConfigurableScraper
|
|
11
|
-
from .configurable.timeseries import TimeSeries
|
|
12
8
|
from .outputs.base import BaseOutput
|
|
9
|
+
from .scrapers.aggregator import Aggregator
|
|
10
|
+
from .scrapers.configurable_scraper import ConfigurableScraper
|
|
11
|
+
from .scrapers.resource_downloader import ResourceDownloader
|
|
12
|
+
from .scrapers.timeseries import TimeSeries
|
|
13
13
|
from .utilities import get_startend_dates_from_time_period
|
|
14
14
|
from .utilities.fallbacks import Fallbacks
|
|
15
15
|
from .utilities.reader import Read
|
|
16
16
|
from .utilities.sources import Sources
|
|
17
17
|
from hdx.location.adminlevel import AdminLevel
|
|
18
18
|
from hdx.utilities.dateparse import now_utc
|
|
19
|
-
from hdx.utilities.
|
|
19
|
+
from hdx.utilities.error_handler import ErrorHandler
|
|
20
20
|
from hdx.utilities.typehint import ListTuple
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
@@ -28,7 +28,7 @@ class Runner:
|
|
|
28
28
|
Args:
|
|
29
29
|
countryiso3s (ListTuple[str]): List of ISO3 country codes to process
|
|
30
30
|
today (datetime): Value to use for today. Defaults to now_utc().
|
|
31
|
-
|
|
31
|
+
error_handler (ErrorHandler): ErrorHandler object that logs errors on exit
|
|
32
32
|
scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None (all scrapers).
|
|
33
33
|
"""
|
|
34
34
|
|
|
@@ -36,12 +36,12 @@ class Runner:
|
|
|
36
36
|
self,
|
|
37
37
|
countryiso3s: ListTuple[str],
|
|
38
38
|
today: datetime = now_utc(),
|
|
39
|
-
|
|
39
|
+
error_handler: Optional[ErrorHandler] = None,
|
|
40
40
|
scrapers_to_run: Optional[ListTuple[str]] = None,
|
|
41
41
|
):
|
|
42
42
|
self.countryiso3s = countryiso3s
|
|
43
43
|
self.today = today
|
|
44
|
-
self.
|
|
44
|
+
self.error_handler = error_handler
|
|
45
45
|
if isinstance(scrapers_to_run, tuple):
|
|
46
46
|
scrapers_to_run = list(scrapers_to_run)
|
|
47
47
|
self.scrapers_to_run: Optional[List[str]] = scrapers_to_run
|
|
@@ -73,7 +73,7 @@ class Runner:
|
|
|
73
73
|
and scraper_name not in self.scrapers_to_run
|
|
74
74
|
):
|
|
75
75
|
self.scrapers_to_run.append(scraper_name)
|
|
76
|
-
scraper.
|
|
76
|
+
scraper.error_handler = self.error_handler
|
|
77
77
|
return scraper_name
|
|
78
78
|
|
|
79
79
|
def add_customs(
|
|
@@ -142,7 +142,7 @@ class Runner:
|
|
|
142
142
|
level_name,
|
|
143
143
|
source_configuration,
|
|
144
144
|
self.today,
|
|
145
|
-
self.
|
|
145
|
+
self.error_handler,
|
|
146
146
|
)
|
|
147
147
|
if scraper_name not in self.scraper_names:
|
|
148
148
|
self.scraper_names.append(scraper_name)
|
|
@@ -612,8 +612,8 @@ class Runner:
|
|
|
612
612
|
if not Fallbacks.exist() or scraper.can_fallback is False:
|
|
613
613
|
raise
|
|
614
614
|
logger.exception(f"Using fallbacks for {scraper.name}!")
|
|
615
|
-
if self.
|
|
616
|
-
self.
|
|
615
|
+
if self.error_handler:
|
|
616
|
+
self.error_handler.add(
|
|
617
617
|
f"Using fallbacks for {scraper.name}! Error: {format_exc()}"
|
|
618
618
|
)
|
|
619
619
|
for level in scraper.headers.keys():
|
|
@@ -17,7 +17,7 @@ from hdx.utilities.dateparse import (
|
|
|
17
17
|
)
|
|
18
18
|
from hdx.utilities.dictandlist import dict_of_lists_add
|
|
19
19
|
from hdx.utilities.downloader import DownloadError
|
|
20
|
-
from hdx.utilities.
|
|
20
|
+
from hdx.utilities.error_handler import ErrorHandler
|
|
21
21
|
from hdx.utilities.text import ( # noqa: F401
|
|
22
22
|
get_fraction_str,
|
|
23
23
|
get_numeric_if_possible,
|
|
@@ -42,7 +42,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
42
42
|
level_name (Optional[str]): Customised level_name name. Defaults to None (level).
|
|
43
43
|
source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
|
|
44
44
|
today (datetime): Value to use for today. Defaults to now_utc().
|
|
45
|
-
|
|
45
|
+
error_handler (Optional[ErrorHandler]): ErrorHandler object that logs errors on exit
|
|
46
46
|
**kwargs: Variables to use when evaluating template arguments in urls
|
|
47
47
|
"""
|
|
48
48
|
|
|
@@ -67,7 +67,7 @@ class ConfigurableScraper(BaseScraper):
|
|
|
67
67
|
level_name: Optional[str] = None,
|
|
68
68
|
source_configuration: Dict = {},
|
|
69
69
|
today: datetime = now_utc(),
|
|
70
|
-
|
|
70
|
+
error_handler: Optional[ErrorHandler] = None,
|
|
71
71
|
**kwargs: Any,
|
|
72
72
|
):
|
|
73
73
|
self.name = name
|
|
@@ -83,10 +83,10 @@ class ConfigurableScraper(BaseScraper):
|
|
|
83
83
|
else:
|
|
84
84
|
self.level_name: str = level_name
|
|
85
85
|
self.countryiso3s = countryiso3s
|
|
86
|
-
self.adminlevel = adminlevel
|
|
86
|
+
self.adminlevel: Optional[AdminLevel] = adminlevel
|
|
87
87
|
self.today = today
|
|
88
88
|
self.subsets = self.get_subsets_from_datasetinfo(datasetinfo)
|
|
89
|
-
self.
|
|
89
|
+
self.error_handler: Optional[ErrorHandler] = error_handler
|
|
90
90
|
self.variables = kwargs
|
|
91
91
|
self.rowparser = None
|
|
92
92
|
self.datasetinfo = copy.deepcopy(datasetinfo)
|
|
@@ -8,10 +8,10 @@ from . import bool_assert
|
|
|
8
8
|
from hdx.api.configuration import Configuration
|
|
9
9
|
from hdx.api.locations import Locations
|
|
10
10
|
from hdx.location.country import Country
|
|
11
|
-
from hdx.scraper.base_scraper import BaseScraper
|
|
12
|
-
from hdx.scraper.utilities import string_params_to_dict
|
|
13
|
-
from hdx.scraper.utilities.fallbacks import Fallbacks
|
|
14
|
-
from hdx.scraper.utilities.reader import Read
|
|
11
|
+
from hdx.scraper.framework.base_scraper import BaseScraper
|
|
12
|
+
from hdx.scraper.framework.utilities import string_params_to_dict
|
|
13
|
+
from hdx.scraper.framework.utilities.fallbacks import Fallbacks
|
|
14
|
+
from hdx.scraper.framework.utilities.reader import Read
|
|
15
15
|
from hdx.utilities.dateparse import parse_date
|
|
16
16
|
|
|
17
17
|
|
|
File without changes
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from hdx.scraper.base_scraper import BaseScraper
|
|
4
|
-
from hdx.scraper.utilities.sources import Sources
|
|
3
|
+
from hdx.scraper.framework.base_scraper import BaseScraper
|
|
4
|
+
from hdx.scraper.framework.utilities.sources import Sources
|
|
5
5
|
from hdx.utilities.dateparse import default_date
|
|
6
6
|
from hdx.utilities.dictandlist import dict_of_lists_add
|
|
7
7
|
from hdx.utilities.text import number_format
|