hdx-python-scraper 2.3.5__tar.gz → 2.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.config/pre-commit-config.yaml +7 -6
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.config/ruff.toml +6 -4
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.github/workflows/publish.yaml +4 -3
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.github/workflows/run-python-tests.yaml +6 -5
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/CONTRIBUTING.md +4 -5
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/PKG-INFO +6 -5
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/documentation/.readthedocs.yaml +1 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/documentation/main.md +29 -2
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/documentation/pydoc-markdown.yaml +12 -15
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/pyproject.toml +21 -18
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/requirements.txt +107 -117
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/_version.py +2 -2
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/base_scraper.py +4 -4
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/outputs/googlesheets.py +1 -1
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/runner.py +12 -12
- hdx_python_scraper-2.3.5/src/hdx/scraper/configurable/scraper.py → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers/configurable_scraper.py +5 -5
- {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers}/rowparser.py +58 -23
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/reader.py +98 -22
- hdx_python_scraper-2.5.3/src/hdx/scraper/framework/utilities/sector.py +63 -0
- hdx_python_scraper-2.5.3/src/hdx/scraper/framework/utilities/sector_configuration.yaml +138 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/sources.py +3 -3
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/config/project_configuration.yaml +2 -1
- hdx_python_scraper-2.5.3/tests/fixtures/input/global-coordination-groups-beta.json +1 -0
- hdx_python_scraper-2.5.3/tests/fixtures/input/sector_global_coordination_groups_beta_csv.csv +17 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework}/conftest.py +7 -4
- hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/custom/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/custom}/affected_targeted_reached.py +2 -2
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/custom}/education_closures.py +1 -1
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/custom}/education_enrolment.py +1 -1
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_custom.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/custom/test_custom.py +8 -8
- hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/outputs}/test_output.py +6 -6
- hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_aggregation.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_aggregation.py +5 -5
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_appenddata.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_appenddata.py +7 -7
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_global.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_global.py +4 -4
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_multipleurls.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +4 -4
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_national.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_national.py +17 -17
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_regionaltoplevel.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +7 -7
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_resource_downloaders.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +3 -3
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_subnational.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_subnational.py +9 -8
- hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_timeseries.py → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers/test_timeseries.py +4 -4
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/scrapers}/unhcr_myanmar_idps.py +2 -2
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework}/test_runner.py +2 -2
- hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities}/test_readers.py +72 -3
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities}/test_regionlookup.py +1 -1
- hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities/test_sector.py +11 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities}/test_sources.py +5 -3
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework/utilities}/test_utils.py +1 -1
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.config/coveragerc +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.gitignore +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/LICENSE +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/README.md +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/outputs}/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/outputs/base.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/outputs/excelfile.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/outputs/json.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper/outputs → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers}/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers}/aggregator.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers}/resource_downloader.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.3/src/hdx/scraper/framework/scrapers}/timeseries.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/fallbacks.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.3/src/hdx/scraper/framework}/utilities/writer.py +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.3/tests/hdx/scraper/framework}/__init__.py +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
default_language_version:
|
|
2
|
-
python: python3.
|
|
2
|
+
python: python3.12
|
|
3
3
|
repos:
|
|
4
4
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
5
|
-
rev:
|
|
5
|
+
rev: v5.0.0
|
|
6
6
|
hooks:
|
|
7
7
|
- id: trailing-whitespace
|
|
8
8
|
- id: end-of-file-fixer
|
|
9
9
|
exclude: test_scraper_.*\.json
|
|
10
10
|
- id: check-ast
|
|
11
11
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
12
|
-
rev: v0.
|
|
12
|
+
rev: v0.8.6
|
|
13
13
|
hooks:
|
|
14
14
|
# Run the linter.
|
|
15
15
|
- id: ruff
|
|
@@ -17,10 +17,11 @@ repos:
|
|
|
17
17
|
# Run the formatter.
|
|
18
18
|
- id: ruff-format
|
|
19
19
|
args: [--config, .config/ruff.toml]
|
|
20
|
-
- repo: https://github.com/
|
|
21
|
-
rev:
|
|
20
|
+
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
21
|
+
rev: 0.5.15
|
|
22
22
|
hooks:
|
|
23
|
+
# Run the pip compile
|
|
23
24
|
- id: pip-compile
|
|
24
25
|
name: pip-compile requirements.txt
|
|
25
26
|
files: pyproject.toml
|
|
26
|
-
args: [pyproject.toml, --resolver=backtracking, --all-extras, --upgrade, -q, -o, requirements.txt]
|
|
27
|
+
args: [ pyproject.toml, --resolver=backtracking, --all-extras, --upgrade, -q, -o, requirements.txt ]
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
line-length = 79
|
|
2
2
|
exclude = ["_version.py"]
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
]
|
|
3
|
+
|
|
4
|
+
[lint]
|
|
6
5
|
# List of rules: https://docs.astral.sh/ruff/rules/
|
|
7
6
|
select = [
|
|
8
7
|
"E", # pycodestyle - default
|
|
9
8
|
"F", # pyflakes - default
|
|
10
9
|
"I" # isort
|
|
11
10
|
]
|
|
11
|
+
ignore = [
|
|
12
|
+
"E501" # Line too long
|
|
13
|
+
]
|
|
12
14
|
|
|
13
|
-
[isort]
|
|
15
|
+
[lint.isort]
|
|
14
16
|
known-local-folder = ["hdx"]
|
|
@@ -9,19 +9,20 @@ jobs:
|
|
|
9
9
|
runs-on: ubuntu-latest
|
|
10
10
|
|
|
11
11
|
steps:
|
|
12
|
-
- uses: actions/checkout@
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
13
|
- name: Get history and tags for versioning to work
|
|
14
14
|
run: |
|
|
15
15
|
git fetch --prune --unshallow
|
|
16
16
|
git fetch --depth=1 origin +refs/tags/*:refs/tags/*
|
|
17
17
|
- name: Set up Python
|
|
18
|
-
uses: actions/setup-python@
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
19
|
with:
|
|
20
20
|
python-version: '3.x'
|
|
21
21
|
- name: Install dependencies
|
|
22
22
|
run: |
|
|
23
23
|
python -m pip install --upgrade pip
|
|
24
|
-
|
|
24
|
+
- name: Install Hatch
|
|
25
|
+
uses: pypa/hatch@install
|
|
25
26
|
- name: Build with hatch
|
|
26
27
|
run: |
|
|
27
28
|
hatch build
|
{hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.3}/.github/workflows/run-python-tests.yaml
RENAMED
|
@@ -18,24 +18,25 @@ jobs:
|
|
|
18
18
|
runs-on: ubuntu-latest
|
|
19
19
|
|
|
20
20
|
steps:
|
|
21
|
-
- uses: actions/checkout@
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
22
|
- name: Set up Python
|
|
23
|
-
uses: actions/setup-python@
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
24
|
with:
|
|
25
25
|
python-version: '3.x'
|
|
26
26
|
- name: Install dependencies
|
|
27
27
|
run: |
|
|
28
28
|
python -m pip install --upgrade pip
|
|
29
|
-
|
|
29
|
+
- name: Install Hatch
|
|
30
|
+
uses: pypa/hatch@install
|
|
30
31
|
- name: Test with hatch/pytest
|
|
31
32
|
env:
|
|
32
33
|
GSHEET_AUTH: ${{ secrets.GSHEET_AUTH }}
|
|
33
34
|
run: |
|
|
34
|
-
hatch
|
|
35
|
+
hatch test
|
|
35
36
|
- name: Check styling
|
|
36
37
|
if: always()
|
|
37
38
|
run: |
|
|
38
|
-
hatch
|
|
39
|
+
hatch fmt --check
|
|
39
40
|
- name: Publish Unit Test Results
|
|
40
41
|
uses: EnricoMi/publish-unit-test-result-action@v2
|
|
41
42
|
if: always()
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
## Environment
|
|
4
4
|
|
|
5
|
-
Development is currently done using Python 3.
|
|
5
|
+
Development is currently done using Python 3.12. We recommend using a virtual
|
|
6
6
|
environment such as ``venv``:
|
|
7
7
|
|
|
8
|
-
python3.
|
|
8
|
+
python3.12 -m venv venv
|
|
9
9
|
source venv/bin/activate
|
|
10
10
|
|
|
11
11
|
In your virtual environment, please install all packages for
|
|
@@ -21,14 +21,13 @@ you make a git commit:
|
|
|
21
21
|
pre-commit install
|
|
22
22
|
|
|
23
23
|
The configuration file for this project is in a
|
|
24
|
-
non-
|
|
24
|
+
non-standard location. Thus, you will need to edit your
|
|
25
25
|
`.git/hooks/pre-commit` file to reflect this. Change
|
|
26
26
|
the line that begins with `ARGS` to:
|
|
27
27
|
|
|
28
28
|
ARGS=(hook-impl --config=.config/pre-commit-config.yaml --hook-type=pre-commit)
|
|
29
29
|
|
|
30
30
|
With pre-commit, all code is formatted according to
|
|
31
|
-
[black]("https://github.com/psf/black") and
|
|
32
31
|
[ruff]("https://github.com/charliermarsh/ruff") guidelines.
|
|
33
32
|
|
|
34
33
|
To check if your changes pass pre-commit without committing, run:
|
|
@@ -41,7 +40,7 @@ To run the tests and view coverage, execute:
|
|
|
41
40
|
|
|
42
41
|
pytest -c .config/pytest.ini --cov hdx --cov-config .config/coveragerc
|
|
43
42
|
|
|
44
|
-
Follow the example set out already in ``
|
|
43
|
+
Follow the example set out already in ``documentation/main.md`` as you write the documentation.
|
|
45
44
|
|
|
46
45
|
## Packages
|
|
47
46
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.3
|
|
3
|
+
Version: 2.5.3
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,13 +26,14 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.
|
|
30
|
-
Requires-Dist: hdx-python-country>=3.6
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.3.7
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.8.6
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=3.8.2
|
|
31
32
|
Requires-Dist: regex
|
|
32
33
|
Provides-Extra: dev
|
|
33
34
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
34
35
|
Provides-Extra: pandas
|
|
35
|
-
Requires-Dist: pandas>=2.
|
|
36
|
+
Requires-Dist: pandas>=2.2.2; extra == 'pandas'
|
|
36
37
|
Provides-Extra: test
|
|
37
38
|
Requires-Dist: pytest; extra == 'test'
|
|
38
39
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
@@ -26,6 +26,8 @@ install with:
|
|
|
26
26
|
pip install hdx-python-scraper[pandas]
|
|
27
27
|
|
|
28
28
|
## Breaking Changes
|
|
29
|
+
From 2.5.0, package names have changed to avoid name space clashes
|
|
30
|
+
|
|
29
31
|
From 2.3.0, resource name is used when available instead of creating name from
|
|
30
32
|
url so tests that use saved data from the Read class may break. file_type
|
|
31
33
|
parameters in various Read methods renamed to format.
|
|
@@ -81,6 +83,7 @@ The library is set up broadly as follows:
|
|
|
81
83
|
hdx_auth=configuration.get_api_key(),
|
|
82
84
|
header_auths=header_auths,
|
|
83
85
|
basic_auths=basic_auths,
|
|
86
|
+
bearer_tokens=bearer_tokens,
|
|
84
87
|
param_auths=param_auths,
|
|
85
88
|
today=today,
|
|
86
89
|
)
|
|
@@ -335,7 +338,7 @@ default is:
|
|
|
335
338
|
### Custom Scrapers
|
|
336
339
|
|
|
337
340
|
It is possible to define custom scrapers written in Python which must inherit
|
|
338
|
-
[BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/base_scraper.py),
|
|
341
|
+
[BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/framework/base_scraper.py),
|
|
339
342
|
calling its constructor and providing a `run` method. Other methods where a default
|
|
340
343
|
implementation has been provided can be overridden such as `add_sources` and
|
|
341
344
|
`add_population`. There are also two hooks for running steps at particular points.
|
|
@@ -392,7 +395,7 @@ from regions to values. In this case, since national and regional each have only
|
|
|
392
395
|
header and HXL hashtag, there is only one dictionary to populate for each.
|
|
393
396
|
|
|
394
397
|
An example of a custom scraper can be seen
|
|
395
|
-
[here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/education_closures.py).
|
|
398
|
+
[here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/framework/scrapers/education_closures.py).
|
|
396
399
|
|
|
397
400
|
An example of overriding `add_sources` to customise the source information that is
|
|
398
401
|
output is as follows:
|
|
@@ -1089,6 +1092,30 @@ same HXL hashtags.
|
|
|
1089
1092
|
use_hxl: True
|
|
1090
1093
|
should_overwrite_sources: True
|
|
1091
1094
|
|
|
1095
|
+
The xlsx2csv option below converts the xlsx to csv before processing.
|
|
1096
|
+
|
|
1097
|
+
idps_somalia:
|
|
1098
|
+
dataset: "somalia-internally-displaced-persons-idps"
|
|
1099
|
+
format: "xlsx"
|
|
1100
|
+
xlsx2csv: True
|
|
1101
|
+
filter_cols:
|
|
1102
|
+
- "Reason"
|
|
1103
|
+
- "Year"
|
|
1104
|
+
prefilter: "'drought' in Reason.lower() and int(Year) in (self.today.year - 1, self.today.year)"
|
|
1105
|
+
admin:
|
|
1106
|
+
- value: "SOM"
|
|
1107
|
+
- "Current (Arrival) District"
|
|
1108
|
+
input:
|
|
1109
|
+
- "Number of Individuals"
|
|
1110
|
+
sum:
|
|
1111
|
+
- formula: "int(Number of Individuals)"
|
|
1112
|
+
mustbepopulated: True
|
|
1113
|
+
output:
|
|
1114
|
+
- "IDPs"
|
|
1115
|
+
output_hxl:
|
|
1116
|
+
- "#affected+idps+ind"
|
|
1117
|
+
source_date_format: "%Y-%m-%d"
|
|
1118
|
+
|
|
1092
1119
|
## Population Data
|
|
1093
1120
|
|
|
1094
1121
|
Population data is treated as a special class of data. By default, configurable and
|
|
@@ -3,13 +3,13 @@ loaders:
|
|
|
3
3
|
search_path:
|
|
4
4
|
- ../src
|
|
5
5
|
packages:
|
|
6
|
-
- hdx.scraper
|
|
6
|
+
- hdx.scraper.framework
|
|
7
7
|
renderer:
|
|
8
8
|
type: mkdocs
|
|
9
9
|
output_directory: docs
|
|
10
10
|
mkdocs_config:
|
|
11
11
|
site_name: HDX Python Scraper
|
|
12
|
-
theme:
|
|
12
|
+
theme: material
|
|
13
13
|
repo_url: "https://github.com/OCHA-DAP/hdx-python-scraper"
|
|
14
14
|
markdown:
|
|
15
15
|
source_linker:
|
|
@@ -21,21 +21,18 @@ renderer:
|
|
|
21
21
|
source: main.md
|
|
22
22
|
- title: API Documentation
|
|
23
23
|
children:
|
|
24
|
-
- title:
|
|
24
|
+
- title: Runner
|
|
25
25
|
contents:
|
|
26
|
-
- hdx.scraper.
|
|
27
|
-
- title:
|
|
26
|
+
- hdx.scraper.framework.runner.Runner.*
|
|
27
|
+
- title: Scraper Base Class
|
|
28
28
|
contents:
|
|
29
|
-
- hdx.scraper.
|
|
30
|
-
- title:
|
|
29
|
+
- hdx.scraper.framework.base_scraper.BaseScraper.*
|
|
30
|
+
- title: Scrapers
|
|
31
31
|
contents:
|
|
32
|
-
- hdx.scraper.
|
|
33
|
-
- title: Fallbacks
|
|
34
|
-
contents:
|
|
35
|
-
- hdx.scraper.utilities.fallbacks.Fallbacks
|
|
36
|
-
- title: Run Scrapers
|
|
37
|
-
contents:
|
|
38
|
-
- hdx.scraper.runner.Runner.*
|
|
32
|
+
- hdx.scraper.framework.scrapers.*
|
|
39
33
|
- title: Outputs
|
|
40
34
|
contents:
|
|
41
|
-
- hdx.scraper.outputs.*
|
|
35
|
+
- hdx.scraper.framework.outputs.*
|
|
36
|
+
- title: Utilities
|
|
37
|
+
contents:
|
|
38
|
+
- hdx.scraper.framework.utilities.*
|
|
@@ -34,8 +34,9 @@ classifiers = [
|
|
|
34
34
|
requires-python = ">=3.8"
|
|
35
35
|
|
|
36
36
|
dependencies = [
|
|
37
|
-
"hdx-python-api>=6.
|
|
38
|
-
"hdx-python-country>=3.6
|
|
37
|
+
"hdx-python-api>=6.3.7",
|
|
38
|
+
"hdx-python-country>=3.8.6",
|
|
39
|
+
"hdx-python-utilities>=3.8.2",
|
|
39
40
|
"gspread",
|
|
40
41
|
"regex",
|
|
41
42
|
]
|
|
@@ -49,7 +50,7 @@ content-type = "text/markdown"
|
|
|
49
50
|
Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
|
|
50
51
|
|
|
51
52
|
[project.optional-dependencies]
|
|
52
|
-
pandas = ["pandas>=2.
|
|
53
|
+
pandas = ["pandas>=2.2.2"]
|
|
53
54
|
test = ["pytest", "pytest-cov"]
|
|
54
55
|
dev = ["pre-commit"]
|
|
55
56
|
|
|
@@ -64,7 +65,10 @@ dev = ["pre-commit"]
|
|
|
64
65
|
packages = ["src/hdx"]
|
|
65
66
|
|
|
66
67
|
[tool.hatch.build.hooks.vcs]
|
|
67
|
-
version-file = "src/hdx/scraper/_version.py"
|
|
68
|
+
version-file = "src/hdx/scraper/framework/_version.py"
|
|
69
|
+
|
|
70
|
+
[tool.hatch.metadata]
|
|
71
|
+
allow-direct-references = true
|
|
68
72
|
|
|
69
73
|
# Versioning
|
|
70
74
|
|
|
@@ -77,25 +81,24 @@ version_scheme = "python-simplified-semver"
|
|
|
77
81
|
|
|
78
82
|
# Tests
|
|
79
83
|
|
|
80
|
-
[tool.hatch.envs.test]
|
|
84
|
+
[tool.hatch.envs.hatch-test]
|
|
81
85
|
features = ["pandas", "test"]
|
|
82
86
|
|
|
83
|
-
[tool.hatch.envs.test.
|
|
84
|
-
|
|
87
|
+
[[tool.hatch.envs.hatch-test.matrix]]
|
|
88
|
+
python = ["3.12"]
|
|
89
|
+
|
|
90
|
+
[tool.hatch.envs.hatch-test.scripts]
|
|
91
|
+
run = """
|
|
85
92
|
pytest -c .config/pytest.ini --rootdir=. --junitxml=test-results.xml \
|
|
86
93
|
--cov --cov-config=.config/coveragerc --no-cov-on-fail \
|
|
87
94
|
--cov-report=lcov --cov-report=term-missing
|
|
88
95
|
"""
|
|
89
96
|
|
|
90
|
-
[
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
[tool.hatch.envs.lint]
|
|
94
|
-
detached = true
|
|
95
|
-
dependencies = ["ruff"]
|
|
97
|
+
[tool.hatch.envs.hatch-static-analysis]
|
|
98
|
+
dependencies = ["ruff==0.8.6"]
|
|
96
99
|
|
|
97
|
-
[tool.hatch.envs.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
]
|
|
100
|
+
[tool.hatch.envs.hatch-static-analysis.scripts]
|
|
101
|
+
format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
|
|
102
|
+
format-fix = ["ruff format --config .config/ruff.toml {args:.}",]
|
|
103
|
+
lint-check = ["ruff check --config .config/ruff.toml {args:.}",]
|
|
104
|
+
lint-fix = ["ruff check --config .config/ruff.toml --fix {args:.}",]
|