hdx-python-scraper 2.7.0__tar.gz → 2.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hdx_python_scraper-2.7.2/.github/workflows/publish.yaml +34 -0
- hdx_python_scraper-2.7.2/.github/workflows/run-python-tests.yaml +52 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/.pre-commit-config.yaml +8 -9
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/PKG-INFO +5 -10
- hdx_python_scraper-2.7.2/pyproject.toml +150 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/_version.py +2 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/excelfile.py +2 -1
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/json.py +5 -4
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/runner.py +3 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/resource_downloader.py +4 -4
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/fallbacks.py +2 -1
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/lookup.py +2 -1
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/reader.py +11 -11
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/config/project_configuration.yaml +325 -325
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/download-global-pcode-lengths.csv +0 -1
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/download-global-pcodes-adm-1-2.csv +0 -1
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/conftest.py +8 -9
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/test_custom.py +5 -6
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/outputs/test_output.py +9 -10
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_aggregation.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_appenddata.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_global.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_national.py +2 -3
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +2 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_subnational.py +4 -5
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/test_runner.py +1 -2
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_sources.py +2 -2
- hdx_python_scraper-2.7.2/uv.lock +2738 -0
- hdx_python_scraper-2.7.0/.coveragerc +0 -17
- hdx_python_scraper-2.7.0/.github/workflows/publish.yaml +0 -37
- hdx_python_scraper-2.7.0/.github/workflows/run-python-tests.yaml +0 -55
- hdx_python_scraper-2.7.0/CONTRIBUTING.md +0 -60
- hdx_python_scraper-2.7.0/hatch.toml +0 -37
- hdx_python_scraper-2.7.0/pyproject.toml +0 -56
- hdx_python_scraper-2.7.0/pytest.ini +0 -4
- hdx_python_scraper-2.7.0/requirements.txt +0 -236
- hdx_python_scraper-2.7.0/ruff.toml +0 -10
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/.gitignore +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/LICENSE +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/README.md +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/index.md +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/mkdocs.yaml +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/base_scraper.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/base.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/googlesheets.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/aggregator.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/configurable_scraper.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/rowparser.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/timeseries.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/hapi_admins.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/org_type.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/org_type_configuration.yaml +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sector.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sector_configuration.yaml +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sources.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/writer.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/global-coordination-groups-beta.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/org_type_organization_types_beta_csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/organization-types-beta.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sector_global_coordination_groups_beta_csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/affected_targeted_reached.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/education_closures.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/education_enrolment.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_timeseries.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/unhcr_myanmar_idps.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_hapi_admins.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_lookup.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_readers.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_regionlookup.py +0 -0
- {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_utils.py +0 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
|
|
11
|
+
environment:
|
|
12
|
+
name: pypi
|
|
13
|
+
url: https://pypi.org/p/hdx-python-scraper
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write
|
|
17
|
+
contents: read
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v6
|
|
21
|
+
|
|
22
|
+
- name: Get history and tags for versioning to work
|
|
23
|
+
run: |
|
|
24
|
+
git fetch --prune --unshallow
|
|
25
|
+
git fetch --depth=1 origin +refs/tags/*:refs/tags/*
|
|
26
|
+
|
|
27
|
+
- name: Install uv
|
|
28
|
+
uses: astral-sh/setup-uv@v7
|
|
29
|
+
|
|
30
|
+
- name: Build with uv
|
|
31
|
+
run: uv build
|
|
32
|
+
|
|
33
|
+
- name: Publish distribution 📦 to PyPI
|
|
34
|
+
run: uv publish
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: Run tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
push:
|
|
6
|
+
branches-ignore: [gh-pages, "dependabot/**"]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches-ignore: [gh-pages]
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
checks: write
|
|
16
|
+
pull-requests: write
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v6
|
|
20
|
+
|
|
21
|
+
- name: Install uv
|
|
22
|
+
uses: astral-sh/setup-uv@v7
|
|
23
|
+
with:
|
|
24
|
+
enable-cache: true
|
|
25
|
+
python-version: "3.13"
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --frozen
|
|
29
|
+
|
|
30
|
+
- name: Check styling
|
|
31
|
+
run: |
|
|
32
|
+
uv run ruff format --check
|
|
33
|
+
uv run ruff check
|
|
34
|
+
|
|
35
|
+
- name: Test with pytest
|
|
36
|
+
env:
|
|
37
|
+
HDX_KEY_TEST: ${{ secrets.HDX_BOT_SCRAPERS_API_TOKEN }}
|
|
38
|
+
GSHEET_AUTH: ${{ secrets.HDX_PIPELINE_GSHEET_AUTH }}
|
|
39
|
+
run: uv run pytest
|
|
40
|
+
|
|
41
|
+
- name: Publish Unit Test Results
|
|
42
|
+
uses: EnricoMi/publish-unit-test-result-action@v2
|
|
43
|
+
if: always()
|
|
44
|
+
with:
|
|
45
|
+
files: test-results.xml
|
|
46
|
+
|
|
47
|
+
- name: Publish in Coveralls
|
|
48
|
+
uses: coverallsapp/github-action@v2
|
|
49
|
+
if: always()
|
|
50
|
+
with:
|
|
51
|
+
flag-name: tests
|
|
52
|
+
format: lcov
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
default_language_version:
|
|
2
|
-
|
|
2
|
+
python: python3.13
|
|
3
|
+
|
|
3
4
|
repos:
|
|
4
5
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
5
6
|
rev: v6.0.0
|
|
@@ -8,20 +9,18 @@ repos:
|
|
|
8
9
|
- id: end-of-file-fixer
|
|
9
10
|
exclude: test_scraper_.*\.json
|
|
10
11
|
- id: check-ast
|
|
12
|
+
|
|
11
13
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
12
|
-
rev: v0.14.
|
|
14
|
+
rev: v0.14.14
|
|
13
15
|
hooks:
|
|
14
16
|
# Run the linter.
|
|
15
17
|
- id: ruff-check
|
|
16
18
|
args: [ --fix ]
|
|
17
19
|
# Run the formatter.
|
|
18
20
|
- id: ruff-format
|
|
21
|
+
|
|
19
22
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
20
|
-
rev: 0.9.
|
|
23
|
+
rev: 0.9.25
|
|
21
24
|
hooks:
|
|
22
|
-
#
|
|
23
|
-
- id:
|
|
24
|
-
name: pip-compile requirements.txt
|
|
25
|
-
files: pyproject.toml
|
|
26
|
-
args: [ pyproject.toml, --resolver=backtracking, --upgrade, -q,
|
|
27
|
-
-o, requirements.txt ]
|
|
25
|
+
# Ensure the lockfile is up-to-date with pyproject.toml
|
|
26
|
+
- id: uv-lock
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.2
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -26,20 +26,15 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
27
|
Requires-Python: >=3.10
|
|
28
28
|
Requires-Dist: gspread
|
|
29
|
-
Requires-Dist: hdx-python-api>=6.6.
|
|
30
|
-
Requires-Dist: hdx-python-country>=4.
|
|
31
|
-
Requires-Dist: hdx-python-utilities>=4.0.
|
|
29
|
+
Requires-Dist: hdx-python-api>=6.6.4
|
|
30
|
+
Requires-Dist: hdx-python-country>=4.1.1
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=4.0.4
|
|
32
|
+
Requires-Dist: libhxl
|
|
32
33
|
Requires-Dist: regex
|
|
33
|
-
Provides-Extra: dev
|
|
34
|
-
Requires-Dist: pre-commit; extra == 'dev'
|
|
35
34
|
Provides-Extra: docs
|
|
36
35
|
Requires-Dist: mkapi; extra == 'docs'
|
|
37
36
|
Provides-Extra: pandas
|
|
38
37
|
Requires-Dist: pandas>=2.2.3; extra == 'pandas'
|
|
39
|
-
Provides-Extra: test
|
|
40
|
-
Requires-Dist: pandas>=2.2.3; extra == 'test'
|
|
41
|
-
Requires-Dist: pytest; extra == 'test'
|
|
42
|
-
Requires-Dist: pytest-cov; extra == 'test'
|
|
43
38
|
Description-Content-Type: text/markdown
|
|
44
39
|
|
|
45
40
|
[](https://github.com/OCHA-DAP/hdx-python-scraper/actions/workflows/run-python-tests.yaml)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#########################
|
|
2
|
+
# Project Configuration #
|
|
3
|
+
#########################
|
|
4
|
+
|
|
5
|
+
[build-system]
|
|
6
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
7
|
+
build-backend = "hatchling.build"
|
|
8
|
+
|
|
9
|
+
[project]
|
|
10
|
+
name = "hdx-python-scraper"
|
|
11
|
+
description = "HDX Python scraper utilities to assemble data from multiple sources"
|
|
12
|
+
authors = [{name = "Michael Rans", email = "rans@email.com"}]
|
|
13
|
+
license = {text = "MIT"}
|
|
14
|
+
keywords = ["HDX", "scrapers", "data assembly", "data transformation", "tabular data"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 5 - Production/Stable",
|
|
17
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
18
|
+
"Programming Language :: Python",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Natural Language :: English",
|
|
29
|
+
"Operating System :: POSIX :: Linux",
|
|
30
|
+
"Operating System :: Unix",
|
|
31
|
+
"Operating System :: MacOS",
|
|
32
|
+
"Operating System :: Microsoft :: Windows",
|
|
33
|
+
]
|
|
34
|
+
readme = "README.md"
|
|
35
|
+
dynamic = ["version"]
|
|
36
|
+
requires-python = ">=3.10"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
dependencies = [
|
|
40
|
+
"hdx-python-api>=6.6.4",
|
|
41
|
+
"hdx-python-country>=4.1.1",
|
|
42
|
+
"hdx-python-utilities>=4.0.4",
|
|
43
|
+
"libhxl",
|
|
44
|
+
"gspread",
|
|
45
|
+
"regex",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
pandas = ["pandas>=2.2.3"]
|
|
50
|
+
docs = ["mkapi"]
|
|
51
|
+
|
|
52
|
+
[dependency-groups]
|
|
53
|
+
dev = [
|
|
54
|
+
"pandas>=2.2.3",
|
|
55
|
+
"pytest",
|
|
56
|
+
"pytest-cov",
|
|
57
|
+
"pre-commit",
|
|
58
|
+
"ruff==0.14.14",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
[project.urls]
|
|
62
|
+
Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
|
|
63
|
+
|
|
64
|
+
# ----------------------------------------------------------------------------
|
|
65
|
+
# Hatchling (Build & Versioning)
|
|
66
|
+
# ----------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
[tool.hatch.version]
|
|
69
|
+
source = "vcs"
|
|
70
|
+
|
|
71
|
+
[tool.hatch.version.raw-options]
|
|
72
|
+
local_scheme = "no-local-version"
|
|
73
|
+
version_scheme = "python-simplified-semver"
|
|
74
|
+
|
|
75
|
+
[tool.hatch.build.hooks.vcs]
|
|
76
|
+
version-file = "src/hdx/scraper/framework/_version.py"
|
|
77
|
+
|
|
78
|
+
[tool.hatch.build.targets.wheel]
|
|
79
|
+
packages = ["src/hdx"]
|
|
80
|
+
|
|
81
|
+
[tool.hatch.metadata]
|
|
82
|
+
allow-direct-references = true
|
|
83
|
+
|
|
84
|
+
# ----------------------------------------------------------------------------
|
|
85
|
+
# Ruff (Linting & Formatting)
|
|
86
|
+
# ----------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
[tool.ruff]
|
|
89
|
+
target-version = "py310"
|
|
90
|
+
src = ["src"]
|
|
91
|
+
exclude = ["_version.py"]
|
|
92
|
+
|
|
93
|
+
[tool.ruff.lint]
|
|
94
|
+
# Defaults are E (pycodestyle) and F (pyflakes). We extend them:
|
|
95
|
+
extend-select = [
|
|
96
|
+
"I", # isort
|
|
97
|
+
"UP", # pyupgrade
|
|
98
|
+
]
|
|
99
|
+
ignore = [
|
|
100
|
+
"E501", # Line too long
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
[tool.ruff.lint.isort]
|
|
104
|
+
known-local-folder = ["hdx.scraper.framework"]
|
|
105
|
+
known-third-party = [
|
|
106
|
+
"hdx.api",
|
|
107
|
+
"hdx.data",
|
|
108
|
+
"hdx.facades",
|
|
109
|
+
"hdx.location",
|
|
110
|
+
"hdx.utilities",
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
# ----------------------------------------------------------------------------
|
|
114
|
+
# Pytest (Testing)
|
|
115
|
+
# ----------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
[tool.pytest.ini_options]
|
|
118
|
+
pythonpath = "src"
|
|
119
|
+
log_cli = true
|
|
120
|
+
addopts = """
|
|
121
|
+
--color=yes
|
|
122
|
+
--rootdir=.
|
|
123
|
+
--junitxml=test-results.xml
|
|
124
|
+
--cov
|
|
125
|
+
--no-cov-on-fail
|
|
126
|
+
--cov-report=lcov
|
|
127
|
+
--cov-report=term-missing
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
# ----------------------------------------------------------------------------
|
|
131
|
+
# Coverage (Reporting)
|
|
132
|
+
# ----------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
[tool.coverage.run]
|
|
135
|
+
source = ["src"]
|
|
136
|
+
omit = ["*/_version.py"]
|
|
137
|
+
|
|
138
|
+
[tool.coverage.report]
|
|
139
|
+
exclude_also = [
|
|
140
|
+
"from ._version",
|
|
141
|
+
"def __repr__",
|
|
142
|
+
"if self.debug:",
|
|
143
|
+
"if settings.DEBUG",
|
|
144
|
+
"raise AssertionError",
|
|
145
|
+
"raise NotImplementedError",
|
|
146
|
+
"if 0:",
|
|
147
|
+
"if __name__ == .__main__.:",
|
|
148
|
+
"if TYPE_CHECKING:",
|
|
149
|
+
"@(abc\\.)?abstractmethod",
|
|
150
|
+
]
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '2.7.
|
|
32
|
-
__version_tuple__ = version_tuple = (2, 7,
|
|
31
|
+
__version__ = version = '2.7.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (2, 7, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
{hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/excelfile.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from openpyxl import Workbook
|
|
4
5
|
|
|
@@ -24,7 +25,7 @@ class ExcelFile(BaseOutput):
|
|
|
24
25
|
"""
|
|
25
26
|
|
|
26
27
|
def __init__(
|
|
27
|
-
self, excel_path: str, tabs: dict[str, str], updatetabs: list[str]
|
|
28
|
+
self, excel_path: Path | str, tabs: dict[str, str], updatetabs: list[str]
|
|
28
29
|
) -> None:
|
|
29
30
|
super().__init__(updatetabs)
|
|
30
31
|
self.workbook = Workbook()
|
{hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/json.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from pathlib import Path
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
5
|
from hdx.utilities.dictandlist import dict_of_lists_add
|
|
@@ -184,7 +184,7 @@ class JsonFile(BaseOutput):
|
|
|
184
184
|
newrow[hxl_row[key]] = row[key]
|
|
185
185
|
self.add_data_row(name, newrow)
|
|
186
186
|
|
|
187
|
-
def save(self, folder: str | None = None, **kwargs: Any) -> list[
|
|
187
|
+
def save(self, folder: Path | str | None = None, **kwargs: Any) -> list[Path]:
|
|
188
188
|
"""Save JSON file and any addition subsets of that JSON defined in the additional configuration
|
|
189
189
|
|
|
190
190
|
Args:
|
|
@@ -197,7 +197,8 @@ class JsonFile(BaseOutput):
|
|
|
197
197
|
filepaths = []
|
|
198
198
|
filepath = self.configuration["output"]
|
|
199
199
|
if folder:
|
|
200
|
-
|
|
200
|
+
folder = Path(folder)
|
|
201
|
+
filepath = folder / filepath
|
|
201
202
|
logger.info(f"Writing JSON to {filepath}")
|
|
202
203
|
save_json(self.json, filepath)
|
|
203
204
|
filepaths.append(filepath)
|
|
@@ -262,7 +263,7 @@ class JsonFile(BaseOutput):
|
|
|
262
263
|
continue
|
|
263
264
|
filedetailspath = filedetails["filepath"]
|
|
264
265
|
if folder:
|
|
265
|
-
filedetailspath =
|
|
266
|
+
filedetailspath = folder / filedetailspath
|
|
266
267
|
logger.info(f"Writing JSON to {filedetailspath}")
|
|
267
268
|
save_json(json, filedetailspath)
|
|
268
269
|
filepaths.append(filedetailspath)
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from collections.abc import Callable, Sequence
|
|
3
3
|
from copy import copy
|
|
4
4
|
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from traceback import format_exc
|
|
6
7
|
from typing import Any, Optional
|
|
7
8
|
|
|
@@ -422,7 +423,7 @@ class Runner:
|
|
|
422
423
|
def add_resource_downloader(
|
|
423
424
|
self,
|
|
424
425
|
datasetinfo: dict,
|
|
425
|
-
folder: str = "",
|
|
426
|
+
folder: Path | str = "",
|
|
426
427
|
force_add_to_run: bool = False,
|
|
427
428
|
) -> str:
|
|
428
429
|
"""Add resource downloader to the run. If running specific scrapers rather than
|
|
@@ -445,7 +446,7 @@ class Runner:
|
|
|
445
446
|
def add_resource_downloaders(
|
|
446
447
|
self,
|
|
447
448
|
configuration: dict,
|
|
448
|
-
folder: str = "",
|
|
449
|
+
folder: Path | str = "",
|
|
449
450
|
force_add_to_run: bool = False,
|
|
450
451
|
) -> list[str]:
|
|
451
452
|
"""Add multiple resource downloaders to the run. If running specific scrapers
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from pathlib import Path
|
|
3
3
|
from shutil import copy2
|
|
4
4
|
|
|
5
5
|
from slugify import slugify
|
|
@@ -19,11 +19,11 @@ class ResourceDownloader(BaseScraper):
|
|
|
19
19
|
folder: Folder to which to download. Default is "".
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, datasetinfo, folder):
|
|
22
|
+
def __init__(self, datasetinfo: dict, folder: Path | str):
|
|
23
23
|
# ResourceDownloader only outputs to sources
|
|
24
24
|
name = f"resource_downloader_{slugify(datasetinfo['hxltag'].lower(), separator='_')}"
|
|
25
25
|
super().__init__(name, datasetinfo, {})
|
|
26
|
-
self.folder = folder
|
|
26
|
+
self.folder = Path(folder)
|
|
27
27
|
|
|
28
28
|
def run(self) -> None:
|
|
29
29
|
"""Runs one resource downloader given dataset information
|
|
@@ -35,7 +35,7 @@ class ResourceDownloader(BaseScraper):
|
|
|
35
35
|
resource = reader.read_hdx_metadata(self.datasetinfo)
|
|
36
36
|
url, path = reader.download_resource(resource, file_prefix=self.name)
|
|
37
37
|
logger.info(f"Downloading {url} to {path}")
|
|
38
|
-
copy2(path,
|
|
38
|
+
copy2(path, self.folder / self.datasetinfo["filename"])
|
|
39
39
|
|
|
40
40
|
def add_sources(self) -> None:
|
|
41
41
|
"""Add source for resource download
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from hdx.utilities.loader import LoadError, load_json
|
|
4
5
|
|
|
@@ -25,7 +26,7 @@ class Fallbacks:
|
|
|
25
26
|
@classmethod
|
|
26
27
|
def add(
|
|
27
28
|
cls,
|
|
28
|
-
fallbacks_path: str,
|
|
29
|
+
fallbacks_path: Path | str,
|
|
29
30
|
levels_mapping: dict[str, str] = default_levels_mapping,
|
|
30
31
|
sources_key: str = "sources",
|
|
31
32
|
admin_name_mapping: dict[str, str] = default_admin_name_mapping,
|
{hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/lookup.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from copy import copy
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
from hdx.utilities.loader import load_yaml
|
|
5
6
|
from hdx.utilities.matching import get_code_from_name
|
|
@@ -20,7 +21,7 @@ class Lookup:
|
|
|
20
21
|
classobject: Child class
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
|
-
def __init__(self, yaml_config_path: str, classobject: type):
|
|
24
|
+
def __init__(self, yaml_config_path: Path | str, classobject: type):
|
|
24
25
|
configuration = load_yaml(script_dir_plus_file(yaml_config_path, classobject))
|
|
25
26
|
self._configuration = configuration
|
|
26
27
|
initial_lookup = configuration.get("initial_lookup", {})
|
{hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/reader.py
RENAMED
|
@@ -2,7 +2,7 @@ import glob
|
|
|
2
2
|
import logging
|
|
3
3
|
from collections.abc import Iterator, Sequence
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from
|
|
5
|
+
from pathlib import Path
|
|
6
6
|
from typing import Any
|
|
7
7
|
from urllib.parse import parse_qsl
|
|
8
8
|
|
|
@@ -41,9 +41,9 @@ class Read(Retrieve):
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self,
|
|
43
43
|
downloader: Download,
|
|
44
|
-
fallback_dir: str,
|
|
45
|
-
saved_dir: str,
|
|
46
|
-
temp_dir: str,
|
|
44
|
+
fallback_dir: Path | str,
|
|
45
|
+
saved_dir: Path | str,
|
|
46
|
+
temp_dir: Path | str,
|
|
47
47
|
save: bool = False,
|
|
48
48
|
use_saved: bool = False,
|
|
49
49
|
prefix: str = "",
|
|
@@ -65,9 +65,9 @@ class Read(Retrieve):
|
|
|
65
65
|
@classmethod
|
|
66
66
|
def create_readers(
|
|
67
67
|
cls,
|
|
68
|
-
fallback_dir: str,
|
|
69
|
-
saved_dir: str,
|
|
70
|
-
temp_dir: str,
|
|
68
|
+
fallback_dir: Path | str,
|
|
69
|
+
saved_dir: Path | str,
|
|
70
|
+
temp_dir: Path | str,
|
|
71
71
|
save: bool = False,
|
|
72
72
|
use_saved: bool = False,
|
|
73
73
|
ignore: Sequence[str] = tuple(),
|
|
@@ -275,7 +275,7 @@ class Read(Retrieve):
|
|
|
275
275
|
Returns:
|
|
276
276
|
The dataset that was read or None
|
|
277
277
|
"""
|
|
278
|
-
saved_path =
|
|
278
|
+
saved_path = self.saved_dir / f"{dataset_name}.json"
|
|
279
279
|
if self.use_saved:
|
|
280
280
|
logger.info(f"Using saved dataset {dataset_name} in {saved_path}")
|
|
281
281
|
dataset = Dataset.load_from_json(saved_path)
|
|
@@ -319,7 +319,7 @@ class Read(Retrieve):
|
|
|
319
319
|
list of datasets resulting from query
|
|
320
320
|
"""
|
|
321
321
|
|
|
322
|
-
saved_path =
|
|
322
|
+
saved_path = self.saved_dir / filename
|
|
323
323
|
if self.use_saved:
|
|
324
324
|
logger.info(
|
|
325
325
|
f"Using saved datasets in {filename}_n.json in {self.saved_dir}"
|
|
@@ -461,7 +461,7 @@ class Read(Retrieve):
|
|
|
461
461
|
url = resource["url"]
|
|
462
462
|
try:
|
|
463
463
|
_, path = self.download_resource(resource, **kwargs)
|
|
464
|
-
data = hxl.data(path, InputOptions(allow_local=True)).cache()
|
|
464
|
+
data = hxl.data(str(path), InputOptions(allow_local=True)).cache()
|
|
465
465
|
data.display_tags
|
|
466
466
|
return data
|
|
467
467
|
except hxl.HXLException:
|
|
@@ -488,7 +488,7 @@ class Read(Retrieve):
|
|
|
488
488
|
"""
|
|
489
489
|
try:
|
|
490
490
|
_, path = self.construct_filename_and_download(name, format, url, **kwargs)
|
|
491
|
-
return hxl.info(path, InputOptions(allow_local=True))
|
|
491
|
+
return hxl.info(str(path), InputOptions(allow_local=True))
|
|
492
492
|
except hxl.HXLException:
|
|
493
493
|
logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
|
|
494
494
|
return None
|