hdx-python-scraper 2.3.8__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/PKG-INFO +3 -3
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/pyproject.toml +2 -2
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/requirements.txt +22 -23
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/_version.py +2 -2
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/rowparser.py +57 -22
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/reader.py +9 -6
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_readers.py +4 -4
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/coveragerc +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/pre-commit-config.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/pytest.ini +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/ruff.toml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.github/workflows/publish.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.gitignore +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/CONTRIBUTING.md +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/LICENSE +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/README.md +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/main.md +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/base_scraper.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/__init__.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/aggregator.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/resource_downloader.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/scraper.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/timeseries.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/__init__.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/base.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/excelfile.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/googlesheets.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/json.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/runner.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/__init__.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/fallbacks.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/region_lookup.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/sources.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/writer.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/config/project_configuration.yaml +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/additional-json.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_closures_broken.xls +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/fallbacks.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/unocha-office-locations.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_output.xlsx +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_all.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_other.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_population.json +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/__init__.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/affected_targeted_reached.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/conftest.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_closures.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_enrolment.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_output.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_regionlookup.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_runner.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_aggregation.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_appenddata.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_custom.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_global.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_multipleurls.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_national.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_regionaltoplevel.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_resource_downloaders.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_subnational.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_timeseries.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_sources.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_utils.py +0 -0
- {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/unhcr_myanmar_idps.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: hdx-python-scraper
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: HDX Python scraper utilities to assemble data from multiple sources
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -27,8 +27,8 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Requires-Dist: gspread
|
|
29
29
|
Requires-Dist: hdx-python-api>=6.3.1
|
|
30
|
-
Requires-Dist: hdx-python-country>=3.7.
|
|
31
|
-
Requires-Dist: hdx-python-utilities>=3.7.
|
|
30
|
+
Requires-Dist: hdx-python-country>=3.7.6
|
|
31
|
+
Requires-Dist: hdx-python-utilities>=3.7.2
|
|
32
32
|
Requires-Dist: regex
|
|
33
33
|
Provides-Extra: dev
|
|
34
34
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
@@ -10,7 +10,7 @@ attrs==23.2.0
|
|
|
10
10
|
# referencing
|
|
11
11
|
cachetools==5.3.3
|
|
12
12
|
# via google-auth
|
|
13
|
-
certifi==2024.
|
|
13
|
+
certifi==2024.7.4
|
|
14
14
|
# via requests
|
|
15
15
|
cffi==1.16.0
|
|
16
16
|
# via cryptography
|
|
@@ -24,7 +24,7 @@ ckanapi==4.8
|
|
|
24
24
|
# via hdx-python-api
|
|
25
25
|
click==8.1.7
|
|
26
26
|
# via typer
|
|
27
|
-
coverage==7.
|
|
27
|
+
coverage==7.6.0
|
|
28
28
|
# via pytest-cov
|
|
29
29
|
cryptography==42.0.8
|
|
30
30
|
# via pyopenssl
|
|
@@ -40,36 +40,36 @@ docopt==0.6.2
|
|
|
40
40
|
# num2words
|
|
41
41
|
docutils==0.21.2
|
|
42
42
|
# via defopt
|
|
43
|
-
email-validator==2.
|
|
43
|
+
email-validator==2.2.0
|
|
44
44
|
# via hdx-python-api
|
|
45
45
|
et-xmlfile==1.1.0
|
|
46
46
|
# via openpyxl
|
|
47
|
-
filelock==3.15.
|
|
47
|
+
filelock==3.15.4
|
|
48
48
|
# via virtualenv
|
|
49
49
|
frictionless==5.17.0
|
|
50
50
|
# via hdx-python-utilities
|
|
51
|
-
google-auth==2.
|
|
51
|
+
google-auth==2.32.0
|
|
52
52
|
# via
|
|
53
53
|
# google-auth-oauthlib
|
|
54
54
|
# gspread
|
|
55
|
-
google-auth-oauthlib==1.2.
|
|
55
|
+
google-auth-oauthlib==1.2.1
|
|
56
56
|
# via gspread
|
|
57
57
|
gspread==6.1.2
|
|
58
58
|
# via hdx-python-scraper (pyproject.toml)
|
|
59
59
|
hdx-python-api==6.3.1
|
|
60
60
|
# via hdx-python-scraper (pyproject.toml)
|
|
61
|
-
hdx-python-country==3.7.
|
|
61
|
+
hdx-python-country==3.7.6
|
|
62
62
|
# via
|
|
63
63
|
# hdx-python-scraper (pyproject.toml)
|
|
64
64
|
# hdx-python-api
|
|
65
|
-
hdx-python-utilities==3.7.
|
|
65
|
+
hdx-python-utilities==3.7.2
|
|
66
66
|
# via
|
|
67
67
|
# hdx-python-scraper (pyproject.toml)
|
|
68
68
|
# hdx-python-api
|
|
69
69
|
# hdx-python-country
|
|
70
|
-
humanize==4.
|
|
70
|
+
humanize==4.10.0
|
|
71
71
|
# via frictionless
|
|
72
|
-
identify==2.
|
|
72
|
+
identify==2.6.0
|
|
73
73
|
# via pre-commit
|
|
74
74
|
idna==3.7
|
|
75
75
|
# via
|
|
@@ -77,7 +77,7 @@ idna==3.7
|
|
|
77
77
|
# requests
|
|
78
78
|
ijson==3.3.0
|
|
79
79
|
# via hdx-python-utilities
|
|
80
|
-
inflect==7.
|
|
80
|
+
inflect==7.3.1
|
|
81
81
|
# via quantulum3
|
|
82
82
|
iniconfig==2.0.0
|
|
83
83
|
# via pytest
|
|
@@ -89,7 +89,7 @@ jsonlines==4.0.0
|
|
|
89
89
|
# via hdx-python-utilities
|
|
90
90
|
jsonpath-ng==1.6.1
|
|
91
91
|
# via libhxl
|
|
92
|
-
jsonschema==4.
|
|
92
|
+
jsonschema==4.23.0
|
|
93
93
|
# via
|
|
94
94
|
# frictionless
|
|
95
95
|
# tableschema-to-template
|
|
@@ -101,11 +101,11 @@ libhxl==5.2.1
|
|
|
101
101
|
# hdx-python-country
|
|
102
102
|
loguru==0.7.2
|
|
103
103
|
# via hdx-python-utilities
|
|
104
|
-
makefun==1.15.
|
|
104
|
+
makefun==1.15.4
|
|
105
105
|
# via hdx-python-api
|
|
106
106
|
markdown-it-py==3.0.0
|
|
107
107
|
# via rich
|
|
108
|
-
marko==2.1.
|
|
108
|
+
marko==2.1.2
|
|
109
109
|
# via frictionless
|
|
110
110
|
markupsafe==2.1.5
|
|
111
111
|
# via jinja2
|
|
@@ -123,7 +123,7 @@ numpy==2.0.0
|
|
|
123
123
|
# via pandas
|
|
124
124
|
oauthlib==3.2.2
|
|
125
125
|
# via requests-oauthlib
|
|
126
|
-
openpyxl==3.1.
|
|
126
|
+
openpyxl==3.1.5
|
|
127
127
|
# via hdx-python-utilities
|
|
128
128
|
packaging==24.1
|
|
129
129
|
# via pytest
|
|
@@ -153,9 +153,9 @@ pyasn1-modules==0.4.0
|
|
|
153
153
|
# via google-auth
|
|
154
154
|
pycparser==2.22
|
|
155
155
|
# via cffi
|
|
156
|
-
pydantic==2.
|
|
156
|
+
pydantic==2.8.2
|
|
157
157
|
# via frictionless
|
|
158
|
-
pydantic-core==2.
|
|
158
|
+
pydantic-core==2.20.1
|
|
159
159
|
# via pydantic
|
|
160
160
|
pygments==2.18.0
|
|
161
161
|
# via rich
|
|
@@ -190,7 +190,7 @@ pyyaml==6.0.1
|
|
|
190
190
|
# frictionless
|
|
191
191
|
# pre-commit
|
|
192
192
|
# tableschema-to-template
|
|
193
|
-
quantulum3==0.9.
|
|
193
|
+
quantulum3==0.9.2
|
|
194
194
|
# via hdx-python-api
|
|
195
195
|
ratelimit==2.2.1
|
|
196
196
|
# via hdx-python-utilities
|
|
@@ -216,7 +216,7 @@ rfc3986==2.0.0
|
|
|
216
216
|
# via frictionless
|
|
217
217
|
rich==13.7.1
|
|
218
218
|
# via typer
|
|
219
|
-
rpds-py==0.
|
|
219
|
+
rpds-py==0.19.0
|
|
220
220
|
# via
|
|
221
221
|
# jsonschema
|
|
222
222
|
# referencing
|
|
@@ -226,7 +226,7 @@ ruamel-yaml==0.18.6
|
|
|
226
226
|
# via hdx-python-utilities
|
|
227
227
|
ruamel-yaml-clib==0.2.8
|
|
228
228
|
# via ruamel-yaml
|
|
229
|
-
setuptools==70.
|
|
229
|
+
setuptools==70.3.0
|
|
230
230
|
# via ckanapi
|
|
231
231
|
shellingham==1.5.4
|
|
232
232
|
# via typer
|
|
@@ -260,7 +260,6 @@ typer==0.12.3
|
|
|
260
260
|
typing-extensions==4.12.2
|
|
261
261
|
# via
|
|
262
262
|
# frictionless
|
|
263
|
-
# inflect
|
|
264
263
|
# pydantic
|
|
265
264
|
# pydantic-core
|
|
266
265
|
# typeguard
|
|
@@ -275,9 +274,9 @@ urllib3==2.2.2
|
|
|
275
274
|
# via
|
|
276
275
|
# libhxl
|
|
277
276
|
# requests
|
|
278
|
-
validators==0.
|
|
277
|
+
validators==0.32.0
|
|
279
278
|
# via frictionless
|
|
280
|
-
virtualenv==20.26.
|
|
279
|
+
virtualenv==20.26.3
|
|
281
280
|
# via pre-commit
|
|
282
281
|
wheel==0.43.0
|
|
283
282
|
# via libhxl
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/rowparser.py
RENAMED
|
@@ -185,20 +185,14 @@ class RowParser:
|
|
|
185
185
|
Returns:
|
|
186
186
|
Iterator[Dict]: Input data with prefilter applied if specified and sorted if specified or deemed necessary
|
|
187
187
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
if self.
|
|
196
|
-
if all(
|
|
197
|
-
row[key] == value for key, value in self.stop_row.items()
|
|
198
|
-
):
|
|
199
|
-
break
|
|
200
|
-
for newrow in self.flatten(row):
|
|
201
|
-
rows.append(newrow)
|
|
188
|
+
if self.header_to_hxltag:
|
|
189
|
+
iterator = self.header_to_hxltag_rows(iterator)
|
|
190
|
+
if self.stop_row:
|
|
191
|
+
iterator = self.stop_rows(iterator)
|
|
192
|
+
if self.flatteninfo:
|
|
193
|
+
iterator = self.flatten_rows(iterator)
|
|
194
|
+
if self.prefilter:
|
|
195
|
+
iterator = (row for row in iterator if eval(self.prefilter))
|
|
202
196
|
if not self.sort:
|
|
203
197
|
if self.datecol:
|
|
204
198
|
for subset in self.subsets:
|
|
@@ -212,15 +206,59 @@ class RowParser:
|
|
|
212
206
|
)
|
|
213
207
|
self.sort = {"keys": [self.datecol], "reverse": True}
|
|
214
208
|
break
|
|
215
|
-
if self.prefilter:
|
|
216
|
-
rows = [row for row in rows if eval(self.prefilter)]
|
|
217
209
|
if self.sort:
|
|
218
210
|
keys = self.sort["keys"]
|
|
219
211
|
reverse = self.sort.get("reverse", False)
|
|
220
|
-
|
|
221
|
-
return
|
|
212
|
+
iterator = sorted(iterator, key=itemgetter(*keys), reverse=reverse)
|
|
213
|
+
return iterator
|
|
214
|
+
|
|
215
|
+
def header_to_hxltag_rows(
|
|
216
|
+
self, iterator: Iterator[Dict]
|
|
217
|
+
) -> Generator[Dict, None, None]:
|
|
218
|
+
"""Convert headers to HXL tags in keys
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
iterator (Iterator[Dict]): Input data
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Generator[Dict]: Rows where keys are HXL tags
|
|
225
|
+
"""
|
|
226
|
+
for row in iterator:
|
|
227
|
+
newrow = {}
|
|
228
|
+
for header in row:
|
|
229
|
+
newrow[self.header_to_hxltag[header]] = row[header]
|
|
230
|
+
yield newrow
|
|
231
|
+
|
|
232
|
+
def stop_rows(
|
|
233
|
+
self, iterator: Iterator[Dict]
|
|
234
|
+
) -> Generator[Dict, None, None]:
|
|
235
|
+
"""Stop processing rows after condition met
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
iterator (Iterator[Dict]): Input data
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Generator[Dict]: Rows up to stop condition
|
|
242
|
+
"""
|
|
243
|
+
for row in iterator:
|
|
244
|
+
if all(row[key] == value for key, value in self.stop_row.items()):
|
|
245
|
+
break
|
|
246
|
+
yield row
|
|
247
|
+
|
|
248
|
+
def flatten_rows(self, iterator: Iterator[Dict]) -> Iterator[Dict]:
|
|
249
|
+
"""Flatten rows
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
iterator (Iterator[Dict]): Input data
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Generator[Dict]: Flattened rows
|
|
256
|
+
"""
|
|
257
|
+
for row in iterator:
|
|
258
|
+
for newrow in self.flatten_row(row):
|
|
259
|
+
yield newrow
|
|
222
260
|
|
|
223
|
-
def
|
|
261
|
+
def flatten_row(self, row: Dict) -> Generator[Dict, None, None]:
|
|
224
262
|
"""Flatten a wide spreadsheet format into a long one
|
|
225
263
|
|
|
226
264
|
Args:
|
|
@@ -229,9 +267,6 @@ class RowParser:
|
|
|
229
267
|
Returns:
|
|
230
268
|
Generator[Dict]: Flattened row(s)
|
|
231
269
|
"""
|
|
232
|
-
if not self.flatteninfo:
|
|
233
|
-
yield row
|
|
234
|
-
return
|
|
235
270
|
counters = [-1 for _ in self.flatteninfo]
|
|
236
271
|
while True:
|
|
237
272
|
newrow = copy.deepcopy(row)
|
|
@@ -206,15 +206,18 @@ class Read(Retrieve):
|
|
|
206
206
|
if headers is None:
|
|
207
207
|
headers = 1
|
|
208
208
|
datasetinfo["headers"] = 1
|
|
209
|
-
kwargs["headers"] = headers
|
|
210
|
-
if isinstance(headers, list):
|
|
211
|
-
kwargs["fill_merged_cells"] = True
|
|
212
209
|
format = datasetinfo["format"]
|
|
213
210
|
kwargs["format"] = format
|
|
214
|
-
if
|
|
215
|
-
|
|
211
|
+
if format in ("xls", "xlsx"):
|
|
212
|
+
if not sheet:
|
|
213
|
+
sheet = 1
|
|
214
|
+
if isinstance(headers, list):
|
|
215
|
+
kwargs["fill_merged_cells"] = True
|
|
216
|
+
elif "fill_merged_cells" not in kwargs:
|
|
217
|
+
kwargs["fill_merged_cells"] = False
|
|
216
218
|
if sheet:
|
|
217
219
|
kwargs["sheet"] = sheet
|
|
220
|
+
kwargs["headers"] = headers
|
|
218
221
|
compression = datasetinfo.get("compression")
|
|
219
222
|
if compression:
|
|
220
223
|
kwargs["compression"] = compression
|
|
@@ -302,7 +305,7 @@ class Read(Retrieve):
|
|
|
302
305
|
f"Using saved datasets in {filename}_n.json in {self.saved_dir}"
|
|
303
306
|
)
|
|
304
307
|
datasets = []
|
|
305
|
-
for file_path in glob.glob(f"{saved_path}_*.json"):
|
|
308
|
+
for file_path in sorted(glob.glob(f"{saved_path}_*.json")):
|
|
306
309
|
datasets.append(Dataset.load_from_json(file_path))
|
|
307
310
|
else:
|
|
308
311
|
datasets = Dataset.search_in_hdx(
|
|
@@ -30,7 +30,7 @@ class TestReaders:
|
|
|
30
30
|
assert getattr(clone_reader, property) == value
|
|
31
31
|
|
|
32
32
|
def test_read_dataset(self, configuration, monkeypatch):
|
|
33
|
-
def
|
|
33
|
+
def read_from_hdx(dataset_name, _):
|
|
34
34
|
if dataset_name == "None":
|
|
35
35
|
return None
|
|
36
36
|
dataset = Dataset({"name": dataset_name})
|
|
@@ -58,7 +58,7 @@ class TestReaders:
|
|
|
58
58
|
today=parse_date("2021-02-01"),
|
|
59
59
|
) as reader:
|
|
60
60
|
monkeypatch.setattr(
|
|
61
|
-
Dataset, "read_from_hdx",
|
|
61
|
+
Dataset, "read_from_hdx", read_from_hdx
|
|
62
62
|
)
|
|
63
63
|
dataset_name = "None"
|
|
64
64
|
dataset = reader.read_dataset(dataset_name)
|
|
@@ -89,7 +89,7 @@ class TestReaders:
|
|
|
89
89
|
def test_search_datasets(self, configuration, monkeypatch):
|
|
90
90
|
filename = "TestDataset"
|
|
91
91
|
|
|
92
|
-
def
|
|
92
|
+
def search_in_hdx(*args, **kwargs):
|
|
93
93
|
datasets = []
|
|
94
94
|
for i in range(2):
|
|
95
95
|
dataset = Dataset({"name": f"{filename}_{i}"})
|
|
@@ -117,7 +117,7 @@ class TestReaders:
|
|
|
117
117
|
today=parse_date("2021-02-01"),
|
|
118
118
|
) as reader:
|
|
119
119
|
monkeypatch.setattr(
|
|
120
|
-
Dataset, "search_in_hdx",
|
|
120
|
+
Dataset, "search_in_hdx", search_in_hdx
|
|
121
121
|
)
|
|
122
122
|
datasets = reader.search_datasets(filename)
|
|
123
123
|
assert len(datasets) == 2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.github/workflows/run-python-tests.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/__init__.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/aggregator.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/scraper.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/timeseries.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/googlesheets.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/fallbacks.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/region_lookup.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/config/project_configuration.yaml
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/additional-json.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_other.json
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_population.json
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/affected_targeted_reached.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_closures.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_enrolment.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_regionlookup.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_aggregation.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_appenddata.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_custom.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_global.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_national.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_subnational.py
RENAMED
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_timeseries.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/unhcr_myanmar_idps.py
RENAMED
|
File without changes
|