hdx-python-scraper 2.2.3__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/coveragerc +1 -1
  2. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/CONTRIBUTING.md +4 -4
  3. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/PKG-INFO +16 -15
  4. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/README.md +12 -12
  5. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/main.md +4 -0
  6. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/pyproject.toml +4 -3
  7. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/requirements.txt +47 -40
  8. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/_version.py +2 -2
  9. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/base_scraper.py +21 -9
  10. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/resource_downloader.py +1 -1
  11. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/scraper.py +1 -0
  12. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/timeseries.py +3 -1
  13. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/runner.py +5 -2
  14. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/reader.py +142 -40
  15. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/config/project_configuration.yaml +10 -4
  16. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/additional-json.json +1 -1
  17. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf2_global_download-full-pfmb-allocations.csv +7054 -7054
  18. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf_global_download-full-pfmb-allocations.csv +7054 -7054
  19. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +1 -1
  20. hdx_python_scraper-2.3.0/tests/fixtures/input/education_closures_school_closures.csv +4 -0
  21. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ethiopia-pin-targeted-reached-by-location-and-cluster.json +1 -1
  22. hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_eth_ethiopia_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +1 -1
  23. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/fallbacks.json +17 -17
  24. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/global-school-closures-covid19.json +1 -1
  25. hdx_python_scraper-2.2.3/tests/fixtures/download-hno-2017-sahel-nutrition.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/hno_2017_sahel_nutrition.csv +1 -1
  26. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idmc-internally-displaced-persons-idps.json +1 -1
  27. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +1 -1
  28. hdx_python_scraper-2.2.3/tests/fixtures/ipc_somalia_download-som-food-insecurity-oct-dec2022-projection.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +2 -2
  29. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/kenya-pin-targeted-reached-by-location-and-cluster.json +1 -1
  30. hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_ken_kenya_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +1 -1
  31. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +1 -1
  32. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +1 -1
  33. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +1 -1
  34. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +1 -1
  35. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +1 -1
  36. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +1 -1
  37. hdx_python_scraper-2.3.0/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +36 -0
  38. hdx_python_scraper-2.3.0/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +1 -0
  39. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sadd-countries-to-include.csv +1 -1
  40. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sahel-humanitarian-needs-overview.json +1 -1
  41. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-acute-food-insecurity-country-data.json +1 -1
  42. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-internally-displaced-persons-idps.json +1 -1
  43. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-pin-targeted-reached-by-location-and-cluster.json +1 -1
  44. hdx_python_scraper-2.2.3/tests/fixtures/affected_targeted_reached_affected_targeted_reached_som_somalia_drought_affected_targeted_reached_by_cluster.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +1 -1
  45. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/total-covid-19-tests-performed-by-country.json +1 -1
  46. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ukraine-who-does-what-where-3w.json +1 -1
  47. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/unocha-office-locations.json +1 -1
  48. hdx_python_scraper-2.2.3/tests/fixtures/who_national2_who-covid-19-global-data.csv → hdx_python_scraper-2.3.0/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -1
  49. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/who_national_who-covid-19-global-data.csv +0 -1
  50. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/affected_targeted_reached.py +1 -3
  51. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/conftest.py +9 -4
  52. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/education_closures.py +1 -1
  53. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/education_enrolment.py +1 -1
  54. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_readers.py +75 -6
  55. hdx_python_scraper-2.2.3/tests/hdx/scraper/test_runner_get_results.py → hdx_python_scraper-2.3.0/tests/hdx/scraper/test_runner.py +56 -1
  56. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_custom.py +4 -1
  57. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_global.py +2 -2
  58. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_national.py +24 -2
  59. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_utils.py +6 -1
  60. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/unhcr_myanmar_idps.py +2 -2
  61. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/black.toml +0 -0
  62. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/pre-commit-config.yaml +0 -0
  63. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/pytest.ini +0 -0
  64. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.config/ruff.toml +0 -0
  65. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.github/workflows/publish.yaml +0 -0
  66. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.github/workflows/run-python-tests.yaml +0 -0
  67. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/.gitignore +0 -0
  68. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/LICENSE +0 -0
  69. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/.readthedocs.yaml +0 -0
  70. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/documentation/pydoc-markdown.yaml +0 -0
  71. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/__init__.py +0 -0
  72. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/__init__.py +0 -0
  73. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/aggregator.py +0 -0
  74. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/configurable/rowparser.py +0 -0
  75. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/__init__.py +0 -0
  76. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/base.py +0 -0
  77. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/excelfile.py +0 -0
  78. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/googlesheets.py +0 -0
  79. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/outputs/json.py +0 -0
  80. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/__init__.py +0 -0
  81. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/fallbacks.py +0 -0
  82. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/region_lookup.py +0 -0
  83. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/sources.py +0 -0
  84. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/src/hdx/scraper/utilities/writer.py +0 -0
  85. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
  86. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  87. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  88. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cbpf-allocations-and-contributions.json +0 -0
  89. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cbpf2-allocations-and-contributions.json +0 -0
  90. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf-covid-19-allocations.json +0 -0
  91. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/cerf2-covid-19-allocations.json +0 -0
  92. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/covidtests_data-owid-covid-data.xlsx +0 -0
  93. /hdx_python_scraper-2.2.3/tests/fixtures/education_closures_download-covid-impact-education.csv → /hdx_python_scraper-2.3.0/tests/fixtures/input/education_closures_broken.xls +0 -0
  94. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/education_enrolment_download-countries-enrollment-data-uis-feb-22.xlsx +0 -0
  95. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ethiopia-drought-related-key-figures.json +0 -0
  96. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  97. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  98. /hdx_python_scraper-2.2.3/tests/fixtures/download-hno-2017-sahel-people-in-need.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
  99. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/idps_download-displacement-data.csv +0 -0
  100. /hdx_python_scraper-2.2.3/tests/fixtures/idps_somalia_download-som-unhcr-prmn-displacement-dataset.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
  101. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/kenya-drought-related-key-figures.json +0 -0
  102. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  103. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  104. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population.json +0 -0
  105. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  106. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  107. /hdx_python_scraper-2.2.3/tests/fixtures/regions_download-tbl-regcov-2020-ocha.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
  108. /hdx_python_scraper-2.2.3/tests/fixtures/hdx_resource_downloader_xlsx_ukr_border_crossings_090622.xlsx → /hdx_python_scraper-2.3.0/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
  109. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
  110. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/somalia-drought-related-key-figures.json +0 -0
  111. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  112. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/ukraine-border-crossings.json +0 -0
  113. /hdx_python_scraper-2.2.3/tests/fixtures/who_national3_who-covid-19-global-data.csv → /hdx_python_scraper-2.3.0/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
  114. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/whowhatwhere_afg_3w_data.csv +0 -0
  115. {hdx_python_scraper-2.2.3/tests/fixtures → hdx_python_scraper-2.3.0/tests/fixtures/input}/whowhatwhere_notags_3w_data.csv +0 -0
  116. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_output.xlsx +0 -0
  117. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_all.json +0 -0
  118. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_other.json +0 -0
  119. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/fixtures/test_scraper_population.json +0 -0
  120. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/__init__.py +0 -0
  121. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_output.py +0 -0
  122. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_regionlookup.py +0 -0
  123. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_aggregation.py +0 -0
  124. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_appenddata.py +0 -0
  125. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_multipleurls.py +0 -0
  126. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_regionaltoplevel.py +0 -0
  127. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_resource_downloaders.py +0 -0
  128. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_subnational.py +0 -0
  129. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_scrapers_timeseries.py +0 -0
  130. {hdx_python_scraper-2.2.3 → hdx_python_scraper-2.3.0}/tests/hdx/scraper/test_sources.py +0 -0
@@ -14,4 +14,4 @@ exclude_also =
14
14
  if 0:
15
15
  if __name__ == .__main__.:
16
16
  if TYPE_CHECKING:
17
- @(abc\.)?abstractmethod
17
+ @(abc\.)?abstractmethod
@@ -20,7 +20,7 @@ you make a git commit:
20
20
 
21
21
  pre-commit install
22
22
 
23
- The configuration file for this project is in a
23
+ The configuration file for this project is in a
24
24
  non-start location. Thus, you will need to edit your
25
25
  `.git/hooks/pre-commit` file to reflect this. Change
26
26
  the line that begins with `ARGS` to:
@@ -29,7 +29,7 @@ the line that begins with `ARGS` to:
29
29
 
30
30
  With pre-commit, all code is formatted according to
31
31
  [black]("https://github.com/psf/black") and
32
- [ruff]("https://github.com/charliermarsh/ruff") guidelines.
32
+ [ruff]("https://github.com/charliermarsh/ruff") guidelines.
33
33
 
34
34
  To check if your changes pass pre-commit without committing, run:
35
35
 
@@ -46,8 +46,8 @@ Follow the example set out already in ``api.rst`` as you write the documentation
46
46
  ## Packages
47
47
 
48
48
  [pip-tools](https://github.com/jazzband/pip-tools) is used for
49
- package management. If you’ve introduced a new package to the
50
- source code (i.e.anywhere in `src/`), please add it to the
49
+ package management. If you’ve introduced a new package to the
50
+ source code (i.e.anywhere in `src/`), please add it to the
51
51
  `project.dependencies` section of
52
52
  `pyproject.toml` with any known version constraints.
53
53
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hdx-python-scraper
3
- Version: 2.2.3
3
+ Version: 2.3.0
4
4
  Summary: HDX Python scraper utilities to assemble data from multiple sources
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -26,12 +26,13 @@ Classifier: Programming Language :: Python :: 3.12
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Requires-Python: >=3.8
28
28
  Requires-Dist: gspread
29
- Requires-Dist: hdx-python-api>=6.1.3
29
+ Requires-Dist: hdx-python-api>=6.1.4
30
+ Requires-Dist: hdx-python-country>=3.6.3
30
31
  Requires-Dist: regex
31
32
  Provides-Extra: dev
32
33
  Requires-Dist: pre-commit; extra == 'dev'
33
34
  Provides-Extra: pandas
34
- Requires-Dist: pandas>=2.1.1; extra == 'pandas'
35
+ Requires-Dist: pandas>=2.1.3; extra == 'pandas'
35
36
  Provides-Extra: test
36
37
  Requires-Dist: pytest; extra == 'test'
37
38
  Requires-Dist: pytest-cov; extra == 'test'
@@ -43,19 +44,19 @@ Description-Content-Type: text/markdown
43
44
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
44
45
  [![Downloads](https://img.shields.io/pypi/dm/hdx-python-scraper.svg)](https://pypistats.org/packages/hdx-python-scraper)
45
46
 
46
- The HDX Python Scraper Library is designed to enable you to easily develop code that
47
- assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
48
- uses a YAML file that specifies for each source what needs to be read and allows some
49
- transformations to be performed on the data. The output is written to JSON, Google sheets
50
- and/or Excel and includes the addition of
51
- [Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
52
- the YAML file. Custom Python scrapers can also be written that conform to a defined
53
- specification and the framework handles the execution of both configurable and custom
47
+ The HDX Python Scraper Library is designed to enable you to easily develop code that
48
+ assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
49
+ uses a YAML file that specifies for each source what needs to be read and allows some
50
+ transformations to be performed on the data. The output is written to JSON, Google sheets
51
+ and/or Excel and includes the addition of
52
+ [Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
53
+ the YAML file. Custom Python scrapers can also be written that conform to a defined
54
+ specification and the framework handles the execution of both configurable and custom
54
55
  scrapers.
55
56
 
56
- For more information, please read the
57
- [documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
57
+ For more information, please read the
58
+ [documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
58
59
 
59
- This library is part of the
60
- [Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
60
+ This library is part of the
61
+ [Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
61
62
  humanitarian related data, please upload your datasets to HDX.
@@ -4,19 +4,19 @@
4
4
  [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
5
5
  [![Downloads](https://img.shields.io/pypi/dm/hdx-python-scraper.svg)](https://pypistats.org/packages/hdx-python-scraper)
6
6
 
7
- The HDX Python Scraper Library is designed to enable you to easily develop code that
8
- assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
9
- uses a YAML file that specifies for each source what needs to be read and allows some
10
- transformations to be performed on the data. The output is written to JSON, Google sheets
11
- and/or Excel and includes the addition of
12
- [Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
13
- the YAML file. Custom Python scrapers can also be written that conform to a defined
14
- specification and the framework handles the execution of both configurable and custom
7
+ The HDX Python Scraper Library is designed to enable you to easily develop code that
8
+ assembles data from one or more tabular sources that can be csv, xls, xlsx or JSON. It
9
+ uses a YAML file that specifies for each source what needs to be read and allows some
10
+ transformations to be performed on the data. The output is written to JSON, Google sheets
11
+ and/or Excel and includes the addition of
12
+ [Humanitarian Exchange Language (HXL)](https://hxlstandard.org/) hashtags specified in
13
+ the YAML file. Custom Python scrapers can also be written that conform to a defined
14
+ specification and the framework handles the execution of both configurable and custom
15
15
  scrapers.
16
16
 
17
- For more information, please read the
18
- [documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
17
+ For more information, please read the
18
+ [documentation](https://hdx-python-scraper.readthedocs.io/en/latest/).
19
19
 
20
- This library is part of the
21
- [Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
20
+ This library is part of the
21
+ [Humanitarian Data Exchange](https://data.humdata.org/) (HDX) project. If you have
22
22
  humanitarian related data, please upload your datasets to HDX.
@@ -26,6 +26,10 @@ install with:
26
26
  pip install hdx-python-scraper[pandas]
27
27
 
28
28
  ## Breaking Changes
29
+ From 2.3.0, resource name is used when available instead of creating name from
30
+ url so tests that use saved data from the Read class may break. file_type
31
+ parameters in various Read methods renamed to format.
32
+
29
33
  From 2.1.2, Python 3.7 no longer supported
30
34
 
31
35
  From 2.0.1, all functions in outputs.update_tabs are methods in the new Writer class
@@ -34,7 +34,8 @@ classifiers = [
34
34
  requires-python = ">=3.8"
35
35
 
36
36
  dependencies = [
37
- "hdx-python-api>=6.1.3",
37
+ "hdx-python-api>=6.1.4",
38
+ "hdx-python-country>=3.6.3",
38
39
  "gspread",
39
40
  "regex",
40
41
  ]
@@ -48,7 +49,7 @@ content-type = "text/markdown"
48
49
  Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
49
50
 
50
51
  [project.optional-dependencies]
51
- pandas = ["pandas>=2.1.1"]
52
+ pandas = ["pandas>=2.1.3"]
52
53
  test = ["pytest", "pytest-cov"]
53
54
  dev = ["pre-commit"]
54
55
 
@@ -87,7 +88,7 @@ test = """
87
88
  """
88
89
 
89
90
  [[tool.hatch.envs.test.matrix]]
90
- python = ["3.11"]
91
+ python = ["3.12"]
91
92
 
92
93
  [tool.hatch.envs.lint]
93
94
  detached = true
@@ -4,16 +4,16 @@
4
4
  #
5
5
  # pip-compile --all-extras --output-file=requirements.txt --resolver=backtracking pyproject.toml
6
6
  #
7
- annotated-types==0.5.0
7
+ annotated-types==0.6.0
8
8
  # via pydantic
9
9
  attrs==23.1.0
10
10
  # via
11
11
  # frictionless
12
12
  # jsonlines
13
13
  # jsonschema
14
- cachetools==5.3.1
14
+ cachetools==5.3.2
15
15
  # via google-auth
16
- certifi==2023.7.22
16
+ certifi==2023.11.17
17
17
  # via requests
18
18
  cffi==1.16.0
19
19
  # via cryptography
@@ -21,7 +21,7 @@ cfgv==3.4.0
21
21
  # via pre-commit
22
22
  chardet==5.2.0
23
23
  # via frictionless
24
- charset-normalizer==3.3.0
24
+ charset-normalizer==3.3.2
25
25
  # via requests
26
26
  ckanapi==4.7
27
27
  # via hdx-python-api
@@ -31,7 +31,7 @@ colorama==0.4.6
31
31
  # via typer
32
32
  coverage[toml]==7.3.2
33
33
  # via pytest-cov
34
- cryptography==41.0.4
34
+ cryptography==41.0.7
35
35
  # via pyopenssl
36
36
  defopt==6.4.0
37
37
  # via hdx-python-api
@@ -45,33 +45,37 @@ docopt==0.6.2
45
45
  # num2words
46
46
  docutils==0.20.1
47
47
  # via defopt
48
- email-validator==2.0.0.post2
48
+ email-validator==2.1.0.post1
49
49
  # via hdx-python-api
50
50
  et-xmlfile==1.1.0
51
51
  # via openpyxl
52
- filelock==3.12.4
52
+ filelock==3.13.1
53
53
  # via virtualenv
54
54
  frictionless==5.16.0
55
55
  # via hdx-python-utilities
56
- google-auth==2.23.2
56
+ google-auth==2.25.2
57
57
  # via
58
58
  # google-auth-oauthlib
59
59
  # gspread
60
60
  google-auth-oauthlib==1.1.0
61
61
  # via gspread
62
- gspread==5.11.3
62
+ gspread==5.12.2
63
63
  # via hdx-python-scraper (pyproject.toml)
64
- hdx-python-api==6.1.3
64
+ hdx-python-api==6.1.4
65
65
  # via hdx-python-scraper (pyproject.toml)
66
- hdx-python-country==3.5.7
67
- # via hdx-python-api
66
+ hdx-python-country==3.6.3
67
+ # via
68
+ # hdx-python-api
69
+ # hdx-python-scraper (pyproject.toml)
68
70
  hdx-python-utilities==3.6.2
69
- # via hdx-python-country
70
- humanize==4.8.0
71
+ # via
72
+ # hdx-python-api
73
+ # hdx-python-country
74
+ humanize==4.9.0
71
75
  # via frictionless
72
- identify==2.5.30
76
+ identify==2.5.33
73
77
  # via pre-commit
74
- idna==3.4
78
+ idna==3.6
75
79
  # via
76
80
  # email-validator
77
81
  # requests
@@ -94,14 +98,16 @@ jsonschema==4.17.3
94
98
  # frictionless
95
99
  # tableschema-to-template
96
100
  libhxl==5.1
97
- # via hdx-python-country
101
+ # via
102
+ # hdx-python-api
103
+ # hdx-python-country
98
104
  loguru==0.7.2
99
105
  # via hdx-python-utilities
100
- makefun==1.15.1
106
+ makefun==1.15.2
101
107
  # via hdx-python-api
102
108
  markdown-it-py==3.0.0
103
109
  # via rich
104
- marko==2.0.0
110
+ marko==2.0.2
105
111
  # via frictionless
106
112
  markupsafe==2.1.3
107
113
  # via jinja2
@@ -111,9 +117,9 @@ ndg-httpsclient==0.5.1
111
117
  # via hdx-python-api
112
118
  nodeenv==1.8.0
113
119
  # via pre-commit
114
- num2words==0.5.12
120
+ num2words==0.5.13
115
121
  # via quantulum3
116
- numpy==1.26.0
122
+ numpy==1.26.2
117
123
  # via pandas
118
124
  oauthlib==3.2.2
119
125
  # via requests-oauthlib
@@ -121,11 +127,11 @@ openpyxl==3.1.2
121
127
  # via hdx-python-utilities
122
128
  packaging==23.2
123
129
  # via pytest
124
- pandas==2.1.1
130
+ pandas==2.1.4
125
131
  # via hdx-python-scraper (pyproject.toml)
126
132
  petl==1.7.14
127
133
  # via frictionless
128
- platformdirs==3.11.0
134
+ platformdirs==4.1.0
129
135
  # via virtualenv
130
136
  pluggy==1.3.0
131
137
  # via pytest
@@ -135,9 +141,9 @@ ply==3.11
135
141
  # libhxl
136
142
  pockets==0.9.1
137
143
  # via sphinxcontrib-napoleon
138
- pre-commit==3.4.0
144
+ pre-commit==3.6.0
139
145
  # via hdx-python-scraper (pyproject.toml)
140
- pyasn1==0.5.0
146
+ pyasn1==0.5.1
141
147
  # via
142
148
  # hdx-python-api
143
149
  # ndg-httpsclient
@@ -147,23 +153,23 @@ pyasn1-modules==0.3.0
147
153
  # via google-auth
148
154
  pycparser==2.21
149
155
  # via cffi
150
- pydantic==2.4.2
156
+ pydantic==2.5.2
151
157
  # via
152
158
  # frictionless
153
159
  # inflect
154
- pydantic-core==2.10.1
160
+ pydantic-core==2.14.5
155
161
  # via pydantic
156
- pygments==2.16.1
162
+ pygments==2.17.2
157
163
  # via rich
158
- pyopenssl==23.2.0
164
+ pyopenssl==23.3.0
159
165
  # via
160
166
  # hdx-python-api
161
167
  # ndg-httpsclient
162
168
  pyphonetics==0.5.3
163
169
  # via hdx-python-country
164
- pyrsistent==0.19.3
170
+ pyrsistent==0.20.0
165
171
  # via jsonschema
166
- pytest==7.4.2
172
+ pytest==7.4.3
167
173
  # via
168
174
  # hdx-python-scraper (pyproject.toml)
169
175
  # pytest-cov
@@ -198,6 +204,7 @@ requests==2.31.0
198
204
  # via
199
205
  # ckanapi
200
206
  # frictionless
207
+ # hdx-python-api
201
208
  # libhxl
202
209
  # requests-file
203
210
  # requests-oauthlib
@@ -207,15 +214,15 @@ requests-oauthlib==1.3.1
207
214
  # via google-auth-oauthlib
208
215
  rfc3986==2.0.0
209
216
  # via frictionless
210
- rich==13.6.0
217
+ rich==13.7.0
211
218
  # via typer
212
219
  rsa==4.9
213
220
  # via google-auth
214
- ruamel-yaml==0.17.35
221
+ ruamel-yaml==0.18.5
215
222
  # via hdx-python-utilities
216
223
  ruamel-yaml-clib==0.2.8
217
224
  # via ruamel-yaml
218
- shellingham==1.5.3
225
+ shellingham==1.5.4
219
226
  # via typer
220
227
  simpleeval==0.9.13
221
228
  # via frictionless
@@ -231,7 +238,7 @@ sphinxcontrib-napoleon==0.7
231
238
  # via defopt
232
239
  stringcase==1.2.0
233
240
  # via frictionless
234
- structlog==23.1.0
241
+ structlog==23.2.0
235
242
  # via libhxl
236
243
  tableschema-to-template==0.0.13
237
244
  # via hdx-python-utilities
@@ -241,7 +248,7 @@ text-unidecode==1.3
241
248
  # via python-slugify
242
249
  typer[all]==0.9.0
243
250
  # via frictionless
244
- typing-extensions==4.8.0
251
+ typing-extensions==4.9.0
245
252
  # via
246
253
  # frictionless
247
254
  # inflect
@@ -254,21 +261,21 @@ unidecode==1.3.7
254
261
  # via
255
262
  # libhxl
256
263
  # pyphonetics
257
- urllib3==2.0.6
264
+ urllib3==2.1.0
258
265
  # via
259
266
  # libhxl
260
267
  # requests
261
268
  validators==0.22.0
262
269
  # via frictionless
263
- virtualenv==20.24.5
270
+ virtualenv==20.25.0
264
271
  # via pre-commit
265
- wheel==0.41.2
272
+ wheel==0.42.0
266
273
  # via libhxl
267
274
  xlrd==2.0.1
268
275
  # via hdx-python-utilities
269
276
  xlrd3==1.1.0
270
277
  # via libhxl
271
- xlsxwriter==3.1.6
278
+ xlsxwriter==3.1.9
272
279
  # via tableschema-to-template
273
280
  xlwt==1.3.0
274
281
  # via hdx-python-utilities
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '2.2.3'
16
- __version_tuple__ = version_tuple = (2, 2, 3)
15
+ __version__ = version = '2.3.0'
16
+ __version_tuple__ = version_tuple = (2, 3, 0)
@@ -81,15 +81,12 @@ class BaseScraper(ABC):
81
81
  self.sources: Dict[str, List] = {level: [] for level in self.headers}
82
82
  self.source_configuration = deepcopy(source_configuration)
83
83
 
84
- def get_reader(
85
- self, name: Optional[str] = None, prefix: Optional[str] = None
86
- ):
87
- """Get reader given name if provided or using name member variable if not.
88
- Set reader prefix to given prefix or name if not provided.
84
+ def get_reader(self, name: Optional[str] = None):
85
+ """Get reader given name if provided or using name member variable if
86
+ not.
89
87
 
90
88
  Args:
91
89
  name (str): Name of scraper
92
- prefix (Optional[str]): Prefix to use. Defaults to None (use scraper name).
93
90
 
94
91
  Returns:
95
92
  None
@@ -97,9 +94,6 @@ class BaseScraper(ABC):
97
94
  if not name:
98
95
  name = self.name
99
96
  reader = Read.get_reader(name)
100
- if not prefix:
101
- prefix = name
102
- reader.prefix = prefix
103
97
  return reader
104
98
 
105
99
  def get_headers(self, level: str) -> Optional[Tuple[Tuple]]:
@@ -362,6 +356,24 @@ class BaseScraper(ABC):
362
356
  Returns:
363
357
  Optional[Dict]: HAPI resource metadata
364
358
  """
359
+ hapi_resource_metadata = self.datasetinfo.get("hapi_resource_metadata")
360
+ if not hapi_resource_metadata:
361
+ return None
362
+ if "is_hxl" in hapi_resource_metadata:
363
+ return hapi_resource_metadata
364
+ reader = self.get_reader(self.name)
365
+ filename = self.datasetinfo.get("filename")
366
+ hxl_info = reader.hxl_info_hapi_resource_metadata(
367
+ hapi_resource_metadata, filename=filename, file_prefix=self.name
368
+ )
369
+ is_hxl = False
370
+ if hxl_info:
371
+ for sheet in hxl_info.get("sheets", ()):
372
+ if sheet["is_hxlated"]:
373
+ is_hxl = True
374
+ break
375
+ hapi_resource_metadata["is_hxl"] = is_hxl
376
+
365
377
  return self.datasetinfo.get("hapi_resource_metadata")
366
378
 
367
379
  def add_population(self) -> None:
@@ -33,7 +33,7 @@ class ResourceDownloader(BaseScraper):
33
33
  """
34
34
  reader = self.get_reader("hdx")
35
35
  resource = reader.read_hdx_metadata(self.datasetinfo)
36
- url, path = reader.download_resource(self.name, resource)
36
+ url, path = reader.download_resource(resource, file_prefix=self.name)
37
37
  logger.info(f"Downloading {url} to {path}")
38
38
  copy2(path, join(self.folder, self.datasetinfo["filename"]))
39
39
 
@@ -147,6 +147,7 @@ class ConfigurableScraper(BaseScraper):
147
147
  """
148
148
  return self.get_reader(name).read(
149
149
  self.datasetinfo,
150
+ file_prefix=name,
150
151
  **self.variables,
151
152
  )
152
153
 
@@ -50,7 +50,9 @@ class TimeSeries(BaseScraper):
50
50
  "output_hxl"
51
51
  ]
52
52
  rows = [headers, hxltags]
53
- file_headers, iterator = self.get_reader().read(self.datasetinfo)
53
+ file_headers, iterator = self.get_reader(self.name).read(
54
+ self.datasetinfo, file_prefix=self.name
55
+ )
54
56
  for inrow in iterator:
55
57
  if isinstance(datecol, list):
56
58
  dates = [str(inrow[x]) for x in datecol]
@@ -1145,7 +1145,9 @@ class Runner:
1145
1145
  return sorted(source_urls)
1146
1146
 
1147
1147
  def get_hapi_metadata(
1148
- self, names: Optional[ListTuple[str]] = None
1148
+ self,
1149
+ names: Optional[ListTuple[str]] = None,
1150
+ has_run: bool = True,
1149
1151
  ) -> Dict:
1150
1152
  """Get HAPI metadata for all datasets. A dictionary is returned that
1151
1153
  maps from dataset ids to a dictionary. The dictionary has keys for
@@ -1154,6 +1156,7 @@ class Runner:
1154
1156
 
1155
1157
  Args:
1156
1158
  names (Optional[ListTuple[str]]): Names of scrapers
1159
+ has_run (bool): Only get results for scrapers marked as having run. Defaults to True.
1157
1160
 
1158
1161
  Returns:
1159
1162
  Dict: HAPI metadata for all datasets
@@ -1163,7 +1166,7 @@ class Runner:
1163
1166
  results = {}
1164
1167
  for name in names:
1165
1168
  scraper = self.get_scraper(name)
1166
- if not scraper.has_run:
1169
+ if has_run and not scraper.has_run:
1167
1170
  continue
1168
1171
  hapi_dataset_metadata = scraper.get_hapi_dataset_metadata()
1169
1172
  hapi_resource_metadata = scraper.get_hapi_resource_metadata()