hdx-python-scraper 2.7.0__tar.gz → 2.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. hdx_python_scraper-2.7.2/.github/workflows/publish.yaml +34 -0
  2. hdx_python_scraper-2.7.2/.github/workflows/run-python-tests.yaml +52 -0
  3. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/.pre-commit-config.yaml +8 -9
  4. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/PKG-INFO +5 -10
  5. hdx_python_scraper-2.7.2/pyproject.toml +150 -0
  6. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/_version.py +2 -2
  7. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/excelfile.py +2 -1
  8. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/json.py +5 -4
  9. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/runner.py +3 -2
  10. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/resource_downloader.py +4 -4
  11. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/fallbacks.py +2 -1
  12. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/lookup.py +2 -1
  13. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/reader.py +11 -11
  14. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/config/project_configuration.yaml +325 -325
  15. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/download-global-pcode-lengths.csv +0 -1
  16. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/download-global-pcodes-adm-1-2.csv +0 -1
  17. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/conftest.py +8 -9
  18. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/test_custom.py +5 -6
  19. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/outputs/test_output.py +9 -10
  20. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_aggregation.py +1 -2
  21. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_appenddata.py +1 -2
  22. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_global.py +1 -2
  23. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +1 -2
  24. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_national.py +2 -3
  25. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +1 -2
  26. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +2 -2
  27. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_subnational.py +4 -5
  28. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/test_runner.py +1 -2
  29. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_sources.py +2 -2
  30. hdx_python_scraper-2.7.2/uv.lock +2738 -0
  31. hdx_python_scraper-2.7.0/.coveragerc +0 -17
  32. hdx_python_scraper-2.7.0/.github/workflows/publish.yaml +0 -37
  33. hdx_python_scraper-2.7.0/.github/workflows/run-python-tests.yaml +0 -55
  34. hdx_python_scraper-2.7.0/CONTRIBUTING.md +0 -60
  35. hdx_python_scraper-2.7.0/hatch.toml +0 -37
  36. hdx_python_scraper-2.7.0/pyproject.toml +0 -56
  37. hdx_python_scraper-2.7.0/pytest.ini +0 -4
  38. hdx_python_scraper-2.7.0/requirements.txt +0 -236
  39. hdx_python_scraper-2.7.0/ruff.toml +0 -10
  40. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/.gitignore +0 -0
  41. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/LICENSE +0 -0
  42. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/README.md +0 -0
  43. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/.readthedocs.yaml +0 -0
  44. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/index.md +0 -0
  45. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/documentation/mkdocs.yaml +0 -0
  46. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/__init__.py +0 -0
  47. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/base_scraper.py +0 -0
  48. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/__init__.py +0 -0
  49. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/base.py +0 -0
  50. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/outputs/googlesheets.py +0 -0
  51. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/__init__.py +0 -0
  52. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/aggregator.py +0 -0
  53. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/configurable_scraper.py +0 -0
  54. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/rowparser.py +0 -0
  55. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/scrapers/timeseries.py +0 -0
  56. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/__init__.py +0 -0
  57. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/hapi_admins.py +0 -0
  58. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/org_type.py +0 -0
  59. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/org_type_configuration.yaml +0 -0
  60. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/region_lookup.py +0 -0
  61. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sector.py +0 -0
  62. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sector_configuration.yaml +0 -0
  63. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/sources.py +0 -0
  64. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/src/hdx/scraper/framework/utilities/writer.py +0 -0
  65. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
  66. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/additional-json.json +0 -0
  67. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  68. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  69. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
  70. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
  71. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
  72. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
  73. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
  74. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
  75. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
  76. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
  77. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_closures_broken.xls +0 -0
  78. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
  79. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
  80. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
  81. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  82. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  83. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/fallbacks.json +0 -0
  84. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  85. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  86. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/global-coordination-groups-beta.json +0 -0
  87. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
  88. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
  89. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
  90. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
  91. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
  92. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
  93. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
  94. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
  95. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
  96. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
  97. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
  98. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  99. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  100. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  101. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  102. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  103. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  104. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/org_type_organization_types_beta_csv.csv +0 -0
  105. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/organization-types-beta.json +0 -0
  106. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  107. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  108. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
  109. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
  110. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population.json +0 -0
  111. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  112. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  113. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
  114. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
  115. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
  116. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
  117. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
  118. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
  119. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/sector_global_coordination_groups_beta_csv.csv +0 -0
  120. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
  121. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
  122. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
  123. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  124. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  125. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  126. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
  127. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
  128. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
  129. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/unocha-office-locations.json +0 -0
  130. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
  131. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
  132. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
  133. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
  134. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
  135. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_output.xlsx +0 -0
  136. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_all.json +0 -0
  137. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_other.json +0 -0
  138. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/fixtures/test_scraper_population.json +0 -0
  139. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/__init__.py +0 -0
  140. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/__init__.py +0 -0
  141. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/affected_targeted_reached.py +0 -0
  142. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/education_closures.py +0 -0
  143. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/custom/education_enrolment.py +0 -0
  144. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
  145. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
  146. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/test_timeseries.py +0 -0
  147. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/scrapers/unhcr_myanmar_idps.py +0 -0
  148. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
  149. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_hapi_admins.py +0 -0
  150. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_lookup.py +0 -0
  151. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_readers.py +0 -0
  152. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_regionlookup.py +0 -0
  153. {hdx_python_scraper-2.7.0 → hdx_python_scraper-2.7.2}/tests/hdx/scraper/framework/utilities/test_utils.py +0 -0
@@ -0,0 +1,34 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+
11
+ environment:
12
+ name: pypi
13
+ url: https://pypi.org/p/hdx-python-scraper
14
+
15
+ permissions:
16
+ id-token: write
17
+ contents: read
18
+
19
+ steps:
20
+ - uses: actions/checkout@v6
21
+
22
+ - name: Get history and tags for versioning to work
23
+ run: |
24
+ git fetch --prune --unshallow
25
+ git fetch --depth=1 origin +refs/tags/*:refs/tags/*
26
+
27
+ - name: Install uv
28
+ uses: astral-sh/setup-uv@v7
29
+
30
+ - name: Build with uv
31
+ run: uv build
32
+
33
+ - name: Publish distribution 📦 to PyPI
34
+ run: uv publish
@@ -0,0 +1,52 @@
1
+ name: Run tests
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ branches-ignore: [gh-pages, "dependabot/**"]
7
+ pull_request:
8
+ branches-ignore: [gh-pages]
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ubuntu-latest
13
+ permissions:
14
+ contents: read
15
+ checks: write
16
+ pull-requests: write
17
+
18
+ steps:
19
+ - uses: actions/checkout@v6
20
+
21
+ - name: Install uv
22
+ uses: astral-sh/setup-uv@v7
23
+ with:
24
+ enable-cache: true
25
+ python-version: "3.13"
26
+
27
+ - name: Install dependencies
28
+ run: uv sync --frozen
29
+
30
+ - name: Check styling
31
+ run: |
32
+ uv run ruff format --check
33
+ uv run ruff check
34
+
35
+ - name: Test with pytest
36
+ env:
37
+ HDX_KEY_TEST: ${{ secrets.HDX_BOT_SCRAPERS_API_TOKEN }}
38
+ GSHEET_AUTH: ${{ secrets.HDX_PIPELINE_GSHEET_AUTH }}
39
+ run: uv run pytest
40
+
41
+ - name: Publish Unit Test Results
42
+ uses: EnricoMi/publish-unit-test-result-action@v2
43
+ if: always()
44
+ with:
45
+ files: test-results.xml
46
+
47
+ - name: Publish in Coveralls
48
+ uses: coverallsapp/github-action@v2
49
+ if: always()
50
+ with:
51
+ flag-name: tests
52
+ format: lcov
@@ -1,5 +1,6 @@
1
1
  default_language_version:
2
- python: python3.13
2
+ python: python3.13
3
+
3
4
  repos:
4
5
  - repo: https://github.com/pre-commit/pre-commit-hooks
5
6
  rev: v6.0.0
@@ -8,20 +9,18 @@ repos:
8
9
  - id: end-of-file-fixer
9
10
  exclude: test_scraper_.*\.json
10
11
  - id: check-ast
12
+
11
13
  - repo: https://github.com/astral-sh/ruff-pre-commit
12
- rev: v0.14.10
14
+ rev: v0.14.14
13
15
  hooks:
14
16
  # Run the linter.
15
17
  - id: ruff-check
16
18
  args: [ --fix ]
17
19
  # Run the formatter.
18
20
  - id: ruff-format
21
+
19
22
  - repo: https://github.com/astral-sh/uv-pre-commit
20
- rev: 0.9.22
23
+ rev: 0.9.25
21
24
  hooks:
22
- # Run the pip compile
23
- - id: pip-compile
24
- name: pip-compile requirements.txt
25
- files: pyproject.toml
26
- args: [ pyproject.toml, --resolver=backtracking, --upgrade, -q,
27
- -o, requirements.txt ]
25
+ # Ensure the lockfile is up-to-date with pyproject.toml
26
+ - id: uv-lock
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hdx-python-scraper
3
- Version: 2.7.0
3
+ Version: 2.7.2
4
4
  Summary: HDX Python scraper utilities to assemble data from multiple sources
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -26,20 +26,15 @@ Classifier: Programming Language :: Python :: 3.12
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Requires-Python: >=3.10
28
28
  Requires-Dist: gspread
29
- Requires-Dist: hdx-python-api>=6.6.0
30
- Requires-Dist: hdx-python-country>=4.0.0
31
- Requires-Dist: hdx-python-utilities>=4.0.0
29
+ Requires-Dist: hdx-python-api>=6.6.4
30
+ Requires-Dist: hdx-python-country>=4.1.1
31
+ Requires-Dist: hdx-python-utilities>=4.0.4
32
+ Requires-Dist: libhxl
32
33
  Requires-Dist: regex
33
- Provides-Extra: dev
34
- Requires-Dist: pre-commit; extra == 'dev'
35
34
  Provides-Extra: docs
36
35
  Requires-Dist: mkapi; extra == 'docs'
37
36
  Provides-Extra: pandas
38
37
  Requires-Dist: pandas>=2.2.3; extra == 'pandas'
39
- Provides-Extra: test
40
- Requires-Dist: pandas>=2.2.3; extra == 'test'
41
- Requires-Dist: pytest; extra == 'test'
42
- Requires-Dist: pytest-cov; extra == 'test'
43
38
  Description-Content-Type: text/markdown
44
39
 
45
40
  [![Build Status](https://github.com/OCHA-DAP/hdx-python-scraper/actions/workflows/run-python-tests.yaml/badge.svg)](https://github.com/OCHA-DAP/hdx-python-scraper/actions/workflows/run-python-tests.yaml)
@@ -0,0 +1,150 @@
1
+ #########################
2
+ # Project Configuration #
3
+ #########################
4
+
5
+ [build-system]
6
+ requires = ["hatchling", "hatch-vcs"]
7
+ build-backend = "hatchling.build"
8
+
9
+ [project]
10
+ name = "hdx-python-scraper"
11
+ description = "HDX Python scraper utilities to assemble data from multiple sources"
12
+ authors = [{name = "Michael Rans", email = "rans@email.com"}]
13
+ license = {text = "MIT"}
14
+ keywords = ["HDX", "scrapers", "data assembly", "data transformation", "tabular data"]
15
+ classifiers = [
16
+ "Development Status :: 5 - Production/Stable",
17
+ "Topic :: Software Development :: Libraries :: Python Modules",
18
+ "Programming Language :: Python",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3 :: Only",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Intended Audience :: Developers",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Natural Language :: English",
29
+ "Operating System :: POSIX :: Linux",
30
+ "Operating System :: Unix",
31
+ "Operating System :: MacOS",
32
+ "Operating System :: Microsoft :: Windows",
33
+ ]
34
+ readme = "README.md"
35
+ dynamic = ["version"]
36
+ requires-python = ">=3.10"
37
+
38
+
39
+ dependencies = [
40
+ "hdx-python-api>=6.6.4",
41
+ "hdx-python-country>=4.1.1",
42
+ "hdx-python-utilities>=4.0.4",
43
+ "libhxl",
44
+ "gspread",
45
+ "regex",
46
+ ]
47
+
48
+ [project.optional-dependencies]
49
+ pandas = ["pandas>=2.2.3"]
50
+ docs = ["mkapi"]
51
+
52
+ [dependency-groups]
53
+ dev = [
54
+ "pandas>=2.2.3",
55
+ "pytest",
56
+ "pytest-cov",
57
+ "pre-commit",
58
+ "ruff==0.14.14",
59
+ ]
60
+
61
+ [project.urls]
62
+ Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
63
+
64
+ # ----------------------------------------------------------------------------
65
+ # Hatchling (Build & Versioning)
66
+ # ----------------------------------------------------------------------------
67
+
68
+ [tool.hatch.version]
69
+ source = "vcs"
70
+
71
+ [tool.hatch.version.raw-options]
72
+ local_scheme = "no-local-version"
73
+ version_scheme = "python-simplified-semver"
74
+
75
+ [tool.hatch.build.hooks.vcs]
76
+ version-file = "src/hdx/scraper/framework/_version.py"
77
+
78
+ [tool.hatch.build.targets.wheel]
79
+ packages = ["src/hdx"]
80
+
81
+ [tool.hatch.metadata]
82
+ allow-direct-references = true
83
+
84
+ # ----------------------------------------------------------------------------
85
+ # Ruff (Linting & Formatting)
86
+ # ----------------------------------------------------------------------------
87
+
88
+ [tool.ruff]
89
+ target-version = "py310"
90
+ src = ["src"]
91
+ exclude = ["_version.py"]
92
+
93
+ [tool.ruff.lint]
94
+ # Defaults are E (pycodestyle) and F (pyflakes). We extend them:
95
+ extend-select = [
96
+ "I", # isort
97
+ "UP", # pyupgrade
98
+ ]
99
+ ignore = [
100
+ "E501", # Line too long
101
+ ]
102
+
103
+ [tool.ruff.lint.isort]
104
+ known-local-folder = ["hdx.scraper.framework"]
105
+ known-third-party = [
106
+ "hdx.api",
107
+ "hdx.data",
108
+ "hdx.facades",
109
+ "hdx.location",
110
+ "hdx.utilities",
111
+ ]
112
+
113
+ # ----------------------------------------------------------------------------
114
+ # Pytest (Testing)
115
+ # ----------------------------------------------------------------------------
116
+
117
+ [tool.pytest.ini_options]
118
+ pythonpath = "src"
119
+ log_cli = true
120
+ addopts = """
121
+ --color=yes
122
+ --rootdir=.
123
+ --junitxml=test-results.xml
124
+ --cov
125
+ --no-cov-on-fail
126
+ --cov-report=lcov
127
+ --cov-report=term-missing
128
+ """
129
+
130
+ # ----------------------------------------------------------------------------
131
+ # Coverage (Reporting)
132
+ # ----------------------------------------------------------------------------
133
+
134
+ [tool.coverage.run]
135
+ source = ["src"]
136
+ omit = ["*/_version.py"]
137
+
138
+ [tool.coverage.report]
139
+ exclude_also = [
140
+ "from ._version",
141
+ "def __repr__",
142
+ "if self.debug:",
143
+ "if settings.DEBUG",
144
+ "raise AssertionError",
145
+ "raise NotImplementedError",
146
+ "if 0:",
147
+ "if __name__ == .__main__.:",
148
+ "if TYPE_CHECKING:",
149
+ "@(abc\\.)?abstractmethod",
150
+ ]
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '2.7.0'
32
- __version_tuple__ = version_tuple = (2, 7, 0)
31
+ __version__ = version = '2.7.2'
32
+ __version_tuple__ = version_tuple = (2, 7, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from pathlib import Path
2
3
 
3
4
  from openpyxl import Workbook
4
5
 
@@ -24,7 +25,7 @@ class ExcelFile(BaseOutput):
24
25
  """
25
26
 
26
27
  def __init__(
27
- self, excel_path: str, tabs: dict[str, str], updatetabs: list[str]
28
+ self, excel_path: Path | str, tabs: dict[str, str], updatetabs: list[str]
28
29
  ) -> None:
29
30
  super().__init__(updatetabs)
30
31
  self.workbook = Workbook()
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from os.path import join
2
+ from pathlib import Path
3
3
  from typing import Any
4
4
 
5
5
  from hdx.utilities.dictandlist import dict_of_lists_add
@@ -184,7 +184,7 @@ class JsonFile(BaseOutput):
184
184
  newrow[hxl_row[key]] = row[key]
185
185
  self.add_data_row(name, newrow)
186
186
 
187
- def save(self, folder: str | None = None, **kwargs: Any) -> list[str]:
187
+ def save(self, folder: Path | str | None = None, **kwargs: Any) -> list[Path]:
188
188
  """Save JSON file and any addition subsets of that JSON defined in the additional configuration
189
189
 
190
190
  Args:
@@ -197,7 +197,8 @@ class JsonFile(BaseOutput):
197
197
  filepaths = []
198
198
  filepath = self.configuration["output"]
199
199
  if folder:
200
- filepath = join(folder, filepath)
200
+ folder = Path(folder)
201
+ filepath = folder / filepath
201
202
  logger.info(f"Writing JSON to {filepath}")
202
203
  save_json(self.json, filepath)
203
204
  filepaths.append(filepath)
@@ -262,7 +263,7 @@ class JsonFile(BaseOutput):
262
263
  continue
263
264
  filedetailspath = filedetails["filepath"]
264
265
  if folder:
265
- filedetailspath = join(folder, filedetailspath)
266
+ filedetailspath = folder / filedetailspath
266
267
  logger.info(f"Writing JSON to {filedetailspath}")
267
268
  save_json(json, filedetailspath)
268
269
  filepaths.append(filedetailspath)
@@ -2,6 +2,7 @@ import logging
2
2
  from collections.abc import Callable, Sequence
3
3
  from copy import copy
4
4
  from datetime import datetime
5
+ from pathlib import Path
5
6
  from traceback import format_exc
6
7
  from typing import Any, Optional
7
8
 
@@ -422,7 +423,7 @@ class Runner:
422
423
  def add_resource_downloader(
423
424
  self,
424
425
  datasetinfo: dict,
425
- folder: str = "",
426
+ folder: Path | str = "",
426
427
  force_add_to_run: bool = False,
427
428
  ) -> str:
428
429
  """Add resource downloader to the run. If running specific scrapers rather than
@@ -445,7 +446,7 @@ class Runner:
445
446
  def add_resource_downloaders(
446
447
  self,
447
448
  configuration: dict,
448
- folder: str = "",
449
+ folder: Path | str = "",
449
450
  force_add_to_run: bool = False,
450
451
  ) -> list[str]:
451
452
  """Add multiple resource downloaders to the run. If running specific scrapers
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from os.path import join
2
+ from pathlib import Path
3
3
  from shutil import copy2
4
4
 
5
5
  from slugify import slugify
@@ -19,11 +19,11 @@ class ResourceDownloader(BaseScraper):
19
19
  folder: Folder to which to download. Default is "".
20
20
  """
21
21
 
22
- def __init__(self, datasetinfo, folder):
22
+ def __init__(self, datasetinfo: dict, folder: Path | str):
23
23
  # ResourceDownloader only outputs to sources
24
24
  name = f"resource_downloader_{slugify(datasetinfo['hxltag'].lower(), separator='_')}"
25
25
  super().__init__(name, datasetinfo, {})
26
- self.folder = folder
26
+ self.folder = Path(folder)
27
27
 
28
28
  def run(self) -> None:
29
29
  """Runs one resource downloader given dataset information
@@ -35,7 +35,7 @@ class ResourceDownloader(BaseScraper):
35
35
  resource = reader.read_hdx_metadata(self.datasetinfo)
36
36
  url, path = reader.download_resource(resource, file_prefix=self.name)
37
37
  logger.info(f"Downloading {url} to {path}")
38
- copy2(path, join(self.folder, self.datasetinfo["filename"]))
38
+ copy2(path, self.folder / self.datasetinfo["filename"])
39
39
 
40
40
  def add_sources(self) -> None:
41
41
  """Add source for resource download
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from pathlib import Path
2
3
 
3
4
  from hdx.utilities.loader import LoadError, load_json
4
5
 
@@ -25,7 +26,7 @@ class Fallbacks:
25
26
  @classmethod
26
27
  def add(
27
28
  cls,
28
- fallbacks_path: str,
29
+ fallbacks_path: Path | str,
29
30
  levels_mapping: dict[str, str] = default_levels_mapping,
30
31
  sources_key: str = "sources",
31
32
  admin_name_mapping: dict[str, str] = default_admin_name_mapping,
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from copy import copy
3
+ from pathlib import Path
3
4
 
4
5
  from hdx.utilities.loader import load_yaml
5
6
  from hdx.utilities.matching import get_code_from_name
@@ -20,7 +21,7 @@ class Lookup:
20
21
  classobject: Child class
21
22
  """
22
23
 
23
- def __init__(self, yaml_config_path: str, classobject: type):
24
+ def __init__(self, yaml_config_path: Path | str, classobject: type):
24
25
  configuration = load_yaml(script_dir_plus_file(yaml_config_path, classobject))
25
26
  self._configuration = configuration
26
27
  initial_lookup = configuration.get("initial_lookup", {})
@@ -2,7 +2,7 @@ import glob
2
2
  import logging
3
3
  from collections.abc import Iterator, Sequence
4
4
  from datetime import datetime
5
- from os.path import join
5
+ from pathlib import Path
6
6
  from typing import Any
7
7
  from urllib.parse import parse_qsl
8
8
 
@@ -41,9 +41,9 @@ class Read(Retrieve):
41
41
  def __init__(
42
42
  self,
43
43
  downloader: Download,
44
- fallback_dir: str,
45
- saved_dir: str,
46
- temp_dir: str,
44
+ fallback_dir: Path | str,
45
+ saved_dir: Path | str,
46
+ temp_dir: Path | str,
47
47
  save: bool = False,
48
48
  use_saved: bool = False,
49
49
  prefix: str = "",
@@ -65,9 +65,9 @@ class Read(Retrieve):
65
65
  @classmethod
66
66
  def create_readers(
67
67
  cls,
68
- fallback_dir: str,
69
- saved_dir: str,
70
- temp_dir: str,
68
+ fallback_dir: Path | str,
69
+ saved_dir: Path | str,
70
+ temp_dir: Path | str,
71
71
  save: bool = False,
72
72
  use_saved: bool = False,
73
73
  ignore: Sequence[str] = tuple(),
@@ -275,7 +275,7 @@ class Read(Retrieve):
275
275
  Returns:
276
276
  The dataset that was read or None
277
277
  """
278
- saved_path = join(self.saved_dir, f"{dataset_name}.json")
278
+ saved_path = self.saved_dir / f"{dataset_name}.json"
279
279
  if self.use_saved:
280
280
  logger.info(f"Using saved dataset {dataset_name} in {saved_path}")
281
281
  dataset = Dataset.load_from_json(saved_path)
@@ -319,7 +319,7 @@ class Read(Retrieve):
319
319
  list of datasets resulting from query
320
320
  """
321
321
 
322
- saved_path = join(self.saved_dir, filename)
322
+ saved_path = self.saved_dir / filename
323
323
  if self.use_saved:
324
324
  logger.info(
325
325
  f"Using saved datasets in {filename}_n.json in {self.saved_dir}"
@@ -461,7 +461,7 @@ class Read(Retrieve):
461
461
  url = resource["url"]
462
462
  try:
463
463
  _, path = self.download_resource(resource, **kwargs)
464
- data = hxl.data(path, InputOptions(allow_local=True)).cache()
464
+ data = hxl.data(str(path), InputOptions(allow_local=True)).cache()
465
465
  data.display_tags
466
466
  return data
467
467
  except hxl.HXLException:
@@ -488,7 +488,7 @@ class Read(Retrieve):
488
488
  """
489
489
  try:
490
490
  _, path = self.construct_filename_and_download(name, format, url, **kwargs)
491
- return hxl.info(path, InputOptions(allow_local=True))
491
+ return hxl.info(str(path), InputOptions(allow_local=True))
492
492
  except hxl.HXLException:
493
493
  logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
494
494
  return None