hdx-python-scraper 2.3.5__tar.gz → 2.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.config/pre-commit-config.yaml +7 -6
  2. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.config/ruff.toml +6 -4
  3. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.github/workflows/publish.yaml +4 -3
  4. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.github/workflows/run-python-tests.yaml +6 -5
  5. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/CONTRIBUTING.md +4 -5
  6. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/PKG-INFO +6 -5
  7. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/documentation/.readthedocs.yaml +1 -0
  8. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/documentation/main.md +28 -2
  9. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/documentation/pydoc-markdown.yaml +12 -15
  10. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/pyproject.toml +21 -18
  11. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/requirements.txt +107 -117
  12. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/__init__.py +0 -0
  13. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/_version.py +2 -2
  14. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/base_scraper.py +4 -4
  15. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/outputs/googlesheets.py +1 -1
  16. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/runner.py +12 -12
  17. hdx_python_scraper-2.3.5/src/hdx/scraper/configurable/scraper.py → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers/configurable_scraper.py +5 -5
  18. {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers}/rowparser.py +58 -23
  19. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/reader.py +93 -22
  20. hdx_python_scraper-2.5.2/src/hdx/scraper/framework/utilities/sector.py +63 -0
  21. hdx_python_scraper-2.5.2/src/hdx/scraper/framework/utilities/sector_configuration.yaml +138 -0
  22. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/sources.py +3 -3
  23. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/config/project_configuration.yaml +2 -1
  24. hdx_python_scraper-2.5.2/tests/fixtures/input/global-coordination-groups-beta.json +1 -0
  25. hdx_python_scraper-2.5.2/tests/fixtures/input/sector_global_coordination_groups_beta_csv.csv +17 -0
  26. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework}/conftest.py +5 -4
  27. hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/custom/__init__.py +0 -0
  28. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/custom}/affected_targeted_reached.py +2 -2
  29. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/custom}/education_closures.py +1 -1
  30. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/custom}/education_enrolment.py +1 -1
  31. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_custom.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/custom/test_custom.py +8 -8
  32. hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/outputs/__init__.py +0 -0
  33. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/outputs}/test_output.py +6 -6
  34. hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/__init__.py +0 -0
  35. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_aggregation.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_aggregation.py +5 -5
  36. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_appenddata.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_appenddata.py +7 -7
  37. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_global.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_global.py +4 -4
  38. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_multipleurls.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_multipleurls.py +4 -4
  39. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_national.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_national.py +17 -17
  40. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_regionaltoplevel.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_regionaltoplevel.py +7 -7
  41. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_resource_downloaders.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_resource_downloaders.py +3 -3
  42. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_subnational.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_subnational.py +9 -8
  43. hdx_python_scraper-2.3.5/tests/hdx/scraper/test_scrapers_timeseries.py → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers/test_timeseries.py +4 -4
  44. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/scrapers}/unhcr_myanmar_idps.py +2 -2
  45. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework}/test_runner.py +2 -2
  46. hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities/__init__.py +0 -0
  47. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities}/test_readers.py +72 -3
  48. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities}/test_regionlookup.py +1 -1
  49. hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities/test_sector.py +11 -0
  50. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities}/test_sources.py +5 -3
  51. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework/utilities}/test_utils.py +1 -1
  52. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.config/coveragerc +0 -0
  53. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.config/pytest.ini +0 -0
  54. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/.gitignore +0 -0
  55. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/LICENSE +0 -0
  56. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/README.md +0 -0
  57. {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/outputs}/__init__.py +0 -0
  58. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/outputs/base.py +0 -0
  59. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/outputs/excelfile.py +0 -0
  60. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/outputs/json.py +0 -0
  61. {hdx_python_scraper-2.3.5/src/hdx/scraper/outputs → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers}/__init__.py +0 -0
  62. {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers}/aggregator.py +0 -0
  63. {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers}/resource_downloader.py +0 -0
  64. {hdx_python_scraper-2.3.5/src/hdx/scraper/configurable → hdx_python_scraper-2.5.2/src/hdx/scraper/framework/scrapers}/timeseries.py +0 -0
  65. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/__init__.py +0 -0
  66. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/fallbacks.py +0 -0
  67. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/region_lookup.py +0 -0
  68. {hdx_python_scraper-2.3.5/src/hdx/scraper → hdx_python_scraper-2.5.2/src/hdx/scraper/framework}/utilities/writer.py +0 -0
  69. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
  70. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/additional-json.json +0 -0
  71. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  72. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  73. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
  74. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
  75. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
  76. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
  77. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
  78. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
  79. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
  80. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
  81. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/education_closures_broken.xls +0 -0
  82. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
  83. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
  84. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
  85. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  86. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  87. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/fallbacks.json +0 -0
  88. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  89. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  90. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
  91. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
  92. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
  93. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
  94. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
  95. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
  96. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
  97. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
  98. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
  99. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
  100. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
  101. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  102. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  103. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  104. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  105. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  106. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  107. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  108. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  109. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
  110. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
  111. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/population.json +0 -0
  112. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  113. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  114. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
  115. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
  116. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
  117. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
  118. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
  119. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
  120. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
  121. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
  122. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
  123. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  124. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  125. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  126. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
  127. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
  128. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
  129. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/unocha-office-locations.json +0 -0
  130. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
  131. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
  132. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
  133. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
  134. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
  135. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/test_output.xlsx +0 -0
  136. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/test_scraper_all.json +0 -0
  137. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/test_scraper_other.json +0 -0
  138. {hdx_python_scraper-2.3.5 → hdx_python_scraper-2.5.2}/tests/fixtures/test_scraper_population.json +0 -0
  139. {hdx_python_scraper-2.3.5/tests/hdx/scraper → hdx_python_scraper-2.5.2/tests/hdx/scraper/framework}/__init__.py +0 -0
@@ -1,15 +1,15 @@
1
1
  default_language_version:
2
- python: python3.11
2
+ python: python3.12
3
3
  repos:
4
4
  - repo: https://github.com/pre-commit/pre-commit-hooks
5
- rev: v4.5.0
5
+ rev: v5.0.0
6
6
  hooks:
7
7
  - id: trailing-whitespace
8
8
  - id: end-of-file-fixer
9
9
  exclude: test_scraper_.*\.json
10
10
  - id: check-ast
11
11
  - repo: https://github.com/astral-sh/ruff-pre-commit
12
- rev: v0.1.14
12
+ rev: v0.8.6
13
13
  hooks:
14
14
  # Run the linter.
15
15
  - id: ruff
@@ -17,10 +17,11 @@ repos:
17
17
  # Run the formatter.
18
18
  - id: ruff-format
19
19
  args: [--config, .config/ruff.toml]
20
- - repo: https://github.com/jazzband/pip-tools
21
- rev: 7.3.0
20
+ - repo: https://github.com/astral-sh/uv-pre-commit
21
+ rev: 0.5.15
22
22
  hooks:
23
+ # Run the pip compile
23
24
  - id: pip-compile
24
25
  name: pip-compile requirements.txt
25
26
  files: pyproject.toml
26
- args: [pyproject.toml, --resolver=backtracking, --all-extras, --upgrade, -q, -o, requirements.txt]
27
+ args: [ pyproject.toml, --resolver=backtracking, --all-extras, --upgrade, -q, -o, requirements.txt ]
@@ -1,14 +1,16 @@
1
1
  line-length = 79
2
2
  exclude = ["_version.py"]
3
- ignore = [
4
- "E501" # Line too long
5
- ]
3
+
4
+ [lint]
6
5
  # List of rules: https://docs.astral.sh/ruff/rules/
7
6
  select = [
8
7
  "E", # pycodestyle - default
9
8
  "F", # pyflakes - default
10
9
  "I" # isort
11
10
  ]
11
+ ignore = [
12
+ "E501" # Line too long
13
+ ]
12
14
 
13
- [isort]
15
+ [lint.isort]
14
16
  known-local-folder = ["hdx"]
@@ -9,19 +9,20 @@ jobs:
9
9
  runs-on: ubuntu-latest
10
10
 
11
11
  steps:
12
- - uses: actions/checkout@v3
12
+ - uses: actions/checkout@v4
13
13
  - name: Get history and tags for versioning to work
14
14
  run: |
15
15
  git fetch --prune --unshallow
16
16
  git fetch --depth=1 origin +refs/tags/*:refs/tags/*
17
17
  - name: Set up Python
18
- uses: actions/setup-python@v4
18
+ uses: actions/setup-python@v5
19
19
  with:
20
20
  python-version: '3.x'
21
21
  - name: Install dependencies
22
22
  run: |
23
23
  python -m pip install --upgrade pip
24
- pip install --upgrade hatch
24
+ - name: Install Hatch
25
+ uses: pypa/hatch@install
25
26
  - name: Build with hatch
26
27
  run: |
27
28
  hatch build
@@ -18,24 +18,25 @@ jobs:
18
18
  runs-on: ubuntu-latest
19
19
 
20
20
  steps:
21
- - uses: actions/checkout@v3
21
+ - uses: actions/checkout@v4
22
22
  - name: Set up Python
23
- uses: actions/setup-python@v4
23
+ uses: actions/setup-python@v5
24
24
  with:
25
25
  python-version: '3.x'
26
26
  - name: Install dependencies
27
27
  run: |
28
28
  python -m pip install --upgrade pip
29
- pip install --upgrade hatch
29
+ - name: Install Hatch
30
+ uses: pypa/hatch@install
30
31
  - name: Test with hatch/pytest
31
32
  env:
32
33
  GSHEET_AUTH: ${{ secrets.GSHEET_AUTH }}
33
34
  run: |
34
- hatch run test:test
35
+ hatch test
35
36
  - name: Check styling
36
37
  if: always()
37
38
  run: |
38
- hatch run lint:style
39
+ hatch fmt --check
39
40
  - name: Publish Unit Test Results
40
41
  uses: EnricoMi/publish-unit-test-result-action@v2
41
42
  if: always()
@@ -2,10 +2,10 @@
2
2
 
3
3
  ## Environment
4
4
 
5
- Development is currently done using Python 3.11. We recommend using a virtual
5
+ Development is currently done using Python 3.12. We recommend using a virtual
6
6
  environment such as ``venv``:
7
7
 
8
- python3.11 -m venv venv
8
+ python3.12 -m venv venv
9
9
  source venv/bin/activate
10
10
 
11
11
  In your virtual environment, please install all packages for
@@ -21,14 +21,13 @@ you make a git commit:
21
21
  pre-commit install
22
22
 
23
23
  The configuration file for this project is in a
24
- non-start location. Thus, you will need to edit your
24
+ non-standard location. Thus, you will need to edit your
25
25
  `.git/hooks/pre-commit` file to reflect this. Change
26
26
  the line that begins with `ARGS` to:
27
27
 
28
28
  ARGS=(hook-impl --config=.config/pre-commit-config.yaml --hook-type=pre-commit)
29
29
 
30
30
  With pre-commit, all code is formatted according to
31
- [black]("https://github.com/psf/black") and
32
31
  [ruff]("https://github.com/charliermarsh/ruff") guidelines.
33
32
 
34
33
  To check if your changes pass pre-commit without committing, run:
@@ -41,7 +40,7 @@ To run the tests and view coverage, execute:
41
40
 
42
41
  pytest -c .config/pytest.ini --cov hdx --cov-config .config/coveragerc
43
42
 
44
- Follow the example set out already in ``api.rst`` as you write the documentation.
43
+ Follow the example set out already in ``documentation/main.md`` as you write the documentation.
45
44
 
46
45
  ## Packages
47
46
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hdx-python-scraper
3
- Version: 2.3.5
3
+ Version: 2.5.2
4
4
  Summary: HDX Python scraper utilities to assemble data from multiple sources
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -26,13 +26,14 @@ Classifier: Programming Language :: Python :: 3.12
26
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Requires-Python: >=3.8
28
28
  Requires-Dist: gspread
29
- Requires-Dist: hdx-python-api>=6.2.1
30
- Requires-Dist: hdx-python-country>=3.6.4
29
+ Requires-Dist: hdx-python-api>=6.3.7
30
+ Requires-Dist: hdx-python-country>=3.8.6
31
+ Requires-Dist: hdx-python-utilities>=3.8.2
31
32
  Requires-Dist: regex
32
33
  Provides-Extra: dev
33
34
  Requires-Dist: pre-commit; extra == 'dev'
34
35
  Provides-Extra: pandas
35
- Requires-Dist: pandas>=2.1.3; extra == 'pandas'
36
+ Requires-Dist: pandas>=2.2.2; extra == 'pandas'
36
37
  Provides-Extra: test
37
38
  Requires-Dist: pytest; extra == 'test'
38
39
  Requires-Dist: pytest-cov; extra == 'test'
@@ -11,5 +11,6 @@ build:
11
11
  jobs:
12
12
  pre_build:
13
13
  - pip install --upgrade mkdocs
14
+ - pip install mkdocs-material
14
15
  - pip install pydoc-markdown
15
16
  - pydoc-markdown documentation/pydoc-markdown.yaml
@@ -26,6 +26,8 @@ install with:
26
26
  pip install hdx-python-scraper[pandas]
27
27
 
28
28
  ## Breaking Changes
29
+ From 2.5.0, package names have changed to avoid name space clashes
30
+
29
31
  From 2.3.0, resource name is used when available instead of creating name from
30
32
  url so tests that use saved data from the Read class may break. file_type
31
33
  parameters in various Read methods renamed to format.
@@ -335,7 +337,7 @@ default is:
335
337
  ### Custom Scrapers
336
338
 
337
339
  It is possible to define custom scrapers written in Python which must inherit
338
- [BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/base_scraper.py),
340
+ [BaseScraper](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/src/hdx/scraper/framework/base_scraper.py),
339
341
  calling its constructor and providing a `run` method. Other methods where a default
340
342
  implementation has been provided can be overridden such as `add_sources` and
341
343
  `add_population`. There are also two hooks for running steps at particular points.
@@ -392,7 +394,7 @@ from regions to values. In this case, since national and regional each have only
392
394
  header and HXL hashtag, there is only one dictionary to populate for each.
393
395
 
394
396
  An example of a custom scraper can be seen
395
- [here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/education_closures.py).
397
+ [here](https://github.com/OCHA-DAP/hdx-python-scraper/blob/main/tests/hdx/scraper/framework/scrapers/education_closures.py).
396
398
 
397
399
  An example of overriding `add_sources` to customise the source information that is
398
400
  output is as follows:
@@ -1089,6 +1091,30 @@ same HXL hashtags.
1089
1091
  use_hxl: True
1090
1092
  should_overwrite_sources: True
1091
1093
 
1094
+ The xlsx2csv option below converts the xlsx to csv before processing.
1095
+
1096
+ idps_somalia:
1097
+ dataset: "somalia-internally-displaced-persons-idps"
1098
+ format: "xlsx"
1099
+ xlsx2csv: True
1100
+ filter_cols:
1101
+ - "Reason"
1102
+ - "Year"
1103
+ prefilter: "'drought' in Reason.lower() and int(Year) in (self.today.year - 1, self.today.year)"
1104
+ admin:
1105
+ - value: "SOM"
1106
+ - "Current (Arrival) District"
1107
+ input:
1108
+ - "Number of Individuals"
1109
+ sum:
1110
+ - formula: "int(Number of Individuals)"
1111
+ mustbepopulated: True
1112
+ output:
1113
+ - "IDPs"
1114
+ output_hxl:
1115
+ - "#affected+idps+ind"
1116
+ source_date_format: "%Y-%m-%d"
1117
+
1092
1118
  ## Population Data
1093
1119
 
1094
1120
  Population data is treated as a special class of data. By default, configurable and
@@ -3,13 +3,13 @@ loaders:
3
3
  search_path:
4
4
  - ../src
5
5
  packages:
6
- - hdx.scraper
6
+ - hdx.scraper.framework
7
7
  renderer:
8
8
  type: mkdocs
9
9
  output_directory: docs
10
10
  mkdocs_config:
11
11
  site_name: HDX Python Scraper
12
- theme: mkdocs
12
+ theme: material
13
13
  repo_url: "https://github.com/OCHA-DAP/hdx-python-scraper"
14
14
  markdown:
15
15
  source_linker:
@@ -21,21 +21,18 @@ renderer:
21
21
  source: main.md
22
22
  - title: API Documentation
23
23
  children:
24
- - title: Source Readers
24
+ - title: Runner
25
25
  contents:
26
- - hdx.scraper.utilities.readers.*
27
- - title: Configurable Scrapers
26
+ - hdx.scraper.framework.runner.Runner.*
27
+ - title: Scraper Base Class
28
28
  contents:
29
- - hdx.scraper.configurable.*
30
- - title: Base Class for Scrapers
29
+ - hdx.scraper.framework.base_scraper.BaseScraper.*
30
+ - title: Scrapers
31
31
  contents:
32
- - hdx.scraper.base_scraper.BaseScraper.*
33
- - title: Fallbacks
34
- contents:
35
- - hdx.scraper.utilities.fallbacks.Fallbacks
36
- - title: Run Scrapers
37
- contents:
38
- - hdx.scraper.runner.Runner.*
32
+ - hdx.scraper.framework.scrapers.*
39
33
  - title: Outputs
40
34
  contents:
41
- - hdx.scraper.outputs.*
35
+ - hdx.scraper.framework.outputs.*
36
+ - title: Utilities
37
+ contents:
38
+ - hdx.scraper.framework.utilities.*
@@ -34,8 +34,9 @@ classifiers = [
34
34
  requires-python = ">=3.8"
35
35
 
36
36
  dependencies = [
37
- "hdx-python-api>=6.2.1",
38
- "hdx-python-country>=3.6.4",
37
+ "hdx-python-api>=6.3.7",
38
+ "hdx-python-country>=3.8.6",
39
+ "hdx-python-utilities>=3.8.2",
39
40
  "gspread",
40
41
  "regex",
41
42
  ]
@@ -49,7 +50,7 @@ content-type = "text/markdown"
49
50
  Homepage = "https://github.com/OCHA-DAP/hdx-python-scraper"
50
51
 
51
52
  [project.optional-dependencies]
52
- pandas = ["pandas>=2.1.3"]
53
+ pandas = ["pandas>=2.2.2"]
53
54
  test = ["pytest", "pytest-cov"]
54
55
  dev = ["pre-commit"]
55
56
 
@@ -64,7 +65,10 @@ dev = ["pre-commit"]
64
65
  packages = ["src/hdx"]
65
66
 
66
67
  [tool.hatch.build.hooks.vcs]
67
- version-file = "src/hdx/scraper/_version.py"
68
+ version-file = "src/hdx/scraper/framework/_version.py"
69
+
70
+ [tool.hatch.metadata]
71
+ allow-direct-references = true
68
72
 
69
73
  # Versioning
70
74
 
@@ -77,25 +81,24 @@ version_scheme = "python-simplified-semver"
77
81
 
78
82
  # Tests
79
83
 
80
- [tool.hatch.envs.test]
84
+ [tool.hatch.envs.hatch-test]
81
85
  features = ["pandas", "test"]
82
86
 
83
- [tool.hatch.envs.test.scripts]
84
- test = """
87
+ [[tool.hatch.envs.hatch-test.matrix]]
88
+ python = ["3.12"]
89
+
90
+ [tool.hatch.envs.hatch-test.scripts]
91
+ run = """
85
92
  pytest -c .config/pytest.ini --rootdir=. --junitxml=test-results.xml \
86
93
  --cov --cov-config=.config/coveragerc --no-cov-on-fail \
87
94
  --cov-report=lcov --cov-report=term-missing
88
95
  """
89
96
 
90
- [[tool.hatch.envs.test.matrix]]
91
- python = ["3.12"]
92
-
93
- [tool.hatch.envs.lint]
94
- detached = true
95
- dependencies = ["ruff"]
97
+ [tool.hatch.envs.hatch-static-analysis]
98
+ dependencies = ["ruff==0.8.6"]
96
99
 
97
- [tool.hatch.envs.lint.scripts]
98
- style = [
99
- "ruff check --config .config/ruff.toml --diff {args:.}",
100
- "ruff format --config .config/ruff.toml --diff {args:.}",
101
- ]
100
+ [tool.hatch.envs.hatch-static-analysis.scripts]
101
+ format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
102
+ format-fix = ["ruff format --config .config/ruff.toml {args:.}",]
103
+ lint-check = ["ruff check --config .config/ruff.toml {args:.}",]
104
+ lint-fix = ["ruff check --config .config/ruff.toml --fix {args:.}",]