hdx-python-scraper 2.3.8__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/PKG-INFO +3 -3
  2. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/pyproject.toml +2 -2
  3. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/requirements.txt +22 -23
  4. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/_version.py +2 -2
  5. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/rowparser.py +57 -22
  6. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/reader.py +9 -6
  7. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_readers.py +4 -4
  8. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/coveragerc +0 -0
  9. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/pre-commit-config.yaml +0 -0
  10. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/pytest.ini +0 -0
  11. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.config/ruff.toml +0 -0
  12. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.github/workflows/publish.yaml +0 -0
  13. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.github/workflows/run-python-tests.yaml +0 -0
  14. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/.gitignore +0 -0
  15. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/CONTRIBUTING.md +0 -0
  16. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/LICENSE +0 -0
  17. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/README.md +0 -0
  18. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/.readthedocs.yaml +0 -0
  19. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/main.md +0 -0
  20. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/documentation/pydoc-markdown.yaml +0 -0
  21. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/__init__.py +0 -0
  22. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/base_scraper.py +0 -0
  23. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/__init__.py +0 -0
  24. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/aggregator.py +0 -0
  25. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/resource_downloader.py +0 -0
  26. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/scraper.py +0 -0
  27. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/configurable/timeseries.py +0 -0
  28. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/__init__.py +0 -0
  29. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/base.py +0 -0
  30. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/excelfile.py +0 -0
  31. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/googlesheets.py +0 -0
  32. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/outputs/json.py +0 -0
  33. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/runner.py +0 -0
  34. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/__init__.py +0 -0
  35. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/fallbacks.py +0 -0
  36. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/region_lookup.py +0 -0
  37. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/sources.py +0 -0
  38. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/src/hdx/scraper/utilities/writer.py +0 -0
  39. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/config/project_configuration.yaml +0 -0
  40. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/access_2pacx-1vrszjzuyvt9i-mkrq2hbxrul2lx2vihkthqm-lae8nyhqty70zqtcufs3pxbhzgat1l2bkoa4-daoap-pub-gid-574237756-single-true-output-csv.csv +0 -0
  41. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/additional-json.json +0 -0
  42. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/altworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  43. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  44. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cbpf-allocations-and-contributions.json +0 -0
  45. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cbpf2-allocations-and-contributions.json +0 -0
  46. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf-covid-19-allocations.json +0 -0
  47. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf2-covid-19-allocations.json +0 -0
  48. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf2_global_download-full-pfmb-allocations.csv +0 -0
  49. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/cerf_global_download-full-pfmb-allocations.csv +0 -0
  50. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/covax_2pacx-1vtvzu79pptfaa2syevoqfyrrjy63djwitqu0ffbxiqczoun9k9timwmrvfgg1rbsnlmgyugzseiaye2-pub-gid-992438980-single-true-output-csv.csv +0 -0
  51. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/covidtests_data-owid-covid-data.xlsx +0 -0
  52. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_closures_broken.xls +0 -0
  53. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_closures_school_closures.csv +0 -0
  54. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/education_enrolment_enrollment_data.xlsx +0 -0
  55. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia-drought-related-key-figures.json +0 -0
  56. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  57. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ethiopia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  58. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/fallbacks.json +0 -0
  59. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/gam_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  60. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/gam_other_download-unicef-who-wb-global-expanded-databases-severe-wasting.xlsx +0 -0
  61. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/global-school-closures-covid19.json +0 -0
  62. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/hno_2017_sahel_nutrition.csv +0 -0
  63. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/hno_2017_sahel_people_in_need.xlsx +0 -0
  64. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idmc-internally-displaced-persons-idps.json +0 -0
  65. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_download-displacement-data.csv +0 -0
  66. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_override_population-widget-id-264111-geo-id-693-population-group-54074999.json +0 -0
  67. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/idps_somalia_som_unhcr_prmn_displacement_dataset.xlsx +0 -0
  68. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ipc_somalia_som_food_insecurity_oct_dec2022_projection.csv +0 -0
  69. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya-drought-related-key-figures.json +0 -0
  70. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya-pin-targeted-reached-by-location-and-cluster.json +0 -0
  71. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/kenya_drought_affected_targeted_reached_by_cluster.csv +0 -0
  72. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  73. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  74. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  75. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_eth_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-2015311116-single-true-output-csv.csv +0 -0
  76. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_ken_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-1275038715-single-true-output-csv.csv +0 -0
  77. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/key_figures_som_2pacx-1vrppqx8jtkkkrckmzfncmmtfecvcpkbp9pdhs1sqtuyacmbsx8tlaxpgblfce-lcehukregguxja-4s-pub-gid-0-single-true-output-csv.csv +0 -0
  78. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ourworldindata_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  79. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ourworldindata_other_data-tagger-match-all-on-tagger-01-header-location-tagger-01-tag.csv +0 -0
  80. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/oxcgrt_oxcgrt_csv.csv +0 -0
  81. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/oxford-covid-19-government-response-tracker.json +0 -0
  82. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population.json +0 -0
  83. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  84. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/population_other_indicator-sp-pop-downloadformat-excel-dataformat-list-totl.xls +0 -0
  85. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/regions_tbl_regcov_2020_ocha.xlsx +0 -0
  86. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/resource_downloader_xlsx_ukr_border_crossings_090622.xlsx +0 -0
  87. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sadd-countries-to-include.csv +0 -0
  88. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sadd_covid-data-dataset-fullvars-extype-csv.csv +0 -0
  89. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sahel-humanitarian-needs-overview.json +0 -0
  90. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json +0 -0
  91. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-acute-food-insecurity-country-data.json +0 -0
  92. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-drought-related-key-figures.json +0 -0
  93. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-internally-displaced-persons-idps.json +0 -0
  94. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia-pin-targeted-reached-by-location-and-cluster.json +0 -0
  95. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/somalia_drought_affected_targeted_reached_by_cluster.csv +0 -0
  96. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/timeseries_casualties_2pacx-1vqidedbzz0ehrc0b4fswip14r7mdtu1mpmwakuxupelsah2awcurkgalfduhjvyjul8vzzat3r1b5qg-pub-gid-0-single-true-output-csv.csv +0 -0
  97. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/total-covid-19-tests-performed-by-country.json +0 -0
  98. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ukraine-border-crossings.json +0 -0
  99. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/ukraine-who-does-what-where-3w.json +0 -0
  100. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/unocha-office-locations.json +0 -0
  101. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national2_who-covid-19-global-data.csv +0 -0
  102. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national3_who-covid-19-global-data.csv +0 -0
  103. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/who_national_who-covid-19-global-data.csv +0 -0
  104. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/whowhatwhere_afg_3w_data.csv +0 -0
  105. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/input/whowhatwhere_notags_3w_data.csv +0 -0
  106. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_output.xlsx +0 -0
  107. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_all.json +0 -0
  108. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_other.json +0 -0
  109. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/fixtures/test_scraper_population.json +0 -0
  110. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/__init__.py +0 -0
  111. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/affected_targeted_reached.py +0 -0
  112. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/conftest.py +0 -0
  113. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_closures.py +0 -0
  114. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/education_enrolment.py +0 -0
  115. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_output.py +0 -0
  116. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_regionlookup.py +0 -0
  117. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_runner.py +0 -0
  118. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_aggregation.py +0 -0
  119. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_appenddata.py +0 -0
  120. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_custom.py +0 -0
  121. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_global.py +0 -0
  122. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_multipleurls.py +0 -0
  123. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_national.py +0 -0
  124. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_regionaltoplevel.py +0 -0
  125. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_resource_downloaders.py +0 -0
  126. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_subnational.py +0 -0
  127. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_scrapers_timeseries.py +0 -0
  128. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_sources.py +0 -0
  129. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/test_utils.py +0 -0
  130. {hdx_python_scraper-2.3.8 → hdx_python_scraper-2.4.0}/tests/hdx/scraper/unhcr_myanmar_idps.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hdx-python-scraper
3
- Version: 2.3.8
3
+ Version: 2.4.0
4
4
  Summary: HDX Python scraper utilities to assemble data from multiple sources
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -27,8 +27,8 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
27
  Requires-Python: >=3.8
28
28
  Requires-Dist: gspread
29
29
  Requires-Dist: hdx-python-api>=6.3.1
30
- Requires-Dist: hdx-python-country>=3.7.2
31
- Requires-Dist: hdx-python-utilities>=3.7.0
30
+ Requires-Dist: hdx-python-country>=3.7.6
31
+ Requires-Dist: hdx-python-utilities>=3.7.2
32
32
  Requires-Dist: regex
33
33
  Provides-Extra: dev
34
34
  Requires-Dist: pre-commit; extra == 'dev'
@@ -35,8 +35,8 @@ requires-python = ">=3.8"
35
35
 
36
36
  dependencies = [
37
37
  "hdx-python-api>=6.3.1",
38
- "hdx-python-country>=3.7.2",
39
- "hdx-python-utilities>=3.7.0",
38
+ "hdx-python-country>=3.7.6",
39
+ "hdx-python-utilities>=3.7.2",
40
40
  "gspread",
41
41
  "regex",
42
42
  ]
@@ -10,7 +10,7 @@ attrs==23.2.0
10
10
  # referencing
11
11
  cachetools==5.3.3
12
12
  # via google-auth
13
- certifi==2024.6.2
13
+ certifi==2024.7.4
14
14
  # via requests
15
15
  cffi==1.16.0
16
16
  # via cryptography
@@ -24,7 +24,7 @@ ckanapi==4.8
24
24
  # via hdx-python-api
25
25
  click==8.1.7
26
26
  # via typer
27
- coverage==7.5.3
27
+ coverage==7.6.0
28
28
  # via pytest-cov
29
29
  cryptography==42.0.8
30
30
  # via pyopenssl
@@ -40,36 +40,36 @@ docopt==0.6.2
40
40
  # num2words
41
41
  docutils==0.21.2
42
42
  # via defopt
43
- email-validator==2.1.2
43
+ email-validator==2.2.0
44
44
  # via hdx-python-api
45
45
  et-xmlfile==1.1.0
46
46
  # via openpyxl
47
- filelock==3.15.3
47
+ filelock==3.15.4
48
48
  # via virtualenv
49
49
  frictionless==5.17.0
50
50
  # via hdx-python-utilities
51
- google-auth==2.30.0
51
+ google-auth==2.32.0
52
52
  # via
53
53
  # google-auth-oauthlib
54
54
  # gspread
55
- google-auth-oauthlib==1.2.0
55
+ google-auth-oauthlib==1.2.1
56
56
  # via gspread
57
57
  gspread==6.1.2
58
58
  # via hdx-python-scraper (pyproject.toml)
59
59
  hdx-python-api==6.3.1
60
60
  # via hdx-python-scraper (pyproject.toml)
61
- hdx-python-country==3.7.2
61
+ hdx-python-country==3.7.6
62
62
  # via
63
63
  # hdx-python-scraper (pyproject.toml)
64
64
  # hdx-python-api
65
- hdx-python-utilities==3.7.0
65
+ hdx-python-utilities==3.7.2
66
66
  # via
67
67
  # hdx-python-scraper (pyproject.toml)
68
68
  # hdx-python-api
69
69
  # hdx-python-country
70
- humanize==4.9.0
70
+ humanize==4.10.0
71
71
  # via frictionless
72
- identify==2.5.36
72
+ identify==2.6.0
73
73
  # via pre-commit
74
74
  idna==3.7
75
75
  # via
@@ -77,7 +77,7 @@ idna==3.7
77
77
  # requests
78
78
  ijson==3.3.0
79
79
  # via hdx-python-utilities
80
- inflect==7.2.1
80
+ inflect==7.3.1
81
81
  # via quantulum3
82
82
  iniconfig==2.0.0
83
83
  # via pytest
@@ -89,7 +89,7 @@ jsonlines==4.0.0
89
89
  # via hdx-python-utilities
90
90
  jsonpath-ng==1.6.1
91
91
  # via libhxl
92
- jsonschema==4.22.0
92
+ jsonschema==4.23.0
93
93
  # via
94
94
  # frictionless
95
95
  # tableschema-to-template
@@ -101,11 +101,11 @@ libhxl==5.2.1
101
101
  # hdx-python-country
102
102
  loguru==0.7.2
103
103
  # via hdx-python-utilities
104
- makefun==1.15.2
104
+ makefun==1.15.4
105
105
  # via hdx-python-api
106
106
  markdown-it-py==3.0.0
107
107
  # via rich
108
- marko==2.1.1
108
+ marko==2.1.2
109
109
  # via frictionless
110
110
  markupsafe==2.1.5
111
111
  # via jinja2
@@ -123,7 +123,7 @@ numpy==2.0.0
123
123
  # via pandas
124
124
  oauthlib==3.2.2
125
125
  # via requests-oauthlib
126
- openpyxl==3.1.4
126
+ openpyxl==3.1.5
127
127
  # via hdx-python-utilities
128
128
  packaging==24.1
129
129
  # via pytest
@@ -153,9 +153,9 @@ pyasn1-modules==0.4.0
153
153
  # via google-auth
154
154
  pycparser==2.22
155
155
  # via cffi
156
- pydantic==2.7.4
156
+ pydantic==2.8.2
157
157
  # via frictionless
158
- pydantic-core==2.18.4
158
+ pydantic-core==2.20.1
159
159
  # via pydantic
160
160
  pygments==2.18.0
161
161
  # via rich
@@ -190,7 +190,7 @@ pyyaml==6.0.1
190
190
  # frictionless
191
191
  # pre-commit
192
192
  # tableschema-to-template
193
- quantulum3==0.9.1
193
+ quantulum3==0.9.2
194
194
  # via hdx-python-api
195
195
  ratelimit==2.2.1
196
196
  # via hdx-python-utilities
@@ -216,7 +216,7 @@ rfc3986==2.0.0
216
216
  # via frictionless
217
217
  rich==13.7.1
218
218
  # via typer
219
- rpds-py==0.18.1
219
+ rpds-py==0.19.0
220
220
  # via
221
221
  # jsonschema
222
222
  # referencing
@@ -226,7 +226,7 @@ ruamel-yaml==0.18.6
226
226
  # via hdx-python-utilities
227
227
  ruamel-yaml-clib==0.2.8
228
228
  # via ruamel-yaml
229
- setuptools==70.1.0
229
+ setuptools==70.3.0
230
230
  # via ckanapi
231
231
  shellingham==1.5.4
232
232
  # via typer
@@ -260,7 +260,6 @@ typer==0.12.3
260
260
  typing-extensions==4.12.2
261
261
  # via
262
262
  # frictionless
263
- # inflect
264
263
  # pydantic
265
264
  # pydantic-core
266
265
  # typeguard
@@ -275,9 +274,9 @@ urllib3==2.2.2
275
274
  # via
276
275
  # libhxl
277
276
  # requests
278
- validators==0.28.3
277
+ validators==0.32.0
279
278
  # via frictionless
280
- virtualenv==20.26.2
279
+ virtualenv==20.26.3
281
280
  # via pre-commit
282
281
  wheel==0.43.0
283
282
  # via libhxl
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '2.3.8'
16
- __version_tuple__ = version_tuple = (2, 3, 8)
15
+ __version__ = version = '2.4.0'
16
+ __version_tuple__ = version_tuple = (2, 4, 0)
@@ -185,20 +185,14 @@ class RowParser:
185
185
  Returns:
186
186
  Iterator[Dict]: Input data with prefilter applied if specified and sorted if specified or deemed necessary
187
187
  """
188
- rows = []
189
- for row in iterator:
190
- if self.header_to_hxltag:
191
- newrow = {}
192
- for header in row:
193
- newrow[self.header_to_hxltag[header]] = row[header]
194
- row = newrow
195
- if self.stop_row:
196
- if all(
197
- row[key] == value for key, value in self.stop_row.items()
198
- ):
199
- break
200
- for newrow in self.flatten(row):
201
- rows.append(newrow)
188
+ if self.header_to_hxltag:
189
+ iterator = self.header_to_hxltag_rows(iterator)
190
+ if self.stop_row:
191
+ iterator = self.stop_rows(iterator)
192
+ if self.flatteninfo:
193
+ iterator = self.flatten_rows(iterator)
194
+ if self.prefilter:
195
+ iterator = (row for row in iterator if eval(self.prefilter))
202
196
  if not self.sort:
203
197
  if self.datecol:
204
198
  for subset in self.subsets:
@@ -212,15 +206,59 @@ class RowParser:
212
206
  )
213
207
  self.sort = {"keys": [self.datecol], "reverse": True}
214
208
  break
215
- if self.prefilter:
216
- rows = [row for row in rows if eval(self.prefilter)]
217
209
  if self.sort:
218
210
  keys = self.sort["keys"]
219
211
  reverse = self.sort.get("reverse", False)
220
- rows = sorted(rows, key=itemgetter(*keys), reverse=reverse)
221
- return rows
212
+ iterator = sorted(iterator, key=itemgetter(*keys), reverse=reverse)
213
+ return iterator
214
+
215
+ def header_to_hxltag_rows(
216
+ self, iterator: Iterator[Dict]
217
+ ) -> Generator[Dict, None, None]:
218
+ """Convert headers to HXL tags in keys
219
+
220
+ Args:
221
+ iterator (Iterator[Dict]): Input data
222
+
223
+ Returns:
224
+ Generator[Dict]: Rows where keys are HXL tags
225
+ """
226
+ for row in iterator:
227
+ newrow = {}
228
+ for header in row:
229
+ newrow[self.header_to_hxltag[header]] = row[header]
230
+ yield newrow
231
+
232
+ def stop_rows(
233
+ self, iterator: Iterator[Dict]
234
+ ) -> Generator[Dict, None, None]:
235
+ """Stop processing rows after condition met
236
+
237
+ Args:
238
+ iterator (Iterator[Dict]): Input data
239
+
240
+ Returns:
241
+ Generator[Dict]: Rows up to stop condition
242
+ """
243
+ for row in iterator:
244
+ if all(row[key] == value for key, value in self.stop_row.items()):
245
+ break
246
+ yield row
247
+
248
+ def flatten_rows(self, iterator: Iterator[Dict]) -> Iterator[Dict]:
249
+ """Flatten rows
250
+
251
+ Args:
252
+ iterator (Iterator[Dict]): Input data
253
+
254
+ Returns:
255
+ Generator[Dict]: Flattened rows
256
+ """
257
+ for row in iterator:
258
+ for newrow in self.flatten_row(row):
259
+ yield newrow
222
260
 
223
- def flatten(self, row: Dict) -> Generator[Dict, None, None]:
261
+ def flatten_row(self, row: Dict) -> Generator[Dict, None, None]:
224
262
  """Flatten a wide spreadsheet format into a long one
225
263
 
226
264
  Args:
@@ -229,9 +267,6 @@ class RowParser:
229
267
  Returns:
230
268
  Generator[Dict]: Flattened row(s)
231
269
  """
232
- if not self.flatteninfo:
233
- yield row
234
- return
235
270
  counters = [-1 for _ in self.flatteninfo]
236
271
  while True:
237
272
  newrow = copy.deepcopy(row)
@@ -206,15 +206,18 @@ class Read(Retrieve):
206
206
  if headers is None:
207
207
  headers = 1
208
208
  datasetinfo["headers"] = 1
209
- kwargs["headers"] = headers
210
- if isinstance(headers, list):
211
- kwargs["fill_merged_cells"] = True
212
209
  format = datasetinfo["format"]
213
210
  kwargs["format"] = format
214
- if not sheet and format in ("xls", "xlsx"):
215
- sheet = 1
211
+ if format in ("xls", "xlsx"):
212
+ if not sheet:
213
+ sheet = 1
214
+ if isinstance(headers, list):
215
+ kwargs["fill_merged_cells"] = True
216
+ elif "fill_merged_cells" not in kwargs:
217
+ kwargs["fill_merged_cells"] = False
216
218
  if sheet:
217
219
  kwargs["sheet"] = sheet
220
+ kwargs["headers"] = headers
218
221
  compression = datasetinfo.get("compression")
219
222
  if compression:
220
223
  kwargs["compression"] = compression
@@ -302,7 +305,7 @@ class Read(Retrieve):
302
305
  f"Using saved datasets in {filename}_n.json in {self.saved_dir}"
303
306
  )
304
307
  datasets = []
305
- for file_path in glob.glob(f"{saved_path}_*.json"):
308
+ for file_path in sorted(glob.glob(f"{saved_path}_*.json")):
306
309
  datasets.append(Dataset.load_from_json(file_path))
307
310
  else:
308
311
  datasets = Dataset.search_in_hdx(
@@ -30,7 +30,7 @@ class TestReaders:
30
30
  assert getattr(clone_reader, property) == value
31
31
 
32
32
  def test_read_dataset(self, configuration, monkeypatch):
33
- def test_read_from_hdx(dataset_name, _):
33
+ def read_from_hdx(dataset_name, _):
34
34
  if dataset_name == "None":
35
35
  return None
36
36
  dataset = Dataset({"name": dataset_name})
@@ -58,7 +58,7 @@ class TestReaders:
58
58
  today=parse_date("2021-02-01"),
59
59
  ) as reader:
60
60
  monkeypatch.setattr(
61
- Dataset, "read_from_hdx", test_read_from_hdx
61
+ Dataset, "read_from_hdx", read_from_hdx
62
62
  )
63
63
  dataset_name = "None"
64
64
  dataset = reader.read_dataset(dataset_name)
@@ -89,7 +89,7 @@ class TestReaders:
89
89
  def test_search_datasets(self, configuration, monkeypatch):
90
90
  filename = "TestDataset"
91
91
 
92
- def test_search_in_hdx(*args, **kwargs):
92
+ def search_in_hdx(*args, **kwargs):
93
93
  datasets = []
94
94
  for i in range(2):
95
95
  dataset = Dataset({"name": f"{filename}_{i}"})
@@ -117,7 +117,7 @@ class TestReaders:
117
117
  today=parse_date("2021-02-01"),
118
118
  ) as reader:
119
119
  monkeypatch.setattr(
120
- Dataset, "search_in_hdx", test_search_in_hdx
120
+ Dataset, "search_in_hdx", search_in_hdx
121
121
  )
122
122
  datasets = reader.search_datasets(filename)
123
123
  assert len(datasets) == 2