hdx-python-utilities 4.0.0__tar.gz → 4.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/PKG-INFO +1 -1
  2. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/index.md +6 -1
  3. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/_version.py +2 -2
  4. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/base_downloader.py +2 -1
  5. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/compare.py +7 -2
  6. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/dictandlist.py +2 -1
  7. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/downloader.py +28 -25
  8. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/email.py +3 -2
  9. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/file_hashing.py +2 -1
  10. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/frictionless_wrapper.py +1 -1
  11. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/html.py +2 -1
  12. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/loader.py +11 -10
  13. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/path.py +45 -39
  14. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/retriever.py +25 -22
  15. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/saver.py +9 -9
  16. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/session.py +2 -1
  17. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/state.py +3 -2
  18. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/useragent.py +10 -9
  19. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/conftest.py +3 -3
  20. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_compare.py +2 -4
  21. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_dictandlist.py +5 -4
  22. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_downloader.py +39 -38
  23. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_email.py +2 -4
  24. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_file_hashing.py +8 -10
  25. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_html.py +1 -3
  26. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_loader.py +16 -16
  27. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_path.py +20 -18
  28. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_retriever.py +39 -27
  29. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_saver.py +36 -35
  30. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_state.py +5 -6
  31. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_useragent.py +5 -7
  32. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_zip_crc.py +4 -6
  33. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.coveragerc +0 -0
  34. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/publish-test.yaml +0 -0
  35. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/publish.yaml +0 -0
  36. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/run-python-tests.yaml +0 -0
  37. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.gitignore +0 -0
  38. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.pre-commit-config.yaml +0 -0
  39. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/CONTRIBUTING.md +0 -0
  40. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/LICENSE +0 -0
  41. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/README.md +0 -0
  42. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/.readthedocs.yaml +0 -0
  43. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/mkdocs.yaml +0 -0
  44. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/hatch.toml +0 -0
  45. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/pyproject.toml +0 -0
  46. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/pytest.ini +0 -0
  47. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/requirements.txt +0 -0
  48. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/ruff.toml +0 -0
  49. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/__init__.py +0 -0
  50. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/dateparse.py +0 -0
  51. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/easy_logging.py +0 -0
  52. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/encoding.py +0 -0
  53. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/error_handler.py +0 -0
  54. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/errors_onexit.py +0 -0
  55. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/matching.py +0 -0
  56. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/text.py +0 -0
  57. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/uuid.py +0 -0
  58. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/zip_crc.py +0 -0
  59. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/compare/test_csv_processing.csv +0 -0
  60. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/compare/test_csv_processing2.csv +0 -0
  61. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/empty.yaml +0 -0
  62. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_config.json +0 -0
  63. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_config.yaml +0 -0
  64. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_email_configuration.json +0 -0
  65. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_email_configuration.yaml +0 -0
  66. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/json_csv.yaml +0 -0
  67. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/logging_config.json +0 -0
  68. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/logging_config.yaml +0 -0
  69. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/project_configuration.json +0 -0
  70. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/project_configuration.yaml +0 -0
  71. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/smtp_config.json +0 -0
  72. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/smtp_config.yaml +0 -0
  73. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config.yaml +0 -0
  74. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config2.yaml +0 -0
  75. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config3.yaml +0 -0
  76. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config_wrong.yaml +0 -0
  77. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/basicauth.txt +0 -0
  78. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/bearertoken.txt +0 -0
  79. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params.json +0 -0
  80. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params.yaml +0 -0
  81. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params_tree.yaml +0 -0
  82. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_csv_processing.csv +0 -0
  83. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_csv_processing_blanks.csv +0 -0
  84. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data.csv +0 -0
  85. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data.xlsx +0 -0
  86. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data1.csv/empty.txt +0 -0
  87. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data2.csv +0 -0
  88. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_json_processing.json +0 -0
  89. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_xls_processing.xls +0 -0
  90. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_xlsx_processing.xlsx +0 -0
  91. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/bad_header.zip +0 -0
  92. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/bad_index.xlsx +0 -0
  93. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/empty.zip +0 -0
  94. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/test.xlsx +0 -0
  95. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/test_shapefile.zip +0 -0
  96. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/valid_sig_invalid_body.zip +0 -0
  97. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/html/response.html +0 -0
  98. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty.json +0 -0
  99. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty.yaml +0 -0
  100. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty_list.json +0 -0
  101. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.csv +0 -0
  102. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.json +0 -0
  103. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.txt +0 -0
  104. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.yaml +0 -0
  105. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/retriever-test.csv +0 -0
  106. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.csv +0 -0
  107. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.json +0 -0
  108. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.txt +0 -0
  109. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.yaml +0 -0
  110. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test_hxl.csv +0 -0
  111. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out.csv +0 -0
  112. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out.json +0 -0
  113. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out2.csv +0 -0
  114. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out2.json +0 -0
  115. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out5.json +0 -0
  116. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out6.json +0 -0
  117. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out7.json +0 -0
  118. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out8.csv +0 -0
  119. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out8.json +0 -0
  120. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-false.json +0 -0
  121. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-false.yaml +0 -0
  122. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-true.json +0 -0
  123. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-true.yaml +0 -0
  124. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-false.json +0 -0
  125. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-false.yaml +0 -0
  126. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-true.json +0 -0
  127. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-true.yaml +0 -0
  128. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/state/analysis_dates.txt +0 -0
  129. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/state/last_build_date.txt +0 -0
  130. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/test_data.csv +0 -0
  131. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_dateparse.py +0 -0
  132. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_easy_logging.py +0 -0
  133. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_encoding.py +0 -0
  134. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_error_handler.py +0 -0
  135. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_matching.py +0 -0
  136. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_text.py +0 -0
  137. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_uuid.py +0 -0
  138. {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hdx-python-utilities
3
- Version: 4.0.0
3
+ Version: 4.0.2
4
4
  Summary: HDX Python Utilities for streaming tabular data, date and time handling and other helpful functions
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-utilities
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -33,7 +33,12 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit
33
33
  The library has detailed API documentation which can be found in the menu at the top.
34
34
 
35
35
  ## Breaking Changes
36
- From 4.0.0, Python 3.10 or later is required
36
+ From 4.0.1, Library is Path aware. The following methods return Path not str:
37
+ download_file, get_path_for_url, stream_path, get_temp_dir, script_dir_plus_file,
38
+ script_dir. The context managers in hdx.utilities.path where they yield a path
39
+ do so as a Path object not a str.
40
+
41
+ From 4.0.0, Python 3.10 or later is required
37
42
 
38
43
  From 3.8.0, multiple_replace, match_template_variables, earliest_index,
39
44
  get_matching_text_in_strs, get_matching_text,
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '4.0.0'
32
- __version_tuple__ = version_tuple = (4, 0, 0)
31
+ __version__ = version = '4.0.2'
32
+ __version_tuple__ = version_tuple = (4, 0, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,5 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from collections.abc import Iterator, Sequence
3
+ from pathlib import Path
3
4
  from typing import Any
4
5
 
5
6
 
@@ -32,7 +33,7 @@ class BaseDownload(ABC):
32
33
  """
33
34
 
34
35
  @abstractmethod
35
- def download_file(self, url: str, *args: Any, **kwargs: Any) -> str:
36
+ def download_file(self, url: str, *args: Any, **kwargs: Any) -> Path:
36
37
  """Download file from url.
37
38
 
38
39
  Args:
@@ -1,6 +1,7 @@
1
1
  """File compare utilities."""
2
2
 
3
3
  from os import linesep
4
+ from pathlib import Path
4
5
 
5
6
  try:
6
7
  from cydifflib import ndiff
@@ -8,7 +9,9 @@ except ImportError:
8
9
  from difflib import ndiff
9
10
 
10
11
 
11
- def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]:
12
+ def compare_files(
13
+ path1: Path | str, path2: Path | str, encoding: str = "utf-8"
14
+ ) -> list[str]:
12
15
  """Returns the delta between two files using -, ?, + format excluding lines
13
16
  that are the same.
14
17
 
@@ -26,7 +29,9 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]:
26
29
  return [x for x in diff if x[0] in ["-", "+", "?"]]
27
30
 
28
31
 
29
- def assert_files_same(path1: str, path2: str, encoding: str = "utf-8") -> None:
32
+ def assert_files_same(
33
+ path1: Path | str, path2: Path | str, encoding: str = "utf-8"
34
+ ) -> None:
30
35
  """Asserts that two files are the same and returns delta using.
31
36
 
32
37
  -, ?, + format if not
@@ -3,6 +3,7 @@
3
3
  import itertools
4
4
  import warnings
5
5
  from collections.abc import Callable, Mapping, MutableMapping, Sequence
6
+ from pathlib import Path
6
7
  from typing import Any
7
8
 
8
9
  from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource
@@ -410,7 +411,7 @@ def read_list_from_csv(
410
411
 
411
412
 
412
413
  def write_list_to_csv(
413
- filepath: str,
414
+ filepath: Path | str,
414
415
  rows: list[Sequence | Mapping],
415
416
  headers: int | Sequence[str] | None = None,
416
417
  columns: Sequence[int] | Sequence[str] | None = None,
@@ -5,7 +5,7 @@ import logging
5
5
  from collections.abc import Callable, Iterator, Sequence
6
6
  from copy import deepcopy
7
7
  from os import remove
8
- from os.path import exists, isfile, join, split, splitext
8
+ from os.path import exists, isfile, split, splitext
9
9
  from pathlib import Path
10
10
  from typing import Any
11
11
  from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
@@ -65,7 +65,7 @@ class Download(BaseDownload):
65
65
  def __init__(
66
66
  self,
67
67
  user_agent: str | None = None,
68
- user_agent_config_yaml: str | None = None,
68
+ user_agent_config_yaml: Path | str | None = None,
69
69
  user_agent_lookup: str | None = None,
70
70
  use_env: bool = True,
71
71
  fail_on_missing_file: bool = True,
@@ -133,12 +133,12 @@ class Download(BaseDownload):
133
133
  @staticmethod
134
134
  def get_path_for_url(
135
135
  url: str,
136
- folder: str | None = None,
136
+ folder: Path | str | None = None,
137
137
  filename: str | None = None,
138
- path: str | None = None,
138
+ path: Path | str | None = None,
139
139
  overwrite: bool = False,
140
140
  keep: bool = False,
141
- ) -> str:
141
+ ) -> Path:
142
142
  """Get filename from url and join to provided folder or temporary
143
143
  folder if no folder supplied, ensuring uniqueness.
144
144
 
@@ -164,7 +164,8 @@ class Download(BaseDownload):
164
164
  filename, extension = splitext(filename)
165
165
  if not folder:
166
166
  folder = get_temp_dir()
167
- path = join(folder, f"{filename}{extension}")
167
+ folder = Path(folder)
168
+ path = folder / f"{filename}{extension}"
168
169
  if overwrite:
169
170
  try:
170
171
  remove(path)
@@ -174,7 +175,7 @@ class Download(BaseDownload):
174
175
  count = 0
175
176
  while exists(path):
176
177
  count += 1
177
- path = join(folder, f"{filename}{count}{extension}")
178
+ path = folder / f"{filename}{count}{extension}"
178
179
  return path
179
180
 
180
181
  def get_full_url(self, url: str) -> str:
@@ -254,7 +255,7 @@ class Download(BaseDownload):
254
255
 
255
256
  def normal_setup(
256
257
  self,
257
- url: str,
258
+ url: Path | str,
258
259
  stream: bool = True,
259
260
  post: bool = False,
260
261
  parameters: dict | None = None,
@@ -281,6 +282,7 @@ class Download(BaseDownload):
281
282
  self.close_response()
282
283
  self.response = None
283
284
  try:
285
+ url = str(url)
284
286
  spliturl = urlsplit(url)
285
287
  if not spliturl.scheme:
286
288
  if isfile(url):
@@ -336,7 +338,7 @@ class Download(BaseDownload):
336
338
  }
337
339
  )
338
340
 
339
- def hash_stream(self, url: str) -> str:
341
+ def hash_stream(self, url: Path | str) -> str:
340
342
  """Stream file from url and hash it using MD5. Must call setup method
341
343
  first.
342
344
 
@@ -357,7 +359,7 @@ class Download(BaseDownload):
357
359
  f"Download of {url} failed in retrieval of stream!" % url
358
360
  )
359
361
 
360
- def stream_path(self, path: str, errormsg: str):
362
+ def stream_path(self, path: Path | str, errormsg: str) -> Path:
361
363
  """Stream file from url and store in provided path. Must call setup
362
364
  method first.
363
365
 
@@ -370,12 +372,13 @@ class Download(BaseDownload):
370
372
  """
371
373
  f = None
372
374
  try:
373
- f = open(path, "wb")
375
+ path = Path(path)
376
+ f = path.open("wb")
374
377
  for chunk in self.response.iter_content(chunk_size=10240):
375
378
  if chunk: # filter out keep-alive new chunks
376
379
  f.write(chunk)
377
380
  f.flush()
378
- return f.name
381
+ return path
379
382
  except Exception as e:
380
383
  raise DownloadError(errormsg) from e
381
384
  finally:
@@ -384,10 +387,10 @@ class Download(BaseDownload):
384
387
 
385
388
  def stream_file(
386
389
  self,
387
- url: str,
388
- folder: str | None = None,
390
+ url: Path | str,
391
+ folder: Path | str | None = None,
389
392
  filename: str | None = None,
390
- path: str | None = None,
393
+ path: Path | str | None = None,
391
394
  overwrite: bool = False,
392
395
  keep: bool = False,
393
396
  ) -> str:
@@ -414,9 +417,9 @@ class Download(BaseDownload):
414
417
 
415
418
  def download_file(
416
419
  self,
417
- url: str,
420
+ url: Path | str,
418
421
  **kwargs: Any,
419
- ) -> str:
422
+ ) -> Path:
420
423
  """Download file from url and store in provided folder or temporary
421
424
  folder if no folder supplied.
422
425
 
@@ -460,7 +463,7 @@ class Download(BaseDownload):
460
463
  path, f"Download of {url} failed in retrieval of stream!"
461
464
  )
462
465
 
463
- def download(self, url: str, **kwargs: Any) -> requests.Response:
466
+ def download(self, url: Path | str, **kwargs: Any) -> requests.Response:
464
467
  """Download url.
465
468
 
466
469
  Args:
@@ -539,7 +542,7 @@ class Download(BaseDownload):
539
542
  """
540
543
  return self.response.json()
541
544
 
542
- def download_text(self, url: str, **kwargs: Any) -> str:
545
+ def download_text(self, url: Path | str, **kwargs: Any) -> str:
543
546
  """Download url as text.
544
547
 
545
548
  Args:
@@ -557,7 +560,7 @@ class Download(BaseDownload):
557
560
  self.download(url, **kwargs)
558
561
  return self.get_text()
559
562
 
560
- def download_yaml(self, url: str, **kwargs: Any) -> Any:
563
+ def download_yaml(self, url: Path | str, **kwargs: Any) -> Any:
561
564
  """Download url as YAML.
562
565
 
563
566
  Args:
@@ -575,7 +578,7 @@ class Download(BaseDownload):
575
578
  self.download(url, **kwargs)
576
579
  return self.get_yaml()
577
580
 
578
- def download_json(self, url: str, **kwargs: Any) -> Any:
581
+ def download_json(self, url: Path | str, **kwargs: Any) -> Any:
579
582
  """Download url as JSON.
580
583
 
581
584
  Args:
@@ -595,7 +598,7 @@ class Download(BaseDownload):
595
598
 
596
599
  def get_frictionless_tableresource(
597
600
  self,
598
- url: str,
601
+ url: Path | str,
599
602
  ignore_blank_rows: bool = True,
600
603
  infer_types: bool = False,
601
604
  **kwargs: Any,
@@ -641,7 +644,7 @@ class Download(BaseDownload):
641
644
 
642
645
  def _get_tabular_rows(
643
646
  self,
644
- url: str,
647
+ url: Path | str,
645
648
  headers: int | Sequence[int] | Sequence[str] = 1,
646
649
  dict_form: bool = False,
647
650
  include_headers: bool = False,
@@ -704,7 +707,7 @@ class Download(BaseDownload):
704
707
  xlsx2csv = kwargs.pop("xlsx2csv", False)
705
708
  if xlsx2csv:
706
709
  path = self.download_file(url)
707
- outpath = path.replace(".xlsx", ".csv")
710
+ outpath = path.with_suffix(".csv")
708
711
  sheet = kwargs.pop("sheet", 1)
709
712
  if isinstance(sheet, int):
710
713
  sheet_args = {"sheetid": sheet}
@@ -1276,7 +1279,7 @@ class Download(BaseDownload):
1276
1279
  cls,
1277
1280
  custom_configs: dict[str, dict],
1278
1281
  user_agent: str | None = None,
1279
- user_agent_config_yaml: str | None = None,
1282
+ user_agent_config_yaml: Path | str | None = None,
1280
1283
  user_agent_lookup: str | None = None,
1281
1284
  use_env: bool = True,
1282
1285
  fail_on_missing_file: bool = True,
@@ -5,7 +5,8 @@ import smtplib
5
5
  from collections.abc import Sequence
6
6
  from email.mime.multipart import MIMEMultipart
7
7
  from email.mime.text import MIMEText
8
- from os.path import expanduser, join
8
+ from os.path import expanduser
9
+ from pathlib import Path
9
10
  from typing import Any
10
11
 
11
12
  from hdx.utilities.loader import load_json, load_yaml
@@ -45,7 +46,7 @@ class Email:
45
46
  email_config_yaml (str): Path to YAML HDX configuration. Defaults to ~/hdx_email_configuration.yaml.
46
47
  """
47
48
 
48
- default_email_config_yaml = join(expanduser("~"), "hdx_email_configuration.yaml")
49
+ default_email_config_yaml = Path(expanduser("~")) / "hdx_email_configuration.yaml"
49
50
 
50
51
  def __init__(self, **kwargs: Any) -> None:
51
52
  email_config_found = False
@@ -3,6 +3,7 @@ import logging
3
3
  import zipfile
4
4
  from io import BytesIO, IOBase
5
5
  from os import fstat
6
+ from pathlib import Path
6
7
 
7
8
  from openpyxl import load_workbook
8
9
  from openpyxl.utils.exceptions import InvalidFileException
@@ -90,7 +91,7 @@ def crc_zip_fp(fp: IOBase) -> str:
90
91
  return get_crc_sum(file_crcs)
91
92
 
92
93
 
93
- def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]:
94
+ def get_size_and_hash(filepath: Path | str, file_format: str) -> tuple[int, str]:
94
95
  """Return the size and hash of file
95
96
 
96
97
  Args:
@@ -188,7 +188,7 @@ def get_frictionless_tableresource(
188
188
  http_session = kwargs.pop("http_session", session)
189
189
  with system.use_context(http_session=http_session):
190
190
  if url:
191
- resource = TableResource(path=url, **kwargs)
191
+ resource = TableResource(path=str(url), **kwargs)
192
192
  else:
193
193
  resource = TableResource(data=data, **kwargs)
194
194
  resource.open()
@@ -1,6 +1,7 @@
1
1
  """HTML parsing utilities."""
2
2
 
3
3
  import logging
4
+ from pathlib import Path
4
5
  from typing import Any
5
6
 
6
7
  try:
@@ -20,7 +21,7 @@ if BeautifulSoup is not None:
20
21
  url: str,
21
22
  downloader: Download = None,
22
23
  user_agent: str | None = None,
23
- user_agent_config_yaml: str | None = None,
24
+ user_agent_config_yaml: Path | str | None = None,
24
25
  user_agent_lookup: str | None = None,
25
26
  **kwargs: Any,
26
27
  ) -> BeautifulSoup:
@@ -1,7 +1,8 @@
1
1
  """Loading utilities for YAML, JSON etc."""
2
2
 
3
3
  import json
4
- from collections.abc import Mapping, Sequence
4
+ from collections.abc import Mapping, MutableMapping, Sequence
5
+ from pathlib import Path
5
6
  from typing import Any
6
7
  from warnings import warn
7
8
 
@@ -15,7 +16,7 @@ class LoadError(Exception):
15
16
 
16
17
 
17
18
  def load_text(
18
- path: str,
19
+ path: Path | str,
19
20
  encoding: str = "utf-8",
20
21
  strip: bool = False,
21
22
  replace_newlines: str | None = None,
@@ -59,7 +60,7 @@ def load_text(
59
60
 
60
61
 
61
62
  def load_yaml(
62
- path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
63
+ path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
63
64
  ) -> Any:
64
65
  """Load YAML file into an ordered dictionary.
65
66
 
@@ -83,7 +84,7 @@ def load_yaml(
83
84
 
84
85
 
85
86
  def load_json(
86
- path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
87
+ path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
87
88
  ) -> Any:
88
89
  """Load JSON file into an ordered dictionary (dict for Python 3.7+)
89
90
 
@@ -152,11 +153,11 @@ def load_and_merge_json(
152
153
 
153
154
 
154
155
  def load_yaml_into_existing_dict(
155
- data: dict,
156
- path: str,
156
+ data: MutableMapping,
157
+ path: Path | str,
157
158
  encoding: str = "utf-8",
158
159
  loaderror_if_empty: bool = True,
159
- ) -> Mapping:
160
+ ) -> MutableMapping:
160
161
  """Merge YAML file that is in dictionary form into existing dictionary.
161
162
 
162
163
  Args:
@@ -173,11 +174,11 @@ def load_yaml_into_existing_dict(
173
174
 
174
175
 
175
176
  def load_json_into_existing_dict(
176
- data: dict,
177
- path: str,
177
+ data: MutableMapping,
178
+ path: Path | str,
178
179
  encoding: str = "utf-8",
179
180
  loaderror_if_empty: bool = True,
180
- ) -> Mapping:
181
+ ) -> MutableMapping:
181
182
  """Merge JSON file that is in dictionary form into existing dictionary.
182
183
 
183
184
  Args:
@@ -4,17 +4,15 @@ import contextlib
4
4
  import inspect
5
5
  import logging
6
6
  import sys
7
- from collections.abc import Generator, Iterable, Sequence
7
+ from collections.abc import Iterable, Iterator, Sequence
8
8
  from os import getenv, makedirs, remove
9
9
  from os.path import (
10
- abspath,
11
10
  basename,
12
11
  dirname,
13
12
  exists,
14
- join,
15
- realpath,
16
13
  splitext,
17
14
  )
15
+ from pathlib import Path
18
16
  from shutil import rmtree
19
17
  from tempfile import gettempdir
20
18
  from typing import Any
@@ -33,7 +31,7 @@ class NotFoundError(Exception):
33
31
  pass
34
32
 
35
33
 
36
- def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str:
34
+ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> Path:
37
35
  """Get current script's directory.
38
36
 
39
37
  Args:
@@ -43,36 +41,42 @@ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str:
43
41
  Returns:
44
42
  Current script's directory
45
43
  """
46
- if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze
47
- path = abspath(sys.executable) # pragma: no cover
44
+ if getattr(sys, "frozen", False):
45
+ # Frozen (PyInstaller, etc.): Use the executable path
46
+ path = Path(sys.executable) # pragma: no cover
48
47
  else:
49
- path = inspect.getabsfile(pyobject)
50
- if follow_symlinks:
51
- path = realpath(path)
52
- return dirname(path)
48
+ # Standard: Use the object's file path
49
+ path = Path(inspect.getfile(pyobject))
50
+
51
+ # Resolve symlinks if requested, otherwise just make absolute
52
+ path = path.resolve() if follow_symlinks else path.absolute()
53
+
54
+ return path.parent
53
55
 
54
56
 
55
57
  def script_dir_plus_file(
56
58
  filename: str, pyobject: Any, follow_symlinks: bool = True
57
- ) -> str:
59
+ ) -> Path:
58
60
  """Get current script's directory and then append a filename.
59
61
 
60
62
  Args:
61
63
  filename: Filename to append to directory path
62
64
  pyobject: Any Python object in the script
65
+ return_path: Whether to return a Path object. Defaults to returning str.
66
+
63
67
  follow_symlinks: Follow symlinks or not. Defaults to True.
64
68
 
65
69
  Returns:
66
70
  Current script's directory and with filename appended
67
71
  """
68
- return join(script_dir(pyobject, follow_symlinks), filename)
72
+ return script_dir(pyobject, follow_symlinks) / filename
69
73
 
70
74
 
71
75
  def get_temp_dir(
72
- folder: str | None = None,
76
+ folder: Path | str | None = None,
73
77
  delete_if_exists: bool = False,
74
- tempdir: str | None = None,
75
- ) -> str:
78
+ tempdir: Path | str | None = None,
79
+ ) -> Path:
76
80
  """Get a temporary directory. Looks for environment variable TEMP_DIR and
77
81
  falls back on os.gettempdir if a root temporary directory is not supplied.
78
82
  If a folder is supplied, creates that folder within the temporary
@@ -88,8 +92,9 @@ def get_temp_dir(
88
92
  """
89
93
  if tempdir is None:
90
94
  tempdir = getenv("TEMP_DIR", gettempdir())
95
+ tempdir = Path(tempdir)
91
96
  if folder:
92
- tempdir = join(tempdir, folder)
97
+ tempdir = tempdir / folder
93
98
  if exists(tempdir):
94
99
  if delete_if_exists:
95
100
  rmtree(tempdir)
@@ -101,12 +106,12 @@ def get_temp_dir(
101
106
 
102
107
  @contextlib.contextmanager
103
108
  def temp_dir(
104
- folder: str | None = None,
109
+ folder: Path | str | None = None,
105
110
  delete_if_exists: bool = False,
106
111
  delete_on_success: bool = True,
107
112
  delete_on_failure: bool = True,
108
- tempdir: str | None = None,
109
- ) -> Generator[str, Any, None]:
113
+ tempdir: Path | str | None = None,
114
+ ) -> Iterator[Path]:
110
115
  """Get a temporary directory optionally with folder appended (and created
111
116
  if it doesn't exist)
112
117
 
@@ -132,7 +137,7 @@ def temp_dir(
132
137
  raise
133
138
 
134
139
 
135
- def read_or_create_batch(folder: str, batch: str | None = None) -> str:
140
+ def read_or_create_batch(folder: Path, batch: str | None = None) -> str:
136
141
  """Get batch or create it if it doesn't exist.
137
142
 
138
143
  Args:
@@ -142,7 +147,7 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str:
142
147
  Returns:
143
148
  Batch
144
149
  """
145
- batch_file = join(folder, "batch.txt")
150
+ batch_file = folder / "batch.txt"
146
151
  if exists(batch_file):
147
152
  batch = load_text(batch_file, strip=True)
148
153
  logger.info(f"File BATCH = {batch}")
@@ -156,13 +161,13 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str:
156
161
 
157
162
  @contextlib.contextmanager
158
163
  def temp_dir_batch(
159
- folder: str | None = None,
164
+ folder: Path | str | None = None,
160
165
  delete_if_exists: bool = False,
161
166
  delete_on_success: bool = True,
162
167
  delete_on_failure: bool = True,
163
168
  batch: str | None = None,
164
- tempdir: str | None = None,
165
- ) -> Generator[dict, Any, None]:
169
+ tempdir: Path | str | None = None,
170
+ ) -> Iterator[dict]:
166
171
  """Get a temporary directory and batch id. Yields a dictionary with key
167
172
  folder which is the temporary directory optionally with folder appended
168
173
  (and created if it doesn't exist). In key batch is a batch code to be
@@ -219,7 +224,7 @@ def progress_storing_folder(
219
224
  iterator: Iterable[dict],
220
225
  key: str,
221
226
  wheretostart: str | None = None,
222
- ) -> Generator[tuple[dict, dict], Any, None]:
227
+ ) -> Iterator[tuple[dict, dict]]:
223
228
  """Store progress in folder in key folder of info dictionary parameter.
224
229
  Yields 2 dictionaries. The first is the info dictionary. It contains in key
225
230
  folder the folder being used to store progress and in key progress the
@@ -238,7 +243,7 @@ def progress_storing_folder(
238
243
  A tuple of the form (info dictionary, next object in iterator)
239
244
  """
240
245
  folder = info["folder"]
241
- progress_file = join(folder, "progress.txt")
246
+ progress_file = folder / "progress.txt"
242
247
 
243
248
  if not wheretostart:
244
249
  contents = getenv("WHERETOSTART")
@@ -277,8 +282,8 @@ def progress_storing_folder(
277
282
 
278
283
  @contextlib.contextmanager
279
284
  def wheretostart_tempdir_batch(
280
- folder: str, batch: str | None = None, tempdir: str | None = None
281
- ) -> Generator[dict, Any, None]:
285
+ folder: Path | str, batch: str | None = None, tempdir: Path | str | None = None
286
+ ) -> Iterator[dict]:
282
287
  """Get a temporary directory and batch id. Deletes any existing folder if
283
288
  WHERETOSTART environment variable is set to RESET. Yields a dictionary with
284
289
  key folder which is the temporary directory optionally with folder appended
@@ -311,12 +316,12 @@ def wheretostart_tempdir_batch(
311
316
 
312
317
 
313
318
  def progress_storing_tempdir(
314
- folder: str,
319
+ folder: Path | str,
315
320
  iterator: Iterable[dict],
316
321
  key: str,
317
322
  batch: str | None = None,
318
- tempdir: str | None = None,
319
- ) -> Generator[tuple[dict, dict], Any, None]:
323
+ tempdir: Path | str | None = None,
324
+ ) -> Iterator[tuple[dict, dict]]:
320
325
  """Store progress in temporary directory. The folder persists until the
321
326
  final iteration allowing which iteration to start at and the batch code to
322
327
  be persisted between runs. Yields 2 dictionaries. The first contains key
@@ -344,11 +349,11 @@ def progress_storing_tempdir(
344
349
 
345
350
 
346
351
  def multiple_progress_storing_tempdir(
347
- folder: str,
352
+ folder: Path | str,
348
353
  iterators: Sequence[Iterable[dict]],
349
354
  keys: Sequence[str],
350
355
  batch: str | None = None,
351
- ) -> Generator[tuple[int, dict, dict], Any, None]:
356
+ ) -> Iterator[tuple[int, dict, dict]]:
352
357
  """Store progress in temporary directory. The folder persists until the
353
358
  final iteration of the last iterator allowing which iteration to start at
354
359
  and the batch code to be persisted between runs. Yields 2 dictionaries. The
@@ -387,7 +392,7 @@ def multiple_progress_storing_tempdir(
387
392
  tempdir = info["folder"]
388
393
  batch = info["batch"]
389
394
  for i, key in enumerate(keys):
390
- progress_file = join(tempdir, "progress.txt")
395
+ progress_file = tempdir / "progress.txt"
391
396
  if wheretostartenv:
392
397
  wheretostart = get_wheretostart(
393
398
  wheretostartenv, "Environment variable", key
@@ -416,18 +421,19 @@ def multiple_progress_storing_tempdir(
416
421
 
417
422
 
418
423
  def get_filename_extension_from_url(
419
- url: str, second_last: bool = False, use_query: bool = False
424
+ url: Path | str, second_last: bool = False, use_query: bool = False
420
425
  ) -> tuple[str, str]:
421
426
  """Get separately filename and extension from url.
422
427
 
423
428
  Args:
424
- url: URL to download
429
+ url: URL or path to download
425
430
  second_last: Get second last segment of url as well. Defaults to False.
426
431
  use_query: Include query parameters as well. Defaults to False.
427
432
 
428
433
  Returns:
429
434
  Tuple of (filename, extension)
430
435
  """
436
+ url = str(url)
431
437
  split_url = urlsplit(unquote_plus(url))
432
438
  urlpath = split_url.path
433
439
  last_part = basename(urlpath)
@@ -448,12 +454,12 @@ def get_filename_extension_from_url(
448
454
 
449
455
 
450
456
  def get_filename_from_url(
451
- url: str, second_last: bool = False, use_query: bool = False
457
+ url: Path | str, second_last: bool = False, use_query: bool = False
452
458
  ) -> str:
453
459
  """Get filename including extension from url.
454
460
 
455
461
  Args:
456
- url: URL
462
+ url: URL or path
457
463
  second_last: Get second last segment of url as well. Defaults to False.
458
464
  use_query: Include query parameters as well. Defaults to False.
459
465