hdx-python-utilities 3.7.4__tar.gz → 3.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/pre-commit-config.yaml +3 -3
  2. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/PKG-INFO +3 -2
  3. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/main.md +47 -17
  4. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/pyproject.toml +4 -0
  5. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/requirements.txt +36 -33
  6. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/_version.py +2 -2
  7. hdx_python_utilities-3.8.0/src/hdx/utilities/error_handler.py +185 -0
  8. hdx_python_utilities-3.8.0/src/hdx/utilities/errors_onexit.py +14 -0
  9. hdx_python_utilities-3.8.0/src/hdx/utilities/matching.py +339 -0
  10. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/saver.py +1 -1
  11. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/text.py +1 -232
  12. hdx_python_utilities-3.8.0/tests/hdx/utilities/test_error_handler.py +56 -0
  13. hdx_python_utilities-3.7.4/tests/hdx/utilities/test_text.py → hdx_python_utilities-3.8.0/tests/hdx/utilities/test_matching.py +176 -117
  14. hdx_python_utilities-3.8.0/tests/hdx/utilities/test_text.py +123 -0
  15. hdx_python_utilities-3.7.4/src/hdx/utilities/errors_onexit.py +0 -53
  16. hdx_python_utilities-3.7.4/tests/hdx/utilities/test_errors_onexit.py +0 -24
  17. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/coveragerc +0 -0
  18. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/pytest.ini +0 -0
  19. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/ruff.toml +0 -0
  20. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.github/workflows/publish.yaml +0 -0
  21. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.github/workflows/run-python-tests.yaml +0 -0
  22. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.gitignore +0 -0
  23. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/CONTRIBUTING.md +0 -0
  24. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/LICENSE +0 -0
  25. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/README.md +0 -0
  26. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/.readthedocs.yaml +0 -0
  27. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/pydoc-markdown.yaml +0 -0
  28. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/__init__.py +0 -0
  29. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/base_downloader.py +0 -0
  30. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/compare.py +0 -0
  31. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/dateparse.py +0 -0
  32. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/dictandlist.py +0 -0
  33. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/downloader.py +0 -0
  34. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/easy_logging.py +0 -0
  35. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/email.py +0 -0
  36. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/encoding.py +0 -0
  37. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/frictionless_wrapper.py +0 -0
  38. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/html.py +0 -0
  39. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/loader.py +0 -0
  40. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/path.py +0 -0
  41. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/retriever.py +0 -0
  42. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/session.py +0 -0
  43. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/state.py +0 -0
  44. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/typehint.py +0 -0
  45. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/useragent.py +0 -0
  46. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/uuid.py +0 -0
  47. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/compare/test_csv_processing.csv +0 -0
  48. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/compare/test_csv_processing2.csv +0 -0
  49. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/empty.yaml +0 -0
  50. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_config.json +0 -0
  51. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_config.yaml +0 -0
  52. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_email_configuration.json +0 -0
  53. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_email_configuration.yaml +0 -0
  54. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/json_csv.yaml +0 -0
  55. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/logging_config.json +0 -0
  56. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/logging_config.yaml +0 -0
  57. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/project_configuration.json +0 -0
  58. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/project_configuration.yaml +0 -0
  59. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/smtp_config.json +0 -0
  60. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/smtp_config.yaml +0 -0
  61. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config.yaml +0 -0
  62. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config2.yaml +0 -0
  63. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config3.yaml +0 -0
  64. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config_wrong.yaml +0 -0
  65. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/basicauth.txt +0 -0
  66. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/bearertoken.txt +0 -0
  67. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params.json +0 -0
  68. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params.yaml +0 -0
  69. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params_tree.yaml +0 -0
  70. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_csv_processing.csv +0 -0
  71. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_csv_processing_blanks.csv +0 -0
  72. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data.csv +0 -0
  73. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data.xlsx +0 -0
  74. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data1.csv/empty.txt +0 -0
  75. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data2.csv +0 -0
  76. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_json_processing.json +0 -0
  77. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_xls_processing.xls +0 -0
  78. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_xlsx_processing.xlsx +0 -0
  79. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/html/response.html +0 -0
  80. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/loader/empty.json +0 -0
  81. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/loader/empty.yaml +0 -0
  82. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.csv +0 -0
  83. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.json +0 -0
  84. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.txt +0 -0
  85. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.yaml +0 -0
  86. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/retriever-test.csv +0 -0
  87. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.csv +0 -0
  88. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.json +0 -0
  89. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.txt +0 -0
  90. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.yaml +0 -0
  91. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test_hxl.csv +0 -0
  92. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out.csv +0 -0
  93. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out.json +0 -0
  94. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out2.csv +0 -0
  95. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out2.json +0 -0
  96. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out5.json +0 -0
  97. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out6.json +0 -0
  98. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out7.json +0 -0
  99. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out8.csv +0 -0
  100. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out8.json +0 -0
  101. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-false.json +0 -0
  102. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-false.yaml +0 -0
  103. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-true.json +0 -0
  104. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-true.yaml +0 -0
  105. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-false.json +0 -0
  106. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-false.yaml +0 -0
  107. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-true.json +0 -0
  108. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-true.yaml +0 -0
  109. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/state/analysis_dates.txt +0 -0
  110. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/state/last_build_date.txt +0 -0
  111. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/test_data.csv +0 -0
  112. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/conftest.py +0 -0
  113. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_compare.py +0 -0
  114. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_dateparse.py +0 -0
  115. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_dictandlist.py +0 -0
  116. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_downloader.py +0 -0
  117. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_easy_logging.py +0 -0
  118. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_email.py +0 -0
  119. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_encoding.py +0 -0
  120. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_html.py +0 -0
  121. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_loader.py +0 -0
  122. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_path.py +0 -0
  123. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_retriever.py +0 -0
  124. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_saver.py +0 -0
  125. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_state.py +0 -0
  126. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_useragent.py +0 -0
  127. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_uuid.py +0 -0
  128. {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/utils.py +0 -0
@@ -2,7 +2,7 @@ default_language_version:
2
2
  python: python3.12
3
3
  repos:
4
4
  - repo: https://github.com/pre-commit/pre-commit-hooks
5
- rev: v4.6.0
5
+ rev: v5.0.0
6
6
  hooks:
7
7
  - id: trailing-whitespace
8
8
  exclude: test_loader.py
@@ -10,7 +10,7 @@ repos:
10
10
  exclude: (test_csv_processing_blanks.csv|test.txt)
11
11
  - id: check-ast
12
12
  - repo: https://github.com/astral-sh/ruff-pre-commit
13
- rev: v0.6.4
13
+ rev: v0.8.6
14
14
  hooks:
15
15
  # Run the linter.
16
16
  - id: ruff
@@ -19,7 +19,7 @@ repos:
19
19
  - id: ruff-format
20
20
  args: [--config, .config/ruff.toml]
21
21
  - repo: https://github.com/astral-sh/uv-pre-commit
22
- rev: 0.4.5
22
+ rev: 0.5.15
23
23
  hooks:
24
24
  # Run the pip compile
25
25
  - id: pip-compile
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: hdx-python-utilities
3
- Version: 3.7.4
3
+ Version: 3.8.0
4
4
  Summary: HDX Python Utilities for streaming tabular data, date and time handling and other helpful functions
5
5
  Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-utilities
6
6
  Author-email: Michael Rans <rans@email.com>
@@ -30,6 +30,7 @@ Requires-Dist: ijson>=3.2.3
30
30
  Requires-Dist: jsonlines>=4.0.0
31
31
  Requires-Dist: loguru
32
32
  Requires-Dist: openpyxl>=3.1.2
33
+ Requires-Dist: pyphonetics
33
34
  Requires-Dist: python-dateutil<2.9.1,>=2.9.0
34
35
  Requires-Dist: ratelimit
35
36
  Requires-Dist: requests-file
@@ -18,6 +18,7 @@ Python developers. Note that these are not specific to HDX.
18
18
  1. [State utility](#state-utility)
19
19
  1. [Path utilities](#path-utilities)
20
20
  1. [Text processing](#text-processing)
21
+ 1. [Matching utilities](#matching-utilities)
21
22
  1. [Encoding utilities](#encoding-utilities)
22
23
  1. [Check valid UUID](#valid-uuid)
23
24
  1. [Easy building and packaging](#easy-building-and-packaging)
@@ -31,6 +32,12 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit
31
32
  The library has detailed API documentation which can be found in the menu at the top.
32
33
 
33
34
  ## Breaking Changes
35
+ From 3.8.0, multiple_replace, match_template_variables, earliest_index,
36
+ get_matching_text_in_strs, get_matching_text,
37
+ get_matching_then_nonmatching_text moved from hdx.utilities.text to
38
+ hdx.utilities.matching. ErrorOnExit renamed ErrorHandler with changed
39
+ functionality.
40
+
34
41
  From 3.5.5, Python 3.7 no longer supported
35
42
 
36
43
  From 3.3.7, improved parse_date and parse_date_range by default will attempt to parse
@@ -648,17 +655,23 @@ Then use the logger like this:
648
655
  There is a class that allows collecting of errors to be logged later, typically on exit.
649
656
  It is called ErrorsOnExit and can be used as follows:
650
657
 
651
- with ErrorsOnExit() as errors:
658
+ with ErrorsHandler() as errors:
652
659
  ...
653
- errors.add("MY ERROR MESSAGE")
660
+ errors.add_message("MY ERROR MESSAGE")
661
+ errors.add_message("MY WARNING MESSAGE", "category 1", "warning")
654
662
  ...
655
- errors.add("ANOTHER ERROR MESSAGE")
656
-
657
- The above code will collect the errors, in this case "MY ERROR MESSAGE" and "ANOTHER
658
- ERROR MESSAGE". On leaving the `with` block, the errors will be logged and the code will
659
- exit with the error code 1 (ie. `sys.exit(1)` will be called). If there are no errors,
660
- the code will not exit and execution will continue after the `with` block (ie.
661
- `sys.exit(1)` will not be called).
663
+ errors.add("ERROR MESSAGE", "category 1", "warning")
664
+ errors.add("ANOTHER ERROR MESSAGE", "category 1", "warning")
665
+
666
+ The above code will collect the errors and warnings, in this case
667
+ "MY ERROR MESSAGE", "category 1 - ERROR MESSAGE",
668
+ "category 1 - ANOTHER ERROR MESSAGE" and "category 1 - MY WARNING MESSAGE".
669
+ On leaving the `with` block, the errors and warnings will be logged by category
670
+ and sorted. The code will exit with the error code 1 (ie.`sys.exit(1)` will be
671
+ called) if there are errors and `should_exit_on_error` is True (the default
672
+ for this parameter in the constructor). If there are no errors, the code will
673
+ not exit and execution will continue after the `with` block (ie.`sys.exit(1)`
674
+ will not be called).
662
675
 
663
676
  ## State utility
664
677
 
@@ -803,13 +816,35 @@ Examples:
803
816
  assert remove_string("lala, 01/02/2020 ", "01/02/2020", PUNCTUATION_MINUS_BRACKETS) == "lala "
804
817
  assert remove_string("lala,(01/02/2020) ", "01/02/2020", PUNCTUATION_MINUS_BRACKETS) == "lala,() "
805
818
 
819
+ # Extract words from a string sentence into a list
820
+ result = get_words_in_sentence("Korea (Democratic People's Republic of)")
821
+ assert result == ["Korea", "Democratic", "People's", "Republic", "of"]
822
+
823
+ ## Matching utilities
824
+
825
+ Examples:
826
+
827
+ possible_names = ["Al Maharah", "Ad Dali", "Dhamar"]
828
+ phonetics = Phonetics()
829
+ assert phonetics.match(possible_names, "al dali") == 1
830
+
831
+ org_type_lookup = {"Donor": "433", "National NGO": "441", "Other": "443"}
832
+ lookup = {
833
+ normalise(k): v for k, v in org_type_lookup.items()
834
+ }
835
+ assert get_code_from_name("NATIONAL_NGO", lookup, [], fuzzy_match=False) == "441"
836
+
837
+ a = "The quick brown fox jumped over the lazy dog. It was so fast!"
838
+
806
839
  # Replace multiple strings in a string simultaneously
807
840
  result = multiple_replace(a, {"quick": "slow", "fast": "slow", "lazy": "busy"})
808
841
  assert result == "The slow brown fox jumped over the busy dog. It was so slow!"
809
842
 
810
- # Extract words from a string sentence into a list
811
- result = get_words_in_sentence("Korea (Democratic People's Republic of)")
812
- assert result == ["Korea", "Democratic", "People's", "Republic", "of"]
843
+ # Look for template variables in a string (ie. {{XXX}})
844
+ assert match_template_variables("dasdda{{abc}}gff") == ("{{abc}}", "abc")
845
+
846
+ # Search a string for each of a list of strings and return the earliest index
847
+ assert earliest_index(a, ["dog", "lala", "fox", "haha", "quick"]) == 4
813
848
 
814
849
  # Find matching text in strings
815
850
  a = "The quick brown fox jumped over the lazy dog. It was so fast!"
@@ -818,11 +853,6 @@ Examples:
818
853
  result = get_matching_text([a, b, c], match_min_size=10)
819
854
  assert result == " brown fox over the It was so fast!"
820
855
 
821
- # Search a string for each of a list of strings and return the earliest index
822
- assert earliest_index(a, ["dog", "lala", "fox", "haha", "quick"]) == 4
823
-
824
- # Look for template variables in a string (ie. {{XXX}})
825
- assert match_template_variables("dasdda{{abc}}gff") == ("{{abc}}", "abc")
826
856
 
827
857
  ## Encoding utilities
828
858
 
@@ -47,6 +47,7 @@ dependencies = [
47
47
  "jsonlines>=4.0.0",
48
48
  # /end frictionless extras
49
49
  "loguru",
50
+ "pyphonetics",
50
51
  "python-dateutil>=2.9.0, <2.9.1",
51
52
  "ratelimit",
52
53
  "requests-file",
@@ -105,6 +106,9 @@ run = """
105
106
  --cov-report=lcov --cov-report=term-missing
106
107
  """
107
108
 
109
+ [tool.hatch.envs.hatch-static-analysis]
110
+ dependencies = ["ruff==0.8.6"]
111
+
108
112
  [tool.hatch.envs.hatch-static-analysis.scripts]
109
113
  format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
110
114
  format-fix = ["ruff format --config .config/ruff.toml {args:.}",]
@@ -2,7 +2,7 @@
2
2
  # uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt
3
3
  annotated-types==0.7.0
4
4
  # via pydantic
5
- attrs==24.2.0
5
+ attrs==24.3.0
6
6
  # via
7
7
  # frictionless
8
8
  # jsonlines
@@ -10,25 +10,25 @@ attrs==24.2.0
10
10
  # referencing
11
11
  beautifulsoup4==4.12.3
12
12
  # via hdx-python-utilities (pyproject.toml)
13
- certifi==2024.8.30
13
+ certifi==2024.12.14
14
14
  # via requests
15
15
  cfgv==3.4.0
16
16
  # via pre-commit
17
17
  chardet==5.2.0
18
18
  # via frictionless
19
- charset-normalizer==3.3.2
19
+ charset-normalizer==3.4.1
20
20
  # via requests
21
- click==8.1.7
21
+ click==8.1.8
22
22
  # via typer
23
- coverage==7.6.1
23
+ coverage==7.6.10
24
24
  # via pytest-cov
25
- distlib==0.3.8
25
+ distlib==0.3.9
26
26
  # via virtualenv
27
- dnspython==2.6.1
27
+ dnspython==2.7.0
28
28
  # via email-validator
29
29
  email-validator==2.2.0
30
30
  # via hdx-python-utilities (pyproject.toml)
31
- et-xmlfile==1.1.0
31
+ et-xmlfile==2.0.0
32
32
  # via openpyxl
33
33
  filelock==3.16.1
34
34
  # via virtualenv
@@ -36,9 +36,9 @@ frictionless==5.18.0
36
36
  # via hdx-python-utilities (pyproject.toml)
37
37
  html5lib==1.1
38
38
  # via hdx-python-utilities (pyproject.toml)
39
- humanize==4.10.0
39
+ humanize==4.11.0
40
40
  # via frictionless
41
- identify==2.6.1
41
+ identify==2.6.5
42
42
  # via pre-commit
43
43
  idna==3.10
44
44
  # via
@@ -48,9 +48,9 @@ ijson==3.3.0
48
48
  # via hdx-python-utilities (pyproject.toml)
49
49
  iniconfig==2.0.0
50
50
  # via pytest
51
- isodate==0.6.1
51
+ isodate==0.7.2
52
52
  # via frictionless
53
- jinja2==3.1.4
53
+ jinja2==3.1.5
54
54
  # via frictionless
55
55
  jsonlines==4.0.0
56
56
  # via hdx-python-utilities (pyproject.toml)
@@ -58,9 +58,9 @@ jsonschema==4.23.0
58
58
  # via
59
59
  # frictionless
60
60
  # tableschema-to-template
61
- jsonschema-specifications==2023.12.1
61
+ jsonschema-specifications==2024.10.1
62
62
  # via jsonschema
63
- loguru==0.7.2
63
+ loguru==0.7.3
64
64
  # via
65
65
  # hdx-python-utilities (pyproject.toml)
66
66
  # pytest-loguru
@@ -68,7 +68,7 @@ markdown-it-py==3.0.0
68
68
  # via rich
69
69
  marko==2.1.2
70
70
  # via frictionless
71
- markupsafe==2.1.5
71
+ markupsafe==3.0.2
72
72
  # via jinja2
73
73
  mdurl==0.1.2
74
74
  # via markdown-it-py
@@ -76,7 +76,7 @@ nodeenv==1.9.1
76
76
  # via pre-commit
77
77
  openpyxl==3.1.5
78
78
  # via hdx-python-utilities (pyproject.toml)
79
- packaging==24.1
79
+ packaging==24.2
80
80
  # via pytest
81
81
  petl==1.7.15
82
82
  # via frictionless
@@ -84,19 +84,21 @@ platformdirs==4.3.6
84
84
  # via virtualenv
85
85
  pluggy==1.5.0
86
86
  # via pytest
87
- pre-commit==3.8.0
87
+ pre-commit==4.0.1
88
88
  # via hdx-python-utilities (pyproject.toml)
89
- pydantic==2.9.2
89
+ pydantic==2.10.5
90
90
  # via frictionless
91
- pydantic-core==2.23.4
91
+ pydantic-core==2.27.2
92
92
  # via pydantic
93
- pygments==2.18.0
93
+ pygments==2.19.1
94
94
  # via rich
95
- pytest==8.3.3
95
+ pyphonetics==0.5.3
96
+ # via hdx-python-utilities (pyproject.toml)
97
+ pytest==8.3.4
96
98
  # via
97
99
  # hdx-python-utilities (pyproject.toml)
98
100
  # pytest-cov
99
- pytest-cov==5.0.0
101
+ pytest-cov==6.0.0
100
102
  # via hdx-python-utilities (pyproject.toml)
101
103
  pytest-loguru==0.4.0
102
104
  # via hdx-python-utilities (pyproject.toml)
@@ -125,24 +127,23 @@ requests-file==2.1.0
125
127
  # via hdx-python-utilities (pyproject.toml)
126
128
  rfc3986==2.0.0
127
129
  # via frictionless
128
- rich==13.8.1
130
+ rich==13.9.4
129
131
  # via typer
130
- rpds-py==0.20.0
132
+ rpds-py==0.22.3
131
133
  # via
132
134
  # jsonschema
133
135
  # referencing
134
- ruamel-yaml==0.18.6
136
+ ruamel-yaml==0.18.10
135
137
  # via hdx-python-utilities (pyproject.toml)
136
- ruamel-yaml-clib==0.2.8
138
+ ruamel-yaml-clib==0.2.12
137
139
  # via ruamel-yaml
138
140
  shellingham==1.5.4
139
141
  # via typer
140
- simpleeval==0.9.13
142
+ simpleeval==1.0.3
141
143
  # via frictionless
142
- six==1.16.0
144
+ six==1.17.0
143
145
  # via
144
146
  # html5lib
145
- # isodate
146
147
  # python-dateutil
147
148
  soupsieve==2.6
148
149
  # via beautifulsoup4
@@ -154,7 +155,7 @@ tabulate==0.9.0
154
155
  # via frictionless
155
156
  text-unidecode==1.3
156
157
  # via python-slugify
157
- typer==0.12.5
158
+ typer==0.15.1
158
159
  # via frictionless
159
160
  typing-extensions==4.12.2
160
161
  # via
@@ -162,17 +163,19 @@ typing-extensions==4.12.2
162
163
  # pydantic
163
164
  # pydantic-core
164
165
  # typer
165
- urllib3==2.2.3
166
+ unidecode==1.3.8
167
+ # via pyphonetics
168
+ urllib3==2.3.0
166
169
  # via requests
167
170
  validators==0.34.0
168
171
  # via frictionless
169
- virtualenv==20.26.6
172
+ virtualenv==20.28.1
170
173
  # via pre-commit
171
174
  webencodings==0.5.1
172
175
  # via html5lib
173
176
  xlrd==2.0.1
174
177
  # via hdx-python-utilities (pyproject.toml)
175
- xlsx2csv==0.8.3
178
+ xlsx2csv==0.8.4
176
179
  # via hdx-python-utilities (pyproject.toml)
177
180
  xlsxwriter==3.2.0
178
181
  # via tableschema-to-template
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.7.4'
16
- __version_tuple__ = version_tuple = (3, 7, 4)
15
+ __version__ = version = '3.8.0'
16
+ __version_tuple__ = version_tuple = (3, 8, 0)
@@ -0,0 +1,185 @@
1
+ """Collect errors and warnings by category and log them."""
2
+
3
+ import logging
4
+ import sys
5
+ from typing import Any, Optional
6
+
7
+ from hdx.utilities.dictandlist import dict_of_sets_add
8
+ from hdx.utilities.typehint import ListTuple
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ErrorHandler:
14
+ """Class that enables recording of errors and warnings.
15
+
16
+ Errors and warnings can be logged by calling the `output` method or
17
+ automatically logged on exit. Messages are output grouped by category and
18
+ sorted.
19
+
20
+ Args:
21
+ should_exit_on_error (bool): Whether to exit with a 1 code if there are errors. Default is True.
22
+
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ should_exit_on_error: bool = True,
28
+ ):
29
+ self.should_exit_on_error = should_exit_on_error
30
+ self.shared_errors = {
31
+ "error": {},
32
+ "warning": {},
33
+ }
34
+
35
+ def add(
36
+ self, message: str, category: str = "", message_type: str = "error"
37
+ ) -> None:
38
+ """Add error to be logged. Prepend category if supplied. Output format:
39
+ error category - {text}
40
+
41
+ Args:
42
+ message (str): Error message
43
+ category (str): Error category. Defaults to "".
44
+ message_type (str): The type of message (error or warning). Default is "error"
45
+
46
+ Returns:
47
+ None
48
+ """
49
+ message = message.strip()
50
+ if category:
51
+ output = f"{category} - {message}"
52
+ else:
53
+ output = message
54
+ dict_of_sets_add(self.shared_errors[message_type], category, output)
55
+
56
+ @staticmethod
57
+ def missing_value_message(value_type: str, value: Any) -> str:
58
+ """
59
+ Generate a formatted message for a missing value of a specific type in
60
+ a fixed format:
61
+ error category - type n not found
62
+
63
+ Args:
64
+ value_type (str): The type of value that is missing
65
+ value (Any): The specific missing value
66
+
67
+ Returns:
68
+ str: A formatted message stating the missing value and its type
69
+ """
70
+ return f"{value_type} {str(value)} not found"
71
+
72
+ def add_missing_value(
73
+ self,
74
+ value_type: str,
75
+ value: Any,
76
+ category: str = "",
77
+ message_type: str = "error",
78
+ ) -> None:
79
+ """
80
+ Add a new message (typically a warning or error) concerning a missing value
81
+ to a dictionary of messages in a fixed format:
82
+ error category - type n not found
83
+ identifier is usually a dataset name.
84
+ Args:
85
+ value_type (str): Type of value e.g. "sector"
86
+ value (Any): Missing value
87
+ category (str): Error category. Defaults to "".
88
+ message_type (str): The type of message (error or warning). Default is "error"
89
+ Returns:
90
+ None
91
+ """
92
+ self.add(
93
+ self.missing_value_message(value_type, value),
94
+ category,
95
+ message_type,
96
+ )
97
+
98
+ def multi_valued_message(
99
+ self, text: str, values: ListTuple
100
+ ) -> Optional[str]:
101
+ """
102
+ Generate a formatted message for a list of values in a fixed format:
103
+ error category - n {text}. First 10 values: n1,n2,n3...
104
+ If less than 10 values, ". First 10 values" is omitted. identifier is usually
105
+ a dataset name. Values are cast to string.
106
+
107
+ Args:
108
+ text (str): Descriptive text for the issue (e.g., "invalid values")
109
+ values (ListTuple): The list of related values of concern
110
+
111
+ Returns:
112
+ Optional[str]: A formatted string in the format defined above
113
+ """
114
+ if not values:
115
+ return None
116
+ no_values = len(values)
117
+ if no_values > 10:
118
+ values = values[:10]
119
+ message_suffix = ". First 10 values"
120
+ else:
121
+ message_suffix = ""
122
+ return f"{no_values} {text}{message_suffix}: {', '.join(map(str, values))}"
123
+
124
+ def add_multi_valued(
125
+ self,
126
+ text: str,
127
+ values: ListTuple,
128
+ category: str = "",
129
+ message_type: str = "error",
130
+ ) -> bool:
131
+ """
132
+ Add a new message (typically a warning or error) concerning a list of
133
+ values to a set of messages in a fixed format:
134
+ error category - n {text}. First 10 values: n1,n2,n3...
135
+ If less than 10 values, ". First 10 values" is omitted. identifier is usually
136
+ a dataset name. Values are cast to string.
137
+
138
+ Args:
139
+ text (str): Text to use e.g. "negative values removed"
140
+ values (ListTuple): List of values of concern
141
+ category (str): Error category. Defaults to "".
142
+ message_type (str): The type of message (error or warning). Default is "error"
143
+ Returns:
144
+ bool: True if a message was added, False if not
145
+ """
146
+ message = self.multi_valued_message(text, values)
147
+ if message is None:
148
+ return False
149
+ self.add(message, category, message_type)
150
+ return True
151
+
152
+ def log(self) -> None:
153
+ """
154
+ Log errors and warning by category and sorted
155
+
156
+ Returns:
157
+ None
158
+ """
159
+
160
+ for _, errors in self.shared_errors["error"].items():
161
+ errors = sorted(errors)
162
+ for error in errors:
163
+ logger.error(error)
164
+ for _, warnings in self.shared_errors["warning"].items():
165
+ warnings = sorted(warnings)
166
+ for warning in warnings:
167
+ logger.warning(warning)
168
+
169
+ def exit_on_error(self) -> None:
170
+ """Exit with a 1 code if there are errors and should_exit_on_error
171
+ is True
172
+
173
+ Returns:
174
+ None
175
+ """
176
+ if self.should_exit_on_error and self.shared_errors["error"]:
177
+ sys.exit(1)
178
+
179
+ def __enter__(self) -> "ErrorHandler":
180
+ return self
181
+
182
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
183
+ self.log()
184
+ if exc_type is None:
185
+ self.exit_on_error()
@@ -0,0 +1,14 @@
1
+ """Collect and log errors on exit."""
2
+
3
+ import warnings
4
+
5
+ from hdx.utilities.error_handler import ErrorHandler
6
+
7
+
8
+ class ErrorsOnExit(ErrorHandler): # pragma: no cover
9
+ def __init__(self) -> None:
10
+ warnings.warn(
11
+ "The ErrorsOnExit class was renamed ErrorHandler and will be removed in future!",
12
+ DeprecationWarning,
13
+ )
14
+ super().__init__()