hdx-python-utilities 3.7.4__tar.gz → 3.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/pre-commit-config.yaml +3 -3
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/PKG-INFO +3 -2
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/main.md +47 -17
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/pyproject.toml +4 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/requirements.txt +36 -33
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/_version.py +2 -2
- hdx_python_utilities-3.8.0/src/hdx/utilities/error_handler.py +185 -0
- hdx_python_utilities-3.8.0/src/hdx/utilities/errors_onexit.py +14 -0
- hdx_python_utilities-3.8.0/src/hdx/utilities/matching.py +339 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/saver.py +1 -1
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/text.py +1 -232
- hdx_python_utilities-3.8.0/tests/hdx/utilities/test_error_handler.py +56 -0
- hdx_python_utilities-3.7.4/tests/hdx/utilities/test_text.py → hdx_python_utilities-3.8.0/tests/hdx/utilities/test_matching.py +176 -117
- hdx_python_utilities-3.8.0/tests/hdx/utilities/test_text.py +123 -0
- hdx_python_utilities-3.7.4/src/hdx/utilities/errors_onexit.py +0 -53
- hdx_python_utilities-3.7.4/tests/hdx/utilities/test_errors_onexit.py +0 -24
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/coveragerc +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/pytest.ini +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.config/ruff.toml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.github/workflows/publish.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/.gitignore +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/CONTRIBUTING.md +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/LICENSE +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/README.md +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/__init__.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/base_downloader.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/compare.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/dateparse.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/dictandlist.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/downloader.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/easy_logging.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/email.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/encoding.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/frictionless_wrapper.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/html.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/loader.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/path.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/retriever.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/session.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/state.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/typehint.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/useragent.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/src/hdx/utilities/uuid.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/compare/test_csv_processing.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/compare/test_csv_processing2.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/empty.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_config.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_config.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_email_configuration.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/hdx_email_configuration.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/json_csv.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/logging_config.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/logging_config.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/project_configuration.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/project_configuration.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/smtp_config.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/smtp_config.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config2.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config3.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/config/user_agent_config_wrong.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/basicauth.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/bearertoken.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/extra_params_tree.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_csv_processing.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_csv_processing_blanks.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data.xlsx +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data1.csv/empty.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_data2.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_json_processing.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_xls_processing.xls +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/downloader/test_xlsx_processing.xlsx +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/html/response.html +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/loader/empty.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/loader/empty.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/fallbacks/test.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/retriever-test.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/retriever/test_hxl.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out2.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out2.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out5.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out6.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out7.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out8.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/out8.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-false.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-true.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-false_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-false.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-true.json +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/saver/pretty-true_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/state/analysis_dates.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/state/last_build_date.txt +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/fixtures/test_data.csv +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/conftest.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_compare.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_dateparse.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_dictandlist.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_downloader.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_easy_logging.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_email.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_encoding.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_html.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_loader.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_path.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_retriever.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_saver.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_state.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_useragent.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/test_uuid.py +0 -0
- {hdx_python_utilities-3.7.4 → hdx_python_utilities-3.8.0}/tests/hdx/utilities/utils.py +0 -0
|
@@ -2,7 +2,7 @@ default_language_version:
|
|
|
2
2
|
python: python3.12
|
|
3
3
|
repos:
|
|
4
4
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
5
|
-
rev:
|
|
5
|
+
rev: v5.0.0
|
|
6
6
|
hooks:
|
|
7
7
|
- id: trailing-whitespace
|
|
8
8
|
exclude: test_loader.py
|
|
@@ -10,7 +10,7 @@ repos:
|
|
|
10
10
|
exclude: (test_csv_processing_blanks.csv|test.txt)
|
|
11
11
|
- id: check-ast
|
|
12
12
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
-
rev: v0.6
|
|
13
|
+
rev: v0.8.6
|
|
14
14
|
hooks:
|
|
15
15
|
# Run the linter.
|
|
16
16
|
- id: ruff
|
|
@@ -19,7 +19,7 @@ repos:
|
|
|
19
19
|
- id: ruff-format
|
|
20
20
|
args: [--config, .config/ruff.toml]
|
|
21
21
|
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
22
|
-
rev: 0.
|
|
22
|
+
rev: 0.5.15
|
|
23
23
|
hooks:
|
|
24
24
|
# Run the pip compile
|
|
25
25
|
- id: pip-compile
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-utilities
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.8.0
|
|
4
4
|
Summary: HDX Python Utilities for streaming tabular data, date and time handling and other helpful functions
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-utilities
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -30,6 +30,7 @@ Requires-Dist: ijson>=3.2.3
|
|
|
30
30
|
Requires-Dist: jsonlines>=4.0.0
|
|
31
31
|
Requires-Dist: loguru
|
|
32
32
|
Requires-Dist: openpyxl>=3.1.2
|
|
33
|
+
Requires-Dist: pyphonetics
|
|
33
34
|
Requires-Dist: python-dateutil<2.9.1,>=2.9.0
|
|
34
35
|
Requires-Dist: ratelimit
|
|
35
36
|
Requires-Dist: requests-file
|
|
@@ -18,6 +18,7 @@ Python developers. Note that these are not specific to HDX.
|
|
|
18
18
|
1. [State utility](#state-utility)
|
|
19
19
|
1. [Path utilities](#path-utilities)
|
|
20
20
|
1. [Text processing](#text-processing)
|
|
21
|
+
1. [Matching utilities](#matching-utilities)
|
|
21
22
|
1. [Encoding utilities](#encoding-utilities)
|
|
22
23
|
1. [Check valid UUID](#valid-uuid)
|
|
23
24
|
1. [Easy building and packaging](#easy-building-and-packaging)
|
|
@@ -31,6 +32,12 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit
|
|
|
31
32
|
The library has detailed API documentation which can be found in the menu at the top.
|
|
32
33
|
|
|
33
34
|
## Breaking Changes
|
|
35
|
+
From 3.8.0, multiple_replace, match_template_variables, earliest_index,
|
|
36
|
+
get_matching_text_in_strs, get_matching_text,
|
|
37
|
+
get_matching_then_nonmatching_text moved from hdx.utilities.text to
|
|
38
|
+
hdx.utilities.matching. ErrorOnExit renamed ErrorHandler with changed
|
|
39
|
+
functionality.
|
|
40
|
+
|
|
34
41
|
From 3.5.5, Python 3.7 no longer supported
|
|
35
42
|
|
|
36
43
|
From 3.3.7, improved parse_date and parse_date_range by default will attempt to parse
|
|
@@ -648,17 +655,23 @@ Then use the logger like this:
|
|
|
648
655
|
There is a class that allows collecting of errors to be logged later, typically on exit.
|
|
649
656
|
It is called ErrorsOnExit and can be used as follows:
|
|
650
657
|
|
|
651
|
-
with
|
|
658
|
+
with ErrorsHandler() as errors:
|
|
652
659
|
...
|
|
653
|
-
errors.
|
|
660
|
+
errors.add_message("MY ERROR MESSAGE")
|
|
661
|
+
errors.add_message("MY WARNING MESSAGE", "category 1", "warning")
|
|
654
662
|
...
|
|
655
|
-
errors.add("
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
`
|
|
663
|
+
errors.add("ERROR MESSAGE", "category 1", "warning")
|
|
664
|
+
errors.add("ANOTHER ERROR MESSAGE", "category 1", "warning")
|
|
665
|
+
|
|
666
|
+
The above code will collect the errors and warnings, in this case
|
|
667
|
+
"MY ERROR MESSAGE", "category 1 - ERROR MESSAGE",
|
|
668
|
+
"category 1 - ANOTHER ERROR MESSAGE" and "category 1 - MY WARNING MESSAGE".
|
|
669
|
+
On leaving the `with` block, the errors and warnings will be logged by category
|
|
670
|
+
and sorted. The code will exit with the error code 1 (ie.`sys.exit(1)` will be
|
|
671
|
+
called) if there are errors and `should_exit_on_error` is True (the default
|
|
672
|
+
for this parameter in the constructor). If there are no errors, the code will
|
|
673
|
+
not exit and execution will continue after the `with` block (ie.`sys.exit(1)`
|
|
674
|
+
will not be called).
|
|
662
675
|
|
|
663
676
|
## State utility
|
|
664
677
|
|
|
@@ -803,13 +816,35 @@ Examples:
|
|
|
803
816
|
assert remove_string("lala, 01/02/2020 ", "01/02/2020", PUNCTUATION_MINUS_BRACKETS) == "lala "
|
|
804
817
|
assert remove_string("lala,(01/02/2020) ", "01/02/2020", PUNCTUATION_MINUS_BRACKETS) == "lala,() "
|
|
805
818
|
|
|
819
|
+
# Extract words from a string sentence into a list
|
|
820
|
+
result = get_words_in_sentence("Korea (Democratic People's Republic of)")
|
|
821
|
+
assert result == ["Korea", "Democratic", "People's", "Republic", "of"]
|
|
822
|
+
|
|
823
|
+
## Matching utilities
|
|
824
|
+
|
|
825
|
+
Examples:
|
|
826
|
+
|
|
827
|
+
possible_names = ["Al Maharah", "Ad Dali", "Dhamar"]
|
|
828
|
+
phonetics = Phonetics()
|
|
829
|
+
assert phonetics.match(possible_names, "al dali") == 1
|
|
830
|
+
|
|
831
|
+
org_type_lookup = {"Donor": "433", "National NGO": "441", "Other": "443"}
|
|
832
|
+
lookup = {
|
|
833
|
+
normalise(k): v for k, v in org_type_lookup.items()
|
|
834
|
+
}
|
|
835
|
+
assert get_code_from_name("NATIONAL_NGO", lookup, [], fuzzy_match=False) == "441"
|
|
836
|
+
|
|
837
|
+
a = "The quick brown fox jumped over the lazy dog. It was so fast!"
|
|
838
|
+
|
|
806
839
|
# Replace multiple strings in a string simultaneously
|
|
807
840
|
result = multiple_replace(a, {"quick": "slow", "fast": "slow", "lazy": "busy"})
|
|
808
841
|
assert result == "The slow brown fox jumped over the busy dog. It was so slow!"
|
|
809
842
|
|
|
810
|
-
#
|
|
811
|
-
|
|
812
|
-
|
|
843
|
+
# Look for template variables in a string (ie. {{XXX}})
|
|
844
|
+
assert match_template_variables("dasdda{{abc}}gff") == ("{{abc}}", "abc")
|
|
845
|
+
|
|
846
|
+
# Search a string for each of a list of strings and return the earliest index
|
|
847
|
+
assert earliest_index(a, ["dog", "lala", "fox", "haha", "quick"]) == 4
|
|
813
848
|
|
|
814
849
|
# Find matching text in strings
|
|
815
850
|
a = "The quick brown fox jumped over the lazy dog. It was so fast!"
|
|
@@ -818,11 +853,6 @@ Examples:
|
|
|
818
853
|
result = get_matching_text([a, b, c], match_min_size=10)
|
|
819
854
|
assert result == " brown fox over the It was so fast!"
|
|
820
855
|
|
|
821
|
-
# Search a string for each of a list of strings and return the earliest index
|
|
822
|
-
assert earliest_index(a, ["dog", "lala", "fox", "haha", "quick"]) == 4
|
|
823
|
-
|
|
824
|
-
# Look for template variables in a string (ie. {{XXX}})
|
|
825
|
-
assert match_template_variables("dasdda{{abc}}gff") == ("{{abc}}", "abc")
|
|
826
856
|
|
|
827
857
|
## Encoding utilities
|
|
828
858
|
|
|
@@ -47,6 +47,7 @@ dependencies = [
|
|
|
47
47
|
"jsonlines>=4.0.0",
|
|
48
48
|
# /end frictionless extras
|
|
49
49
|
"loguru",
|
|
50
|
+
"pyphonetics",
|
|
50
51
|
"python-dateutil>=2.9.0, <2.9.1",
|
|
51
52
|
"ratelimit",
|
|
52
53
|
"requests-file",
|
|
@@ -105,6 +106,9 @@ run = """
|
|
|
105
106
|
--cov-report=lcov --cov-report=term-missing
|
|
106
107
|
"""
|
|
107
108
|
|
|
109
|
+
[tool.hatch.envs.hatch-static-analysis]
|
|
110
|
+
dependencies = ["ruff==0.8.6"]
|
|
111
|
+
|
|
108
112
|
[tool.hatch.envs.hatch-static-analysis.scripts]
|
|
109
113
|
format-check = ["ruff format --config .config/ruff.toml --check --diff {args:.}",]
|
|
110
114
|
format-fix = ["ruff format --config .config/ruff.toml {args:.}",]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt
|
|
3
3
|
annotated-types==0.7.0
|
|
4
4
|
# via pydantic
|
|
5
|
-
attrs==24.
|
|
5
|
+
attrs==24.3.0
|
|
6
6
|
# via
|
|
7
7
|
# frictionless
|
|
8
8
|
# jsonlines
|
|
@@ -10,25 +10,25 @@ attrs==24.2.0
|
|
|
10
10
|
# referencing
|
|
11
11
|
beautifulsoup4==4.12.3
|
|
12
12
|
# via hdx-python-utilities (pyproject.toml)
|
|
13
|
-
certifi==2024.
|
|
13
|
+
certifi==2024.12.14
|
|
14
14
|
# via requests
|
|
15
15
|
cfgv==3.4.0
|
|
16
16
|
# via pre-commit
|
|
17
17
|
chardet==5.2.0
|
|
18
18
|
# via frictionless
|
|
19
|
-
charset-normalizer==3.
|
|
19
|
+
charset-normalizer==3.4.1
|
|
20
20
|
# via requests
|
|
21
|
-
click==8.1.
|
|
21
|
+
click==8.1.8
|
|
22
22
|
# via typer
|
|
23
|
-
coverage==7.6.
|
|
23
|
+
coverage==7.6.10
|
|
24
24
|
# via pytest-cov
|
|
25
|
-
distlib==0.3.
|
|
25
|
+
distlib==0.3.9
|
|
26
26
|
# via virtualenv
|
|
27
|
-
dnspython==2.
|
|
27
|
+
dnspython==2.7.0
|
|
28
28
|
# via email-validator
|
|
29
29
|
email-validator==2.2.0
|
|
30
30
|
# via hdx-python-utilities (pyproject.toml)
|
|
31
|
-
et-xmlfile==
|
|
31
|
+
et-xmlfile==2.0.0
|
|
32
32
|
# via openpyxl
|
|
33
33
|
filelock==3.16.1
|
|
34
34
|
# via virtualenv
|
|
@@ -36,9 +36,9 @@ frictionless==5.18.0
|
|
|
36
36
|
# via hdx-python-utilities (pyproject.toml)
|
|
37
37
|
html5lib==1.1
|
|
38
38
|
# via hdx-python-utilities (pyproject.toml)
|
|
39
|
-
humanize==4.
|
|
39
|
+
humanize==4.11.0
|
|
40
40
|
# via frictionless
|
|
41
|
-
identify==2.6.
|
|
41
|
+
identify==2.6.5
|
|
42
42
|
# via pre-commit
|
|
43
43
|
idna==3.10
|
|
44
44
|
# via
|
|
@@ -48,9 +48,9 @@ ijson==3.3.0
|
|
|
48
48
|
# via hdx-python-utilities (pyproject.toml)
|
|
49
49
|
iniconfig==2.0.0
|
|
50
50
|
# via pytest
|
|
51
|
-
isodate==0.
|
|
51
|
+
isodate==0.7.2
|
|
52
52
|
# via frictionless
|
|
53
|
-
jinja2==3.1.
|
|
53
|
+
jinja2==3.1.5
|
|
54
54
|
# via frictionless
|
|
55
55
|
jsonlines==4.0.0
|
|
56
56
|
# via hdx-python-utilities (pyproject.toml)
|
|
@@ -58,9 +58,9 @@ jsonschema==4.23.0
|
|
|
58
58
|
# via
|
|
59
59
|
# frictionless
|
|
60
60
|
# tableschema-to-template
|
|
61
|
-
jsonschema-specifications==
|
|
61
|
+
jsonschema-specifications==2024.10.1
|
|
62
62
|
# via jsonschema
|
|
63
|
-
loguru==0.7.
|
|
63
|
+
loguru==0.7.3
|
|
64
64
|
# via
|
|
65
65
|
# hdx-python-utilities (pyproject.toml)
|
|
66
66
|
# pytest-loguru
|
|
@@ -68,7 +68,7 @@ markdown-it-py==3.0.0
|
|
|
68
68
|
# via rich
|
|
69
69
|
marko==2.1.2
|
|
70
70
|
# via frictionless
|
|
71
|
-
markupsafe==
|
|
71
|
+
markupsafe==3.0.2
|
|
72
72
|
# via jinja2
|
|
73
73
|
mdurl==0.1.2
|
|
74
74
|
# via markdown-it-py
|
|
@@ -76,7 +76,7 @@ nodeenv==1.9.1
|
|
|
76
76
|
# via pre-commit
|
|
77
77
|
openpyxl==3.1.5
|
|
78
78
|
# via hdx-python-utilities (pyproject.toml)
|
|
79
|
-
packaging==24.
|
|
79
|
+
packaging==24.2
|
|
80
80
|
# via pytest
|
|
81
81
|
petl==1.7.15
|
|
82
82
|
# via frictionless
|
|
@@ -84,19 +84,21 @@ platformdirs==4.3.6
|
|
|
84
84
|
# via virtualenv
|
|
85
85
|
pluggy==1.5.0
|
|
86
86
|
# via pytest
|
|
87
|
-
pre-commit==
|
|
87
|
+
pre-commit==4.0.1
|
|
88
88
|
# via hdx-python-utilities (pyproject.toml)
|
|
89
|
-
pydantic==2.
|
|
89
|
+
pydantic==2.10.5
|
|
90
90
|
# via frictionless
|
|
91
|
-
pydantic-core==2.
|
|
91
|
+
pydantic-core==2.27.2
|
|
92
92
|
# via pydantic
|
|
93
|
-
pygments==2.
|
|
93
|
+
pygments==2.19.1
|
|
94
94
|
# via rich
|
|
95
|
-
|
|
95
|
+
pyphonetics==0.5.3
|
|
96
|
+
# via hdx-python-utilities (pyproject.toml)
|
|
97
|
+
pytest==8.3.4
|
|
96
98
|
# via
|
|
97
99
|
# hdx-python-utilities (pyproject.toml)
|
|
98
100
|
# pytest-cov
|
|
99
|
-
pytest-cov==
|
|
101
|
+
pytest-cov==6.0.0
|
|
100
102
|
# via hdx-python-utilities (pyproject.toml)
|
|
101
103
|
pytest-loguru==0.4.0
|
|
102
104
|
# via hdx-python-utilities (pyproject.toml)
|
|
@@ -125,24 +127,23 @@ requests-file==2.1.0
|
|
|
125
127
|
# via hdx-python-utilities (pyproject.toml)
|
|
126
128
|
rfc3986==2.0.0
|
|
127
129
|
# via frictionless
|
|
128
|
-
rich==13.
|
|
130
|
+
rich==13.9.4
|
|
129
131
|
# via typer
|
|
130
|
-
rpds-py==0.
|
|
132
|
+
rpds-py==0.22.3
|
|
131
133
|
# via
|
|
132
134
|
# jsonschema
|
|
133
135
|
# referencing
|
|
134
|
-
ruamel-yaml==0.18.
|
|
136
|
+
ruamel-yaml==0.18.10
|
|
135
137
|
# via hdx-python-utilities (pyproject.toml)
|
|
136
|
-
ruamel-yaml-clib==0.2.
|
|
138
|
+
ruamel-yaml-clib==0.2.12
|
|
137
139
|
# via ruamel-yaml
|
|
138
140
|
shellingham==1.5.4
|
|
139
141
|
# via typer
|
|
140
|
-
simpleeval==0.
|
|
142
|
+
simpleeval==1.0.3
|
|
141
143
|
# via frictionless
|
|
142
|
-
six==1.
|
|
144
|
+
six==1.17.0
|
|
143
145
|
# via
|
|
144
146
|
# html5lib
|
|
145
|
-
# isodate
|
|
146
147
|
# python-dateutil
|
|
147
148
|
soupsieve==2.6
|
|
148
149
|
# via beautifulsoup4
|
|
@@ -154,7 +155,7 @@ tabulate==0.9.0
|
|
|
154
155
|
# via frictionless
|
|
155
156
|
text-unidecode==1.3
|
|
156
157
|
# via python-slugify
|
|
157
|
-
typer==0.
|
|
158
|
+
typer==0.15.1
|
|
158
159
|
# via frictionless
|
|
159
160
|
typing-extensions==4.12.2
|
|
160
161
|
# via
|
|
@@ -162,17 +163,19 @@ typing-extensions==4.12.2
|
|
|
162
163
|
# pydantic
|
|
163
164
|
# pydantic-core
|
|
164
165
|
# typer
|
|
165
|
-
|
|
166
|
+
unidecode==1.3.8
|
|
167
|
+
# via pyphonetics
|
|
168
|
+
urllib3==2.3.0
|
|
166
169
|
# via requests
|
|
167
170
|
validators==0.34.0
|
|
168
171
|
# via frictionless
|
|
169
|
-
virtualenv==20.
|
|
172
|
+
virtualenv==20.28.1
|
|
170
173
|
# via pre-commit
|
|
171
174
|
webencodings==0.5.1
|
|
172
175
|
# via html5lib
|
|
173
176
|
xlrd==2.0.1
|
|
174
177
|
# via hdx-python-utilities (pyproject.toml)
|
|
175
|
-
xlsx2csv==0.8.
|
|
178
|
+
xlsx2csv==0.8.4
|
|
176
179
|
# via hdx-python-utilities (pyproject.toml)
|
|
177
180
|
xlsxwriter==3.2.0
|
|
178
181
|
# via tableschema-to-template
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Collect errors and warnings by category and log them."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from hdx.utilities.dictandlist import dict_of_sets_add
|
|
8
|
+
from hdx.utilities.typehint import ListTuple
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ErrorHandler:
|
|
14
|
+
"""Class that enables recording of errors and warnings.
|
|
15
|
+
|
|
16
|
+
Errors and warnings can be logged by calling the `output` method or
|
|
17
|
+
automatically logged on exit. Messages are output grouped by category and
|
|
18
|
+
sorted.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
should_exit_on_error (bool): Whether to exit with a 1 code if there are errors. Default is True.
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
should_exit_on_error: bool = True,
|
|
28
|
+
):
|
|
29
|
+
self.should_exit_on_error = should_exit_on_error
|
|
30
|
+
self.shared_errors = {
|
|
31
|
+
"error": {},
|
|
32
|
+
"warning": {},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def add(
|
|
36
|
+
self, message: str, category: str = "", message_type: str = "error"
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Add error to be logged. Prepend category if supplied. Output format:
|
|
39
|
+
error category - {text}
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
message (str): Error message
|
|
43
|
+
category (str): Error category. Defaults to "".
|
|
44
|
+
message_type (str): The type of message (error or warning). Default is "error"
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
None
|
|
48
|
+
"""
|
|
49
|
+
message = message.strip()
|
|
50
|
+
if category:
|
|
51
|
+
output = f"{category} - {message}"
|
|
52
|
+
else:
|
|
53
|
+
output = message
|
|
54
|
+
dict_of_sets_add(self.shared_errors[message_type], category, output)
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def missing_value_message(value_type: str, value: Any) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Generate a formatted message for a missing value of a specific type in
|
|
60
|
+
a fixed format:
|
|
61
|
+
error category - type n not found
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
value_type (str): The type of value that is missing
|
|
65
|
+
value (Any): The specific missing value
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
str: A formatted message stating the missing value and its type
|
|
69
|
+
"""
|
|
70
|
+
return f"{value_type} {str(value)} not found"
|
|
71
|
+
|
|
72
|
+
def add_missing_value(
|
|
73
|
+
self,
|
|
74
|
+
value_type: str,
|
|
75
|
+
value: Any,
|
|
76
|
+
category: str = "",
|
|
77
|
+
message_type: str = "error",
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Add a new message (typically a warning or error) concerning a missing value
|
|
81
|
+
to a dictionary of messages in a fixed format:
|
|
82
|
+
error category - type n not found
|
|
83
|
+
identifier is usually a dataset name.
|
|
84
|
+
Args:
|
|
85
|
+
value_type (str): Type of value e.g. "sector"
|
|
86
|
+
value (Any): Missing value
|
|
87
|
+
category (str): Error category. Defaults to "".
|
|
88
|
+
message_type (str): The type of message (error or warning). Default is "error"
|
|
89
|
+
Returns:
|
|
90
|
+
None
|
|
91
|
+
"""
|
|
92
|
+
self.add(
|
|
93
|
+
self.missing_value_message(value_type, value),
|
|
94
|
+
category,
|
|
95
|
+
message_type,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def multi_valued_message(
|
|
99
|
+
self, text: str, values: ListTuple
|
|
100
|
+
) -> Optional[str]:
|
|
101
|
+
"""
|
|
102
|
+
Generate a formatted message for a list of values in a fixed format:
|
|
103
|
+
error category - n {text}. First 10 values: n1,n2,n3...
|
|
104
|
+
If less than 10 values, ". First 10 values" is omitted. identifier is usually
|
|
105
|
+
a dataset name. Values are cast to string.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
text (str): Descriptive text for the issue (e.g., "invalid values")
|
|
109
|
+
values (ListTuple): The list of related values of concern
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Optional[str]: A formatted string in the format defined above
|
|
113
|
+
"""
|
|
114
|
+
if not values:
|
|
115
|
+
return None
|
|
116
|
+
no_values = len(values)
|
|
117
|
+
if no_values > 10:
|
|
118
|
+
values = values[:10]
|
|
119
|
+
message_suffix = ". First 10 values"
|
|
120
|
+
else:
|
|
121
|
+
message_suffix = ""
|
|
122
|
+
return f"{no_values} {text}{message_suffix}: {', '.join(map(str, values))}"
|
|
123
|
+
|
|
124
|
+
def add_multi_valued(
|
|
125
|
+
self,
|
|
126
|
+
text: str,
|
|
127
|
+
values: ListTuple,
|
|
128
|
+
category: str = "",
|
|
129
|
+
message_type: str = "error",
|
|
130
|
+
) -> bool:
|
|
131
|
+
"""
|
|
132
|
+
Add a new message (typically a warning or error) concerning a list of
|
|
133
|
+
values to a set of messages in a fixed format:
|
|
134
|
+
error category - n {text}. First 10 values: n1,n2,n3...
|
|
135
|
+
If less than 10 values, ". First 10 values" is omitted. identifier is usually
|
|
136
|
+
a dataset name. Values are cast to string.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
text (str): Text to use e.g. "negative values removed"
|
|
140
|
+
values (ListTuple): List of values of concern
|
|
141
|
+
category (str): Error category. Defaults to "".
|
|
142
|
+
message_type (str): The type of message (error or warning). Default is "error"
|
|
143
|
+
Returns:
|
|
144
|
+
bool: True if a message was added, False if not
|
|
145
|
+
"""
|
|
146
|
+
message = self.multi_valued_message(text, values)
|
|
147
|
+
if message is None:
|
|
148
|
+
return False
|
|
149
|
+
self.add(message, category, message_type)
|
|
150
|
+
return True
|
|
151
|
+
|
|
152
|
+
def log(self) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Log errors and warning by category and sorted
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
None
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
for _, errors in self.shared_errors["error"].items():
|
|
161
|
+
errors = sorted(errors)
|
|
162
|
+
for error in errors:
|
|
163
|
+
logger.error(error)
|
|
164
|
+
for _, warnings in self.shared_errors["warning"].items():
|
|
165
|
+
warnings = sorted(warnings)
|
|
166
|
+
for warning in warnings:
|
|
167
|
+
logger.warning(warning)
|
|
168
|
+
|
|
169
|
+
def exit_on_error(self) -> None:
|
|
170
|
+
"""Exit with a 1 code if there are errors and should_exit_on_error
|
|
171
|
+
is True
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
None
|
|
175
|
+
"""
|
|
176
|
+
if self.should_exit_on_error and self.shared_errors["error"]:
|
|
177
|
+
sys.exit(1)
|
|
178
|
+
|
|
179
|
+
def __enter__(self) -> "ErrorHandler":
|
|
180
|
+
return self
|
|
181
|
+
|
|
182
|
+
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
|
183
|
+
self.log()
|
|
184
|
+
if exc_type is None:
|
|
185
|
+
self.exit_on_error()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Collect and log errors on exit."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from hdx.utilities.error_handler import ErrorHandler
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ErrorsOnExit(ErrorHandler): # pragma: no cover
|
|
9
|
+
def __init__(self) -> None:
|
|
10
|
+
warnings.warn(
|
|
11
|
+
"The ErrorsOnExit class was renamed ErrorHandler and will be removed in future!",
|
|
12
|
+
DeprecationWarning,
|
|
13
|
+
)
|
|
14
|
+
super().__init__()
|