hdx-python-utilities 3.7.1__tar.gz → 3.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/PKG-INFO +2 -1
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/documentation/main.md +3 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/pyproject.toml +1 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/requirements.txt +13 -11
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/_version.py +2 -2
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/downloader.py +19 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/retriever.py +1 -1
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/text.py +39 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_downloader.py +76 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_text.py +9 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.config/coveragerc +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.config/pre-commit-config.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.config/pytest.ini +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.config/ruff.toml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.github/workflows/publish.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.gitignore +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/CONTRIBUTING.md +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/LICENSE +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/README.md +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/documentation/pydoc-markdown.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/__init__.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/base_downloader.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/compare.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/dateparse.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/dictandlist.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/easy_logging.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/email.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/encoding.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/errors_onexit.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/frictionless_wrapper.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/html.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/loader.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/path.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/saver.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/session.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/state.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/typehint.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/useragent.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/uuid.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/compare/test_csv_processing.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/compare/test_csv_processing2.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/empty.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_config.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_config.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_email_configuration.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_email_configuration.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/json_csv.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/logging_config.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/logging_config.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/project_configuration.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/project_configuration.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/smtp_config.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/smtp_config.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/user_agent_config.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/user_agent_config2.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/user_agent_config3.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/user_agent_config_wrong.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/basicauth.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/bearertoken.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/extra_params.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/extra_params.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/extra_params_tree.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_csv_processing.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_csv_processing_blanks.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data.xlsx +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data1.csv/empty.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data2.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_json_processing.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_xls_processing.xls +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_xlsx_processing.xlsx +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/html/response.html +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/loader/empty.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/loader/empty.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/fallbacks/test.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/fallbacks/test.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/fallbacks/test.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/fallbacks/test.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/retriever-test.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test_hxl.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out2.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out2.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out5.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out6.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out7.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out8.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/out8.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-false_sortkeys-false.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-false_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-false_sortkeys-true.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-false_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-true_sortkeys-false.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-true_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-true_sortkeys-true.json +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/saver/pretty-true_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/state/analysis_dates.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/state/last_build_date.txt +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/test_data.csv +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/conftest.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_compare.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_dateparse.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_dictandlist.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_easy_logging.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_email.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_encoding.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_errors_onexit.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_html.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_loader.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_path.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_retriever.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_saver.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_state.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_useragent.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_uuid.py +0 -0
- {hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: hdx-python-utilities
|
|
3
|
-
Version: 3.7.
|
|
3
|
+
Version: 3.7.3
|
|
4
4
|
Summary: HDX Python Utilities for streaming tabular data, date and time handling and other helpful functions
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-utilities
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -36,6 +36,7 @@ Requires-Dist: requests-file
|
|
|
36
36
|
Requires-Dist: ruamel-yaml
|
|
37
37
|
Requires-Dist: tableschema-to-template>=0.0.13
|
|
38
38
|
Requires-Dist: xlrd>=2.0.1
|
|
39
|
+
Requires-Dist: xlsx2csv
|
|
39
40
|
Requires-Dist: xlwt>=1.3.0
|
|
40
41
|
Provides-Extra: dev
|
|
41
42
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
@@ -785,6 +785,9 @@ Examples:
|
|
|
785
785
|
|
|
786
786
|
a = "The quick brown fox jumped over the lazy dog. It was so fast!"
|
|
787
787
|
|
|
788
|
+
# Normalise text eg. to support name matching
|
|
789
|
+
assert normalise("£^*& ()+-[]<>?|\ Al DhaleZ'eÉ / الضالع,,..1234''#~~### ") == "al dhalezee 1234"
|
|
790
|
+
|
|
788
791
|
# Remove whitespace and punctuation from end of string
|
|
789
792
|
assert remove_end_characters('lalala,.,"') == "lalala"
|
|
790
793
|
assert remove_end_characters('lalala, .\t/,"', f"{punctuation}{whitespace}" == "lalala"
|
|
@@ -10,7 +10,7 @@ attrs==23.2.0
|
|
|
10
10
|
# referencing
|
|
11
11
|
beautifulsoup4==4.12.3
|
|
12
12
|
# via hdx-python-utilities (pyproject.toml)
|
|
13
|
-
certifi==2024.
|
|
13
|
+
certifi==2024.7.4
|
|
14
14
|
# via requests
|
|
15
15
|
cfgv==3.4.0
|
|
16
16
|
# via pre-commit
|
|
@@ -20,7 +20,7 @@ charset-normalizer==3.3.2
|
|
|
20
20
|
# via requests
|
|
21
21
|
click==8.1.7
|
|
22
22
|
# via typer
|
|
23
|
-
coverage==7.
|
|
23
|
+
coverage==7.6.0
|
|
24
24
|
# via pytest-cov
|
|
25
25
|
distlib==0.3.8
|
|
26
26
|
# via virtualenv
|
|
@@ -36,9 +36,9 @@ frictionless==5.17.0
|
|
|
36
36
|
# via hdx-python-utilities (pyproject.toml)
|
|
37
37
|
html5lib==1.1
|
|
38
38
|
# via hdx-python-utilities (pyproject.toml)
|
|
39
|
-
humanize==4.
|
|
39
|
+
humanize==4.10.0
|
|
40
40
|
# via frictionless
|
|
41
|
-
identify==2.
|
|
41
|
+
identify==2.6.0
|
|
42
42
|
# via pre-commit
|
|
43
43
|
idna==3.7
|
|
44
44
|
# via
|
|
@@ -54,7 +54,7 @@ jinja2==3.1.4
|
|
|
54
54
|
# via frictionless
|
|
55
55
|
jsonlines==4.0.0
|
|
56
56
|
# via hdx-python-utilities (pyproject.toml)
|
|
57
|
-
jsonschema==4.
|
|
57
|
+
jsonschema==4.23.0
|
|
58
58
|
# via
|
|
59
59
|
# frictionless
|
|
60
60
|
# tableschema-to-template
|
|
@@ -84,15 +84,15 @@ platformdirs==4.2.2
|
|
|
84
84
|
# via virtualenv
|
|
85
85
|
pluggy==1.5.0
|
|
86
86
|
# via pytest
|
|
87
|
-
pre-commit==3.
|
|
87
|
+
pre-commit==3.8.0
|
|
88
88
|
# via hdx-python-utilities (pyproject.toml)
|
|
89
|
-
pydantic==2.8.
|
|
89
|
+
pydantic==2.8.2
|
|
90
90
|
# via frictionless
|
|
91
|
-
pydantic-core==2.20.
|
|
91
|
+
pydantic-core==2.20.1
|
|
92
92
|
# via pydantic
|
|
93
93
|
pygments==2.18.0
|
|
94
94
|
# via rich
|
|
95
|
-
pytest==8.
|
|
95
|
+
pytest==8.3.2
|
|
96
96
|
# via
|
|
97
97
|
# hdx-python-utilities (pyproject.toml)
|
|
98
98
|
# pytest-cov
|
|
@@ -127,7 +127,7 @@ rfc3986==2.0.0
|
|
|
127
127
|
# via frictionless
|
|
128
128
|
rich==13.7.1
|
|
129
129
|
# via typer
|
|
130
|
-
rpds-py==0.
|
|
130
|
+
rpds-py==0.19.1
|
|
131
131
|
# via
|
|
132
132
|
# jsonschema
|
|
133
133
|
# referencing
|
|
@@ -164,7 +164,7 @@ typing-extensions==4.12.2
|
|
|
164
164
|
# typer
|
|
165
165
|
urllib3==2.2.2
|
|
166
166
|
# via requests
|
|
167
|
-
validators==0.
|
|
167
|
+
validators==0.33.0
|
|
168
168
|
# via frictionless
|
|
169
169
|
virtualenv==20.26.3
|
|
170
170
|
# via pre-commit
|
|
@@ -172,6 +172,8 @@ webencodings==0.5.1
|
|
|
172
172
|
# via html5lib
|
|
173
173
|
xlrd==2.0.1
|
|
174
174
|
# via hdx-python-utilities (pyproject.toml)
|
|
175
|
+
xlsx2csv==0.8.3
|
|
176
|
+
# via hdx-python-utilities (pyproject.toml)
|
|
175
177
|
xlsxwriter==3.2.0
|
|
176
178
|
# via tableschema-to-template
|
|
177
179
|
xlwt==1.3.0
|
|
@@ -15,6 +15,7 @@ from frictionless.resources import TableResource
|
|
|
15
15
|
from ratelimit import RateLimitDecorator, sleep_and_retry
|
|
16
16
|
from requests import Request
|
|
17
17
|
from ruamel.yaml import YAML
|
|
18
|
+
from xlsx2csv import Xlsx2csv
|
|
18
19
|
|
|
19
20
|
from .base_downloader import BaseDownload, DownloadError
|
|
20
21
|
from .frictionless_wrapper import get_frictionless_tableresource
|
|
@@ -669,6 +670,7 @@ class Download(BaseDownload):
|
|
|
669
670
|
**kwargs:
|
|
670
671
|
format (Optional[str]): Type of file. Defaults to inferring.
|
|
671
672
|
file_type (Optional[str]): Type of file. Defaults to inferring.
|
|
673
|
+
xlsx2csv (bool): Whether to convert xlsx files. Defaults to False.
|
|
672
674
|
encoding (Optional[str]): Type of encoding. Defaults to inferring.
|
|
673
675
|
compression (Optional[str]): Type of compression. Defaults to inferring.
|
|
674
676
|
delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring.
|
|
@@ -690,6 +692,20 @@ class Download(BaseDownload):
|
|
|
690
692
|
"""
|
|
691
693
|
if headers is None:
|
|
692
694
|
raise DownloadError("Argument headers cannot be None!")
|
|
695
|
+
xlsx2csv = kwargs.pop("xlsx2csv", False)
|
|
696
|
+
if xlsx2csv:
|
|
697
|
+
path = self.download_file(url)
|
|
698
|
+
outpath = path.replace(".xlsx", ".csv")
|
|
699
|
+
sheet = kwargs.pop("sheet", 1)
|
|
700
|
+
if isinstance(sheet, int):
|
|
701
|
+
sheet_args = {"sheetid": sheet}
|
|
702
|
+
else:
|
|
703
|
+
sheet_args = {"sheetname": sheet}
|
|
704
|
+
Xlsx2csv(path).convert(outpath, **sheet_args)
|
|
705
|
+
url = outpath
|
|
706
|
+
kwargs["format"] = "csv" # format takes precedence over file_type
|
|
707
|
+
kwargs.pop("fill_merged_cells", None)
|
|
708
|
+
|
|
693
709
|
resource = self.get_frictionless_tableresource(
|
|
694
710
|
url,
|
|
695
711
|
ignore_blank_rows=ignore_blank_rows,
|
|
@@ -771,6 +787,7 @@ class Download(BaseDownload):
|
|
|
771
787
|
**kwargs:
|
|
772
788
|
format (Optional[str]): Type of file. Defaults to inferring.
|
|
773
789
|
file_type (Optional[str]): Type of file. Defaults to inferring.
|
|
790
|
+
xlsx2csv (bool): Whether to convert xlsx files. Defaults to False.
|
|
774
791
|
encoding (Optional[str]): Type of encoding. Defaults to inferring.
|
|
775
792
|
compression (Optional[str]): Type of compression. Defaults to inferring.
|
|
776
793
|
delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring.
|
|
@@ -875,6 +892,7 @@ class Download(BaseDownload):
|
|
|
875
892
|
**kwargs:
|
|
876
893
|
format (Optional[str]): Type of file. Defaults to inferring.
|
|
877
894
|
file_type (Optional[str]): Type of file. Defaults to inferring.
|
|
895
|
+
xlsx2csv (bool): Whether to convert xlsx files. Defaults to False.
|
|
878
896
|
encoding (Optional[str]): Type of encoding. Defaults to inferring.
|
|
879
897
|
compression (Optional[str]): Type of compression. Defaults to inferring.
|
|
880
898
|
delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring.
|
|
@@ -950,6 +968,7 @@ class Download(BaseDownload):
|
|
|
950
968
|
**kwargs:
|
|
951
969
|
format (Optional[str]): Type of file. Defaults to inferring.
|
|
952
970
|
file_type (Optional[str]): Type of file. Defaults to inferring.
|
|
971
|
+
xlsx2csv (bool): Whether to convert xlsx files. Defaults to False.
|
|
953
972
|
encoding (Optional[str]): Type of encoding. Defaults to inferring.
|
|
954
973
|
compression (Optional[str]): Type of compression. Defaults to inferring.
|
|
955
974
|
delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring.
|
|
@@ -418,7 +418,7 @@ class Retrieve(BaseDownload):
|
|
|
418
418
|
filename (Optional[str]): Filename of saved file. Defaults to getting from url.
|
|
419
419
|
logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename.
|
|
420
420
|
fallback (bool): Whether to use static fallback if download fails. Defaults to False.
|
|
421
|
-
**kwargs: Parameters to pass to download_file
|
|
421
|
+
**kwargs: Parameters to pass to download_file and get_tabular_rows calls
|
|
422
422
|
|
|
423
423
|
Returns:
|
|
424
424
|
Tuple[List[str],Iterator[ListDict]]: Tuple (headers, iterator where each row is a list or dictionary)
|
|
@@ -4,6 +4,7 @@ import difflib
|
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
6
|
import string
|
|
7
|
+
import unicodedata
|
|
7
8
|
from string import punctuation
|
|
8
9
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
9
10
|
|
|
@@ -16,6 +17,44 @@ PUNCTUATION_MINUS_BRACKETS = r"""!"#$%&'*+,-./:;<=>?@\^_`|~"""
|
|
|
16
17
|
TEMPLATE_VARIABLES = re.compile("{{.*?}}")
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
KEEP_CHARS_SAME = set(string.ascii_lowercase).union(set(string.digits))
|
|
21
|
+
CHANGE_TO_LOWERCASE = set(string.ascii_uppercase)
|
|
22
|
+
MAP_TO_SPACE = set(string.punctuation).union(set(string.whitespace))
|
|
23
|
+
MAP_TO_SPACE.remove("'")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def normalise(text: str) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Mormalise text for example to support name matching. Accented characters
|
|
29
|
+
are replaced with non-accented if possible. Any punctuation and whitespace
|
|
30
|
+
is replaced with a space except for ' which is replaced with blank.
|
|
31
|
+
Multiple spaces are replaced with a single space. Uppercase is replaced
|
|
32
|
+
with lowercase. Spaces at start and end are removed. All non-ASCII
|
|
33
|
+
characters are removed.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
text (str): Text to normalise
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: Normalised text
|
|
40
|
+
"""
|
|
41
|
+
chars = []
|
|
42
|
+
space = False
|
|
43
|
+
for chr in unicodedata.normalize("NFD", text):
|
|
44
|
+
if chr in KEEP_CHARS_SAME:
|
|
45
|
+
chars.append(chr)
|
|
46
|
+
space = False
|
|
47
|
+
elif chr in CHANGE_TO_LOWERCASE:
|
|
48
|
+
chars.append(chr.lower())
|
|
49
|
+
space = False
|
|
50
|
+
elif chr in MAP_TO_SPACE:
|
|
51
|
+
if space:
|
|
52
|
+
continue
|
|
53
|
+
chars.append(" ")
|
|
54
|
+
space = True
|
|
55
|
+
return "".join(chars).strip()
|
|
56
|
+
|
|
57
|
+
|
|
19
58
|
def remove_end_characters(
|
|
20
59
|
string: str, characters_to_remove: str = punctuation
|
|
21
60
|
) -> str:
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_downloader.py
RENAMED
|
@@ -1220,6 +1220,82 @@ class TestDownloader:
|
|
|
1220
1220
|
"Tulkarm",
|
|
1221
1221
|
]
|
|
1222
1222
|
|
|
1223
|
+
def test_get_tabular_rows_xlsx2csv(self, fixtureurlexcel):
|
|
1224
|
+
expected_headers = [
|
|
1225
|
+
"GWNO",
|
|
1226
|
+
"EVENT_ID_CNTY",
|
|
1227
|
+
"EVENT_ID_NO_CNTY",
|
|
1228
|
+
"EVENT_DATE",
|
|
1229
|
+
"YEAR",
|
|
1230
|
+
"TIME_PRECISION",
|
|
1231
|
+
"EVENT_TYPE",
|
|
1232
|
+
"ACTOR1",
|
|
1233
|
+
"ALLY_ACTOR_1",
|
|
1234
|
+
"INTER1",
|
|
1235
|
+
"ACTOR2",
|
|
1236
|
+
"ALLY_ACTOR_2",
|
|
1237
|
+
"INTER2",
|
|
1238
|
+
"INTERACTION",
|
|
1239
|
+
"COUNTRY",
|
|
1240
|
+
"ADMIN1",
|
|
1241
|
+
"ADMIN2",
|
|
1242
|
+
"ADMIN3",
|
|
1243
|
+
"LOCATION",
|
|
1244
|
+
"LATITUDE",
|
|
1245
|
+
"LONGITUDE",
|
|
1246
|
+
"GEO_PRECISION",
|
|
1247
|
+
"SOURCE",
|
|
1248
|
+
"NOTES",
|
|
1249
|
+
"FATALITIES",
|
|
1250
|
+
]
|
|
1251
|
+
|
|
1252
|
+
expected_row = [
|
|
1253
|
+
"615",
|
|
1254
|
+
"1416RTA",
|
|
1255
|
+
None,
|
|
1256
|
+
"18/04/2001",
|
|
1257
|
+
"2001",
|
|
1258
|
+
"1",
|
|
1259
|
+
"Violence against civilians",
|
|
1260
|
+
"Police Forces of Algeria (1999-)",
|
|
1261
|
+
None,
|
|
1262
|
+
"1",
|
|
1263
|
+
"Civilians (Algeria)",
|
|
1264
|
+
"Berber Ethnic Group (Algeria)",
|
|
1265
|
+
"7",
|
|
1266
|
+
"17",
|
|
1267
|
+
"Algeria",
|
|
1268
|
+
"Tizi Ouzou",
|
|
1269
|
+
"Beni-Douala",
|
|
1270
|
+
None,
|
|
1271
|
+
"Beni Douala",
|
|
1272
|
+
"36.61954",
|
|
1273
|
+
"4.08282",
|
|
1274
|
+
"1",
|
|
1275
|
+
"Associated Press Online",
|
|
1276
|
+
"A Berber student was shot while in police custody at a police station in "
|
|
1277
|
+
"Beni Douala. He later died on Apr.21.",
|
|
1278
|
+
"1",
|
|
1279
|
+
]
|
|
1280
|
+
|
|
1281
|
+
with Download() as downloader:
|
|
1282
|
+
headers, iterator = downloader.get_tabular_rows(
|
|
1283
|
+
fixtureurlexcel,
|
|
1284
|
+
format="xlsx",
|
|
1285
|
+
xlsx2csv=True,
|
|
1286
|
+
)
|
|
1287
|
+
assert headers == expected_headers
|
|
1288
|
+
assert list(iterator)[0] == expected_row
|
|
1289
|
+
|
|
1290
|
+
headers, iterator = downloader.get_tabular_rows(
|
|
1291
|
+
fixtureurlexcel,
|
|
1292
|
+
format="xlsx",
|
|
1293
|
+
xlsx2csv=True,
|
|
1294
|
+
sheet="test_data",
|
|
1295
|
+
)
|
|
1296
|
+
assert headers == expected_headers
|
|
1297
|
+
assert list(iterator)[0] == expected_row
|
|
1298
|
+
|
|
1223
1299
|
def test_get_tabular_rows_json(self, fixturejsonurl):
|
|
1224
1300
|
with Download() as downloader:
|
|
1225
1301
|
headers, iterator = downloader.get_tabular_rows(
|
|
@@ -15,6 +15,7 @@ from hdx.utilities.text import (
|
|
|
15
15
|
get_words_in_sentence,
|
|
16
16
|
match_template_variables,
|
|
17
17
|
multiple_replace,
|
|
18
|
+
normalise,
|
|
18
19
|
number_format,
|
|
19
20
|
only_allowed_in_str,
|
|
20
21
|
remove_end_characters,
|
|
@@ -28,6 +29,14 @@ class TestText:
|
|
|
28
29
|
b = "The quicker brown fox leapt over the slower fox. It was so fast!"
|
|
29
30
|
c = "The quick brown fox climbed over the lazy dog. It was so fast!"
|
|
30
31
|
|
|
32
|
+
def test_normalise(self):
|
|
33
|
+
assert (
|
|
34
|
+
normalise(
|
|
35
|
+
"£^*& ()+-[]<>?|\ Al DhaleZ'eÉ / الضالع,,..1234''#~~### "
|
|
36
|
+
)
|
|
37
|
+
== "al dhalezee 1234"
|
|
38
|
+
)
|
|
39
|
+
|
|
31
40
|
def test_remove_end_characters(self):
|
|
32
41
|
assert remove_end_characters('lalala,.,"') == "lalala"
|
|
33
42
|
assert (
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/.github/workflows/run-python-tests.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/base_downloader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/errors_onexit.py
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/src/hdx/utilities/frictionless_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_config.json
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/hdx_config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/json_csv.yaml
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/logging_config.json
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/logging_config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/smtp_config.json
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/config/smtp_config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/basicauth.txt
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/bearertoken.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data.csv
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data.xlsx
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/downloader/test_data2.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.json
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test.yaml
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/retriever/test_hxl.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/state/analysis_dates.txt
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/fixtures/state/last_build_date.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_compare.py
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_dateparse.py
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_dictandlist.py
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_easy_logging.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_encoding.py
RENAMED
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_errors_onexit.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_retriever.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hdx_python_utilities-3.7.1 → hdx_python_utilities-3.7.3}/tests/hdx/utilities/test_useragent.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|