hdx-python-utilities 4.0.0__tar.gz → 4.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/PKG-INFO +1 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/index.md +6 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/_version.py +2 -2
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/base_downloader.py +2 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/compare.py +7 -2
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/dictandlist.py +2 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/downloader.py +28 -25
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/email.py +3 -2
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/file_hashing.py +2 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/frictionless_wrapper.py +1 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/html.py +2 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/loader.py +11 -10
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/path.py +45 -39
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/retriever.py +25 -22
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/saver.py +9 -9
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/session.py +2 -1
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/state.py +3 -2
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/useragent.py +10 -9
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/conftest.py +3 -3
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_compare.py +2 -4
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_dictandlist.py +5 -4
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_downloader.py +39 -38
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_email.py +2 -4
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_file_hashing.py +8 -10
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_html.py +1 -3
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_loader.py +16 -16
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_path.py +20 -18
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_retriever.py +39 -27
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_saver.py +36 -35
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_state.py +5 -6
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_useragent.py +5 -7
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_zip_crc.py +4 -6
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.coveragerc +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/publish-test.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/publish.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.github/workflows/run-python-tests.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.gitignore +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/.pre-commit-config.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/CONTRIBUTING.md +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/LICENSE +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/README.md +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/.readthedocs.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/documentation/mkdocs.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/hatch.toml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/pyproject.toml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/pytest.ini +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/requirements.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/ruff.toml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/__init__.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/dateparse.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/easy_logging.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/encoding.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/error_handler.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/errors_onexit.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/matching.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/text.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/uuid.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/zip_crc.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/compare/test_csv_processing.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/compare/test_csv_processing2.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/empty.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_config.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_config.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_email_configuration.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/hdx_email_configuration.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/json_csv.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/logging_config.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/logging_config.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/project_configuration.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/project_configuration.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/smtp_config.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/smtp_config.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config2.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config3.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/config/user_agent_config_wrong.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/basicauth.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/bearertoken.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/extra_params_tree.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_csv_processing.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_csv_processing_blanks.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data.xlsx +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data1.csv/empty.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_data2.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_json_processing.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_xls_processing.xls +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/downloader/test_xlsx_processing.xlsx +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/bad_header.zip +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/bad_index.xlsx +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/empty.zip +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/test.xlsx +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/test_shapefile.zip +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/file_hashing/valid_sig_invalid_body.zip +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/html/response.html +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/loader/empty_list.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/fallbacks/test.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/retriever-test.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/retriever/test_hxl.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out2.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out2.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out5.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out6.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out7.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out8.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/out8.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-false.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-true.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-false_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-false.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-false.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-true.json +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/saver/pretty-true_sortkeys-true.yaml +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/state/analysis_dates.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/state/last_build_date.txt +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/fixtures/test_data.csv +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_dateparse.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_easy_logging.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_encoding.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_error_handler.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_matching.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_text.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/test_uuid.py +0 -0
- {hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/tests/hdx/utilities/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hdx-python-utilities
|
|
3
|
-
Version: 4.0.
|
|
3
|
+
Version: 4.0.2
|
|
4
4
|
Summary: HDX Python Utilities for streaming tabular data, date and time handling and other helpful functions
|
|
5
5
|
Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-utilities
|
|
6
6
|
Author-email: Michael Rans <rans@email.com>
|
|
@@ -33,7 +33,12 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit
|
|
|
33
33
|
The library has detailed API documentation which can be found in the menu at the top.
|
|
34
34
|
|
|
35
35
|
## Breaking Changes
|
|
36
|
-
From 4.0.
|
|
36
|
+
From 4.0.1, Library is Path aware. The following methods return Path not str:
|
|
37
|
+
download_file, get_path_for_url, stream_path, get_temp_dir, script_dir_plus_file,
|
|
38
|
+
script_dir. The context managers in hdx.utilities.path where they yield a path
|
|
39
|
+
do so as a Path object not a str.
|
|
40
|
+
|
|
41
|
+
From 4.0.0, Python 3.10 or later is required
|
|
37
42
|
|
|
38
43
|
From 3.8.0, multiple_replace, match_template_variables, earliest_index,
|
|
39
44
|
get_matching_text_in_strs, get_matching_text,
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '4.0.
|
|
32
|
-
__version_tuple__ = version_tuple = (4, 0,
|
|
31
|
+
__version__ = version = '4.0.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (4, 0, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
{hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/base_downloader.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from collections.abc import Iterator, Sequence
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
5
6
|
|
|
@@ -32,7 +33,7 @@ class BaseDownload(ABC):
|
|
|
32
33
|
"""
|
|
33
34
|
|
|
34
35
|
@abstractmethod
|
|
35
|
-
def download_file(self, url: str, *args: Any, **kwargs: Any) ->
|
|
36
|
+
def download_file(self, url: str, *args: Any, **kwargs: Any) -> Path:
|
|
36
37
|
"""Download file from url.
|
|
37
38
|
|
|
38
39
|
Args:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""File compare utilities."""
|
|
2
2
|
|
|
3
3
|
from os import linesep
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
try:
|
|
6
7
|
from cydifflib import ndiff
|
|
@@ -8,7 +9,9 @@ except ImportError:
|
|
|
8
9
|
from difflib import ndiff
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
def compare_files(
|
|
12
|
+
def compare_files(
|
|
13
|
+
path1: Path | str, path2: Path | str, encoding: str = "utf-8"
|
|
14
|
+
) -> list[str]:
|
|
12
15
|
"""Returns the delta between two files using -, ?, + format excluding lines
|
|
13
16
|
that are the same.
|
|
14
17
|
|
|
@@ -26,7 +29,9 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]:
|
|
|
26
29
|
return [x for x in diff if x[0] in ["-", "+", "?"]]
|
|
27
30
|
|
|
28
31
|
|
|
29
|
-
def assert_files_same(
|
|
32
|
+
def assert_files_same(
|
|
33
|
+
path1: Path | str, path2: Path | str, encoding: str = "utf-8"
|
|
34
|
+
) -> None:
|
|
30
35
|
"""Asserts that two files are the same and returns delta using.
|
|
31
36
|
|
|
32
37
|
-, ?, + format if not
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import itertools
|
|
4
4
|
import warnings
|
|
5
5
|
from collections.abc import Callable, Mapping, MutableMapping, Sequence
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
9
|
from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource
|
|
@@ -410,7 +411,7 @@ def read_list_from_csv(
|
|
|
410
411
|
|
|
411
412
|
|
|
412
413
|
def write_list_to_csv(
|
|
413
|
-
filepath: str,
|
|
414
|
+
filepath: Path | str,
|
|
414
415
|
rows: list[Sequence | Mapping],
|
|
415
416
|
headers: int | Sequence[str] | None = None,
|
|
416
417
|
columns: Sequence[int] | Sequence[str] | None = None,
|
|
@@ -5,7 +5,7 @@ import logging
|
|
|
5
5
|
from collections.abc import Callable, Iterator, Sequence
|
|
6
6
|
from copy import deepcopy
|
|
7
7
|
from os import remove
|
|
8
|
-
from os.path import exists, isfile,
|
|
8
|
+
from os.path import exists, isfile, split, splitext
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import Any
|
|
11
11
|
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
|
@@ -65,7 +65,7 @@ class Download(BaseDownload):
|
|
|
65
65
|
def __init__(
|
|
66
66
|
self,
|
|
67
67
|
user_agent: str | None = None,
|
|
68
|
-
user_agent_config_yaml: str | None = None,
|
|
68
|
+
user_agent_config_yaml: Path | str | None = None,
|
|
69
69
|
user_agent_lookup: str | None = None,
|
|
70
70
|
use_env: bool = True,
|
|
71
71
|
fail_on_missing_file: bool = True,
|
|
@@ -133,12 +133,12 @@ class Download(BaseDownload):
|
|
|
133
133
|
@staticmethod
|
|
134
134
|
def get_path_for_url(
|
|
135
135
|
url: str,
|
|
136
|
-
folder: str | None = None,
|
|
136
|
+
folder: Path | str | None = None,
|
|
137
137
|
filename: str | None = None,
|
|
138
|
-
path: str | None = None,
|
|
138
|
+
path: Path | str | None = None,
|
|
139
139
|
overwrite: bool = False,
|
|
140
140
|
keep: bool = False,
|
|
141
|
-
) ->
|
|
141
|
+
) -> Path:
|
|
142
142
|
"""Get filename from url and join to provided folder or temporary
|
|
143
143
|
folder if no folder supplied, ensuring uniqueness.
|
|
144
144
|
|
|
@@ -164,7 +164,8 @@ class Download(BaseDownload):
|
|
|
164
164
|
filename, extension = splitext(filename)
|
|
165
165
|
if not folder:
|
|
166
166
|
folder = get_temp_dir()
|
|
167
|
-
|
|
167
|
+
folder = Path(folder)
|
|
168
|
+
path = folder / f"{filename}{extension}"
|
|
168
169
|
if overwrite:
|
|
169
170
|
try:
|
|
170
171
|
remove(path)
|
|
@@ -174,7 +175,7 @@ class Download(BaseDownload):
|
|
|
174
175
|
count = 0
|
|
175
176
|
while exists(path):
|
|
176
177
|
count += 1
|
|
177
|
-
path =
|
|
178
|
+
path = folder / f"{filename}{count}{extension}"
|
|
178
179
|
return path
|
|
179
180
|
|
|
180
181
|
def get_full_url(self, url: str) -> str:
|
|
@@ -254,7 +255,7 @@ class Download(BaseDownload):
|
|
|
254
255
|
|
|
255
256
|
def normal_setup(
|
|
256
257
|
self,
|
|
257
|
-
url: str,
|
|
258
|
+
url: Path | str,
|
|
258
259
|
stream: bool = True,
|
|
259
260
|
post: bool = False,
|
|
260
261
|
parameters: dict | None = None,
|
|
@@ -281,6 +282,7 @@ class Download(BaseDownload):
|
|
|
281
282
|
self.close_response()
|
|
282
283
|
self.response = None
|
|
283
284
|
try:
|
|
285
|
+
url = str(url)
|
|
284
286
|
spliturl = urlsplit(url)
|
|
285
287
|
if not spliturl.scheme:
|
|
286
288
|
if isfile(url):
|
|
@@ -336,7 +338,7 @@ class Download(BaseDownload):
|
|
|
336
338
|
}
|
|
337
339
|
)
|
|
338
340
|
|
|
339
|
-
def hash_stream(self, url: str) -> str:
|
|
341
|
+
def hash_stream(self, url: Path | str) -> str:
|
|
340
342
|
"""Stream file from url and hash it using MD5. Must call setup method
|
|
341
343
|
first.
|
|
342
344
|
|
|
@@ -357,7 +359,7 @@ class Download(BaseDownload):
|
|
|
357
359
|
f"Download of {url} failed in retrieval of stream!" % url
|
|
358
360
|
)
|
|
359
361
|
|
|
360
|
-
def stream_path(self, path: str, errormsg: str):
|
|
362
|
+
def stream_path(self, path: Path | str, errormsg: str) -> Path:
|
|
361
363
|
"""Stream file from url and store in provided path. Must call setup
|
|
362
364
|
method first.
|
|
363
365
|
|
|
@@ -370,12 +372,13 @@ class Download(BaseDownload):
|
|
|
370
372
|
"""
|
|
371
373
|
f = None
|
|
372
374
|
try:
|
|
373
|
-
|
|
375
|
+
path = Path(path)
|
|
376
|
+
f = path.open("wb")
|
|
374
377
|
for chunk in self.response.iter_content(chunk_size=10240):
|
|
375
378
|
if chunk: # filter out keep-alive new chunks
|
|
376
379
|
f.write(chunk)
|
|
377
380
|
f.flush()
|
|
378
|
-
return
|
|
381
|
+
return path
|
|
379
382
|
except Exception as e:
|
|
380
383
|
raise DownloadError(errormsg) from e
|
|
381
384
|
finally:
|
|
@@ -384,10 +387,10 @@ class Download(BaseDownload):
|
|
|
384
387
|
|
|
385
388
|
def stream_file(
|
|
386
389
|
self,
|
|
387
|
-
url: str,
|
|
388
|
-
folder: str | None = None,
|
|
390
|
+
url: Path | str,
|
|
391
|
+
folder: Path | str | None = None,
|
|
389
392
|
filename: str | None = None,
|
|
390
|
-
path: str | None = None,
|
|
393
|
+
path: Path | str | None = None,
|
|
391
394
|
overwrite: bool = False,
|
|
392
395
|
keep: bool = False,
|
|
393
396
|
) -> str:
|
|
@@ -414,9 +417,9 @@ class Download(BaseDownload):
|
|
|
414
417
|
|
|
415
418
|
def download_file(
|
|
416
419
|
self,
|
|
417
|
-
url: str,
|
|
420
|
+
url: Path | str,
|
|
418
421
|
**kwargs: Any,
|
|
419
|
-
) ->
|
|
422
|
+
) -> Path:
|
|
420
423
|
"""Download file from url and store in provided folder or temporary
|
|
421
424
|
folder if no folder supplied.
|
|
422
425
|
|
|
@@ -460,7 +463,7 @@ class Download(BaseDownload):
|
|
|
460
463
|
path, f"Download of {url} failed in retrieval of stream!"
|
|
461
464
|
)
|
|
462
465
|
|
|
463
|
-
def download(self, url: str, **kwargs: Any) -> requests.Response:
|
|
466
|
+
def download(self, url: Path | str, **kwargs: Any) -> requests.Response:
|
|
464
467
|
"""Download url.
|
|
465
468
|
|
|
466
469
|
Args:
|
|
@@ -539,7 +542,7 @@ class Download(BaseDownload):
|
|
|
539
542
|
"""
|
|
540
543
|
return self.response.json()
|
|
541
544
|
|
|
542
|
-
def download_text(self, url: str, **kwargs: Any) -> str:
|
|
545
|
+
def download_text(self, url: Path | str, **kwargs: Any) -> str:
|
|
543
546
|
"""Download url as text.
|
|
544
547
|
|
|
545
548
|
Args:
|
|
@@ -557,7 +560,7 @@ class Download(BaseDownload):
|
|
|
557
560
|
self.download(url, **kwargs)
|
|
558
561
|
return self.get_text()
|
|
559
562
|
|
|
560
|
-
def download_yaml(self, url: str, **kwargs: Any) -> Any:
|
|
563
|
+
def download_yaml(self, url: Path | str, **kwargs: Any) -> Any:
|
|
561
564
|
"""Download url as YAML.
|
|
562
565
|
|
|
563
566
|
Args:
|
|
@@ -575,7 +578,7 @@ class Download(BaseDownload):
|
|
|
575
578
|
self.download(url, **kwargs)
|
|
576
579
|
return self.get_yaml()
|
|
577
580
|
|
|
578
|
-
def download_json(self, url: str, **kwargs: Any) -> Any:
|
|
581
|
+
def download_json(self, url: Path | str, **kwargs: Any) -> Any:
|
|
579
582
|
"""Download url as JSON.
|
|
580
583
|
|
|
581
584
|
Args:
|
|
@@ -595,7 +598,7 @@ class Download(BaseDownload):
|
|
|
595
598
|
|
|
596
599
|
def get_frictionless_tableresource(
|
|
597
600
|
self,
|
|
598
|
-
url: str,
|
|
601
|
+
url: Path | str,
|
|
599
602
|
ignore_blank_rows: bool = True,
|
|
600
603
|
infer_types: bool = False,
|
|
601
604
|
**kwargs: Any,
|
|
@@ -641,7 +644,7 @@ class Download(BaseDownload):
|
|
|
641
644
|
|
|
642
645
|
def _get_tabular_rows(
|
|
643
646
|
self,
|
|
644
|
-
url: str,
|
|
647
|
+
url: Path | str,
|
|
645
648
|
headers: int | Sequence[int] | Sequence[str] = 1,
|
|
646
649
|
dict_form: bool = False,
|
|
647
650
|
include_headers: bool = False,
|
|
@@ -704,7 +707,7 @@ class Download(BaseDownload):
|
|
|
704
707
|
xlsx2csv = kwargs.pop("xlsx2csv", False)
|
|
705
708
|
if xlsx2csv:
|
|
706
709
|
path = self.download_file(url)
|
|
707
|
-
outpath = path.
|
|
710
|
+
outpath = path.with_suffix(".csv")
|
|
708
711
|
sheet = kwargs.pop("sheet", 1)
|
|
709
712
|
if isinstance(sheet, int):
|
|
710
713
|
sheet_args = {"sheetid": sheet}
|
|
@@ -1276,7 +1279,7 @@ class Download(BaseDownload):
|
|
|
1276
1279
|
cls,
|
|
1277
1280
|
custom_configs: dict[str, dict],
|
|
1278
1281
|
user_agent: str | None = None,
|
|
1279
|
-
user_agent_config_yaml: str | None = None,
|
|
1282
|
+
user_agent_config_yaml: Path | str | None = None,
|
|
1280
1283
|
user_agent_lookup: str | None = None,
|
|
1281
1284
|
use_env: bool = True,
|
|
1282
1285
|
fail_on_missing_file: bool = True,
|
|
@@ -5,7 +5,8 @@ import smtplib
|
|
|
5
5
|
from collections.abc import Sequence
|
|
6
6
|
from email.mime.multipart import MIMEMultipart
|
|
7
7
|
from email.mime.text import MIMEText
|
|
8
|
-
from os.path import expanduser
|
|
8
|
+
from os.path import expanduser
|
|
9
|
+
from pathlib import Path
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
from hdx.utilities.loader import load_json, load_yaml
|
|
@@ -45,7 +46,7 @@ class Email:
|
|
|
45
46
|
email_config_yaml (str): Path to YAML HDX configuration. Defaults to ~/hdx_email_configuration.yaml.
|
|
46
47
|
"""
|
|
47
48
|
|
|
48
|
-
default_email_config_yaml =
|
|
49
|
+
default_email_config_yaml = Path(expanduser("~")) / "hdx_email_configuration.yaml"
|
|
49
50
|
|
|
50
51
|
def __init__(self, **kwargs: Any) -> None:
|
|
51
52
|
email_config_found = False
|
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import zipfile
|
|
4
4
|
from io import BytesIO, IOBase
|
|
5
5
|
from os import fstat
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
from openpyxl import load_workbook
|
|
8
9
|
from openpyxl.utils.exceptions import InvalidFileException
|
|
@@ -90,7 +91,7 @@ def crc_zip_fp(fp: IOBase) -> str:
|
|
|
90
91
|
return get_crc_sum(file_crcs)
|
|
91
92
|
|
|
92
93
|
|
|
93
|
-
def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]:
|
|
94
|
+
def get_size_and_hash(filepath: Path | str, file_format: str) -> tuple[int, str]:
|
|
94
95
|
"""Return the size and hash of file
|
|
95
96
|
|
|
96
97
|
Args:
|
{hdx_python_utilities-4.0.0 → hdx_python_utilities-4.0.2}/src/hdx/utilities/frictionless_wrapper.py
RENAMED
|
@@ -188,7 +188,7 @@ def get_frictionless_tableresource(
|
|
|
188
188
|
http_session = kwargs.pop("http_session", session)
|
|
189
189
|
with system.use_context(http_session=http_session):
|
|
190
190
|
if url:
|
|
191
|
-
resource = TableResource(path=url, **kwargs)
|
|
191
|
+
resource = TableResource(path=str(url), **kwargs)
|
|
192
192
|
else:
|
|
193
193
|
resource = TableResource(data=data, **kwargs)
|
|
194
194
|
resource.open()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""HTML parsing utilities."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
6
7
|
try:
|
|
@@ -20,7 +21,7 @@ if BeautifulSoup is not None:
|
|
|
20
21
|
url: str,
|
|
21
22
|
downloader: Download = None,
|
|
22
23
|
user_agent: str | None = None,
|
|
23
|
-
user_agent_config_yaml: str | None = None,
|
|
24
|
+
user_agent_config_yaml: Path | str | None = None,
|
|
24
25
|
user_agent_lookup: str | None = None,
|
|
25
26
|
**kwargs: Any,
|
|
26
27
|
) -> BeautifulSoup:
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Loading utilities for YAML, JSON etc."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
from collections.abc import Mapping, Sequence
|
|
4
|
+
from collections.abc import Mapping, MutableMapping, Sequence
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from typing import Any
|
|
6
7
|
from warnings import warn
|
|
7
8
|
|
|
@@ -15,7 +16,7 @@ class LoadError(Exception):
|
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def load_text(
|
|
18
|
-
path: str,
|
|
19
|
+
path: Path | str,
|
|
19
20
|
encoding: str = "utf-8",
|
|
20
21
|
strip: bool = False,
|
|
21
22
|
replace_newlines: str | None = None,
|
|
@@ -59,7 +60,7 @@ def load_text(
|
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
def load_yaml(
|
|
62
|
-
path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
|
|
63
|
+
path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
|
|
63
64
|
) -> Any:
|
|
64
65
|
"""Load YAML file into an ordered dictionary.
|
|
65
66
|
|
|
@@ -83,7 +84,7 @@ def load_yaml(
|
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
def load_json(
|
|
86
|
-
path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
|
|
87
|
+
path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
|
|
87
88
|
) -> Any:
|
|
88
89
|
"""Load JSON file into an ordered dictionary (dict for Python 3.7+)
|
|
89
90
|
|
|
@@ -152,11 +153,11 @@ def load_and_merge_json(
|
|
|
152
153
|
|
|
153
154
|
|
|
154
155
|
def load_yaml_into_existing_dict(
|
|
155
|
-
data:
|
|
156
|
-
path: str,
|
|
156
|
+
data: MutableMapping,
|
|
157
|
+
path: Path | str,
|
|
157
158
|
encoding: str = "utf-8",
|
|
158
159
|
loaderror_if_empty: bool = True,
|
|
159
|
-
) ->
|
|
160
|
+
) -> MutableMapping:
|
|
160
161
|
"""Merge YAML file that is in dictionary form into existing dictionary.
|
|
161
162
|
|
|
162
163
|
Args:
|
|
@@ -173,11 +174,11 @@ def load_yaml_into_existing_dict(
|
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
def load_json_into_existing_dict(
|
|
176
|
-
data:
|
|
177
|
-
path: str,
|
|
177
|
+
data: MutableMapping,
|
|
178
|
+
path: Path | str,
|
|
178
179
|
encoding: str = "utf-8",
|
|
179
180
|
loaderror_if_empty: bool = True,
|
|
180
|
-
) ->
|
|
181
|
+
) -> MutableMapping:
|
|
181
182
|
"""Merge JSON file that is in dictionary form into existing dictionary.
|
|
182
183
|
|
|
183
184
|
Args:
|
|
@@ -4,17 +4,15 @@ import contextlib
|
|
|
4
4
|
import inspect
|
|
5
5
|
import logging
|
|
6
6
|
import sys
|
|
7
|
-
from collections.abc import
|
|
7
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
8
8
|
from os import getenv, makedirs, remove
|
|
9
9
|
from os.path import (
|
|
10
|
-
abspath,
|
|
11
10
|
basename,
|
|
12
11
|
dirname,
|
|
13
12
|
exists,
|
|
14
|
-
join,
|
|
15
|
-
realpath,
|
|
16
13
|
splitext,
|
|
17
14
|
)
|
|
15
|
+
from pathlib import Path
|
|
18
16
|
from shutil import rmtree
|
|
19
17
|
from tempfile import gettempdir
|
|
20
18
|
from typing import Any
|
|
@@ -33,7 +31,7 @@ class NotFoundError(Exception):
|
|
|
33
31
|
pass
|
|
34
32
|
|
|
35
33
|
|
|
36
|
-
def script_dir(pyobject: Any, follow_symlinks: bool = True) ->
|
|
34
|
+
def script_dir(pyobject: Any, follow_symlinks: bool = True) -> Path:
|
|
37
35
|
"""Get current script's directory.
|
|
38
36
|
|
|
39
37
|
Args:
|
|
@@ -43,36 +41,42 @@ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str:
|
|
|
43
41
|
Returns:
|
|
44
42
|
Current script's directory
|
|
45
43
|
"""
|
|
46
|
-
if getattr(sys, "frozen", False):
|
|
47
|
-
|
|
44
|
+
if getattr(sys, "frozen", False):
|
|
45
|
+
# Frozen (PyInstaller, etc.): Use the executable path
|
|
46
|
+
path = Path(sys.executable) # pragma: no cover
|
|
48
47
|
else:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
# Standard: Use the object's file path
|
|
49
|
+
path = Path(inspect.getfile(pyobject))
|
|
50
|
+
|
|
51
|
+
# Resolve symlinks if requested, otherwise just make absolute
|
|
52
|
+
path = path.resolve() if follow_symlinks else path.absolute()
|
|
53
|
+
|
|
54
|
+
return path.parent
|
|
53
55
|
|
|
54
56
|
|
|
55
57
|
def script_dir_plus_file(
|
|
56
58
|
filename: str, pyobject: Any, follow_symlinks: bool = True
|
|
57
|
-
) ->
|
|
59
|
+
) -> Path:
|
|
58
60
|
"""Get current script's directory and then append a filename.
|
|
59
61
|
|
|
60
62
|
Args:
|
|
61
63
|
filename: Filename to append to directory path
|
|
62
64
|
pyobject: Any Python object in the script
|
|
65
|
+
return_path: Whether to return a Path object. Defaults to returning str.
|
|
66
|
+
|
|
63
67
|
follow_symlinks: Follow symlinks or not. Defaults to True.
|
|
64
68
|
|
|
65
69
|
Returns:
|
|
66
70
|
Current script's directory and with filename appended
|
|
67
71
|
"""
|
|
68
|
-
return
|
|
72
|
+
return script_dir(pyobject, follow_symlinks) / filename
|
|
69
73
|
|
|
70
74
|
|
|
71
75
|
def get_temp_dir(
|
|
72
|
-
folder: str | None = None,
|
|
76
|
+
folder: Path | str | None = None,
|
|
73
77
|
delete_if_exists: bool = False,
|
|
74
|
-
tempdir: str | None = None,
|
|
75
|
-
) ->
|
|
78
|
+
tempdir: Path | str | None = None,
|
|
79
|
+
) -> Path:
|
|
76
80
|
"""Get a temporary directory. Looks for environment variable TEMP_DIR and
|
|
77
81
|
falls back on os.gettempdir if a root temporary directory is not supplied.
|
|
78
82
|
If a folder is supplied, creates that folder within the temporary
|
|
@@ -88,8 +92,9 @@ def get_temp_dir(
|
|
|
88
92
|
"""
|
|
89
93
|
if tempdir is None:
|
|
90
94
|
tempdir = getenv("TEMP_DIR", gettempdir())
|
|
95
|
+
tempdir = Path(tempdir)
|
|
91
96
|
if folder:
|
|
92
|
-
tempdir =
|
|
97
|
+
tempdir = tempdir / folder
|
|
93
98
|
if exists(tempdir):
|
|
94
99
|
if delete_if_exists:
|
|
95
100
|
rmtree(tempdir)
|
|
@@ -101,12 +106,12 @@ def get_temp_dir(
|
|
|
101
106
|
|
|
102
107
|
@contextlib.contextmanager
|
|
103
108
|
def temp_dir(
|
|
104
|
-
folder: str | None = None,
|
|
109
|
+
folder: Path | str | None = None,
|
|
105
110
|
delete_if_exists: bool = False,
|
|
106
111
|
delete_on_success: bool = True,
|
|
107
112
|
delete_on_failure: bool = True,
|
|
108
|
-
tempdir: str | None = None,
|
|
109
|
-
) ->
|
|
113
|
+
tempdir: Path | str | None = None,
|
|
114
|
+
) -> Iterator[Path]:
|
|
110
115
|
"""Get a temporary directory optionally with folder appended (and created
|
|
111
116
|
if it doesn't exist)
|
|
112
117
|
|
|
@@ -132,7 +137,7 @@ def temp_dir(
|
|
|
132
137
|
raise
|
|
133
138
|
|
|
134
139
|
|
|
135
|
-
def read_or_create_batch(folder:
|
|
140
|
+
def read_or_create_batch(folder: Path, batch: str | None = None) -> str:
|
|
136
141
|
"""Get batch or create it if it doesn't exist.
|
|
137
142
|
|
|
138
143
|
Args:
|
|
@@ -142,7 +147,7 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str:
|
|
|
142
147
|
Returns:
|
|
143
148
|
Batch
|
|
144
149
|
"""
|
|
145
|
-
batch_file =
|
|
150
|
+
batch_file = folder / "batch.txt"
|
|
146
151
|
if exists(batch_file):
|
|
147
152
|
batch = load_text(batch_file, strip=True)
|
|
148
153
|
logger.info(f"File BATCH = {batch}")
|
|
@@ -156,13 +161,13 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str:
|
|
|
156
161
|
|
|
157
162
|
@contextlib.contextmanager
|
|
158
163
|
def temp_dir_batch(
|
|
159
|
-
folder: str | None = None,
|
|
164
|
+
folder: Path | str | None = None,
|
|
160
165
|
delete_if_exists: bool = False,
|
|
161
166
|
delete_on_success: bool = True,
|
|
162
167
|
delete_on_failure: bool = True,
|
|
163
168
|
batch: str | None = None,
|
|
164
|
-
tempdir: str | None = None,
|
|
165
|
-
) ->
|
|
169
|
+
tempdir: Path | str | None = None,
|
|
170
|
+
) -> Iterator[dict]:
|
|
166
171
|
"""Get a temporary directory and batch id. Yields a dictionary with key
|
|
167
172
|
folder which is the temporary directory optionally with folder appended
|
|
168
173
|
(and created if it doesn't exist). In key batch is a batch code to be
|
|
@@ -219,7 +224,7 @@ def progress_storing_folder(
|
|
|
219
224
|
iterator: Iterable[dict],
|
|
220
225
|
key: str,
|
|
221
226
|
wheretostart: str | None = None,
|
|
222
|
-
) ->
|
|
227
|
+
) -> Iterator[tuple[dict, dict]]:
|
|
223
228
|
"""Store progress in folder in key folder of info dictionary parameter.
|
|
224
229
|
Yields 2 dictionaries. The first is the info dictionary. It contains in key
|
|
225
230
|
folder the folder being used to store progress and in key progress the
|
|
@@ -238,7 +243,7 @@ def progress_storing_folder(
|
|
|
238
243
|
A tuple of the form (info dictionary, next object in iterator)
|
|
239
244
|
"""
|
|
240
245
|
folder = info["folder"]
|
|
241
|
-
progress_file =
|
|
246
|
+
progress_file = folder / "progress.txt"
|
|
242
247
|
|
|
243
248
|
if not wheretostart:
|
|
244
249
|
contents = getenv("WHERETOSTART")
|
|
@@ -277,8 +282,8 @@ def progress_storing_folder(
|
|
|
277
282
|
|
|
278
283
|
@contextlib.contextmanager
|
|
279
284
|
def wheretostart_tempdir_batch(
|
|
280
|
-
folder: str, batch: str | None = None, tempdir: str | None = None
|
|
281
|
-
) ->
|
|
285
|
+
folder: Path | str, batch: str | None = None, tempdir: Path | str | None = None
|
|
286
|
+
) -> Iterator[dict]:
|
|
282
287
|
"""Get a temporary directory and batch id. Deletes any existing folder if
|
|
283
288
|
WHERETOSTART environment variable is set to RESET. Yields a dictionary with
|
|
284
289
|
key folder which is the temporary directory optionally with folder appended
|
|
@@ -311,12 +316,12 @@ def wheretostart_tempdir_batch(
|
|
|
311
316
|
|
|
312
317
|
|
|
313
318
|
def progress_storing_tempdir(
|
|
314
|
-
folder: str,
|
|
319
|
+
folder: Path | str,
|
|
315
320
|
iterator: Iterable[dict],
|
|
316
321
|
key: str,
|
|
317
322
|
batch: str | None = None,
|
|
318
|
-
tempdir: str | None = None,
|
|
319
|
-
) ->
|
|
323
|
+
tempdir: Path | str | None = None,
|
|
324
|
+
) -> Iterator[tuple[dict, dict]]:
|
|
320
325
|
"""Store progress in temporary directory. The folder persists until the
|
|
321
326
|
final iteration allowing which iteration to start at and the batch code to
|
|
322
327
|
be persisted between runs. Yields 2 dictionaries. The first contains key
|
|
@@ -344,11 +349,11 @@ def progress_storing_tempdir(
|
|
|
344
349
|
|
|
345
350
|
|
|
346
351
|
def multiple_progress_storing_tempdir(
|
|
347
|
-
folder: str,
|
|
352
|
+
folder: Path | str,
|
|
348
353
|
iterators: Sequence[Iterable[dict]],
|
|
349
354
|
keys: Sequence[str],
|
|
350
355
|
batch: str | None = None,
|
|
351
|
-
) ->
|
|
356
|
+
) -> Iterator[tuple[int, dict, dict]]:
|
|
352
357
|
"""Store progress in temporary directory. The folder persists until the
|
|
353
358
|
final iteration of the last iterator allowing which iteration to start at
|
|
354
359
|
and the batch code to be persisted between runs. Yields 2 dictionaries. The
|
|
@@ -387,7 +392,7 @@ def multiple_progress_storing_tempdir(
|
|
|
387
392
|
tempdir = info["folder"]
|
|
388
393
|
batch = info["batch"]
|
|
389
394
|
for i, key in enumerate(keys):
|
|
390
|
-
progress_file =
|
|
395
|
+
progress_file = tempdir / "progress.txt"
|
|
391
396
|
if wheretostartenv:
|
|
392
397
|
wheretostart = get_wheretostart(
|
|
393
398
|
wheretostartenv, "Environment variable", key
|
|
@@ -416,18 +421,19 @@ def multiple_progress_storing_tempdir(
|
|
|
416
421
|
|
|
417
422
|
|
|
418
423
|
def get_filename_extension_from_url(
|
|
419
|
-
url: str, second_last: bool = False, use_query: bool = False
|
|
424
|
+
url: Path | str, second_last: bool = False, use_query: bool = False
|
|
420
425
|
) -> tuple[str, str]:
|
|
421
426
|
"""Get separately filename and extension from url.
|
|
422
427
|
|
|
423
428
|
Args:
|
|
424
|
-
url: URL to download
|
|
429
|
+
url: URL or path to download
|
|
425
430
|
second_last: Get second last segment of url as well. Defaults to False.
|
|
426
431
|
use_query: Include query parameters as well. Defaults to False.
|
|
427
432
|
|
|
428
433
|
Returns:
|
|
429
434
|
Tuple of (filename, extension)
|
|
430
435
|
"""
|
|
436
|
+
url = str(url)
|
|
431
437
|
split_url = urlsplit(unquote_plus(url))
|
|
432
438
|
urlpath = split_url.path
|
|
433
439
|
last_part = basename(urlpath)
|
|
@@ -448,12 +454,12 @@ def get_filename_extension_from_url(
|
|
|
448
454
|
|
|
449
455
|
|
|
450
456
|
def get_filename_from_url(
|
|
451
|
-
url: str, second_last: bool = False, use_query: bool = False
|
|
457
|
+
url: Path | str, second_last: bool = False, use_query: bool = False
|
|
452
458
|
) -> str:
|
|
453
459
|
"""Get filename including extension from url.
|
|
454
460
|
|
|
455
461
|
Args:
|
|
456
|
-
url: URL
|
|
462
|
+
url: URL or path
|
|
457
463
|
second_last: Get second last segment of url as well. Defaults to False.
|
|
458
464
|
use_query: Include query parameters as well. Defaults to False.
|
|
459
465
|
|