aikosh 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aikosh/__init__.py +196 -0
- aikosh/_version.py +1 -0
- aikosh/config.py +238 -0
- aikosh/datasets/__init__.py +42 -0
- aikosh/datasets/_download_helpers.py +92 -0
- aikosh/datasets/api.py +313 -0
- aikosh/datasets/journey.py +604 -0
- aikosh/environment.py +41 -0
- aikosh/exceptions.py +95 -0
- aikosh/helpers.py +141 -0
- aikosh/http_utils.py +30 -0
- aikosh/models/__init__.py +40 -0
- aikosh/models/api.py +296 -0
- aikosh/models/journey.py +385 -0
- aikosh/py.typed +0 -0
- aikosh-0.1.0.dist-info/METADATA +385 -0
- aikosh-0.1.0.dist-info/RECORD +19 -0
- aikosh-0.1.0.dist-info/WHEEL +5 -0
- aikosh-0.1.0.dist-info/top_level.txt +1 -0
aikosh/__init__.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""AIKosh platform SDK."""
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import inspect
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import aikosh
|
|
8
|
+
from aikosh._version import __version__
|
|
9
|
+
from aikosh.config import get_access_key, set_access_key, set_api_key
|
|
10
|
+
from aikosh.datasets.journey import (
|
|
11
|
+
download,
|
|
12
|
+
get_dataset_metadata_journey as get_dataset_metadata,
|
|
13
|
+
get_metadata,
|
|
14
|
+
list_directory,
|
|
15
|
+
ping,
|
|
16
|
+
to_json,
|
|
17
|
+
get_datasets_filter_info,
|
|
18
|
+
)
|
|
19
|
+
from aikosh.exceptions import enrich_api_exception
|
|
20
|
+
from aikosh.datasets.journey import list_files as _list_files_dataset
|
|
21
|
+
from aikosh.models.journey import list_files as _list_files_model
|
|
22
|
+
from aikosh.models.journey import get_models_filter_info
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_model_metadata(model_id: str, **kwargs: Any) -> dict[str, Any]:
|
|
26
|
+
"""Model metadata; prefer :func:`get_metadata` with ``type='model'``."""
|
|
27
|
+
return get_metadata("model", model_id, **kwargs)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def list_files(
|
|
31
|
+
type: str,
|
|
32
|
+
identifier: str,
|
|
33
|
+
filters: dict[str, Any] | None = None,
|
|
34
|
+
*,
|
|
35
|
+
api_base_url: str | None = None,
|
|
36
|
+
timeout: float = 60.0,
|
|
37
|
+
transport: Any = None,
|
|
38
|
+
trust_env: bool | None = None,
|
|
39
|
+
) -> dict[str, Any]:
|
|
40
|
+
"""
|
|
41
|
+
List files inside a dataset or model (same entry point as ``list_directory`` / ``get_metadata`` / ``download``).
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
type:
|
|
46
|
+
``"dataset"`` (aliases: ``"datasets"``, ``"data"``) or ``"model"`` (alias: ``"models"``).
|
|
47
|
+
identifier:
|
|
48
|
+
Dataset or model UUID.
|
|
49
|
+
filters:
|
|
50
|
+
Same filters supported by the domain-specific helpers
|
|
51
|
+
(``directory_path``, ``version_id``, ``page``, ``limit``).
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
dict
|
|
56
|
+
``{"status": "success", "type": ..., "identifier": ..., "<asset>_id": ..., "data": ...}``.
|
|
57
|
+
The legacy ``dataset_id`` / ``model_id`` keys are preserved for backward compatibility.
|
|
58
|
+
"""
|
|
59
|
+
t = (type or "").strip().lower()
|
|
60
|
+
iid = str(identifier).strip()
|
|
61
|
+
try:
|
|
62
|
+
if t in {"dataset", "datasets", "data"}:
|
|
63
|
+
out = _list_files_dataset(
|
|
64
|
+
iid,
|
|
65
|
+
filters,
|
|
66
|
+
api_base_url=api_base_url,
|
|
67
|
+
timeout=timeout,
|
|
68
|
+
transport=transport,
|
|
69
|
+
trust_env=trust_env,
|
|
70
|
+
)
|
|
71
|
+
out["type"] = "dataset"
|
|
72
|
+
out["identifier"] = iid
|
|
73
|
+
if "externalUrl" in out["data"]:
|
|
74
|
+
out["data"]["msg"] = f'{out["type"]} is onboarded from the external source : {out["data"]["source"]}. Kindly redirect to the mentioned URL'
|
|
75
|
+
return out
|
|
76
|
+
|
|
77
|
+
if t in {"model", "models"}:
|
|
78
|
+
out = _list_files_model(
|
|
79
|
+
iid,
|
|
80
|
+
filters,
|
|
81
|
+
api_base_url=api_base_url,
|
|
82
|
+
timeout=timeout,
|
|
83
|
+
transport=transport,
|
|
84
|
+
trust_env=trust_env,
|
|
85
|
+
)
|
|
86
|
+
out["type"] = "model"
|
|
87
|
+
out["identifier"] = iid
|
|
88
|
+
if "externalUrl" in out["data"]:
|
|
89
|
+
out["data"]["msg"] = f'{out["type"]} is onboarded from the external source : {out["data"]["source"]}. Kindly redirect to the mentioned URL'
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
if t == "metadata":
|
|
93
|
+
raise NotImplementedError("Metadata functionality is not implemented yet.")
|
|
94
|
+
|
|
95
|
+
raise ValueError(f"list_files() received unsupported type={t!r}. Expected 'dataset' or 'model' or 'metadata'.")
|
|
96
|
+
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
# Don't re-wrap errors already enriched by the inner handlers — pass them through as-is
|
|
99
|
+
from aikosh.exceptions import APIError
|
|
100
|
+
if isinstance(exc, APIError):
|
|
101
|
+
raise
|
|
102
|
+
raise enrich_api_exception(
|
|
103
|
+
exc,
|
|
104
|
+
operation="list_files.validate_type(type must be 'dataset' or 'model' or 'metadata'.)",
|
|
105
|
+
filters_used={"type": t},
|
|
106
|
+
) from exc
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
_FEATURE_MODULES: tuple[tuple[str, str], ...] = (
|
|
110
|
+
("aikosh", "Top-level helpers"),
|
|
111
|
+
("aikosh.datasets", "Datasets module"),
|
|
112
|
+
("aikosh.models", "Models module"),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _describe_callable(obj: Any) -> str:
|
|
117
|
+
"""Return the first non-empty docstring line, else '(no description)'."""
|
|
118
|
+
doc = inspect.getdoc(obj) or ""
|
|
119
|
+
for line in doc.splitlines():
|
|
120
|
+
text = line.strip()
|
|
121
|
+
if text:
|
|
122
|
+
return text
|
|
123
|
+
return "(no description)"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def list_functions(
|
|
127
|
+
*,
|
|
128
|
+
include_submodules: bool = True,
|
|
129
|
+
print_output: bool = True,
|
|
130
|
+
) -> dict[str, dict[str, str]]:
|
|
131
|
+
"""
|
|
132
|
+
List all user-facing functions exposed by the AIKosh SDK, with one-line descriptions.
|
|
133
|
+
|
|
134
|
+
Output is generated dynamically by inspecting each module's ``__all__`` and the first
|
|
135
|
+
line of every callable's docstring, so this stays in sync as new features are added.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
include_submodules:
|
|
140
|
+
If True (default), also include ``aikosh.datasets`` and ``aikosh.models``.
|
|
141
|
+
print_output:
|
|
142
|
+
If True (default), pretty-print the listing to stdout.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
dict
|
|
147
|
+
Mapping of ``module_path -> {function_name: one_liner}``.
|
|
148
|
+
"""
|
|
149
|
+
modules = list(_FEATURE_MODULES) if include_submodules else [_FEATURE_MODULES[0]]
|
|
150
|
+
result: dict[str, dict[str, str]] = {}
|
|
151
|
+
|
|
152
|
+
for mod_path, _label in modules:
|
|
153
|
+
try:
|
|
154
|
+
mod = importlib.import_module(mod_path)
|
|
155
|
+
except Exception:
|
|
156
|
+
continue
|
|
157
|
+
names = list(getattr(mod, "__all__", []) or [])
|
|
158
|
+
functions: dict[str, str] = {}
|
|
159
|
+
for name in names:
|
|
160
|
+
obj = getattr(mod, name, None)
|
|
161
|
+
if obj is None or not callable(obj):
|
|
162
|
+
continue
|
|
163
|
+
functions[name] = _describe_callable(obj)
|
|
164
|
+
if functions:
|
|
165
|
+
result[mod_path] = functions
|
|
166
|
+
|
|
167
|
+
if print_output:
|
|
168
|
+
for mod_path, fns in result.items():
|
|
169
|
+
header = f"{mod_path} (v{__version__})" if mod_path == "aikosh" else mod_path
|
|
170
|
+
print()
|
|
171
|
+
print(header)
|
|
172
|
+
print("-" * len(header))
|
|
173
|
+
width = max((len(name) for name in fns), default=0)
|
|
174
|
+
for name, desc in fns.items():
|
|
175
|
+
print(f" {name:<{width}} {desc}")
|
|
176
|
+
|
|
177
|
+
return result
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
__all__ = [
|
|
181
|
+
"__version__",
|
|
182
|
+
"get_access_key",
|
|
183
|
+
"set_access_key",
|
|
184
|
+
"set_api_key",
|
|
185
|
+
"get_metadata",
|
|
186
|
+
"get_dataset_metadata",
|
|
187
|
+
"get_model_metadata",
|
|
188
|
+
"list_directory",
|
|
189
|
+
"list_files",
|
|
190
|
+
"download",
|
|
191
|
+
"to_json",
|
|
192
|
+
"ping",
|
|
193
|
+
"list_functions",
|
|
194
|
+
"get_datasets_filter_info",
|
|
195
|
+
"get_models_filter_info",
|
|
196
|
+
]
|
aikosh/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
aikosh/config.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from threading import Lock
|
|
5
|
+
|
|
6
|
+
from aikosh.exceptions import ConfigurationError
|
|
7
|
+
|
|
8
|
+
_ENV_ACCESS_KEY = "AIKOSH_ACCESS_KEY"
|
|
9
|
+
_ENV_API_KEY = "AIKOSH_API_KEY"
|
|
10
|
+
|
|
11
|
+
_lock = Lock()
|
|
12
|
+
_access_key: str | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_access_key(key: str) -> None:
|
|
16
|
+
"""Store the API access key for subsequent requests."""
|
|
17
|
+
if not key or not str(key).strip():
|
|
18
|
+
raise ConfigurationError("Access key must be a non-empty string.")
|
|
19
|
+
global _access_key
|
|
20
|
+
with _lock:
|
|
21
|
+
_access_key = str(key).strip()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def set_api_key(key: str) -> None:
|
|
25
|
+
"""Alias for :func:`set_access_key` (matches user-journey naming)."""
|
|
26
|
+
set_access_key(key)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_access_key() -> str:
|
|
30
|
+
"""Return the configured access key (explicit, ``AIKOSH_API_KEY``, or ``AIKOSH_ACCESS_KEY``)."""
|
|
31
|
+
with _lock:
|
|
32
|
+
if _access_key:
|
|
33
|
+
return _access_key
|
|
34
|
+
for env_name in (_ENV_API_KEY, _ENV_ACCESS_KEY):
|
|
35
|
+
env = os.environ.get(env_name, "").strip()
|
|
36
|
+
if env:
|
|
37
|
+
return env
|
|
38
|
+
raise ConfigurationError(
|
|
39
|
+
f"No API key configured. Call set_api_key(...) / set_access_key(...), "
|
|
40
|
+
f"or set {_ENV_API_KEY} / {_ENV_ACCESS_KEY}."
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Allowed keys for filters, list directory, metadata, listfiles
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
# For filters-related configs / responses
|
|
48
|
+
ALLOWED_FILTER_KEYS = [
|
|
49
|
+
'data',
|
|
50
|
+
'datasetTypesList',
|
|
51
|
+
'id',
|
|
52
|
+
'licensesList',
|
|
53
|
+
'message',
|
|
54
|
+
'modelTypesList',
|
|
55
|
+
'name',
|
|
56
|
+
'organisationList',
|
|
57
|
+
'sector',
|
|
58
|
+
'sectorsList',
|
|
59
|
+
# 'slug',
|
|
60
|
+
'status',
|
|
61
|
+
'type'
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
# For "list directory" operations
|
|
65
|
+
ALLOWED_LIST_DIRECTORY_KEYS = [
|
|
66
|
+
# 'createdAt',
|
|
67
|
+
'currentOwner',
|
|
68
|
+
'data',
|
|
69
|
+
'datasetMetadata',
|
|
70
|
+
'dataset_external_url',
|
|
71
|
+
'dataset_source',
|
|
72
|
+
'designation',
|
|
73
|
+
'downloads',
|
|
74
|
+
'externally_hosted',
|
|
75
|
+
'id',
|
|
76
|
+
'items',
|
|
77
|
+
# 'imageUrl',
|
|
78
|
+
'license',
|
|
79
|
+
# 'logo',
|
|
80
|
+
'metadata',
|
|
81
|
+
'modelType',
|
|
82
|
+
'model_external_url',
|
|
83
|
+
'model_source',
|
|
84
|
+
'name',
|
|
85
|
+
'organization',
|
|
86
|
+
'page',
|
|
87
|
+
# 'profilepic',
|
|
88
|
+
# 'recent_upvotes',
|
|
89
|
+
'remark',
|
|
90
|
+
'reviewStatus',
|
|
91
|
+
'role',
|
|
92
|
+
'sector',
|
|
93
|
+
'shortDescription',
|
|
94
|
+
'size',
|
|
95
|
+
# 'slug',
|
|
96
|
+
'sourceOrg',
|
|
97
|
+
'stats',
|
|
98
|
+
'status',
|
|
99
|
+
'tag',
|
|
100
|
+
'tags',
|
|
101
|
+
'total',
|
|
102
|
+
# 'trendingWeightage',
|
|
103
|
+
'type',
|
|
104
|
+
# 'updatedAt',
|
|
105
|
+
'upvotes',
|
|
106
|
+
# 'username',
|
|
107
|
+
'version_score',
|
|
108
|
+
'views',
|
|
109
|
+
'vote'
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# For metadata operations
|
|
113
|
+
ALLOWED_METADATA_KEYS = [
|
|
114
|
+
# 'aiReady',
|
|
115
|
+
# 'approvalRequired',
|
|
116
|
+
# 'approvedDate',
|
|
117
|
+
'author',
|
|
118
|
+
'baseDirectory',
|
|
119
|
+
'bdpDatasetID',
|
|
120
|
+
'createdAt',
|
|
121
|
+
'createdBy',
|
|
122
|
+
'createdDate',
|
|
123
|
+
'currentOwner',
|
|
124
|
+
'data',
|
|
125
|
+
# 'dataCollectionMethod',
|
|
126
|
+
'dataType',
|
|
127
|
+
'datasetId',
|
|
128
|
+
'datasetMetadata',
|
|
129
|
+
# 'datasetOrigin',
|
|
130
|
+
'datasetType',
|
|
131
|
+
'dataset_external_url',
|
|
132
|
+
'dataset_id',
|
|
133
|
+
'dataset_source',
|
|
134
|
+
# 'deleteRequest',
|
|
135
|
+
# 'deleteRequestRemark',
|
|
136
|
+
'designation',
|
|
137
|
+
# 'downloadApprovalRequired',
|
|
138
|
+
'downloads',
|
|
139
|
+
# 'dynamicUrl',
|
|
140
|
+
'endDate',
|
|
141
|
+
# 'expiredAt',
|
|
142
|
+
# 'expiredDays',
|
|
143
|
+
'externally_hosted',
|
|
144
|
+
'fileDownloadableUrl',
|
|
145
|
+
'filePath',
|
|
146
|
+
'fileSource',
|
|
147
|
+
'frequency',
|
|
148
|
+
'fullDescription',
|
|
149
|
+
# 'geographicalCoverage',
|
|
150
|
+
'id',
|
|
151
|
+
'identifier',
|
|
152
|
+
# 'imageUrl',
|
|
153
|
+
# 'isFlaggedBySystemAdmin',
|
|
154
|
+
'isFree',
|
|
155
|
+
'isTryoutAvailable',
|
|
156
|
+
'license',
|
|
157
|
+
'likes',
|
|
158
|
+
# 'logo',
|
|
159
|
+
'metadata',
|
|
160
|
+
'modelFormat',
|
|
161
|
+
'modelId',
|
|
162
|
+
'modelType',
|
|
163
|
+
'model_external_url',
|
|
164
|
+
'model_id',
|
|
165
|
+
'model_source',
|
|
166
|
+
# 'nId',
|
|
167
|
+
'name',
|
|
168
|
+
'organization',
|
|
169
|
+
'organizationId',
|
|
170
|
+
'permission',
|
|
171
|
+
'permissionType',
|
|
172
|
+
'platform',
|
|
173
|
+
# 'platformSource',
|
|
174
|
+
# 'primaryKey',
|
|
175
|
+
# 'profilepic',
|
|
176
|
+
'purpose',
|
|
177
|
+
# 'rawPermissions',
|
|
178
|
+
# 'recent_upvotes',
|
|
179
|
+
'redirectionSource',
|
|
180
|
+
'relatedDataset',
|
|
181
|
+
'relatedModel',
|
|
182
|
+
'remark',
|
|
183
|
+
'reviewStatus',
|
|
184
|
+
'role',
|
|
185
|
+
'sector',
|
|
186
|
+
'shortDescription',
|
|
187
|
+
'size',
|
|
188
|
+
# 'slug',
|
|
189
|
+
'sourceOrg',
|
|
190
|
+
'startDate',
|
|
191
|
+
'stats',
|
|
192
|
+
'status',
|
|
193
|
+
'tag',
|
|
194
|
+
'tags',
|
|
195
|
+
# 'timeGranularity',
|
|
196
|
+
'title',
|
|
197
|
+
'token',
|
|
198
|
+
# 'trainingDataset',
|
|
199
|
+
# 'trainingModel',
|
|
200
|
+
# 'trendingWeightage',
|
|
201
|
+
'type',
|
|
202
|
+
# 'updatedAt',
|
|
203
|
+
# 'updatedDate',
|
|
204
|
+
'uploadedBy',
|
|
205
|
+
'upvotes',
|
|
206
|
+
# 'username',
|
|
207
|
+
'version_score',
|
|
208
|
+
'views',
|
|
209
|
+
'visibility',
|
|
210
|
+
'vote'
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
# For listfiles operations
|
|
214
|
+
ALLOWED_LISTFILES_KEYS = [
|
|
215
|
+
'currentPage',
|
|
216
|
+
'data',
|
|
217
|
+
'directories',
|
|
218
|
+
'directoryInfo',
|
|
219
|
+
'fileTypeCount',
|
|
220
|
+
'files',
|
|
221
|
+
'identifier',
|
|
222
|
+
# 'key',
|
|
223
|
+
'limit',
|
|
224
|
+
'model_id',
|
|
225
|
+
'name',
|
|
226
|
+
# 'path',
|
|
227
|
+
'relativeUrl',
|
|
228
|
+
'status',
|
|
229
|
+
'totalChildren',
|
|
230
|
+
'totalDirectories',
|
|
231
|
+
'totalFiles',
|
|
232
|
+
'totalPages',
|
|
233
|
+
# 'type',
|
|
234
|
+
'msg',
|
|
235
|
+
'externalUrl',
|
|
236
|
+
'source'
|
|
237
|
+
]
|
|
238
|
+
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Datasets domain module (staging SDK + journey helpers)."""
|
|
2
|
+
|
|
3
|
+
from aikosh.datasets.api import (
|
|
4
|
+
get_dataset_metadata,
|
|
5
|
+
get_dataset_version_download_url,
|
|
6
|
+
get_file_download_url,
|
|
7
|
+
get_filters,
|
|
8
|
+
list_datasets,
|
|
9
|
+
list_file_details,
|
|
10
|
+
require_uuid_string,
|
|
11
|
+
stream_download_url_to_path,
|
|
12
|
+
)
|
|
13
|
+
from aikosh.datasets.journey import (
|
|
14
|
+
download,
|
|
15
|
+
get_dataset_metadata_journey,
|
|
16
|
+
get_metadata,
|
|
17
|
+
list_directory,
|
|
18
|
+
list_files,
|
|
19
|
+
ping,
|
|
20
|
+
to_json,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
# low-level API
|
|
25
|
+
"require_uuid_string",
|
|
26
|
+
"get_filters",
|
|
27
|
+
"list_datasets",
|
|
28
|
+
"get_dataset_metadata",
|
|
29
|
+
"list_file_details",
|
|
30
|
+
"get_dataset_version_download_url",
|
|
31
|
+
"get_file_download_url",
|
|
32
|
+
"stream_download_url_to_path",
|
|
33
|
+
# journey façade
|
|
34
|
+
"list_directory",
|
|
35
|
+
"list_files",
|
|
36
|
+
"get_metadata",
|
|
37
|
+
"get_dataset_metadata_journey",
|
|
38
|
+
"download",
|
|
39
|
+
"to_json",
|
|
40
|
+
"ping",
|
|
41
|
+
]
|
|
42
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
_FILENAME_SAFE = re.compile(r"[^A-Za-z0-9._-]+")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def sanitize_filename(name: str, max_len: int = 120) -> str:
|
|
13
|
+
cleaned = _FILENAME_SAFE.sub("_", name).strip("._")
|
|
14
|
+
if not cleaned:
|
|
15
|
+
cleaned = "download"
|
|
16
|
+
return cleaned[:max_len]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def infer_filename_from_url(download_url: str, fallback: str) -> str:
|
|
20
|
+
path = urlparse(download_url).path
|
|
21
|
+
name = Path(path).name
|
|
22
|
+
if name and name not in ("/", "."):
|
|
23
|
+
return name
|
|
24
|
+
return fallback
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def filename_from_content_disposition(value: str) -> str | None:
|
|
28
|
+
parts = [p.strip() for p in value.split(";") if p.strip()]
|
|
29
|
+
for part in parts[1:]:
|
|
30
|
+
if part.lower().startswith("filename*="):
|
|
31
|
+
_, v = part.split("=", 1)
|
|
32
|
+
v = v.strip().strip('"')
|
|
33
|
+
if "''" in v:
|
|
34
|
+
v = v.split("''", 1)[1]
|
|
35
|
+
v = v.strip()
|
|
36
|
+
if v:
|
|
37
|
+
return Path(v).name
|
|
38
|
+
if part.lower().startswith("filename="):
|
|
39
|
+
_, v = part.split("=", 1)
|
|
40
|
+
v = v.strip().strip('"').strip()
|
|
41
|
+
if v:
|
|
42
|
+
return Path(v).name
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def extension_from_content_type(value: str) -> str | None:
|
|
47
|
+
ct = value.split(";", 1)[0].strip().lower()
|
|
48
|
+
if ct in {"application/zip", "application/x-zip-compressed"}:
|
|
49
|
+
return ".zip"
|
|
50
|
+
if ct in {"application/gzip", "application/x-gzip"}:
|
|
51
|
+
return ".gz"
|
|
52
|
+
if ct in {"application/x-tar"}:
|
|
53
|
+
return ".tar"
|
|
54
|
+
if ct in {"application/x-7z-compressed"}:
|
|
55
|
+
return ".7z"
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def choose_output_filename(
|
|
60
|
+
*,
|
|
61
|
+
requested_filename: str | None,
|
|
62
|
+
url_filename: str,
|
|
63
|
+
fallback_name: str,
|
|
64
|
+
headers: httpx.Headers,
|
|
65
|
+
) -> str:
|
|
66
|
+
if requested_filename:
|
|
67
|
+
return requested_filename
|
|
68
|
+
|
|
69
|
+
cd = headers.get("content-disposition", "")
|
|
70
|
+
cd_name = filename_from_content_disposition(cd) if cd else None
|
|
71
|
+
candidate = cd_name or url_filename or fallback_name
|
|
72
|
+
|
|
73
|
+
if Path(candidate).suffix:
|
|
74
|
+
return candidate
|
|
75
|
+
ext = extension_from_content_type(headers.get("content-type", ""))
|
|
76
|
+
if ext:
|
|
77
|
+
return f"{candidate}{ext}"
|
|
78
|
+
return candidate
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def destination_is_directory(dest: Path, *, has_explicit_filename: bool) -> bool:
|
|
82
|
+
s = str(dest)
|
|
83
|
+
if s.endswith(("/", "\\")):
|
|
84
|
+
return True
|
|
85
|
+
if dest.exists() and dest.is_dir():
|
|
86
|
+
return True
|
|
87
|
+
if has_explicit_filename:
|
|
88
|
+
return True
|
|
89
|
+
# No explicit filename; treat as directory by default.
|
|
90
|
+
# Users can force a file path by providing `filename=` in the download request.
|
|
91
|
+
return True
|
|
92
|
+
|