kash-shell 0.3.34__py3-none-any.whl → 0.3.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/config/env_settings.py +0 -3
- kash/config/logger.py +2 -0
- kash/config/logger_basic.py +10 -1
- kash/config/settings.py +0 -12
- kash/config/setup.py +15 -0
- kash/config/text_styles.py +1 -1
- kash/config/warm_slow_imports.py +60 -0
- kash/exec/action_decorators.py +2 -2
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_items.py +4 -2
- kash/exec/llm_transforms.py +4 -0
- kash/file_storage/file_store.py +4 -0
- kash/llm_utils/llm_completion.py +115 -19
- kash/llm_utils/llms.py +8 -7
- kash/mcp/mcp_cli.py +17 -5
- kash/mcp/mcp_server_routes.py +6 -4
- kash/model/actions_model.py +17 -5
- kash/model/items_model.py +24 -12
- kash/model/params_model.py +4 -4
- kash/shell/shell_main.py +3 -14
- kash/utils/common/import_utils.py +136 -12
- kash/utils/common/s3_utils.py +89 -7
- kash/web_content/web_extract.py +0 -1
- kash/web_content/web_fetch.py +270 -98
- kash/web_gen/templates/youtube_webpage.html.jinja +3 -2
- kash/workspaces/workspaces.py +2 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.36.dist-info}/METADATA +2 -1
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.36.dist-info}/RECORD +31 -30
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.36.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.36.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.36.dist-info}/licenses/LICENSE +0 -0
kash/model/items_model.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from collections.abc import Sequence
|
|
4
5
|
from copy import deepcopy
|
|
5
6
|
from dataclasses import asdict, field, is_dataclass
|
|
@@ -192,9 +193,16 @@ class ItemId:
|
|
|
192
193
|
from kash.web_content.canon_url import canonicalize_url
|
|
193
194
|
|
|
194
195
|
item_id = None
|
|
195
|
-
if
|
|
196
|
+
if (
|
|
197
|
+
item.type == ItemType.resource
|
|
198
|
+
and item.format == Format.url
|
|
199
|
+
and item.url
|
|
200
|
+
and not item.source
|
|
201
|
+
):
|
|
202
|
+
# This is a plain URL resource, so its identity is its URL.
|
|
196
203
|
item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
|
|
197
204
|
elif item.type == ItemType.concept and item.title:
|
|
205
|
+
# This is a concept, so its identity is its title.
|
|
198
206
|
item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
|
|
199
207
|
elif item.source and item.source.cacheable and item.source.operation.has_known_inputs:
|
|
200
208
|
# We know the source of this and if the action was cacheable, we can create
|
|
@@ -636,8 +644,8 @@ class Item:
|
|
|
636
644
|
pull_body_heading: bool = False,
|
|
637
645
|
) -> str:
|
|
638
646
|
"""
|
|
639
|
-
Get or infer a title for this item, falling back to the
|
|
640
|
-
|
|
647
|
+
Get or infer a title for this item, falling back to the URL, description or
|
|
648
|
+
body text. Optionally, include the last operation as a parenthetical at the end
|
|
641
649
|
of the title. Will use "Untitled" if all else fails.
|
|
642
650
|
"""
|
|
643
651
|
# First special case: if we are pulling the title from the body header, check
|
|
@@ -651,12 +659,9 @@ class Item:
|
|
|
651
659
|
if not self.title and self.url:
|
|
652
660
|
return abbrev_str(self.url, max_len)
|
|
653
661
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
# Use the title or the path if possible, falling back to description or even body text.
|
|
662
|
+
# Use semantic sources for titles. The original filename is preserved separately.
|
|
657
663
|
base_title = (
|
|
658
664
|
self.title
|
|
659
|
-
or filename_stem
|
|
660
665
|
or self.description
|
|
661
666
|
or (not self.is_binary and self.abbrev_body(max_len))
|
|
662
667
|
or UNTITLED
|
|
@@ -666,7 +671,11 @@ class Item:
|
|
|
666
671
|
# indicating the last operation, if there was one. This makes filename slugs
|
|
667
672
|
# more readable.
|
|
668
673
|
suffix = ""
|
|
669
|
-
if
|
|
674
|
+
if (
|
|
675
|
+
add_ops_suffix
|
|
676
|
+
and self.type.allows_op_suffix
|
|
677
|
+
and not re.search(r"step\d+", base_title) # Just in case, never add suffix twice.
|
|
678
|
+
):
|
|
670
679
|
last_op = self.history and self.history[-1].action_name
|
|
671
680
|
if last_op:
|
|
672
681
|
step_num = len(self.history) + 1 if self.history else 1
|
|
@@ -894,18 +903,19 @@ class Item:
|
|
|
894
903
|
if action_context:
|
|
895
904
|
# Default the output item type and format to the action's declared output_type
|
|
896
905
|
# and format if not explicitly set.
|
|
897
|
-
if "type" not in updates:
|
|
906
|
+
if "type" not in updates and action_context.action.output_type:
|
|
898
907
|
updates["type"] = action_context.action.output_type
|
|
899
908
|
# If we were not given a format override, we leave the output type the same.
|
|
900
909
|
elif action_context.action.output_format:
|
|
901
910
|
# Check an overridden format and then our own format.
|
|
902
|
-
new_output_format = updates.get("format"
|
|
911
|
+
new_output_format = updates.get("format")
|
|
903
912
|
if new_output_format and action_context.action.output_format != new_output_format:
|
|
904
913
|
log.warning(
|
|
905
|
-
"Output item format `%s` does not match declared output format `%s` for action `%s`",
|
|
914
|
+
"Output item format `%s` does not match declared output format `%s` for action `%s` on item: %s",
|
|
906
915
|
new_output_format,
|
|
907
916
|
action_context.action.output_format,
|
|
908
917
|
action_context.action.name,
|
|
918
|
+
self,
|
|
909
919
|
)
|
|
910
920
|
|
|
911
921
|
new_item = self.new_copy_with(update_timestamp=True, **updates)
|
|
@@ -927,7 +937,9 @@ class Item:
|
|
|
927
937
|
|
|
928
938
|
# Fall back to action title template if we have it and title wasn't explicitly set.
|
|
929
939
|
if "title" not in updates:
|
|
930
|
-
|
|
940
|
+
# Avoid using filenames as titles when deriving. Prefer existing semantic title
|
|
941
|
+
# or derive from body heading/URL.
|
|
942
|
+
prev_title = self.title or self.pick_title(pull_body_heading=True)
|
|
931
943
|
|
|
932
944
|
if action:
|
|
933
945
|
new_item.title = action.format_title(prev_title)
|
kash/model/params_model.py
CHANGED
|
@@ -206,10 +206,10 @@ A list of parameter declarations, possibly with default values.
|
|
|
206
206
|
|
|
207
207
|
# These are the default models for typical use cases.
|
|
208
208
|
# The user may override them with parameters.
|
|
209
|
-
DEFAULT_CAREFUL_LLM = LLM.
|
|
210
|
-
DEFAULT_STRUCTURED_LLM = LLM.
|
|
211
|
-
DEFAULT_STANDARD_LLM = LLM.
|
|
212
|
-
DEFAULT_FAST_LLM = LLM.
|
|
209
|
+
DEFAULT_CAREFUL_LLM = LLM.claude_sonnet_4_5
|
|
210
|
+
DEFAULT_STRUCTURED_LLM = LLM.claude_sonnet_4_5
|
|
211
|
+
DEFAULT_STANDARD_LLM = LLM.claude_sonnet_4_5
|
|
212
|
+
DEFAULT_FAST_LLM = LLM.claude_haiku_4_5
|
|
213
213
|
|
|
214
214
|
|
|
215
215
|
# Parameters set globally such as in the workspace.
|
kash/shell/shell_main.py
CHANGED
|
@@ -70,20 +70,9 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
def _import_packages():
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
import uvicorn.protocols.http.h11_impl # noqa: F401
|
|
77
|
-
import uvicorn.protocols.websockets.websockets_impl # noqa: F401
|
|
78
|
-
import xonsh.completers.init # noqa: F401
|
|
79
|
-
import xonsh.pyghooks # noqa: F401
|
|
80
|
-
|
|
81
|
-
import kash.actions # noqa: F401
|
|
82
|
-
import kash.local_server # noqa: F401
|
|
83
|
-
import kash.local_server.local_server # noqa: F401
|
|
84
|
-
import kash.mcp.mcp_server_sse # noqa: F401
|
|
85
|
-
except ImportError as e:
|
|
86
|
-
log.warning(f"Error pre-importing packages: {e}")
|
|
73
|
+
from kash.config.warm_slow_imports import warm_slow_imports
|
|
74
|
+
|
|
75
|
+
warm_slow_imports(include_extras=False)
|
|
87
76
|
|
|
88
77
|
imports_done_event.set()
|
|
89
78
|
|
|
@@ -74,26 +74,108 @@ def import_recursive(
|
|
|
74
74
|
return tallies
|
|
75
75
|
|
|
76
76
|
|
|
77
|
+
def _import_modules_from_package(
|
|
78
|
+
package: types.ModuleType,
|
|
79
|
+
package_name: str,
|
|
80
|
+
max_depth: int = 1,
|
|
81
|
+
include_private: bool = True,
|
|
82
|
+
current_depth: int = 0,
|
|
83
|
+
imported_modules: dict[str, types.ModuleType] | None = None,
|
|
84
|
+
) -> dict[str, types.ModuleType]:
|
|
85
|
+
"""
|
|
86
|
+
Internal helper to recursively import modules from a package.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
package: The package module to import from
|
|
90
|
+
package_name: The fully qualified name of the package
|
|
91
|
+
max_depth: Maximum recursion depth (1 = direct children only)
|
|
92
|
+
include_private: Whether to import private modules (starting with _)
|
|
93
|
+
current_depth: Current recursion depth (internal use)
|
|
94
|
+
imported_modules: Dictionary to accumulate imported modules
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Dictionary mapping module names to their imported module objects
|
|
98
|
+
"""
|
|
99
|
+
if imported_modules is None:
|
|
100
|
+
imported_modules = {}
|
|
101
|
+
|
|
102
|
+
if current_depth >= max_depth:
|
|
103
|
+
return imported_modules
|
|
104
|
+
|
|
105
|
+
# Get the module's __path__ if it's a package
|
|
106
|
+
if not hasattr(package, "__path__"):
|
|
107
|
+
return imported_modules
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
for _finder, module_name, ispkg in pkgutil.iter_modules(
|
|
111
|
+
package.__path__, f"{package_name}."
|
|
112
|
+
):
|
|
113
|
+
# Skip private modules unless requested
|
|
114
|
+
if not include_private and module_name.split(".")[-1].startswith("_"):
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Skip test modules - they often have special import requirements
|
|
118
|
+
# and aren't needed for warming the import cache
|
|
119
|
+
module_parts = module_name.split(".")
|
|
120
|
+
if any(
|
|
121
|
+
part in ("tests", "test", "testing", "_test", "_tests") for part in module_parts
|
|
122
|
+
):
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Skip already imported modules
|
|
126
|
+
if module_name in imported_modules:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
module = importlib.import_module(module_name)
|
|
131
|
+
imported_modules[module_name] = module
|
|
132
|
+
|
|
133
|
+
# Recursively import submodules if it's a package
|
|
134
|
+
if ispkg and current_depth + 1 < max_depth:
|
|
135
|
+
_import_modules_from_package(
|
|
136
|
+
module,
|
|
137
|
+
module_name,
|
|
138
|
+
max_depth=max_depth,
|
|
139
|
+
include_private=include_private,
|
|
140
|
+
current_depth=current_depth + 1,
|
|
141
|
+
imported_modules=imported_modules,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
except Exception as e:
|
|
145
|
+
# Handle various import failures gracefully
|
|
146
|
+
# This includes ImportError, pytest.Skipped, and other exceptions
|
|
147
|
+
error_type = type(e).__name__
|
|
148
|
+
if error_type not in ("ImportError", "AttributeError", "TypeError"):
|
|
149
|
+
log.debug(f" Skipped {module_name}: {error_type}: {e}")
|
|
150
|
+
# Don't log common/expected import errors to reduce noise
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
log.warning(f"Error iterating modules in {package_name}: {e}")
|
|
154
|
+
|
|
155
|
+
return imported_modules
|
|
156
|
+
|
|
157
|
+
|
|
77
158
|
def import_namespace_modules(namespace: str) -> dict[str, types.ModuleType]:
|
|
78
159
|
"""
|
|
79
160
|
Find and import all modules or packages within a namespace package.
|
|
80
161
|
Returns a dictionary mapping module names to their imported module objects.
|
|
81
162
|
"""
|
|
82
|
-
|
|
163
|
+
# Import the main module first
|
|
164
|
+
main_module = importlib.import_module(namespace) # Propagate import errors
|
|
83
165
|
|
|
84
166
|
# Get the package to access its __path__
|
|
85
|
-
|
|
86
|
-
if not package or not hasattr(package, "__path__"):
|
|
167
|
+
if not hasattr(main_module, "__path__"):
|
|
87
168
|
raise ImportError(f"`{namespace}` is not a package or namespace package")
|
|
88
169
|
|
|
89
|
-
log.info(f"Discovering modules in `{namespace}` namespace, searching: {
|
|
170
|
+
log.info(f"Discovering modules in `{namespace}` namespace, searching: {main_module.__path__}")
|
|
171
|
+
|
|
172
|
+
# Use the common helper with depth=1 (no recursion) and include_private=True
|
|
173
|
+
modules = _import_modules_from_package(
|
|
174
|
+
main_module, namespace, max_depth=1, include_private=True
|
|
175
|
+
)
|
|
90
176
|
|
|
91
|
-
#
|
|
92
|
-
modules =
|
|
93
|
-
for _finder, module_name, _ispkg in pkgutil.iter_modules(package.__path__, f"{namespace}."):
|
|
94
|
-
module = importlib.import_module(module_name) # Propagate import errors
|
|
95
|
-
log.info(f"Imported module: {module_name} from {module.__file__}")
|
|
96
|
-
modules[module_name] = module
|
|
177
|
+
# Add the main module itself
|
|
178
|
+
modules[namespace] = main_module
|
|
97
179
|
|
|
98
180
|
log.info(f"Imported {len(modules)} modules from namespace `{namespace}`")
|
|
99
181
|
return modules
|
|
@@ -106,8 +188,13 @@ def recursive_reload(
|
|
|
106
188
|
Recursively reload all modules in the given package that match the filter function.
|
|
107
189
|
Returns a list of module names that were reloaded.
|
|
108
190
|
|
|
109
|
-
:
|
|
110
|
-
|
|
191
|
+
Args:
|
|
192
|
+
package: The package to reload.
|
|
193
|
+
filter_func: A function that takes a module name and returns True if the
|
|
194
|
+
module should be reloaded.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of module names that were reloaded.
|
|
111
198
|
"""
|
|
112
199
|
package_name = package.__name__
|
|
113
200
|
modules = {
|
|
@@ -124,3 +211,40 @@ def recursive_reload(
|
|
|
124
211
|
importlib.reload(modules[name])
|
|
125
212
|
|
|
126
213
|
return module_names
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def warm_import_library(
|
|
217
|
+
library_name: str, max_depth: int = 3, include_private: bool = False
|
|
218
|
+
) -> dict[str, types.ModuleType]:
|
|
219
|
+
"""
|
|
220
|
+
Recursively import all submodules of a library to warm the import cache.
|
|
221
|
+
This is useful for servers where you want to pay the import cost upfront
|
|
222
|
+
rather than during request handling.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
library_name: Name of the library to import (e.g., 'litellm', 'openai')
|
|
226
|
+
max_depth: Maximum depth to recurse into submodules
|
|
227
|
+
include_private: Whether to import private modules (starting with _)
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
Dictionary mapping module names to their imported module objects
|
|
231
|
+
"""
|
|
232
|
+
try:
|
|
233
|
+
# Import the main module first
|
|
234
|
+
main_module = importlib.import_module(library_name)
|
|
235
|
+
|
|
236
|
+
# Use the common helper for recursive imports
|
|
237
|
+
imported_modules = _import_modules_from_package(
|
|
238
|
+
main_module, library_name, max_depth=max_depth, include_private=include_private
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Add the main module itself
|
|
242
|
+
imported_modules[library_name] = main_module
|
|
243
|
+
|
|
244
|
+
except ImportError as e:
|
|
245
|
+
log.warning(f"Could not import {library_name}: {e}")
|
|
246
|
+
return {}
|
|
247
|
+
|
|
248
|
+
log.info(f"Warmed {len(imported_modules)} modules from {library_name}")
|
|
249
|
+
|
|
250
|
+
return imported_modules
|
kash/utils/common/s3_utils.py
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import shutil
|
|
4
5
|
import subprocess
|
|
6
|
+
from logging import getLogger
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
|
|
9
|
+
from dotenv import find_dotenv, load_dotenv
|
|
7
10
|
from sidematter_format.sidematter_format import Sidematter
|
|
11
|
+
from strif import abbrev_str
|
|
8
12
|
|
|
9
13
|
from kash.utils.common.url import Url, is_s3_url, parse_s3_url
|
|
10
14
|
|
|
15
|
+
log = getLogger(__name__)
|
|
16
|
+
|
|
11
17
|
|
|
12
18
|
def check_aws_cli() -> None:
|
|
13
19
|
"""
|
|
@@ -19,6 +25,54 @@ def check_aws_cli() -> None:
|
|
|
19
25
|
)
|
|
20
26
|
|
|
21
27
|
|
|
28
|
+
def run_aws_command(cmd: list[str]) -> subprocess.CompletedProcess[str]:
|
|
29
|
+
"""
|
|
30
|
+
Run an AWS CLI command and capture output.
|
|
31
|
+
Raises a RuntimeError with stdout/stderr on failure.
|
|
32
|
+
"""
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
cmd,
|
|
35
|
+
capture_output=True,
|
|
36
|
+
text=True,
|
|
37
|
+
env=os.environ,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if result.returncode != 0:
|
|
41
|
+
# Build a detailed error message
|
|
42
|
+
error_parts = [f"AWS command failed with exit code {result.returncode}"]
|
|
43
|
+
error_parts.append(f"Command: {' '.join(cmd)}")
|
|
44
|
+
|
|
45
|
+
if result.stdout:
|
|
46
|
+
error_parts.append(f"stdout: {result.stdout}")
|
|
47
|
+
if result.stderr:
|
|
48
|
+
error_parts.append(f"stderr: {result.stderr}")
|
|
49
|
+
|
|
50
|
+
raise RuntimeError("\n".join(error_parts))
|
|
51
|
+
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def reload_aws_env_vars() -> None:
|
|
56
|
+
"""
|
|
57
|
+
Fresh reload of AWS env vars from .env.local.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def aws_creds() -> set[tuple[str, str]]:
|
|
61
|
+
return {(k, abbrev_str(v, 5)) for k, v in os.environ.items() if k.startswith("AWS_")}
|
|
62
|
+
|
|
63
|
+
if len(aws_creds()) == 0:
|
|
64
|
+
dotenv_path = find_dotenv(".env.local", usecwd=True) or find_dotenv(".env", usecwd=True)
|
|
65
|
+
load_dotenv(dotenv_path, override=True)
|
|
66
|
+
if len(aws_creds()) > 0:
|
|
67
|
+
log.info(
|
|
68
|
+
"Loaded %s, found AWS credentials: %s",
|
|
69
|
+
dotenv_path,
|
|
70
|
+
aws_creds(),
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
log.warning("No AWS credentials found in env or .env files")
|
|
74
|
+
|
|
75
|
+
|
|
22
76
|
def get_s3_parent_folder(url: Url) -> Url | None:
|
|
23
77
|
"""
|
|
24
78
|
Get the parent folder of an S3 URL, or None if not an S3 URL.
|
|
@@ -47,6 +101,7 @@ def s3_sync_to_folder(
|
|
|
47
101
|
- For a single file: the file URL (and sidematter file/dir URLs if included).
|
|
48
102
|
- For a directory: the destination parent prefix URL (non-recursive reporting).
|
|
49
103
|
"""
|
|
104
|
+
reload_aws_env_vars()
|
|
50
105
|
|
|
51
106
|
src_path = Path(src_path)
|
|
52
107
|
if not src_path.exists():
|
|
@@ -71,7 +126,7 @@ def s3_sync_to_folder(
|
|
|
71
126
|
for p in sync_paths:
|
|
72
127
|
if p.is_file():
|
|
73
128
|
# Use sync with include/exclude to leverage default short-circuiting
|
|
74
|
-
|
|
129
|
+
run_aws_command(
|
|
75
130
|
[
|
|
76
131
|
"aws",
|
|
77
132
|
"s3",
|
|
@@ -82,27 +137,54 @@ def s3_sync_to_folder(
|
|
|
82
137
|
"*",
|
|
83
138
|
"--include",
|
|
84
139
|
p.name,
|
|
85
|
-
]
|
|
86
|
-
check=True,
|
|
140
|
+
]
|
|
87
141
|
)
|
|
88
142
|
targets.append(Url(dest_prefix + p.name))
|
|
89
143
|
elif p.is_dir():
|
|
90
144
|
dest_dir = dest_prefix + p.name + "/"
|
|
91
|
-
|
|
145
|
+
run_aws_command(["aws", "s3", "sync", str(p), dest_dir])
|
|
92
146
|
targets.append(Url(dest_dir))
|
|
93
147
|
|
|
94
148
|
return targets
|
|
95
149
|
else:
|
|
96
150
|
# Directory mode: sync whole directory.
|
|
97
|
-
|
|
151
|
+
run_aws_command(
|
|
98
152
|
[
|
|
99
153
|
"aws",
|
|
100
154
|
"s3",
|
|
101
155
|
"sync",
|
|
102
156
|
str(src_path),
|
|
103
157
|
dest_prefix,
|
|
104
|
-
]
|
|
105
|
-
check=True,
|
|
158
|
+
]
|
|
106
159
|
)
|
|
107
160
|
targets.append(Url(dest_prefix))
|
|
108
161
|
return targets
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def s3_download_file(s3_url: Url, target_path: str | Path) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Download a file from S3 to a local path using the AWS CLI.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
s3_url: The S3 URL to download from (s3://bucket/path/to/file)
|
|
170
|
+
target_path: The local path to save the file to
|
|
171
|
+
"""
|
|
172
|
+
reload_aws_env_vars()
|
|
173
|
+
|
|
174
|
+
if not is_s3_url(s3_url):
|
|
175
|
+
raise ValueError(f"Source must be an s3:// URL: {s3_url}")
|
|
176
|
+
|
|
177
|
+
check_aws_cli()
|
|
178
|
+
|
|
179
|
+
target_path = Path(target_path)
|
|
180
|
+
|
|
181
|
+
# Use aws s3 cp to download the file
|
|
182
|
+
run_aws_command(
|
|
183
|
+
[
|
|
184
|
+
"aws",
|
|
185
|
+
"s3",
|
|
186
|
+
"cp",
|
|
187
|
+
str(s3_url),
|
|
188
|
+
str(target_path),
|
|
189
|
+
]
|
|
190
|
+
)
|