kash-shell 0.3.34__py3-none-any.whl → 0.3.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kash/model/items_model.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  from collections.abc import Sequence
4
5
  from copy import deepcopy
5
6
  from dataclasses import asdict, field, is_dataclass
@@ -192,9 +193,16 @@ class ItemId:
192
193
  from kash.web_content.canon_url import canonicalize_url
193
194
 
194
195
  item_id = None
195
- if item.type == ItemType.resource and item.format == Format.url and item.url:
196
+ if (
197
+ item.type == ItemType.resource
198
+ and item.format == Format.url
199
+ and item.url
200
+ and not item.source
201
+ ):
202
+ # This is a plain URL resource, so its identity is its URL.
196
203
  item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
197
204
  elif item.type == ItemType.concept and item.title:
205
+ # This is a concept, so its identity is its title.
198
206
  item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
199
207
  elif item.source and item.source.cacheable and item.source.operation.has_known_inputs:
200
208
  # We know the source of this and if the action was cacheable, we can create
@@ -636,8 +644,8 @@ class Item:
636
644
  pull_body_heading: bool = False,
637
645
  ) -> str:
638
646
  """
639
- Get or infer a title for this item, falling back to the filename, URL, description, or
640
- finally body text. Optionally, include the last operation as a parenthetical at the end
647
+ Get or infer a title for this item, falling back to the URL, description or
648
+ body text. Optionally, include the last operation as a parenthetical at the end
641
649
  of the title. Will use "Untitled" if all else fails.
642
650
  """
643
651
  # First special case: if we are pulling the title from the body header, check
@@ -651,12 +659,9 @@ class Item:
651
659
  if not self.title and self.url:
652
660
  return abbrev_str(self.url, max_len)
653
661
 
654
- filename_stem = self.filename_stem()
655
-
656
- # Use the title or the path if possible, falling back to description or even body text.
662
+ # Use semantic sources for titles. The original filename is preserved separately.
657
663
  base_title = (
658
664
  self.title
659
- or filename_stem
660
665
  or self.description
661
666
  or (not self.is_binary and self.abbrev_body(max_len))
662
667
  or UNTITLED
@@ -666,7 +671,11 @@ class Item:
666
671
  # indicating the last operation, if there was one. This makes filename slugs
667
672
  # more readable.
668
673
  suffix = ""
669
- if add_ops_suffix and self.type.allows_op_suffix:
674
+ if (
675
+ add_ops_suffix
676
+ and self.type.allows_op_suffix
677
+ and not re.search(r"step\d+", base_title) # Just in case, never add suffix twice.
678
+ ):
670
679
  last_op = self.history and self.history[-1].action_name
671
680
  if last_op:
672
681
  step_num = len(self.history) + 1 if self.history else 1
@@ -894,18 +903,19 @@ class Item:
894
903
  if action_context:
895
904
  # Default the output item type and format to the action's declared output_type
896
905
  # and format if not explicitly set.
897
- if "type" not in updates:
906
+ if "type" not in updates and action_context.action.output_type:
898
907
  updates["type"] = action_context.action.output_type
899
908
  # If we were not given a format override, we leave the output type the same.
900
909
  elif action_context.action.output_format:
901
910
  # Check an overridden format and then our own format.
902
- new_output_format = updates.get("format", self.format)
911
+ new_output_format = updates.get("format")
903
912
  if new_output_format and action_context.action.output_format != new_output_format:
904
913
  log.warning(
905
- "Output item format `%s` does not match declared output format `%s` for action `%s`",
914
+ "Output item format `%s` does not match declared output format `%s` for action `%s` on item: %s",
906
915
  new_output_format,
907
916
  action_context.action.output_format,
908
917
  action_context.action.name,
918
+ self,
909
919
  )
910
920
 
911
921
  new_item = self.new_copy_with(update_timestamp=True, **updates)
@@ -927,7 +937,9 @@ class Item:
927
937
 
928
938
  # Fall back to action title template if we have it and title wasn't explicitly set.
929
939
  if "title" not in updates:
930
- prev_title = self.title or (Path(self.store_path).stem if self.store_path else UNTITLED)
940
+ # Avoid using filenames as titles when deriving. Prefer existing semantic title
941
+ # or derive from body heading/URL.
942
+ prev_title = self.title or self.pick_title(pull_body_heading=True)
931
943
 
932
944
  if action:
933
945
  new_item.title = action.format_title(prev_title)
@@ -206,10 +206,10 @@ A list of parameter declarations, possibly with default values.
206
206
 
207
207
  # These are the default models for typical use cases.
208
208
  # The user may override them with parameters.
209
- DEFAULT_CAREFUL_LLM = LLM.gpt_5
210
- DEFAULT_STRUCTURED_LLM = LLM.gpt_5
211
- DEFAULT_STANDARD_LLM = LLM.gpt_5
212
- DEFAULT_FAST_LLM = LLM.gpt_5_mini
209
+ DEFAULT_CAREFUL_LLM = LLM.claude_sonnet_4_5
210
+ DEFAULT_STRUCTURED_LLM = LLM.claude_sonnet_4_5
211
+ DEFAULT_STANDARD_LLM = LLM.claude_sonnet_4_5
212
+ DEFAULT_FAST_LLM = LLM.claude_haiku_4_5
213
213
 
214
214
 
215
215
  # Parameters set globally such as in the workspace.
kash/shell/shell_main.py CHANGED
@@ -70,20 +70,9 @@ def build_parser() -> argparse.ArgumentParser:
70
70
 
71
71
 
72
72
  def _import_packages():
73
- try:
74
- # Slowest packages:
75
- import uvicorn.protocols # noqa: F401
76
- import uvicorn.protocols.http.h11_impl # noqa: F401
77
- import uvicorn.protocols.websockets.websockets_impl # noqa: F401
78
- import xonsh.completers.init # noqa: F401
79
- import xonsh.pyghooks # noqa: F401
80
-
81
- import kash.actions # noqa: F401
82
- import kash.local_server # noqa: F401
83
- import kash.local_server.local_server # noqa: F401
84
- import kash.mcp.mcp_server_sse # noqa: F401
85
- except ImportError as e:
86
- log.warning(f"Error pre-importing packages: {e}")
73
+ from kash.config.warm_slow_imports import warm_slow_imports
74
+
75
+ warm_slow_imports(include_extras=False)
87
76
 
88
77
  imports_done_event.set()
89
78
 
@@ -74,26 +74,108 @@ def import_recursive(
74
74
  return tallies
75
75
 
76
76
 
77
+ def _import_modules_from_package(
78
+ package: types.ModuleType,
79
+ package_name: str,
80
+ max_depth: int = 1,
81
+ include_private: bool = True,
82
+ current_depth: int = 0,
83
+ imported_modules: dict[str, types.ModuleType] | None = None,
84
+ ) -> dict[str, types.ModuleType]:
85
+ """
86
+ Internal helper to recursively import modules from a package.
87
+
88
+ Args:
89
+ package: The package module to import from
90
+ package_name: The fully qualified name of the package
91
+ max_depth: Maximum recursion depth (1 = direct children only)
92
+ include_private: Whether to import private modules (starting with _)
93
+ current_depth: Current recursion depth (internal use)
94
+ imported_modules: Dictionary to accumulate imported modules
95
+
96
+ Returns:
97
+ Dictionary mapping module names to their imported module objects
98
+ """
99
+ if imported_modules is None:
100
+ imported_modules = {}
101
+
102
+ if current_depth >= max_depth:
103
+ return imported_modules
104
+
105
+ # Get the module's __path__ if it's a package
106
+ if not hasattr(package, "__path__"):
107
+ return imported_modules
108
+
109
+ try:
110
+ for _finder, module_name, ispkg in pkgutil.iter_modules(
111
+ package.__path__, f"{package_name}."
112
+ ):
113
+ # Skip private modules unless requested
114
+ if not include_private and module_name.split(".")[-1].startswith("_"):
115
+ continue
116
+
117
+ # Skip test modules - they often have special import requirements
118
+ # and aren't needed for warming the import cache
119
+ module_parts = module_name.split(".")
120
+ if any(
121
+ part in ("tests", "test", "testing", "_test", "_tests") for part in module_parts
122
+ ):
123
+ continue
124
+
125
+ # Skip already imported modules
126
+ if module_name in imported_modules:
127
+ continue
128
+
129
+ try:
130
+ module = importlib.import_module(module_name)
131
+ imported_modules[module_name] = module
132
+
133
+ # Recursively import submodules if it's a package
134
+ if ispkg and current_depth + 1 < max_depth:
135
+ _import_modules_from_package(
136
+ module,
137
+ module_name,
138
+ max_depth=max_depth,
139
+ include_private=include_private,
140
+ current_depth=current_depth + 1,
141
+ imported_modules=imported_modules,
142
+ )
143
+
144
+ except Exception as e:
145
+ # Handle various import failures gracefully
146
+ # This includes ImportError, pytest.Skipped, and other exceptions
147
+ error_type = type(e).__name__
148
+ if error_type not in ("ImportError", "AttributeError", "TypeError"):
149
+ log.debug(f" Skipped {module_name}: {error_type}: {e}")
150
+ # Don't log common/expected import errors to reduce noise
151
+
152
+ except Exception as e:
153
+ log.warning(f"Error iterating modules in {package_name}: {e}")
154
+
155
+ return imported_modules
156
+
157
+
77
158
  def import_namespace_modules(namespace: str) -> dict[str, types.ModuleType]:
78
159
  """
79
160
  Find and import all modules or packages within a namespace package.
80
161
  Returns a dictionary mapping module names to their imported module objects.
81
162
  """
82
- importlib.import_module(namespace) # Propagate import errors
163
+ # Import the main module first
164
+ main_module = importlib.import_module(namespace) # Propagate import errors
83
165
 
84
166
  # Get the package to access its __path__
85
- package = sys.modules.get(namespace)
86
- if not package or not hasattr(package, "__path__"):
167
+ if not hasattr(main_module, "__path__"):
87
168
  raise ImportError(f"`{namespace}` is not a package or namespace package")
88
169
 
89
- log.info(f"Discovering modules in `{namespace}` namespace, searching: {package.__path__}")
170
+ log.info(f"Discovering modules in `{namespace}` namespace, searching: {main_module.__path__}")
171
+
172
+ # Use the common helper with depth=1 (no recursion) and include_private=True
173
+ modules = _import_modules_from_package(
174
+ main_module, namespace, max_depth=1, include_private=True
175
+ )
90
176
 
91
- # Iterate through all modules in the namespace package
92
- modules = {}
93
- for _finder, module_name, _ispkg in pkgutil.iter_modules(package.__path__, f"{namespace}."):
94
- module = importlib.import_module(module_name) # Propagate import errors
95
- log.info(f"Imported module: {module_name} from {module.__file__}")
96
- modules[module_name] = module
177
+ # Add the main module itself
178
+ modules[namespace] = main_module
97
179
 
98
180
  log.info(f"Imported {len(modules)} modules from namespace `{namespace}`")
99
181
  return modules
@@ -106,8 +188,13 @@ def recursive_reload(
106
188
  Recursively reload all modules in the given package that match the filter function.
107
189
  Returns a list of module names that were reloaded.
108
190
 
109
- :param filter_func: A function that takes a module name and returns True if the
110
- module should be reloaded.
191
+ Args:
192
+ package: The package to reload.
193
+ filter_func: A function that takes a module name and returns True if the
194
+ module should be reloaded.
195
+
196
+ Returns:
197
+ List of module names that were reloaded.
111
198
  """
112
199
  package_name = package.__name__
113
200
  modules = {
@@ -124,3 +211,40 @@ def recursive_reload(
124
211
  importlib.reload(modules[name])
125
212
 
126
213
  return module_names
214
+
215
+
216
+ def warm_import_library(
217
+ library_name: str, max_depth: int = 3, include_private: bool = False
218
+ ) -> dict[str, types.ModuleType]:
219
+ """
220
+ Recursively import all submodules of a library to warm the import cache.
221
+ This is useful for servers where you want to pay the import cost upfront
222
+ rather than during request handling.
223
+
224
+ Args:
225
+ library_name: Name of the library to import (e.g., 'litellm', 'openai')
226
+ max_depth: Maximum depth to recurse into submodules
227
+ include_private: Whether to import private modules (starting with _)
228
+
229
+ Returns:
230
+ Dictionary mapping module names to their imported module objects
231
+ """
232
+ try:
233
+ # Import the main module first
234
+ main_module = importlib.import_module(library_name)
235
+
236
+ # Use the common helper for recursive imports
237
+ imported_modules = _import_modules_from_package(
238
+ main_module, library_name, max_depth=max_depth, include_private=include_private
239
+ )
240
+
241
+ # Add the main module itself
242
+ imported_modules[library_name] = main_module
243
+
244
+ except ImportError as e:
245
+ log.warning(f"Could not import {library_name}: {e}")
246
+ return {}
247
+
248
+ log.info(f"Warmed {len(imported_modules)} modules from {library_name}")
249
+
250
+ return imported_modules
@@ -1,13 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  import shutil
4
5
  import subprocess
6
+ from logging import getLogger
5
7
  from pathlib import Path
6
8
 
9
+ from dotenv import find_dotenv, load_dotenv
7
10
  from sidematter_format.sidematter_format import Sidematter
11
+ from strif import abbrev_str
8
12
 
9
13
  from kash.utils.common.url import Url, is_s3_url, parse_s3_url
10
14
 
15
+ log = getLogger(__name__)
16
+
11
17
 
12
18
  def check_aws_cli() -> None:
13
19
  """
@@ -19,6 +25,54 @@ def check_aws_cli() -> None:
19
25
  )
20
26
 
21
27
 
28
+ def run_aws_command(cmd: list[str]) -> subprocess.CompletedProcess[str]:
29
+ """
30
+ Run an AWS CLI command and capture output.
31
+ Raises a RuntimeError with stdout/stderr on failure.
32
+ """
33
+ result = subprocess.run(
34
+ cmd,
35
+ capture_output=True,
36
+ text=True,
37
+ env=os.environ,
38
+ )
39
+
40
+ if result.returncode != 0:
41
+ # Build a detailed error message
42
+ error_parts = [f"AWS command failed with exit code {result.returncode}"]
43
+ error_parts.append(f"Command: {' '.join(cmd)}")
44
+
45
+ if result.stdout:
46
+ error_parts.append(f"stdout: {result.stdout}")
47
+ if result.stderr:
48
+ error_parts.append(f"stderr: {result.stderr}")
49
+
50
+ raise RuntimeError("\n".join(error_parts))
51
+
52
+ return result
53
+
54
+
55
+ def reload_aws_env_vars() -> None:
56
+ """
57
+ Fresh reload of AWS env vars from .env.local.
58
+ """
59
+
60
+ def aws_creds() -> set[tuple[str, str]]:
61
+ return {(k, abbrev_str(v, 5)) for k, v in os.environ.items() if k.startswith("AWS_")}
62
+
63
+ if len(aws_creds()) == 0:
64
+ dotenv_path = find_dotenv(".env.local", usecwd=True) or find_dotenv(".env", usecwd=True)
65
+ load_dotenv(dotenv_path, override=True)
66
+ if len(aws_creds()) > 0:
67
+ log.info(
68
+ "Loaded %s, found AWS credentials: %s",
69
+ dotenv_path,
70
+ aws_creds(),
71
+ )
72
+ else:
73
+ log.warning("No AWS credentials found in env or .env files")
74
+
75
+
22
76
  def get_s3_parent_folder(url: Url) -> Url | None:
23
77
  """
24
78
  Get the parent folder of an S3 URL, or None if not an S3 URL.
@@ -47,6 +101,7 @@ def s3_sync_to_folder(
47
101
  - For a single file: the file URL (and sidematter file/dir URLs if included).
48
102
  - For a directory: the destination parent prefix URL (non-recursive reporting).
49
103
  """
104
+ reload_aws_env_vars()
50
105
 
51
106
  src_path = Path(src_path)
52
107
  if not src_path.exists():
@@ -71,7 +126,7 @@ def s3_sync_to_folder(
71
126
  for p in sync_paths:
72
127
  if p.is_file():
73
128
  # Use sync with include/exclude to leverage default short-circuiting
74
- subprocess.run(
129
+ run_aws_command(
75
130
  [
76
131
  "aws",
77
132
  "s3",
@@ -82,27 +137,54 @@ def s3_sync_to_folder(
82
137
  "*",
83
138
  "--include",
84
139
  p.name,
85
- ],
86
- check=True,
140
+ ]
87
141
  )
88
142
  targets.append(Url(dest_prefix + p.name))
89
143
  elif p.is_dir():
90
144
  dest_dir = dest_prefix + p.name + "/"
91
- subprocess.run(["aws", "s3", "sync", str(p), dest_dir], check=True)
145
+ run_aws_command(["aws", "s3", "sync", str(p), dest_dir])
92
146
  targets.append(Url(dest_dir))
93
147
 
94
148
  return targets
95
149
  else:
96
150
  # Directory mode: sync whole directory.
97
- subprocess.run(
151
+ run_aws_command(
98
152
  [
99
153
  "aws",
100
154
  "s3",
101
155
  "sync",
102
156
  str(src_path),
103
157
  dest_prefix,
104
- ],
105
- check=True,
158
+ ]
106
159
  )
107
160
  targets.append(Url(dest_prefix))
108
161
  return targets
162
+
163
+
164
+ def s3_download_file(s3_url: Url, target_path: str | Path) -> None:
165
+ """
166
+ Download a file from S3 to a local path using the AWS CLI.
167
+
168
+ Args:
169
+ s3_url: The S3 URL to download from (s3://bucket/path/to/file)
170
+ target_path: The local path to save the file to
171
+ """
172
+ reload_aws_env_vars()
173
+
174
+ if not is_s3_url(s3_url):
175
+ raise ValueError(f"Source must be an s3:// URL: {s3_url}")
176
+
177
+ check_aws_cli()
178
+
179
+ target_path = Path(target_path)
180
+
181
+ # Use aws s3 cp to download the file
182
+ run_aws_command(
183
+ [
184
+ "aws",
185
+ "s3",
186
+ "cp",
187
+ str(s3_url),
188
+ str(target_path),
189
+ ]
190
+ )
@@ -26,7 +26,6 @@ def fetch_page_content(
26
26
 
27
27
  Force re-fetching and updating the cache by setting `refetch` to true.
28
28
 
29
-
30
29
  For HTML and other text files, uses the `text_extractor` to extract
31
30
  clean text and page metadata.
32
31
  """