kash-shell 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. kash/actions/core/markdownify.py +5 -4
  2. kash/actions/core/readability.py +4 -4
  3. kash/actions/core/render_as_html.py +8 -6
  4. kash/actions/core/show_webpage.py +2 -2
  5. kash/actions/core/strip_html.py +2 -2
  6. kash/commands/base/basic_file_commands.py +24 -3
  7. kash/commands/base/diff_commands.py +38 -3
  8. kash/commands/base/files_command.py +5 -4
  9. kash/commands/base/reformat_command.py +1 -1
  10. kash/commands/base/show_command.py +1 -1
  11. kash/commands/extras/parse_uv_lock.py +12 -3
  12. kash/commands/workspace/selection_commands.py +1 -1
  13. kash/commands/workspace/workspace_commands.py +62 -16
  14. kash/config/env_settings.py +2 -42
  15. kash/config/logger.py +30 -25
  16. kash/config/logger_basic.py +6 -6
  17. kash/config/settings.py +23 -7
  18. kash/config/setup.py +33 -5
  19. kash/config/text_styles.py +25 -22
  20. kash/docs/load_source_code.py +1 -1
  21. kash/embeddings/cosine.py +12 -4
  22. kash/embeddings/embeddings.py +16 -6
  23. kash/embeddings/text_similarity.py +10 -4
  24. kash/exec/__init__.py +3 -0
  25. kash/exec/action_decorators.py +4 -19
  26. kash/exec/action_exec.py +46 -27
  27. kash/exec/fetch_url_metadata.py +8 -5
  28. kash/exec/importing.py +4 -4
  29. kash/exec/llm_transforms.py +2 -2
  30. kash/exec/preconditions.py +11 -19
  31. kash/exec/runtime_settings.py +134 -0
  32. kash/exec/shell_callable_action.py +5 -3
  33. kash/file_storage/file_store.py +91 -53
  34. kash/file_storage/item_file_format.py +6 -3
  35. kash/file_storage/store_filenames.py +7 -3
  36. kash/help/help_embeddings.py +2 -2
  37. kash/llm_utils/clean_headings.py +1 -1
  38. kash/{text_handling → llm_utils}/custom_sliding_transforms.py +0 -3
  39. kash/llm_utils/init_litellm.py +16 -0
  40. kash/llm_utils/llm_api_keys.py +6 -2
  41. kash/llm_utils/llm_completion.py +12 -5
  42. kash/local_server/__init__.py +1 -1
  43. kash/local_server/local_server_commands.py +2 -1
  44. kash/mcp/__init__.py +1 -1
  45. kash/mcp/mcp_cli.py +3 -2
  46. kash/mcp/mcp_server_commands.py +8 -2
  47. kash/mcp/mcp_server_routes.py +11 -12
  48. kash/media_base/media_cache.py +10 -3
  49. kash/media_base/transcription_deepgram.py +15 -2
  50. kash/model/__init__.py +1 -1
  51. kash/model/actions_model.py +9 -54
  52. kash/model/exec_model.py +79 -0
  53. kash/model/items_model.py +131 -81
  54. kash/model/operations_model.py +38 -15
  55. kash/model/paths_model.py +2 -0
  56. kash/shell/output/shell_output.py +10 -8
  57. kash/shell/shell_main.py +2 -2
  58. kash/shell/ui/shell_results.py +2 -1
  59. kash/shell/utils/exception_printing.py +2 -2
  60. kash/utils/common/format_utils.py +0 -14
  61. kash/utils/common/import_utils.py +46 -18
  62. kash/utils/common/task_stack.py +4 -15
  63. kash/utils/errors.py +14 -9
  64. kash/utils/file_utils/file_formats_model.py +61 -26
  65. kash/utils/file_utils/file_sort_filter.py +10 -3
  66. kash/utils/file_utils/filename_parsing.py +41 -16
  67. kash/{text_handling → utils/text_handling}/doc_normalization.py +23 -13
  68. kash/utils/text_handling/escape_html_tags.py +156 -0
  69. kash/{text_handling → utils/text_handling}/markdown_utils.py +82 -4
  70. kash/utils/text_handling/markdownify_utils.py +87 -0
  71. kash/{text_handling → utils/text_handling}/unified_diffs.py +1 -44
  72. kash/web_content/file_cache_utils.py +42 -34
  73. kash/web_content/local_file_cache.py +29 -12
  74. kash/web_content/web_extract.py +1 -1
  75. kash/web_content/web_extract_readabilipy.py +4 -2
  76. kash/web_content/web_fetch.py +42 -7
  77. kash/web_content/web_page_model.py +2 -1
  78. kash/web_gen/simple_webpage.py +1 -1
  79. kash/web_gen/templates/base_styles.css.jinja +139 -16
  80. kash/web_gen/templates/simple_webpage.html.jinja +1 -1
  81. kash/workspaces/__init__.py +12 -3
  82. kash/workspaces/selections.py +2 -2
  83. kash/workspaces/workspace_dirs.py +58 -0
  84. kash/workspaces/workspace_importing.py +2 -2
  85. kash/workspaces/workspace_output.py +2 -2
  86. kash/workspaces/workspaces.py +26 -90
  87. kash/xonsh_custom/load_into_xonsh.py +4 -2
  88. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/METADATA +4 -4
  89. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/RECORD +93 -89
  90. kash/shell/utils/argparse_utils.py +0 -20
  91. kash/utils/lang_utils/inflection.py +0 -18
  92. /kash/{text_handling → utils/text_handling}/markdown_render.py +0 -0
  93. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/WHEEL +0 -0
  94. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/entry_points.txt +0 -0
  95. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/licenses/LICENSE +0 -0
@@ -9,11 +9,12 @@ from kash.config.logger import get_logger
9
9
  from kash.config.settings import atomic_global_settings, global_settings
10
10
  from kash.model.items_model import Item
11
11
  from kash.model.media_model import MediaType
12
+ from kash.model.paths_model import StorePath
12
13
  from kash.utils.common.url import Url
13
14
  from kash.utils.errors import FileNotFound, InvalidInput
14
15
  from kash.utils.file_utils.file_formats_model import detect_media_type
15
16
  from kash.web_content.canon_url import canonicalize_url
16
- from kash.web_content.local_file_cache import Loadable, LocalFileCache
17
+ from kash.web_content.local_file_cache import CacheResult, Loadable, LocalFileCache
17
18
 
18
19
  log = get_logger(__name__)
19
20
 
@@ -40,7 +41,7 @@ def reset_content_cache_dir(path: Path):
40
41
 
41
42
  def cache_file(
42
43
  source: Url | Path | Loadable, global_cache: bool = False, expiration_sec: float | None = None
43
- ) -> tuple[Path, bool]:
44
+ ) -> CacheResult:
44
45
  """
45
46
  Return a local cached copy of the item. If it is an URL, content is fetched.
46
47
  If it is a Path or a Loadable, a cached copy is returned.
@@ -50,8 +51,7 @@ def cache_file(
50
51
  in which case the global cache is used.
51
52
  """
52
53
  cache = _global_content_cache if global_cache else _content_cache
53
- path, was_cached = cache.cache(source, expiration_sec)
54
- return path, was_cached
54
+ return cache.cache(source, expiration_sec)
55
55
 
56
56
 
57
57
  def cache_api_response(
@@ -64,9 +64,9 @@ def cache_api_response(
64
64
  Cache an API response. By default parse the response as JSON.
65
65
  """
66
66
  cache = _global_content_cache if global_cache else _content_cache
67
- path, was_cached = cache.cache(url, expiration_sec)
68
- result = parser(path.read_text())
69
- return result, was_cached
67
+ result = cache.cache(url, expiration_sec)
68
+ parsed_result = parser(result.content.path.read_text())
69
+ return parsed_result, result.was_cached
70
70
 
71
71
 
72
72
  def cache_resource(
@@ -74,7 +74,8 @@ def cache_resource(
74
74
  ) -> dict[MediaType, Path]:
75
75
  """
76
76
  Cache a resource item for an external local path or a URL, fetching or
77
- copying as needed. For media this may yield more than one format.
77
+ copying as needed and returning direct paths to the cached content.
78
+ For media this may yield more than one format.
78
79
  """
79
80
  from kash.exec.preconditions import is_resource
80
81
  from kash.media_base.media_services import is_media_url
@@ -83,62 +84,69 @@ def cache_resource(
83
84
  if not is_resource(item):
84
85
  raise ValueError(f"Item is not a resource: {item}")
85
86
 
86
- path = None
87
- result: dict[MediaType, Path] = {}
87
+ path: Path | None = None
88
+ results: dict[MediaType, Path] = {}
89
+ cache_result: CacheResult | None = None
90
+
91
+ # Cache the content using media or content cache.
88
92
  if item.url:
89
93
  if is_media_url(item.url):
90
- result = cache_media(item.url)
94
+ results = cache_media(item.url)
91
95
  else:
92
- path, _was_cached = cache_file(item.url, global_cache, expiration_sec)
96
+ cache_result = cache_file(item.url, global_cache, expiration_sec)
93
97
  elif item.external_path:
94
- path = Path(item.external_path)
95
- if not path.is_file():
96
- raise FileNotFound(f"External path not found: {path}")
97
- path, _was_cached = cache_file(path, global_cache, expiration_sec)
98
+ ext_path = Path(item.external_path)
99
+ if not ext_path.is_file():
100
+ raise FileNotFound(f"External path not found: {ext_path}")
101
+ cache_result = cache_file(ext_path, global_cache, expiration_sec)
98
102
  elif item.original_filename:
99
- path = Path(item.original_filename)
100
- if not path.is_file():
101
- raise FileNotFound(f"Original filename not found: {path}")
102
- path, _was_cached = cache_file(path, global_cache, expiration_sec)
103
+ orig_path = Path(item.original_filename)
104
+ if not orig_path.is_file():
105
+ raise FileNotFound(f"Original filename not found: {orig_path}")
106
+ cache_result = cache_file(orig_path, global_cache, expiration_sec)
103
107
  else:
104
108
  raise ValueError(f"Item has no URL or external path: {item}")
105
109
 
110
+ if cache_result:
111
+ path = cache_result.content.path
112
+
106
113
  # If we just have the local file path, determine its format.
107
- if not result and path:
108
- result = {detect_media_type(path): path}
114
+ if not results and path:
115
+ results = {detect_media_type(path): path}
109
116
 
110
117
  log.message(
111
118
  "Cached resource %s:\n%s",
112
119
  item.as_str_brief(),
113
120
  fmt_lines(
114
121
  f"{media_type.value}: {fmt_path(media_path)}"
115
- for media_type, media_path in result.items()
122
+ for media_type, media_path in results.items()
116
123
  ),
117
124
  )
118
125
 
119
- return result
126
+ return results
120
127
 
121
128
 
122
129
  def get_url_html(
123
130
  item: Item, global_cache: bool = False, expiration_sec: float | None = None
124
- ) -> tuple[Url, str]:
131
+ ) -> tuple[Url | StorePath, str]:
125
132
  """
126
133
  Returns the HTML content of an URL item, using the content cache,
127
134
  or the body of the item if it has a URL and HTML body.
128
135
  """
129
- from kash.exec.preconditions import has_html_body, is_url_item
130
-
131
- if not item.url:
132
- raise InvalidInput("Item must have a URL or an HTML body")
133
- url = Url(canonicalize_url(item.url))
136
+ from kash.exec.preconditions import has_html_body, is_url_resource
134
137
 
135
- if is_url_item(item):
136
- path, _was_cached = cache_file(url, global_cache, expiration_sec)
138
+ if is_url_resource(item) and item.url and not item.has_body:
139
+ # Need to fetch the content.
140
+ locator = Url(canonicalize_url(item.url))
141
+ path = cache_file(locator, global_cache, expiration_sec).content.path
137
142
  with open(path) as file:
138
143
  html_content = file.read()
139
144
  else:
140
145
  if not item.body or not has_html_body(item):
141
- raise InvalidInput("Item must have a URL or an HTML body")
146
+ raise InvalidInput("Item must be a URL resource or have an HTML body")
147
+ if not item.store_path:
148
+ raise InvalidInput("Item missing store path")
142
149
  html_content = item.body
150
+ locator = StorePath(item.store_path)
143
151
 
144
- return url, html_content
152
+ return locator, html_content
@@ -12,9 +12,10 @@ from strif import atomic_output_file, copyfile_atomic
12
12
 
13
13
  from kash.utils.common.url import Url, is_file_url, is_url, normalize_url, parse_file_url
14
14
  from kash.utils.errors import FileNotFound
15
+ from kash.utils.file_utils.file_formats import MimeType
15
16
  from kash.utils.file_utils.file_formats_model import choose_file_ext
16
17
  from kash.web_content.dir_store import DirStore
17
- from kash.web_content.web_fetch import download_url
18
+ from kash.web_content.web_fetch import HttpHeaders, download_url
18
19
 
19
20
  log = logging.getLogger(__name__)
20
21
 
@@ -73,9 +74,26 @@ An item that can be cached as a file.
73
74
  """
74
75
 
75
76
 
76
- def _suffix_for(cacheable: Cacheable) -> str | None:
77
+ @dataclass(frozen=True)
78
+ class CacheContent:
79
+ """
80
+ An item in the local file cache. If it was a cache miss for a web-fetched URL,
81
+ also has HTTP headers.
82
+ """
83
+
84
+ path: Path
85
+ headers: HttpHeaders | None
86
+
87
+
88
+ @dataclass(frozen=True)
89
+ class CacheResult:
90
+ content: CacheContent
91
+ was_cached: bool
92
+
93
+
94
+ def _suffix_for(cacheable: Cacheable, mime_type: MimeType | None = None) -> str | None:
77
95
  key = cacheable.key if isinstance(cacheable, Loadable) else cacheable
78
- file_ext = choose_file_ext(key)
96
+ file_ext = choose_file_ext(key, mime_type)
79
97
  return file_ext.dot_ext if file_ext else None
80
98
 
81
99
 
@@ -135,7 +153,7 @@ class LocalFileCache(DirStore):
135
153
  if backup_url and mode in (WebCacheMode.TEST, WebCacheMode.UPDATE):
136
154
  self._restore(backup_url)
137
155
 
138
- def _load_source(self, source: Cacheable) -> Path:
156
+ def _load_source(self, source: Cacheable) -> CacheContent:
139
157
  """
140
158
  Load or compute the given source and save it to the cache.
141
159
  """
@@ -147,6 +165,7 @@ class LocalFileCache(DirStore):
147
165
  suffix = _suffix_for(source)
148
166
  cache_path = self.path_for(key, folder=self.folder, suffix=_suffix_for(source))
149
167
 
168
+ headers = None
150
169
  if isinstance(source, Path) or (isinstance(source, str) and is_file_url(source)):
151
170
  # Local file or file:// URL.
152
171
  url_or_path = source
@@ -165,7 +184,8 @@ class LocalFileCache(DirStore):
165
184
  # URL.
166
185
  url = _normalize_url(source)
167
186
  log.info("Downloading to cache: %s -> %s", url, fmt_path(cache_path))
168
- download_url(url, cache_path)
187
+ headers = download_url(url, cache_path)
188
+ log.debug("Response headers: %s", headers)
169
189
  elif isinstance(source, Loadable):
170
190
  # Arbitrary loadable. Load and save (atomically).
171
191
  with atomic_output_file(
@@ -180,7 +200,7 @@ class LocalFileCache(DirStore):
180
200
  else:
181
201
  raise ValueError(f"Invalid source: {source}")
182
202
 
183
- return cache_path
203
+ return CacheContent(cache_path, headers)
184
204
 
185
205
  def _age_in_sec(self, cache_path: Path) -> float:
186
206
  now = time.time()
@@ -210,7 +230,7 @@ class LocalFileCache(DirStore):
210
230
 
211
231
  return cache_path is not None and not self._is_expired(cache_path, expiration_sec)
212
232
 
213
- def cache(self, source: Cacheable, expiration_sec: float | None = None) -> tuple[Path, bool]:
233
+ def cache(self, source: Cacheable, expiration_sec: float | None = None) -> CacheResult:
214
234
  """
215
235
  Returns cached download path of given URL and whether it was previously cached.
216
236
  For file:// URLs does a copy.
@@ -221,13 +241,10 @@ class LocalFileCache(DirStore):
221
241
 
222
242
  if cache_path and not self._is_expired(cache_path, expiration_sec):
223
243
  log.info("URL in cache, not fetching: %s: %s", key, fmt_path(cache_path))
224
- return cache_path, True
244
+ return CacheResult(CacheContent(cache_path, None), True)
225
245
  else:
226
246
  log.info("Caching new copy: %s", key)
227
- return (
228
- self._load_source(source),
229
- False,
230
- )
247
+ return CacheResult(self._load_source(source), False)
231
248
 
232
249
  def backup(self) -> None:
233
250
  if not self.backup_url:
@@ -22,7 +22,7 @@ def fetch_extract(
22
22
  """
23
23
  expiration_sec = 0 if refetch else None
24
24
  if use_cache:
25
- path, _was_cached = cache_file(url, expiration_sec=expiration_sec)
25
+ path = cache_file(url, expiration_sec=expiration_sec).content.path
26
26
  with open(path, "rb") as file:
27
27
  content = file.read()
28
28
  page_data = extractor(url, content)
@@ -1,9 +1,11 @@
1
+ from pathlib import Path
2
+
1
3
  from kash.utils.common.url import Url
2
4
  from kash.utils.errors import InvalidInput
3
5
  from kash.web_content.web_page_model import WebPageData
4
6
 
5
7
 
6
- def extract_text_readabilipy(url: Url, html: str) -> WebPageData:
8
+ def extract_text_readabilipy(locator: Url | Path, html: str) -> WebPageData:
7
9
  """
8
10
  Extracts text from HTML using readability.
9
11
  This requires Node readability. Justext is an alternative and seems good for
@@ -16,7 +18,7 @@ def extract_text_readabilipy(url: Url, html: str) -> WebPageData:
16
18
  raise InvalidInput("No clean HTML found")
17
19
 
18
20
  return WebPageData(
19
- url=url,
21
+ locator=locator,
20
22
  title=result["title"],
21
23
  byline=result["byline"],
22
24
  clean_html=result["content"],
@@ -1,14 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from dataclasses import dataclass
5
+ from functools import cached_property
2
6
  from pathlib import Path
3
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
4
8
  from urllib.parse import urlparse
5
9
 
6
- import httpx
7
10
  from strif import atomic_output_file, copyfile_atomic
8
- from tqdm import tqdm
9
11
 
10
12
  from kash.config.env_settings import KashEnv
11
13
  from kash.utils.common.url import Url
14
+ from kash.utils.file_utils.file_formats import MimeType
15
+
16
+ if TYPE_CHECKING:
17
+ from httpx import Client, Response
12
18
 
13
19
  log = logging.getLogger(__name__)
14
20
 
@@ -30,11 +36,13 @@ def fetch_url(
30
36
  timeout: int = DEFAULT_TIMEOUT,
31
37
  auth: Any | None = None,
32
38
  headers: dict[str, str] | None = None,
33
- ) -> httpx.Response:
39
+ ) -> Response:
34
40
  """
35
41
  Fetch a URL using httpx with logging and reasonable defaults.
36
42
  Raise httpx.HTTPError for non-2xx responses.
37
43
  """
44
+ import httpx
45
+
38
46
  with httpx.Client(
39
47
  follow_redirects=True,
40
48
  timeout=timeout,
@@ -48,36 +56,60 @@ def fetch_url(
48
56
  return response
49
57
 
50
58
 
59
+ @dataclass(frozen=True)
60
+ class HttpHeaders:
61
+ """
62
+ HTTP response headers.
63
+ """
64
+
65
+ headers: dict[str, str]
66
+
67
+ @cached_property
68
+ def mime_type(self) -> MimeType | None:
69
+ """Get content type header, if available."""
70
+ for key, value in self.headers.items():
71
+ if key.lower() == "content-type":
72
+ return MimeType(value)
73
+ return None
74
+
75
+
51
76
  def download_url(
52
77
  url: Url,
53
78
  target_filename: str | Path,
54
- session: httpx.Client | None = None,
79
+ session: Client | None = None,
55
80
  show_progress: bool = False,
56
81
  timeout: int = DEFAULT_TIMEOUT,
57
82
  auth: Any | None = None,
58
83
  headers: dict[str, str] | None = None,
59
- ) -> None:
84
+ ) -> HttpHeaders | None:
60
85
  """
61
86
  Download given file, optionally with progress bar, streaming to a target file.
62
87
  Also handles file:// and s3:// URLs. Output file is created atomically.
63
88
  Raise httpx.HTTPError for non-2xx responses.
89
+ Returns response headers for HTTP/HTTPS requests, None for other URL types.
64
90
  """
91
+ import httpx
92
+ from tqdm import tqdm
93
+
65
94
  target_filename = str(target_filename)
66
95
  parsed_url = urlparse(url)
67
96
  if show_progress:
68
97
  log.info("%s", url)
69
98
 
70
99
  if parsed_url.scheme == "file" or parsed_url.scheme == "":
71
- copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename)
100
+ copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename, make_parents=True)
101
+ return None
72
102
  elif parsed_url.scheme == "s3":
73
103
  import boto3 # pyright: ignore
74
104
 
75
105
  s3 = boto3.resource("s3")
76
106
  s3_path = parsed_url.path.lstrip("/")
77
107
  s3.Bucket(parsed_url.netloc).download_file(s3_path, target_filename)
108
+ return None
78
109
  else:
79
110
  client = session or httpx.Client(follow_redirects=True, timeout=timeout)
80
111
  response: httpx.Response | None = None
112
+ response_headers: dict[str, str] | None = None
81
113
  try:
82
114
  headers = headers or default_headers()
83
115
  log.debug("download_url: using headers: %s", headers)
@@ -90,6 +122,7 @@ def download_url(
90
122
  headers=headers,
91
123
  ) as response:
92
124
  response.raise_for_status()
125
+ response_headers = dict(response.headers)
93
126
  total_size = int(response.headers.get("content-length", "0"))
94
127
 
95
128
  with atomic_output_file(target_filename, make_parents=True) as temp_filename:
@@ -107,3 +140,5 @@ def download_url(
107
140
  client.close()
108
141
  if response:
109
142
  response.raise_for_status() # In case of errors during streaming
143
+
144
+ return HttpHeaders(response_headers) if response_headers else None
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  from typing import Protocol
2
3
 
3
4
  from prettyfmt import abbrev_obj
@@ -12,7 +13,7 @@ class WebPageData:
12
13
  Data about a web page, including URL, title and optionally description and extracted content.
13
14
  """
14
15
 
15
- url: Url
16
+ locator: Url | Path
16
17
  title: str | None = None
17
18
  byline: str | None = None
18
19
  description: str | None = None
@@ -15,7 +15,7 @@ def simple_webpage_render(
15
15
  return render_web_template(
16
16
  template_filename=page_template,
17
17
  data={
18
- "title": item.title,
18
+ "title": item.abbrev_title(),
19
19
  "add_title_h1": add_title_h1,
20
20
  "content_html": item.body_as_html(),
21
21
  "thumbnail_url": item.thumbnail_url,