kash-shell 0.3.17__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. kash/actions/core/{markdownify.py → markdownify_html.py} +3 -6
  2. kash/actions/core/minify_html.py +41 -0
  3. kash/commands/base/show_command.py +11 -1
  4. kash/commands/workspace/workspace_commands.py +10 -88
  5. kash/config/colors.py +6 -2
  6. kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
  7. kash/docs/markdown/topics/a2_installation.md +17 -30
  8. kash/docs/markdown/topics/a3_getting_started.md +5 -19
  9. kash/exec/__init__.py +3 -0
  10. kash/exec/action_exec.py +3 -3
  11. kash/exec/fetch_url_items.py +109 -0
  12. kash/exec/precondition_registry.py +3 -3
  13. kash/file_storage/file_store.py +24 -1
  14. kash/file_storage/store_filenames.py +4 -0
  15. kash/help/function_param_info.py +1 -1
  16. kash/help/help_pages.py +1 -1
  17. kash/help/help_printing.py +1 -1
  18. kash/llm_utils/llm_features.py +5 -1
  19. kash/llm_utils/llms.py +18 -8
  20. kash/media_base/media_cache.py +48 -24
  21. kash/media_base/media_services.py +63 -14
  22. kash/media_base/services/local_file_media.py +9 -1
  23. kash/model/items_model.py +22 -8
  24. kash/model/media_model.py +9 -1
  25. kash/model/params_model.py +9 -3
  26. kash/utils/common/function_inspect.py +97 -1
  27. kash/utils/common/parse_docstring.py +347 -0
  28. kash/utils/common/testing.py +58 -0
  29. kash/utils/common/url_slice.py +329 -0
  30. kash/utils/file_utils/file_formats.py +1 -1
  31. kash/utils/text_handling/markdown_utils.py +424 -16
  32. kash/web_content/web_extract.py +34 -15
  33. kash/web_content/web_page_model.py +10 -1
  34. kash/web_gen/templates/base_styles.css.jinja +137 -15
  35. kash/web_gen/templates/base_webpage.html.jinja +13 -17
  36. kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
  37. kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
  38. kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
  39. kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
  40. kash/web_gen/templates/content_styles.css.jinja +13 -8
  41. kash/web_gen/templates/simple_webpage.html.jinja +15 -481
  42. kash/workspaces/workspaces.py +10 -1
  43. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/METADATA +75 -72
  44. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/RECORD +47 -40
  45. kash/exec/fetch_url_metadata.py +0 -72
  46. kash/help/docstring_utils.py +0 -111
  47. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/WHEEL +0 -0
  48. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/entry_points.txt +0 -0
  49. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/licenses/LICENSE +0 -0
@@ -39,7 +39,7 @@ def kash_precondition(func: Callable[[Item], bool]) -> Precondition:
39
39
 
40
40
  def get_all_preconditions() -> dict[str, Precondition]:
41
41
  """
42
- Returns a copy of all registered preconditions.
42
+ Returns a copy of all registered preconditions (in alphabetical order).
43
43
  """
44
- # Return a copy for safety.
45
- return dict(_preconditions.copy())
44
+ # Return a copy for safety, sorted by key.
45
+ return dict(sorted(_preconditions.copy().items()))
@@ -83,6 +83,11 @@ class FileStore(Workspace):
83
83
  def base_dir(self) -> Path:
84
84
  return self.base_dir_path
85
85
 
86
+ @property
87
+ @override
88
+ def assets_dir(self) -> Path:
89
+ return self.base_dir / "assets"
90
+
86
91
  @synchronized
87
92
  @log_calls(level="warning", if_slower_than=2.0)
88
93
  def reload(self, auto_init: bool = True):
@@ -340,6 +345,18 @@ class FileStore(Workspace):
340
345
 
341
346
  return StorePath(store_path), old_store_path
342
347
 
348
+ def target_path_for(self, item: Item) -> Path:
349
+ """
350
+ Get an the absolute path for an item. Use this if you need to work around the
351
+ usual save mechanism and write directly to the store yourself, at the location
352
+ the item usually would be saved.
353
+
354
+ If you write to this path, then set the item's `external_path` to indicate it's
355
+ already saved.
356
+ """
357
+ store_path, _old_store_path = self.store_path_for(item)
358
+ return self.base_dir / store_path
359
+
343
360
  def _tmp_path_for(self, item: Item) -> StorePath:
344
361
  """
345
362
  Find a path for an item in the tmp directory.
@@ -388,6 +405,7 @@ class FileStore(Workspace):
388
405
  # If external path already exists and is within the workspace, the file was
389
406
  # already saved (e.g. by an action that wrote the item directly to the store).
390
407
  external_path = item.external_path and Path(item.external_path).resolve()
408
+ skipped_save = False
391
409
  if external_path and self._is_in_store(external_path):
392
410
  log.info("Item with external_path already saved: %s", fmt_loc(external_path))
393
411
  rel_path = external_path.relative_to(self.base_dir)
@@ -463,12 +481,17 @@ class FileStore(Workspace):
463
481
  )
464
482
  os.unlink(full_path)
465
483
  store_path = old_store_path
484
+ skipped_save = True
466
485
 
467
486
  # Update in-memory store_path only after successful save.
468
487
  item.store_path = str(store_path)
469
488
  self._id_index_item(store_path)
470
489
 
471
- log.message("%s Saved item:\n%s", EMOJI_SAVED, fmt_lines([fmt_loc(store_path)]))
490
+ if not skipped_save:
491
+ log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
492
+ else:
493
+ log.info("%s Already saved: %s", EMOJI_SAVED, fmt_loc(store_path))
494
+
472
495
  return store_path
473
496
 
474
497
  @log_calls(level="debug")
@@ -30,6 +30,10 @@ def folder_for_type(item_type: ItemType) -> Path:
30
30
 
31
31
 
32
32
  def join_suffix(base_slug: str, full_suffix: str) -> str:
33
+ """
34
+ Create a store filename by joining a base slug and a full suffix, i.e. a filename
35
+ extension with or without an item type (`.html` or `.resource.html`, for example).
36
+ """
33
37
  return f"{base_slug}.{full_suffix.lstrip('.')}"
34
38
 
35
39
 
@@ -2,9 +2,9 @@ from collections.abc import Callable
2
2
  from dataclasses import replace
3
3
  from typing import Any
4
4
 
5
- from kash.help.docstring_utils import parse_docstring
6
5
  from kash.model.params_model import ALL_COMMON_PARAMS, Param
7
6
  from kash.utils.common.function_inspect import FuncParam, inspect_function_params
7
+ from kash.utils.common.parse_docstring import parse_docstring
8
8
 
9
9
 
10
10
  def _look_up_param_docs(func: Callable[..., Any], kw_params: list[FuncParam]) -> list[Param]:
kash/help/help_pages.py CHANGED
@@ -3,7 +3,6 @@ from rich.text import Text
3
3
  from kash.config.logger import get_logger
4
4
  from kash.config.text_styles import STYLE_HINT
5
5
  from kash.docs.all_docs import DocSelection, all_docs
6
- from kash.help.docstring_utils import parse_docstring
7
6
  from kash.shell.output.shell_formatting import format_name_and_value
8
7
  from kash.shell.output.shell_output import (
9
8
  PrintHooks,
@@ -12,6 +11,7 @@ from kash.shell.output.shell_output import (
12
11
  print_hrule,
13
12
  print_markdown,
14
13
  )
14
+ from kash.utils.common.parse_docstring import parse_docstring
15
15
 
16
16
  log = get_logger(__name__)
17
17
 
@@ -6,7 +6,6 @@ from kash.docs.all_docs import DocSelection
6
6
  from kash.exec.action_registry import look_up_action_class
7
7
  from kash.exec.command_registry import CommandFunction, look_up_command
8
8
  from kash.help.assistant import assist_preamble, assistance_unstructured
9
- from kash.help.docstring_utils import parse_docstring
10
9
  from kash.help.function_param_info import annotate_param_info
11
10
  from kash.help.help_lookups import look_up_faq
12
11
  from kash.help.tldr_help import tldr_help
@@ -22,6 +21,7 @@ from kash.shell.output.shell_output import (
22
21
  print_help,
23
22
  print_markdown,
24
23
  )
24
+ from kash.utils.common.parse_docstring import parse_docstring
25
25
  from kash.utils.errors import InvalidInput, NoMatch
26
26
  from kash.utils.file_formats.chat_format import ChatHistory, ChatMessage, ChatRole
27
27
 
@@ -56,13 +56,17 @@ FEATURES = {
56
56
  }
57
57
 
58
58
  preferred_llms: list[LLMName] = [
59
+ LLM.o4_mini,
60
+ LLM.o3,
59
61
  LLM.o3_mini,
60
62
  LLM.o1_mini,
61
63
  LLM.o1,
62
64
  LLM.gpt_4o_mini,
63
65
  LLM.gpt_4o,
64
66
  LLM.gpt_4,
67
+ LLM.claude_4_sonnet,
68
+ LLM.claude_4_opus,
65
69
  LLM.claude_3_7_sonnet,
66
- LLM.claude_3_5_sonnet,
67
70
  LLM.claude_3_5_haiku,
71
+ LLM.gemini_2_5_pro_preview_05_06,
68
72
  ]
kash/llm_utils/llms.py CHANGED
@@ -13,32 +13,42 @@ class LLM(LLMName, Enum):
13
13
  """
14
14
 
15
15
  # https://platform.openai.com/docs/models
16
- o1_mini = LLMName("o1-mini")
17
- o1 = LLMName("o1")
16
+ o4_mini = LLMName("o4-mini")
18
17
  o3 = LLMName("o3")
19
18
  o3_mini = LLMName("o3-mini")
20
- o4_mini = LLMName("o4-mini")
19
+ o1 = LLMName("o1")
20
+ o1_mini = LLMName("o1-mini")
21
+ o1_pro = LLMName("o1-pro")
21
22
  o1_preview = LLMName("o1-preview")
22
- gpt_4o_mini = LLMName("gpt-4o-mini")
23
+ gpt_4_1 = LLMName("gpt-4.1")
23
24
  gpt_4o = LLMName("gpt-4o")
25
+ gpt_4o_mini = LLMName("gpt-4o-mini")
24
26
  gpt_4 = LLMName("gpt-4")
25
- gpt_4_1 = LLMName("gpt-4.1")
27
+
26
28
  gpt_4_1_mini = LLMName("gpt-4.1-mini")
27
29
  gpt_4_1_nano = LLMName("gpt-4.1-nano")
28
30
 
29
31
  # https://docs.anthropic.com/en/docs/about-claude/models/all-models
32
+ claude_4_opus = LLMName("claude-opus-4-20250514")
33
+ claude_4_sonnet = LLMName("claude-sonnet-4-20250514")
30
34
  claude_3_7_sonnet = LLMName("claude-3-7-sonnet-latest")
31
- claude_3_5_sonnet = LLMName("claude-3-5-sonnet-latest")
32
35
  claude_3_5_haiku = LLMName("claude-3-5-haiku-latest")
33
36
 
34
37
  # https://ai.google.dev/gemini-api/docs/models
35
- gemini_2_5_pro_exp_03_25 = LLMName("gemini/gemini-2.5-pro-exp-03-25")
38
+ gemini_2_5_pro_preview_06_05 = LLMName("gemini/gemini-2.5-pro-preview-06-05")
39
+ gemini_2_5_pro_preview_05_06 = LLMName("gemini/gemini-2.5-pro-preview-05-06")
40
+ gemini_2_5_pro_preview_03_25 = LLMName("gemini/gemini-2.5-pro-preview-03-25")
41
+ gemini_2_5_flash_preview = LLMName("gemini-2.5-flash-preview-05-20")
36
42
  gemini_2_0_flash = LLMName("gemini/gemini-2_0-flash")
37
43
  gemini_2_0_flash_lite = LLMName("gemini/gemini-2.0-flash-lite")
38
44
  gemini_2_0_pro_exp_02_05 = LLMName("gemini/gemini-2.0-pro-exp-02-05")
39
45
 
40
46
  # https://docs.x.ai/docs/models
41
- xai_grok_2 = LLMName("xai/grok-2-latest")
47
+ xai_grok_3 = LLMName("xai/grok-3")
48
+ xai_grok_3_fast = LLMName("xai/grok-3-fast")
49
+ xai_grok_3_mini = LLMName("xai/grok-3-mini")
50
+ xai_grok_3_mini_fast = LLMName("xai/grok-3-mini-fast")
51
+ xai_grok_2 = LLMName("xai/grok-2")
42
52
 
43
53
  # https://api-docs.deepseek.com/quick_start/pricing
44
54
  deepseek_chat = LLMName("deepseek/deepseek-chat")
@@ -3,6 +3,7 @@ from functools import cache
3
3
  from pathlib import Path
4
4
 
5
5
  from prettyfmt import fmt_lines, fmt_path
6
+ from prettyfmt.prettyfmt import fmt_size_dual
6
7
  from strif import atomic_output_file
7
8
 
8
9
  from kash.config.logger import get_logger
@@ -14,6 +15,7 @@ from kash.media_base.media_services import (
14
15
  )
15
16
  from kash.utils.common.format_utils import fmt_loc
16
17
  from kash.utils.common.url import Url, as_file_url, is_url
18
+ from kash.utils.common.url_slice import parse_url_slice
17
19
  from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
18
20
  from kash.utils.file_utils.file_formats_model import MediaType
19
21
  from kash.web_content.dir_store import DirStore
@@ -51,14 +53,16 @@ class MediaCache(DirStore):
51
53
  super().__init__(root)
52
54
 
53
55
  def _write_transcript(self, url: Url, content: str) -> None:
54
- transcript_path = self.path_for(url, suffix=SUFFIX_TRANSCRIPT)
56
+ key = str(url) # Cache key is the URL (with slice fragment if present)
57
+ transcript_path = self.path_for(key, suffix=SUFFIX_TRANSCRIPT)
55
58
  with atomic_output_file(transcript_path) as temp_output:
56
59
  with open(temp_output, "w") as f:
57
60
  f.write(content)
58
61
  log.message("Transcript saved to cache: %s", fmt_path(transcript_path))
59
62
 
60
63
  def _read_transcript(self, url: Url) -> str | None:
61
- transcript_file = self.find(url, suffix=SUFFIX_TRANSCRIPT)
64
+ key = str(url) # Cache key is the URL (with slice fragment if present)
65
+ transcript_file = self.find(key, suffix=SUFFIX_TRANSCRIPT)
62
66
  if transcript_file:
63
67
  log.message("Video transcript already in cache: %s: %s", url, fmt_path(transcript_file))
64
68
  with open(transcript_file) as f:
@@ -66,12 +70,13 @@ class MediaCache(DirStore):
66
70
  return None
67
71
 
68
72
  def _downsample_audio(self, url: Url) -> Path:
69
- downsampled_audio_file = self.find(url, suffix=SUFFIX_16KMP3)
73
+ key = str(url) # Cache key is the URL (with slice fragment if present)
74
+ downsampled_audio_file = self.find(key, suffix=SUFFIX_16KMP3)
70
75
  if not downsampled_audio_file:
71
- full_audio_file = self.find(url, suffix=SUFFIX_MP3)
76
+ full_audio_file = self.find(key, suffix=SUFFIX_MP3)
72
77
  if not full_audio_file:
73
78
  raise ValueError("No audio file found for: %s" % url)
74
- downsampled_audio_file = self.path_for(url, suffix=SUFFIX_16KMP3)
79
+ downsampled_audio_file = self.path_for(key, suffix=SUFFIX_16KMP3)
75
80
  log.message(
76
81
  "Downsampling audio: %s -> %s",
77
82
  fmt_path(full_audio_file),
@@ -95,13 +100,18 @@ class MediaCache(DirStore):
95
100
  return transcript
96
101
 
97
102
  def cache(
98
- self, url: Url, refetch=False, media_types: list[MediaType] | None = None
103
+ self, url_or_slice: Url, refetch=False, media_types: list[MediaType] | None = None
99
104
  ) -> dict[MediaType, Path]:
100
105
  """
101
106
  Cache the media files for the given media URL. Returns paths to cached copies
102
107
  for each media type (video or audio). Returns cached copies if available,
103
108
  unless `refetch` is True.
104
109
  """
110
+ key = str(url_or_slice) # Cache key is the URL (with slice fragment if present)
111
+
112
+ # Extract base URL and slice information
113
+ base_url, slice = parse_url_slice(url_or_slice)
114
+
105
115
  cached_paths: dict[MediaType, Path] = {}
106
116
 
107
117
  if not media_types:
@@ -109,14 +119,18 @@ class MediaCache(DirStore):
109
119
 
110
120
  if not refetch:
111
121
  if MediaType.audio in media_types:
112
- audio_file = self.find(url, suffix=SUFFIX_MP3)
122
+ audio_file = self.find(key, suffix=SUFFIX_MP3)
113
123
  if audio_file:
114
- log.message("Audio already in cache: %s: %s", url, fmt_path(audio_file))
124
+ log.message(
125
+ "Audio already in cache: %s: %s", url_or_slice, fmt_path(audio_file)
126
+ )
115
127
  cached_paths[MediaType.audio] = audio_file
116
128
  if MediaType.video in media_types:
117
- video_file = self.find(url, suffix=SUFFIX_MP4)
129
+ video_file = self.find(key, suffix=SUFFIX_MP4)
118
130
  if video_file:
119
- log.message("Video already in cache: %s: %s", url, fmt_path(video_file))
131
+ log.message(
132
+ "Video already in cache: %s: %s", url_or_slice, fmt_path(video_file)
133
+ )
120
134
  cached_paths[MediaType.video] = video_file
121
135
  if set(media_types).issubset(cached_paths.keys()):
122
136
  return cached_paths
@@ -127,23 +141,30 @@ class MediaCache(DirStore):
127
141
  [t.name for t in cached_paths.keys()],
128
142
  )
129
143
 
130
- log.message("Downloading media: %s", url)
131
- media_paths = download_media_by_service(url, self.root, media_types)
144
+ log.message("Downloading media: %s", url_or_slice)
145
+ media_paths = download_media_by_service(
146
+ base_url, self.root, media_types=media_types, slice=slice
147
+ )
132
148
  if MediaType.audio in media_paths:
133
- audio_path = self.path_for(url, suffix=SUFFIX_MP3)
149
+ audio_path = self.path_for(key, suffix=SUFFIX_MP3)
134
150
  os.rename(media_paths[MediaType.audio], audio_path)
135
151
  cached_paths[MediaType.audio] = audio_path
136
152
  if MediaType.video in media_paths:
137
- video_path = self.path_for(url, suffix=SUFFIX_MP4)
153
+ video_path = self.path_for(key, suffix=SUFFIX_MP4)
138
154
  os.rename(media_paths[MediaType.video], video_path)
139
155
  cached_paths[MediaType.video] = video_path
140
156
 
141
157
  log.message(
142
158
  "Downloaded media and saved to cache:\n%s",
143
- fmt_lines([f"{t.name}: {fmt_path(p)}" for (t, p) in cached_paths.items()]),
159
+ fmt_lines(
160
+ [
161
+ f"{t.name}: {fmt_size_dual(p.stat().st_size)}: {fmt_path(p)} "
162
+ for (t, p) in cached_paths.items()
163
+ ]
164
+ ),
144
165
  )
145
166
 
146
- self._downsample_audio(url)
167
+ self._downsample_audio(url_or_slice)
147
168
 
148
169
  return cached_paths
149
170
 
@@ -156,30 +177,33 @@ class MediaCache(DirStore):
156
177
  """
157
178
  if not isinstance(url_or_path, Path) and is_url(url_or_path):
158
179
  # If it is a URL, cache it locally.
159
- url = url_or_path
160
- url = canonicalize_media_url(url)
161
- if not url:
180
+ url_or_slice = url_or_path
181
+ # Canonicalize the URL (preserving slice information if present)
182
+ canon = canonicalize_media_url(url_or_slice)
183
+ if not canon:
162
184
  log.error("Unrecognized media, current services: %s", get_media_services())
163
185
  raise InvalidInput(
164
186
  "Unrecognized media URL (is this media service configured?): %s" % url_or_path
165
187
  )
188
+ url_or_slice = canon
189
+
166
190
  if not refetch:
167
- transcript = self._read_transcript(url)
191
+ transcript = self._read_transcript(url_or_slice)
168
192
  if transcript:
169
193
  return transcript
170
194
  # Cache all formats since we usually will want them.
171
- self.cache(url, refetch)
195
+ self.cache(url_or_slice, refetch)
172
196
  elif isinstance(url_or_path, Path):
173
197
  # Treat local media files as file:// URLs.
174
198
  # Don't need to cache originals but we still will cache audio and transcriptions.
175
199
  if not url_or_path.exists():
176
200
  raise FileNotFound(f"File not found: {fmt_loc(url_or_path)}")
177
- url = as_file_url(url_or_path)
201
+ url_or_slice = as_file_url(url_or_path)
178
202
  else:
179
203
  raise InvalidInput(f"Not a media URL or path: {fmt_loc(url_or_path)}")
180
204
 
181
205
  # Now do the transcription.
182
- transcript = self._do_transcription(url, language=language)
206
+ transcript = self._do_transcription(url_or_slice, language=language)
183
207
  if not transcript:
184
- raise UnexpectedError("No transcript found for: %s" % url)
208
+ raise UnexpectedError("No transcript found for: %s" % url_or_slice)
185
209
  return transcript
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from pathlib import Path
3
5
 
@@ -7,6 +9,7 @@ from strif import AtomicVar
7
9
  from kash.media_base.services.local_file_media import LocalFileMedia
8
10
  from kash.model.media_model import MediaMetadata, MediaService
9
11
  from kash.utils.common.url import Url
12
+ from kash.utils.common.url_slice import Slice, add_slice_to_url, parse_url_slice
10
13
  from kash.utils.errors import InvalidInput
11
14
  from kash.utils.file_utils.file_formats_model import MediaType
12
15
 
@@ -32,14 +35,22 @@ def register_media_service(*services: MediaService) -> None:
32
35
  _media_services.update(lambda services: services + new_services)
33
36
 
34
37
 
35
- def canonicalize_media_url(url: Url) -> Url | None:
38
+ def canonicalize_media_url(url_or_slice: Url) -> Url | None:
36
39
  """
37
40
  Return the canonical form of a media URL from a supported service (like YouTube).
41
+ Preserves any slice information in URL fragments.
38
42
  """
43
+ base_url, slice = parse_url_slice(url_or_slice)
44
+
45
+ # Canonicalize the base URL
39
46
  for service in _media_services.copy():
40
- canonical_url = service.canonicalize(url)
47
+ canonical_url = service.canonicalize(base_url)
41
48
  if canonical_url:
42
- return canonical_url
49
+ # Add slice back to canonical URL if it existed
50
+ if slice:
51
+ return add_slice_to_url(canonical_url, slice)
52
+ else:
53
+ return canonical_url
43
54
  return None
44
55
 
45
56
 
@@ -51,10 +62,11 @@ def thumbnail_media_url(url: Url) -> Url | None:
51
62
  """
52
63
  Return a URL that links to the thumbnail of the media.
53
64
  """
65
+ base_url, _ = parse_url_slice(url)
54
66
  for service in _media_services.copy():
55
- canonical_url = service.canonicalize(url)
67
+ canonical_url = service.canonicalize(base_url)
56
68
  if canonical_url:
57
- return service.thumbnail_url(url)
69
+ return service.thumbnail_url(base_url)
58
70
  return None
59
71
 
60
72
 
@@ -62,18 +74,21 @@ def timestamp_media_url(url: Url, timestamp: float) -> Url:
62
74
  """
63
75
  Return a URL that links to the media at the given timestamp.
64
76
  """
77
+ base_url, _ = parse_url_slice(url)
65
78
  for service in _media_services.copy():
66
- canonical_url = service.canonicalize(url)
79
+ canonical_url = service.canonicalize(base_url)
67
80
  if canonical_url:
68
- return service.timestamp_url(url, timestamp)
81
+ return service.timestamp_url(base_url, timestamp)
69
82
  raise InvalidInput(f"Unrecognized media URL: {url}")
70
83
 
71
84
 
72
85
  def get_media_id(url: Url | None) -> str | None:
73
86
  if not url:
74
87
  return None
88
+
89
+ base_url, _ = parse_url_slice(url)
75
90
  for service in _media_services.copy():
76
- media_id = service.get_media_id(url)
91
+ media_id = service.get_media_id(base_url)
77
92
  if media_id:
78
93
  return media_id
79
94
  return None
@@ -84,10 +99,11 @@ def get_media_metadata(url: Url) -> MediaMetadata | None:
84
99
  """
85
100
  Return metadata for the media at the given URL.
86
101
  """
102
+ base_url, _ = parse_url_slice(url)
87
103
  for service in _media_services.copy():
88
- media_id = service.get_media_id(url)
104
+ media_id = service.get_media_id(base_url)
89
105
  if media_id: # This is an actual video, not a channel etc.
90
- return service.metadata(url)
106
+ return service.metadata(base_url)
91
107
  return None
92
108
 
93
109
 
@@ -95,18 +111,51 @@ def list_channel_items(url: Url) -> list[MediaMetadata]:
95
111
  """
96
112
  List all items in a channel.
97
113
  """
114
+ base_url, _ = parse_url_slice(url)
98
115
  for service in _media_services.copy():
99
- canonical_url = service.canonicalize(url)
116
+ canonical_url = service.canonicalize(base_url)
100
117
  if canonical_url:
101
- return service.list_channel_items(url)
118
+ return service.list_channel_items(base_url)
102
119
  raise InvalidInput(f"Unrecognized media URL: {url}")
103
120
 
104
121
 
105
122
  def download_media_by_service(
106
- url: Url, target_dir: Path, media_types: list[MediaType] | None = None
123
+ url: Url,
124
+ target_dir: Path,
125
+ *,
126
+ media_types: list[MediaType] | None = None,
127
+ slice: Slice | None = None,
107
128
  ) -> dict[MediaType, Path]:
108
129
  for service in _media_services.copy():
109
130
  canonical_url = service.canonicalize(url)
110
131
  if canonical_url:
111
- return service.download_media(url, target_dir, media_types=media_types)
132
+ return service.download_media(url, target_dir, media_types=media_types, slice=slice)
112
133
  raise ValueError(f"Unrecognized media URL: {url}")
134
+
135
+
136
+ ## Tests
137
+
138
+
139
+ def test_canonicalize_media_url_preserves_slice():
140
+ """Test that canonicalize_media_url preserves URL slice fragments."""
141
+
142
+ # Test with unrecognized URLs (should return None)
143
+ # This tests the slice extraction/reconstruction logic without requiring actual files
144
+ unrecognized_url = Url("https://unknown-service.com/video#~slice=10-30")
145
+ canonical_unknown = canonicalize_media_url(unrecognized_url)
146
+ assert canonical_unknown is None
147
+
148
+ # Test typical YouTube URL with slice (would work if YouTube service was registered)
149
+ youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=10-30")
150
+ # For now this returns None since YouTube service isn't registered in this test
151
+ # but the slice extraction/reconstruction logic is tested in url_slice.py
152
+ youtube_canonical = canonicalize_media_url(youtube_url)
153
+ assert youtube_canonical is None # No YouTube service registered
154
+
155
+ # Test HH:MM:SS format slice
156
+ hms_youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=01:30-02:45")
157
+ canonical_hms = canonicalize_media_url(hms_youtube_url)
158
+ assert canonical_hms is None # No YouTube service registered
159
+
160
+ # The actual slice functionality is thoroughly tested in url_slice.py
161
+ # This test ensures canonicalize_media_url doesn't break with slice URLs
@@ -13,6 +13,7 @@ from kash.file_storage.store_filenames import parse_item_filename
13
13
  from kash.model.media_model import MediaMetadata, MediaService, MediaUrlType
14
14
  from kash.utils.common.format_utils import fmt_loc
15
15
  from kash.utils.common.url import Url
16
+ from kash.utils.common.url_slice import Slice
16
17
  from kash.utils.errors import FileNotFound, InvalidInput
17
18
  from kash.utils.file_utils.file_formats_model import FileExt, MediaType
18
19
 
@@ -73,11 +74,18 @@ class LocalFileMedia(MediaService):
73
74
 
74
75
  @override
75
76
  def download_media(
76
- self, url: Url, target_dir: Path, media_types: list[MediaType] | None = None
77
+ self,
78
+ url: Url,
79
+ target_dir: Path,
80
+ *,
81
+ media_types: list[MediaType] | None = None,
82
+ slice: Slice | None = None,
77
83
  ) -> dict[MediaType, Path]:
78
84
  path = self._parse_file_url(url)
79
85
  if not path:
80
86
  raise InvalidInput(f"Not a local file URL: {url}")
87
+ if slice:
88
+ raise NotImplementedError("Slicing currently not supported for local files")
81
89
 
82
90
  _name, _item_type, format, file_ext = parse_item_filename(path)
83
91
  os.makedirs(target_dir, exist_ok=True)
kash/model/items_model.py CHANGED
@@ -675,9 +675,21 @@ class Item:
675
675
  raise FileFormatError(f"Config item is not YAML: {self.format}: {self}")
676
676
  return from_yaml_string(self.body)
677
677
 
678
+ def get_filename(self) -> str | None:
679
+ """
680
+ Get the store or external path filename of the item, including the
681
+ file extension.
682
+ """
683
+ if self.store_path:
684
+ return Path(self.store_path).name
685
+ elif self.external_path:
686
+ return Path(self.external_path).name
687
+ else:
688
+ return None
689
+
678
690
  def get_file_ext(self) -> FileExt:
679
691
  """
680
- Get or infer file extension.
692
+ Get or infer the base file extension for the item.
681
693
  """
682
694
  if self.file_ext:
683
695
  return self.file_ext
@@ -688,7 +700,8 @@ class Item:
688
700
 
689
701
  def get_full_suffix(self) -> str:
690
702
  """
691
- Get the full file extension suffix (e.g. "note.md") for this item.
703
+ Assemble the full file extension suffix (e.g. "resource.yml") for this item.
704
+ Without a leading dot.
692
705
  """
693
706
  if self.type == ItemType.extension:
694
707
  # Python files cannot have more than one . in them.
@@ -892,12 +905,14 @@ class Item:
892
905
 
893
906
  def fmt_loc(self) -> str:
894
907
  """
895
- Formatted store path, external path, or title. For error messages etc.
908
+ Formatted store path, external path, URL, or title. Use for logging etc.
896
909
  """
897
910
  if self.store_path:
898
911
  return fmt_store_path(self.store_path)
899
912
  elif self.external_path:
900
913
  return fmt_loc(self.external_path)
914
+ elif self.url:
915
+ return fmt_loc(self.url)
901
916
  else:
902
917
  return repr(self.pick_title())
903
918
 
@@ -915,10 +930,10 @@ class Item:
915
930
  key_filter={
916
931
  "store_path": 0,
917
932
  "type": 64,
918
- "title": 64,
933
+ "format": 64,
934
+ "title": 40,
919
935
  "url": 64,
920
936
  "external_path": 64,
921
- "context": 64,
922
937
  },
923
938
  )
924
939
  + f"[{len(self.body) if self.body else 0} body chars]"
@@ -932,13 +947,12 @@ class Item:
932
947
  "store_path": 0,
933
948
  "external_path": 64,
934
949
  "type": 64,
950
+ "format": 64,
935
951
  "state": 64,
936
- "title": 64,
952
+ "title": 40,
937
953
  "url": 64,
938
- "format": 64,
939
954
  "created_at": 64,
940
955
  "body": 64,
941
- "context": 64,
942
956
  },
943
957
  )
944
958
  + f"[{len(self.body) if self.body else 0} body chars]"
kash/model/media_model.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from abc import ABC, abstractmethod
2
4
  from datetime import date
3
5
  from enum import Enum
@@ -7,6 +9,7 @@ from prettyfmt import abbrev_obj
7
9
  from pydantic.dataclasses import dataclass
8
10
 
9
11
  from kash.utils.common.url import Url
12
+ from kash.utils.common.url_slice import Slice
10
13
  from kash.utils.file_utils.file_formats_model import MediaType
11
14
 
12
15
 
@@ -109,7 +112,12 @@ class MediaService(ABC):
109
112
 
110
113
  @abstractmethod
111
114
  def download_media(
112
- self, url: Url, target_dir: Path, media_types: list[MediaType] | None = None
115
+ self,
116
+ url: Url,
117
+ target_dir: Path,
118
+ *,
119
+ media_types: list[MediaType] | None = None,
120
+ slice: Slice | None = None,
113
121
  ) -> dict[MediaType, Path]:
114
122
  """
115
123
  Download media from URL and extract to audio or video formats.