kash-shell 0.3.17__py3-none-any.whl → 0.3.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/{markdownify.py → markdownify_html.py} +3 -6
- kash/actions/core/minify_html.py +41 -0
- kash/commands/base/show_command.py +11 -1
- kash/commands/workspace/workspace_commands.py +10 -88
- kash/config/colors.py +6 -2
- kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
- kash/docs/markdown/topics/a2_installation.md +17 -30
- kash/docs/markdown/topics/a3_getting_started.md +5 -19
- kash/exec/__init__.py +3 -0
- kash/exec/action_exec.py +3 -3
- kash/exec/fetch_url_items.py +109 -0
- kash/exec/precondition_registry.py +3 -3
- kash/file_storage/file_store.py +24 -1
- kash/file_storage/store_filenames.py +4 -0
- kash/help/function_param_info.py +1 -1
- kash/help/help_pages.py +1 -1
- kash/help/help_printing.py +1 -1
- kash/llm_utils/llm_features.py +5 -1
- kash/llm_utils/llms.py +18 -8
- kash/media_base/media_cache.py +48 -24
- kash/media_base/media_services.py +63 -14
- kash/media_base/services/local_file_media.py +9 -1
- kash/model/items_model.py +22 -8
- kash/model/media_model.py +9 -1
- kash/model/params_model.py +9 -3
- kash/utils/common/function_inspect.py +97 -1
- kash/utils/common/parse_docstring.py +347 -0
- kash/utils/common/testing.py +58 -0
- kash/utils/common/url_slice.py +329 -0
- kash/utils/file_utils/file_formats.py +1 -1
- kash/utils/text_handling/markdown_utils.py +424 -16
- kash/web_content/web_extract.py +34 -15
- kash/web_content/web_page_model.py +10 -1
- kash/web_gen/templates/base_styles.css.jinja +137 -15
- kash/web_gen/templates/base_webpage.html.jinja +13 -17
- kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
- kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
- kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
- kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
- kash/web_gen/templates/content_styles.css.jinja +13 -8
- kash/web_gen/templates/simple_webpage.html.jinja +15 -481
- kash/workspaces/workspaces.py +10 -1
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/METADATA +75 -72
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/RECORD +47 -40
- kash/exec/fetch_url_metadata.py +0 -72
- kash/help/docstring_utils.py +0 -111
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/licenses/LICENSE +0 -0
|
@@ -39,7 +39,7 @@ def kash_precondition(func: Callable[[Item], bool]) -> Precondition:
|
|
|
39
39
|
|
|
40
40
|
def get_all_preconditions() -> dict[str, Precondition]:
|
|
41
41
|
"""
|
|
42
|
-
Returns a copy of all registered preconditions.
|
|
42
|
+
Returns a copy of all registered preconditions (in alphabetical order).
|
|
43
43
|
"""
|
|
44
|
-
# Return a copy for safety.
|
|
45
|
-
return dict(_preconditions.copy())
|
|
44
|
+
# Return a copy for safety, sorted by key.
|
|
45
|
+
return dict(sorted(_preconditions.copy().items()))
|
kash/file_storage/file_store.py
CHANGED
|
@@ -83,6 +83,11 @@ class FileStore(Workspace):
|
|
|
83
83
|
def base_dir(self) -> Path:
|
|
84
84
|
return self.base_dir_path
|
|
85
85
|
|
|
86
|
+
@property
|
|
87
|
+
@override
|
|
88
|
+
def assets_dir(self) -> Path:
|
|
89
|
+
return self.base_dir / "assets"
|
|
90
|
+
|
|
86
91
|
@synchronized
|
|
87
92
|
@log_calls(level="warning", if_slower_than=2.0)
|
|
88
93
|
def reload(self, auto_init: bool = True):
|
|
@@ -340,6 +345,18 @@ class FileStore(Workspace):
|
|
|
340
345
|
|
|
341
346
|
return StorePath(store_path), old_store_path
|
|
342
347
|
|
|
348
|
+
def target_path_for(self, item: Item) -> Path:
|
|
349
|
+
"""
|
|
350
|
+
Get an the absolute path for an item. Use this if you need to work around the
|
|
351
|
+
usual save mechanism and write directly to the store yourself, at the location
|
|
352
|
+
the item usually would be saved.
|
|
353
|
+
|
|
354
|
+
If you write to this path, then set the item's `external_path` to indicate it's
|
|
355
|
+
already saved.
|
|
356
|
+
"""
|
|
357
|
+
store_path, _old_store_path = self.store_path_for(item)
|
|
358
|
+
return self.base_dir / store_path
|
|
359
|
+
|
|
343
360
|
def _tmp_path_for(self, item: Item) -> StorePath:
|
|
344
361
|
"""
|
|
345
362
|
Find a path for an item in the tmp directory.
|
|
@@ -388,6 +405,7 @@ class FileStore(Workspace):
|
|
|
388
405
|
# If external path already exists and is within the workspace, the file was
|
|
389
406
|
# already saved (e.g. by an action that wrote the item directly to the store).
|
|
390
407
|
external_path = item.external_path and Path(item.external_path).resolve()
|
|
408
|
+
skipped_save = False
|
|
391
409
|
if external_path and self._is_in_store(external_path):
|
|
392
410
|
log.info("Item with external_path already saved: %s", fmt_loc(external_path))
|
|
393
411
|
rel_path = external_path.relative_to(self.base_dir)
|
|
@@ -463,12 +481,17 @@ class FileStore(Workspace):
|
|
|
463
481
|
)
|
|
464
482
|
os.unlink(full_path)
|
|
465
483
|
store_path = old_store_path
|
|
484
|
+
skipped_save = True
|
|
466
485
|
|
|
467
486
|
# Update in-memory store_path only after successful save.
|
|
468
487
|
item.store_path = str(store_path)
|
|
469
488
|
self._id_index_item(store_path)
|
|
470
489
|
|
|
471
|
-
|
|
490
|
+
if not skipped_save:
|
|
491
|
+
log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
|
|
492
|
+
else:
|
|
493
|
+
log.info("%s Already saved: %s", EMOJI_SAVED, fmt_loc(store_path))
|
|
494
|
+
|
|
472
495
|
return store_path
|
|
473
496
|
|
|
474
497
|
@log_calls(level="debug")
|
|
@@ -30,6 +30,10 @@ def folder_for_type(item_type: ItemType) -> Path:
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def join_suffix(base_slug: str, full_suffix: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Create a store filename by joining a base slug and a full suffix, i.e. a filename
|
|
35
|
+
extension with or without an item type (`.html` or `.resource.html`, for example).
|
|
36
|
+
"""
|
|
33
37
|
return f"{base_slug}.{full_suffix.lstrip('.')}"
|
|
34
38
|
|
|
35
39
|
|
kash/help/function_param_info.py
CHANGED
|
@@ -2,9 +2,9 @@ from collections.abc import Callable
|
|
|
2
2
|
from dataclasses import replace
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
-
from kash.help.docstring_utils import parse_docstring
|
|
6
5
|
from kash.model.params_model import ALL_COMMON_PARAMS, Param
|
|
7
6
|
from kash.utils.common.function_inspect import FuncParam, inspect_function_params
|
|
7
|
+
from kash.utils.common.parse_docstring import parse_docstring
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _look_up_param_docs(func: Callable[..., Any], kw_params: list[FuncParam]) -> list[Param]:
|
kash/help/help_pages.py
CHANGED
|
@@ -3,7 +3,6 @@ from rich.text import Text
|
|
|
3
3
|
from kash.config.logger import get_logger
|
|
4
4
|
from kash.config.text_styles import STYLE_HINT
|
|
5
5
|
from kash.docs.all_docs import DocSelection, all_docs
|
|
6
|
-
from kash.help.docstring_utils import parse_docstring
|
|
7
6
|
from kash.shell.output.shell_formatting import format_name_and_value
|
|
8
7
|
from kash.shell.output.shell_output import (
|
|
9
8
|
PrintHooks,
|
|
@@ -12,6 +11,7 @@ from kash.shell.output.shell_output import (
|
|
|
12
11
|
print_hrule,
|
|
13
12
|
print_markdown,
|
|
14
13
|
)
|
|
14
|
+
from kash.utils.common.parse_docstring import parse_docstring
|
|
15
15
|
|
|
16
16
|
log = get_logger(__name__)
|
|
17
17
|
|
kash/help/help_printing.py
CHANGED
|
@@ -6,7 +6,6 @@ from kash.docs.all_docs import DocSelection
|
|
|
6
6
|
from kash.exec.action_registry import look_up_action_class
|
|
7
7
|
from kash.exec.command_registry import CommandFunction, look_up_command
|
|
8
8
|
from kash.help.assistant import assist_preamble, assistance_unstructured
|
|
9
|
-
from kash.help.docstring_utils import parse_docstring
|
|
10
9
|
from kash.help.function_param_info import annotate_param_info
|
|
11
10
|
from kash.help.help_lookups import look_up_faq
|
|
12
11
|
from kash.help.tldr_help import tldr_help
|
|
@@ -22,6 +21,7 @@ from kash.shell.output.shell_output import (
|
|
|
22
21
|
print_help,
|
|
23
22
|
print_markdown,
|
|
24
23
|
)
|
|
24
|
+
from kash.utils.common.parse_docstring import parse_docstring
|
|
25
25
|
from kash.utils.errors import InvalidInput, NoMatch
|
|
26
26
|
from kash.utils.file_formats.chat_format import ChatHistory, ChatMessage, ChatRole
|
|
27
27
|
|
kash/llm_utils/llm_features.py
CHANGED
|
@@ -56,13 +56,17 @@ FEATURES = {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
preferred_llms: list[LLMName] = [
|
|
59
|
+
LLM.o4_mini,
|
|
60
|
+
LLM.o3,
|
|
59
61
|
LLM.o3_mini,
|
|
60
62
|
LLM.o1_mini,
|
|
61
63
|
LLM.o1,
|
|
62
64
|
LLM.gpt_4o_mini,
|
|
63
65
|
LLM.gpt_4o,
|
|
64
66
|
LLM.gpt_4,
|
|
67
|
+
LLM.claude_4_sonnet,
|
|
68
|
+
LLM.claude_4_opus,
|
|
65
69
|
LLM.claude_3_7_sonnet,
|
|
66
|
-
LLM.claude_3_5_sonnet,
|
|
67
70
|
LLM.claude_3_5_haiku,
|
|
71
|
+
LLM.gemini_2_5_pro_preview_05_06,
|
|
68
72
|
]
|
kash/llm_utils/llms.py
CHANGED
|
@@ -13,32 +13,42 @@ class LLM(LLMName, Enum):
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
# https://platform.openai.com/docs/models
|
|
16
|
-
|
|
17
|
-
o1 = LLMName("o1")
|
|
16
|
+
o4_mini = LLMName("o4-mini")
|
|
18
17
|
o3 = LLMName("o3")
|
|
19
18
|
o3_mini = LLMName("o3-mini")
|
|
20
|
-
|
|
19
|
+
o1 = LLMName("o1")
|
|
20
|
+
o1_mini = LLMName("o1-mini")
|
|
21
|
+
o1_pro = LLMName("o1-pro")
|
|
21
22
|
o1_preview = LLMName("o1-preview")
|
|
22
|
-
|
|
23
|
+
gpt_4_1 = LLMName("gpt-4.1")
|
|
23
24
|
gpt_4o = LLMName("gpt-4o")
|
|
25
|
+
gpt_4o_mini = LLMName("gpt-4o-mini")
|
|
24
26
|
gpt_4 = LLMName("gpt-4")
|
|
25
|
-
|
|
27
|
+
|
|
26
28
|
gpt_4_1_mini = LLMName("gpt-4.1-mini")
|
|
27
29
|
gpt_4_1_nano = LLMName("gpt-4.1-nano")
|
|
28
30
|
|
|
29
31
|
# https://docs.anthropic.com/en/docs/about-claude/models/all-models
|
|
32
|
+
claude_4_opus = LLMName("claude-opus-4-20250514")
|
|
33
|
+
claude_4_sonnet = LLMName("claude-sonnet-4-20250514")
|
|
30
34
|
claude_3_7_sonnet = LLMName("claude-3-7-sonnet-latest")
|
|
31
|
-
claude_3_5_sonnet = LLMName("claude-3-5-sonnet-latest")
|
|
32
35
|
claude_3_5_haiku = LLMName("claude-3-5-haiku-latest")
|
|
33
36
|
|
|
34
37
|
# https://ai.google.dev/gemini-api/docs/models
|
|
35
|
-
|
|
38
|
+
gemini_2_5_pro_preview_06_05 = LLMName("gemini/gemini-2.5-pro-preview-06-05")
|
|
39
|
+
gemini_2_5_pro_preview_05_06 = LLMName("gemini/gemini-2.5-pro-preview-05-06")
|
|
40
|
+
gemini_2_5_pro_preview_03_25 = LLMName("gemini/gemini-2.5-pro-preview-03-25")
|
|
41
|
+
gemini_2_5_flash_preview = LLMName("gemini-2.5-flash-preview-05-20")
|
|
36
42
|
gemini_2_0_flash = LLMName("gemini/gemini-2_0-flash")
|
|
37
43
|
gemini_2_0_flash_lite = LLMName("gemini/gemini-2.0-flash-lite")
|
|
38
44
|
gemini_2_0_pro_exp_02_05 = LLMName("gemini/gemini-2.0-pro-exp-02-05")
|
|
39
45
|
|
|
40
46
|
# https://docs.x.ai/docs/models
|
|
41
|
-
|
|
47
|
+
xai_grok_3 = LLMName("xai/grok-3")
|
|
48
|
+
xai_grok_3_fast = LLMName("xai/grok-3-fast")
|
|
49
|
+
xai_grok_3_mini = LLMName("xai/grok-3-mini")
|
|
50
|
+
xai_grok_3_mini_fast = LLMName("xai/grok-3-mini-fast")
|
|
51
|
+
xai_grok_2 = LLMName("xai/grok-2")
|
|
42
52
|
|
|
43
53
|
# https://api-docs.deepseek.com/quick_start/pricing
|
|
44
54
|
deepseek_chat = LLMName("deepseek/deepseek-chat")
|
kash/media_base/media_cache.py
CHANGED
|
@@ -3,6 +3,7 @@ from functools import cache
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from prettyfmt import fmt_lines, fmt_path
|
|
6
|
+
from prettyfmt.prettyfmt import fmt_size_dual
|
|
6
7
|
from strif import atomic_output_file
|
|
7
8
|
|
|
8
9
|
from kash.config.logger import get_logger
|
|
@@ -14,6 +15,7 @@ from kash.media_base.media_services import (
|
|
|
14
15
|
)
|
|
15
16
|
from kash.utils.common.format_utils import fmt_loc
|
|
16
17
|
from kash.utils.common.url import Url, as_file_url, is_url
|
|
18
|
+
from kash.utils.common.url_slice import parse_url_slice
|
|
17
19
|
from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
|
|
18
20
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
19
21
|
from kash.web_content.dir_store import DirStore
|
|
@@ -51,14 +53,16 @@ class MediaCache(DirStore):
|
|
|
51
53
|
super().__init__(root)
|
|
52
54
|
|
|
53
55
|
def _write_transcript(self, url: Url, content: str) -> None:
|
|
54
|
-
|
|
56
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
57
|
+
transcript_path = self.path_for(key, suffix=SUFFIX_TRANSCRIPT)
|
|
55
58
|
with atomic_output_file(transcript_path) as temp_output:
|
|
56
59
|
with open(temp_output, "w") as f:
|
|
57
60
|
f.write(content)
|
|
58
61
|
log.message("Transcript saved to cache: %s", fmt_path(transcript_path))
|
|
59
62
|
|
|
60
63
|
def _read_transcript(self, url: Url) -> str | None:
|
|
61
|
-
|
|
64
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
65
|
+
transcript_file = self.find(key, suffix=SUFFIX_TRANSCRIPT)
|
|
62
66
|
if transcript_file:
|
|
63
67
|
log.message("Video transcript already in cache: %s: %s", url, fmt_path(transcript_file))
|
|
64
68
|
with open(transcript_file) as f:
|
|
@@ -66,12 +70,13 @@ class MediaCache(DirStore):
|
|
|
66
70
|
return None
|
|
67
71
|
|
|
68
72
|
def _downsample_audio(self, url: Url) -> Path:
|
|
69
|
-
|
|
73
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
74
|
+
downsampled_audio_file = self.find(key, suffix=SUFFIX_16KMP3)
|
|
70
75
|
if not downsampled_audio_file:
|
|
71
|
-
full_audio_file = self.find(
|
|
76
|
+
full_audio_file = self.find(key, suffix=SUFFIX_MP3)
|
|
72
77
|
if not full_audio_file:
|
|
73
78
|
raise ValueError("No audio file found for: %s" % url)
|
|
74
|
-
downsampled_audio_file = self.path_for(
|
|
79
|
+
downsampled_audio_file = self.path_for(key, suffix=SUFFIX_16KMP3)
|
|
75
80
|
log.message(
|
|
76
81
|
"Downsampling audio: %s -> %s",
|
|
77
82
|
fmt_path(full_audio_file),
|
|
@@ -95,13 +100,18 @@ class MediaCache(DirStore):
|
|
|
95
100
|
return transcript
|
|
96
101
|
|
|
97
102
|
def cache(
|
|
98
|
-
self,
|
|
103
|
+
self, url_or_slice: Url, refetch=False, media_types: list[MediaType] | None = None
|
|
99
104
|
) -> dict[MediaType, Path]:
|
|
100
105
|
"""
|
|
101
106
|
Cache the media files for the given media URL. Returns paths to cached copies
|
|
102
107
|
for each media type (video or audio). Returns cached copies if available,
|
|
103
108
|
unless `refetch` is True.
|
|
104
109
|
"""
|
|
110
|
+
key = str(url_or_slice) # Cache key is the URL (with slice fragment if present)
|
|
111
|
+
|
|
112
|
+
# Extract base URL and slice information
|
|
113
|
+
base_url, slice = parse_url_slice(url_or_slice)
|
|
114
|
+
|
|
105
115
|
cached_paths: dict[MediaType, Path] = {}
|
|
106
116
|
|
|
107
117
|
if not media_types:
|
|
@@ -109,14 +119,18 @@ class MediaCache(DirStore):
|
|
|
109
119
|
|
|
110
120
|
if not refetch:
|
|
111
121
|
if MediaType.audio in media_types:
|
|
112
|
-
audio_file = self.find(
|
|
122
|
+
audio_file = self.find(key, suffix=SUFFIX_MP3)
|
|
113
123
|
if audio_file:
|
|
114
|
-
log.message(
|
|
124
|
+
log.message(
|
|
125
|
+
"Audio already in cache: %s: %s", url_or_slice, fmt_path(audio_file)
|
|
126
|
+
)
|
|
115
127
|
cached_paths[MediaType.audio] = audio_file
|
|
116
128
|
if MediaType.video in media_types:
|
|
117
|
-
video_file = self.find(
|
|
129
|
+
video_file = self.find(key, suffix=SUFFIX_MP4)
|
|
118
130
|
if video_file:
|
|
119
|
-
log.message(
|
|
131
|
+
log.message(
|
|
132
|
+
"Video already in cache: %s: %s", url_or_slice, fmt_path(video_file)
|
|
133
|
+
)
|
|
120
134
|
cached_paths[MediaType.video] = video_file
|
|
121
135
|
if set(media_types).issubset(cached_paths.keys()):
|
|
122
136
|
return cached_paths
|
|
@@ -127,23 +141,30 @@ class MediaCache(DirStore):
|
|
|
127
141
|
[t.name for t in cached_paths.keys()],
|
|
128
142
|
)
|
|
129
143
|
|
|
130
|
-
log.message("Downloading media: %s",
|
|
131
|
-
media_paths = download_media_by_service(
|
|
144
|
+
log.message("Downloading media: %s", url_or_slice)
|
|
145
|
+
media_paths = download_media_by_service(
|
|
146
|
+
base_url, self.root, media_types=media_types, slice=slice
|
|
147
|
+
)
|
|
132
148
|
if MediaType.audio in media_paths:
|
|
133
|
-
audio_path = self.path_for(
|
|
149
|
+
audio_path = self.path_for(key, suffix=SUFFIX_MP3)
|
|
134
150
|
os.rename(media_paths[MediaType.audio], audio_path)
|
|
135
151
|
cached_paths[MediaType.audio] = audio_path
|
|
136
152
|
if MediaType.video in media_paths:
|
|
137
|
-
video_path = self.path_for(
|
|
153
|
+
video_path = self.path_for(key, suffix=SUFFIX_MP4)
|
|
138
154
|
os.rename(media_paths[MediaType.video], video_path)
|
|
139
155
|
cached_paths[MediaType.video] = video_path
|
|
140
156
|
|
|
141
157
|
log.message(
|
|
142
158
|
"Downloaded media and saved to cache:\n%s",
|
|
143
|
-
fmt_lines(
|
|
159
|
+
fmt_lines(
|
|
160
|
+
[
|
|
161
|
+
f"{t.name}: {fmt_size_dual(p.stat().st_size)}: {fmt_path(p)} "
|
|
162
|
+
for (t, p) in cached_paths.items()
|
|
163
|
+
]
|
|
164
|
+
),
|
|
144
165
|
)
|
|
145
166
|
|
|
146
|
-
self._downsample_audio(
|
|
167
|
+
self._downsample_audio(url_or_slice)
|
|
147
168
|
|
|
148
169
|
return cached_paths
|
|
149
170
|
|
|
@@ -156,30 +177,33 @@ class MediaCache(DirStore):
|
|
|
156
177
|
"""
|
|
157
178
|
if not isinstance(url_or_path, Path) and is_url(url_or_path):
|
|
158
179
|
# If it is a URL, cache it locally.
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
180
|
+
url_or_slice = url_or_path
|
|
181
|
+
# Canonicalize the URL (preserving slice information if present)
|
|
182
|
+
canon = canonicalize_media_url(url_or_slice)
|
|
183
|
+
if not canon:
|
|
162
184
|
log.error("Unrecognized media, current services: %s", get_media_services())
|
|
163
185
|
raise InvalidInput(
|
|
164
186
|
"Unrecognized media URL (is this media service configured?): %s" % url_or_path
|
|
165
187
|
)
|
|
188
|
+
url_or_slice = canon
|
|
189
|
+
|
|
166
190
|
if not refetch:
|
|
167
|
-
transcript = self._read_transcript(
|
|
191
|
+
transcript = self._read_transcript(url_or_slice)
|
|
168
192
|
if transcript:
|
|
169
193
|
return transcript
|
|
170
194
|
# Cache all formats since we usually will want them.
|
|
171
|
-
self.cache(
|
|
195
|
+
self.cache(url_or_slice, refetch)
|
|
172
196
|
elif isinstance(url_or_path, Path):
|
|
173
197
|
# Treat local media files as file:// URLs.
|
|
174
198
|
# Don't need to cache originals but we still will cache audio and transcriptions.
|
|
175
199
|
if not url_or_path.exists():
|
|
176
200
|
raise FileNotFound(f"File not found: {fmt_loc(url_or_path)}")
|
|
177
|
-
|
|
201
|
+
url_or_slice = as_file_url(url_or_path)
|
|
178
202
|
else:
|
|
179
203
|
raise InvalidInput(f"Not a media URL or path: {fmt_loc(url_or_path)}")
|
|
180
204
|
|
|
181
205
|
# Now do the transcription.
|
|
182
|
-
transcript = self._do_transcription(
|
|
206
|
+
transcript = self._do_transcription(url_or_slice, language=language)
|
|
183
207
|
if not transcript:
|
|
184
|
-
raise UnexpectedError("No transcript found for: %s" %
|
|
208
|
+
raise UnexpectedError("No transcript found for: %s" % url_or_slice)
|
|
185
209
|
return transcript
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
@@ -7,6 +9,7 @@ from strif import AtomicVar
|
|
|
7
9
|
from kash.media_base.services.local_file_media import LocalFileMedia
|
|
8
10
|
from kash.model.media_model import MediaMetadata, MediaService
|
|
9
11
|
from kash.utils.common.url import Url
|
|
12
|
+
from kash.utils.common.url_slice import Slice, add_slice_to_url, parse_url_slice
|
|
10
13
|
from kash.utils.errors import InvalidInput
|
|
11
14
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
12
15
|
|
|
@@ -32,14 +35,22 @@ def register_media_service(*services: MediaService) -> None:
|
|
|
32
35
|
_media_services.update(lambda services: services + new_services)
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def canonicalize_media_url(
|
|
38
|
+
def canonicalize_media_url(url_or_slice: Url) -> Url | None:
|
|
36
39
|
"""
|
|
37
40
|
Return the canonical form of a media URL from a supported service (like YouTube).
|
|
41
|
+
Preserves any slice information in URL fragments.
|
|
38
42
|
"""
|
|
43
|
+
base_url, slice = parse_url_slice(url_or_slice)
|
|
44
|
+
|
|
45
|
+
# Canonicalize the base URL
|
|
39
46
|
for service in _media_services.copy():
|
|
40
|
-
canonical_url = service.canonicalize(
|
|
47
|
+
canonical_url = service.canonicalize(base_url)
|
|
41
48
|
if canonical_url:
|
|
42
|
-
|
|
49
|
+
# Add slice back to canonical URL if it existed
|
|
50
|
+
if slice:
|
|
51
|
+
return add_slice_to_url(canonical_url, slice)
|
|
52
|
+
else:
|
|
53
|
+
return canonical_url
|
|
43
54
|
return None
|
|
44
55
|
|
|
45
56
|
|
|
@@ -51,10 +62,11 @@ def thumbnail_media_url(url: Url) -> Url | None:
|
|
|
51
62
|
"""
|
|
52
63
|
Return a URL that links to the thumbnail of the media.
|
|
53
64
|
"""
|
|
65
|
+
base_url, _ = parse_url_slice(url)
|
|
54
66
|
for service in _media_services.copy():
|
|
55
|
-
canonical_url = service.canonicalize(
|
|
67
|
+
canonical_url = service.canonicalize(base_url)
|
|
56
68
|
if canonical_url:
|
|
57
|
-
return service.thumbnail_url(
|
|
69
|
+
return service.thumbnail_url(base_url)
|
|
58
70
|
return None
|
|
59
71
|
|
|
60
72
|
|
|
@@ -62,18 +74,21 @@ def timestamp_media_url(url: Url, timestamp: float) -> Url:
|
|
|
62
74
|
"""
|
|
63
75
|
Return a URL that links to the media at the given timestamp.
|
|
64
76
|
"""
|
|
77
|
+
base_url, _ = parse_url_slice(url)
|
|
65
78
|
for service in _media_services.copy():
|
|
66
|
-
canonical_url = service.canonicalize(
|
|
79
|
+
canonical_url = service.canonicalize(base_url)
|
|
67
80
|
if canonical_url:
|
|
68
|
-
return service.timestamp_url(
|
|
81
|
+
return service.timestamp_url(base_url, timestamp)
|
|
69
82
|
raise InvalidInput(f"Unrecognized media URL: {url}")
|
|
70
83
|
|
|
71
84
|
|
|
72
85
|
def get_media_id(url: Url | None) -> str | None:
|
|
73
86
|
if not url:
|
|
74
87
|
return None
|
|
88
|
+
|
|
89
|
+
base_url, _ = parse_url_slice(url)
|
|
75
90
|
for service in _media_services.copy():
|
|
76
|
-
media_id = service.get_media_id(
|
|
91
|
+
media_id = service.get_media_id(base_url)
|
|
77
92
|
if media_id:
|
|
78
93
|
return media_id
|
|
79
94
|
return None
|
|
@@ -84,10 +99,11 @@ def get_media_metadata(url: Url) -> MediaMetadata | None:
|
|
|
84
99
|
"""
|
|
85
100
|
Return metadata for the media at the given URL.
|
|
86
101
|
"""
|
|
102
|
+
base_url, _ = parse_url_slice(url)
|
|
87
103
|
for service in _media_services.copy():
|
|
88
|
-
media_id = service.get_media_id(
|
|
104
|
+
media_id = service.get_media_id(base_url)
|
|
89
105
|
if media_id: # This is an actual video, not a channel etc.
|
|
90
|
-
return service.metadata(
|
|
106
|
+
return service.metadata(base_url)
|
|
91
107
|
return None
|
|
92
108
|
|
|
93
109
|
|
|
@@ -95,18 +111,51 @@ def list_channel_items(url: Url) -> list[MediaMetadata]:
|
|
|
95
111
|
"""
|
|
96
112
|
List all items in a channel.
|
|
97
113
|
"""
|
|
114
|
+
base_url, _ = parse_url_slice(url)
|
|
98
115
|
for service in _media_services.copy():
|
|
99
|
-
canonical_url = service.canonicalize(
|
|
116
|
+
canonical_url = service.canonicalize(base_url)
|
|
100
117
|
if canonical_url:
|
|
101
|
-
return service.list_channel_items(
|
|
118
|
+
return service.list_channel_items(base_url)
|
|
102
119
|
raise InvalidInput(f"Unrecognized media URL: {url}")
|
|
103
120
|
|
|
104
121
|
|
|
105
122
|
def download_media_by_service(
|
|
106
|
-
url: Url,
|
|
123
|
+
url: Url,
|
|
124
|
+
target_dir: Path,
|
|
125
|
+
*,
|
|
126
|
+
media_types: list[MediaType] | None = None,
|
|
127
|
+
slice: Slice | None = None,
|
|
107
128
|
) -> dict[MediaType, Path]:
|
|
108
129
|
for service in _media_services.copy():
|
|
109
130
|
canonical_url = service.canonicalize(url)
|
|
110
131
|
if canonical_url:
|
|
111
|
-
return service.download_media(url, target_dir, media_types=media_types)
|
|
132
|
+
return service.download_media(url, target_dir, media_types=media_types, slice=slice)
|
|
112
133
|
raise ValueError(f"Unrecognized media URL: {url}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
## Tests
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_canonicalize_media_url_preserves_slice():
|
|
140
|
+
"""Test that canonicalize_media_url preserves URL slice fragments."""
|
|
141
|
+
|
|
142
|
+
# Test with unrecognized URLs (should return None)
|
|
143
|
+
# This tests the slice extraction/reconstruction logic without requiring actual files
|
|
144
|
+
unrecognized_url = Url("https://unknown-service.com/video#~slice=10-30")
|
|
145
|
+
canonical_unknown = canonicalize_media_url(unrecognized_url)
|
|
146
|
+
assert canonical_unknown is None
|
|
147
|
+
|
|
148
|
+
# Test typical YouTube URL with slice (would work if YouTube service was registered)
|
|
149
|
+
youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=10-30")
|
|
150
|
+
# For now this returns None since YouTube service isn't registered in this test
|
|
151
|
+
# but the slice extraction/reconstruction logic is tested in url_slice.py
|
|
152
|
+
youtube_canonical = canonicalize_media_url(youtube_url)
|
|
153
|
+
assert youtube_canonical is None # No YouTube service registered
|
|
154
|
+
|
|
155
|
+
# Test HH:MM:SS format slice
|
|
156
|
+
hms_youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=01:30-02:45")
|
|
157
|
+
canonical_hms = canonicalize_media_url(hms_youtube_url)
|
|
158
|
+
assert canonical_hms is None # No YouTube service registered
|
|
159
|
+
|
|
160
|
+
# The actual slice functionality is thoroughly tested in url_slice.py
|
|
161
|
+
# This test ensures canonicalize_media_url doesn't break with slice URLs
|
|
@@ -13,6 +13,7 @@ from kash.file_storage.store_filenames import parse_item_filename
|
|
|
13
13
|
from kash.model.media_model import MediaMetadata, MediaService, MediaUrlType
|
|
14
14
|
from kash.utils.common.format_utils import fmt_loc
|
|
15
15
|
from kash.utils.common.url import Url
|
|
16
|
+
from kash.utils.common.url_slice import Slice
|
|
16
17
|
from kash.utils.errors import FileNotFound, InvalidInput
|
|
17
18
|
from kash.utils.file_utils.file_formats_model import FileExt, MediaType
|
|
18
19
|
|
|
@@ -73,11 +74,18 @@ class LocalFileMedia(MediaService):
|
|
|
73
74
|
|
|
74
75
|
@override
|
|
75
76
|
def download_media(
|
|
76
|
-
self,
|
|
77
|
+
self,
|
|
78
|
+
url: Url,
|
|
79
|
+
target_dir: Path,
|
|
80
|
+
*,
|
|
81
|
+
media_types: list[MediaType] | None = None,
|
|
82
|
+
slice: Slice | None = None,
|
|
77
83
|
) -> dict[MediaType, Path]:
|
|
78
84
|
path = self._parse_file_url(url)
|
|
79
85
|
if not path:
|
|
80
86
|
raise InvalidInput(f"Not a local file URL: {url}")
|
|
87
|
+
if slice:
|
|
88
|
+
raise NotImplementedError("Slicing currently not supported for local files")
|
|
81
89
|
|
|
82
90
|
_name, _item_type, format, file_ext = parse_item_filename(path)
|
|
83
91
|
os.makedirs(target_dir, exist_ok=True)
|
kash/model/items_model.py
CHANGED
|
@@ -675,9 +675,21 @@ class Item:
|
|
|
675
675
|
raise FileFormatError(f"Config item is not YAML: {self.format}: {self}")
|
|
676
676
|
return from_yaml_string(self.body)
|
|
677
677
|
|
|
678
|
+
def get_filename(self) -> str | None:
|
|
679
|
+
"""
|
|
680
|
+
Get the store or external path filename of the item, including the
|
|
681
|
+
file extension.
|
|
682
|
+
"""
|
|
683
|
+
if self.store_path:
|
|
684
|
+
return Path(self.store_path).name
|
|
685
|
+
elif self.external_path:
|
|
686
|
+
return Path(self.external_path).name
|
|
687
|
+
else:
|
|
688
|
+
return None
|
|
689
|
+
|
|
678
690
|
def get_file_ext(self) -> FileExt:
|
|
679
691
|
"""
|
|
680
|
-
Get or infer file extension.
|
|
692
|
+
Get or infer the base file extension for the item.
|
|
681
693
|
"""
|
|
682
694
|
if self.file_ext:
|
|
683
695
|
return self.file_ext
|
|
@@ -688,7 +700,8 @@ class Item:
|
|
|
688
700
|
|
|
689
701
|
def get_full_suffix(self) -> str:
|
|
690
702
|
"""
|
|
691
|
-
|
|
703
|
+
Assemble the full file extension suffix (e.g. "resource.yml") for this item.
|
|
704
|
+
Without a leading dot.
|
|
692
705
|
"""
|
|
693
706
|
if self.type == ItemType.extension:
|
|
694
707
|
# Python files cannot have more than one . in them.
|
|
@@ -892,12 +905,14 @@ class Item:
|
|
|
892
905
|
|
|
893
906
|
def fmt_loc(self) -> str:
|
|
894
907
|
"""
|
|
895
|
-
Formatted store path, external path, or title.
|
|
908
|
+
Formatted store path, external path, URL, or title. Use for logging etc.
|
|
896
909
|
"""
|
|
897
910
|
if self.store_path:
|
|
898
911
|
return fmt_store_path(self.store_path)
|
|
899
912
|
elif self.external_path:
|
|
900
913
|
return fmt_loc(self.external_path)
|
|
914
|
+
elif self.url:
|
|
915
|
+
return fmt_loc(self.url)
|
|
901
916
|
else:
|
|
902
917
|
return repr(self.pick_title())
|
|
903
918
|
|
|
@@ -915,10 +930,10 @@ class Item:
|
|
|
915
930
|
key_filter={
|
|
916
931
|
"store_path": 0,
|
|
917
932
|
"type": 64,
|
|
918
|
-
"
|
|
933
|
+
"format": 64,
|
|
934
|
+
"title": 40,
|
|
919
935
|
"url": 64,
|
|
920
936
|
"external_path": 64,
|
|
921
|
-
"context": 64,
|
|
922
937
|
},
|
|
923
938
|
)
|
|
924
939
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
|
@@ -932,13 +947,12 @@ class Item:
|
|
|
932
947
|
"store_path": 0,
|
|
933
948
|
"external_path": 64,
|
|
934
949
|
"type": 64,
|
|
950
|
+
"format": 64,
|
|
935
951
|
"state": 64,
|
|
936
|
-
"title":
|
|
952
|
+
"title": 40,
|
|
937
953
|
"url": 64,
|
|
938
|
-
"format": 64,
|
|
939
954
|
"created_at": 64,
|
|
940
955
|
"body": 64,
|
|
941
|
-
"context": 64,
|
|
942
956
|
},
|
|
943
957
|
)
|
|
944
958
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
kash/model/media_model.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from datetime import date
|
|
3
5
|
from enum import Enum
|
|
@@ -7,6 +9,7 @@ from prettyfmt import abbrev_obj
|
|
|
7
9
|
from pydantic.dataclasses import dataclass
|
|
8
10
|
|
|
9
11
|
from kash.utils.common.url import Url
|
|
12
|
+
from kash.utils.common.url_slice import Slice
|
|
10
13
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
11
14
|
|
|
12
15
|
|
|
@@ -109,7 +112,12 @@ class MediaService(ABC):
|
|
|
109
112
|
|
|
110
113
|
@abstractmethod
|
|
111
114
|
def download_media(
|
|
112
|
-
self,
|
|
115
|
+
self,
|
|
116
|
+
url: Url,
|
|
117
|
+
target_dir: Path,
|
|
118
|
+
*,
|
|
119
|
+
media_types: list[MediaType] | None = None,
|
|
120
|
+
slice: Slice | None = None,
|
|
113
121
|
) -> dict[MediaType, Path]:
|
|
114
122
|
"""
|
|
115
123
|
Download media from URL and extract to audio or video formats.
|