kash-shell 0.3.16__py3-none-any.whl → 0.3.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/minify_html.py +41 -0
- kash/commands/base/files_command.py +2 -2
- kash/commands/base/show_command.py +11 -1
- kash/config/colors.py +20 -8
- kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
- kash/docs/markdown/topics/a2_installation.md +17 -30
- kash/docs/markdown/topics/a3_getting_started.md +5 -19
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_metadata.py +9 -0
- kash/exec/precondition_registry.py +3 -3
- kash/file_storage/file_store.py +18 -1
- kash/llm_utils/llm_features.py +5 -1
- kash/llm_utils/llms.py +18 -8
- kash/media_base/media_cache.py +48 -24
- kash/media_base/media_services.py +63 -14
- kash/media_base/services/local_file_media.py +9 -1
- kash/model/actions_model.py +2 -2
- kash/model/items_model.py +4 -5
- kash/model/media_model.py +9 -1
- kash/model/params_model.py +9 -3
- kash/utils/common/function_inspect.py +97 -1
- kash/utils/common/testing.py +58 -0
- kash/utils/common/url_slice.py +329 -0
- kash/utils/file_utils/file_formats.py +1 -1
- kash/utils/text_handling/markdown_utils.py +424 -16
- kash/web_gen/templates/base_styles.css.jinja +204 -25
- kash/web_gen/templates/base_webpage.html.jinja +48 -26
- kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
- kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
- kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
- kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
- kash/web_gen/templates/content_styles.css.jinja +13 -8
- kash/web_gen/templates/simple_webpage.html.jinja +59 -21
- kash/web_gen/templates/tabbed_webpage.html.jinja +4 -2
- kash/workspaces/workspaces.py +10 -1
- {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/METADATA +75 -72
- {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/RECORD +40 -33
- {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/licenses/LICENSE +0 -0
kash/media_base/media_cache.py
CHANGED
|
@@ -3,6 +3,7 @@ from functools import cache
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from prettyfmt import fmt_lines, fmt_path
|
|
6
|
+
from prettyfmt.prettyfmt import fmt_size_dual
|
|
6
7
|
from strif import atomic_output_file
|
|
7
8
|
|
|
8
9
|
from kash.config.logger import get_logger
|
|
@@ -14,6 +15,7 @@ from kash.media_base.media_services import (
|
|
|
14
15
|
)
|
|
15
16
|
from kash.utils.common.format_utils import fmt_loc
|
|
16
17
|
from kash.utils.common.url import Url, as_file_url, is_url
|
|
18
|
+
from kash.utils.common.url_slice import parse_url_slice
|
|
17
19
|
from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
|
|
18
20
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
19
21
|
from kash.web_content.dir_store import DirStore
|
|
@@ -51,14 +53,16 @@ class MediaCache(DirStore):
|
|
|
51
53
|
super().__init__(root)
|
|
52
54
|
|
|
53
55
|
def _write_transcript(self, url: Url, content: str) -> None:
|
|
54
|
-
|
|
56
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
57
|
+
transcript_path = self.path_for(key, suffix=SUFFIX_TRANSCRIPT)
|
|
55
58
|
with atomic_output_file(transcript_path) as temp_output:
|
|
56
59
|
with open(temp_output, "w") as f:
|
|
57
60
|
f.write(content)
|
|
58
61
|
log.message("Transcript saved to cache: %s", fmt_path(transcript_path))
|
|
59
62
|
|
|
60
63
|
def _read_transcript(self, url: Url) -> str | None:
|
|
61
|
-
|
|
64
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
65
|
+
transcript_file = self.find(key, suffix=SUFFIX_TRANSCRIPT)
|
|
62
66
|
if transcript_file:
|
|
63
67
|
log.message("Video transcript already in cache: %s: %s", url, fmt_path(transcript_file))
|
|
64
68
|
with open(transcript_file) as f:
|
|
@@ -66,12 +70,13 @@ class MediaCache(DirStore):
|
|
|
66
70
|
return None
|
|
67
71
|
|
|
68
72
|
def _downsample_audio(self, url: Url) -> Path:
|
|
69
|
-
|
|
73
|
+
key = str(url) # Cache key is the URL (with slice fragment if present)
|
|
74
|
+
downsampled_audio_file = self.find(key, suffix=SUFFIX_16KMP3)
|
|
70
75
|
if not downsampled_audio_file:
|
|
71
|
-
full_audio_file = self.find(
|
|
76
|
+
full_audio_file = self.find(key, suffix=SUFFIX_MP3)
|
|
72
77
|
if not full_audio_file:
|
|
73
78
|
raise ValueError("No audio file found for: %s" % url)
|
|
74
|
-
downsampled_audio_file = self.path_for(
|
|
79
|
+
downsampled_audio_file = self.path_for(key, suffix=SUFFIX_16KMP3)
|
|
75
80
|
log.message(
|
|
76
81
|
"Downsampling audio: %s -> %s",
|
|
77
82
|
fmt_path(full_audio_file),
|
|
@@ -95,13 +100,18 @@ class MediaCache(DirStore):
|
|
|
95
100
|
return transcript
|
|
96
101
|
|
|
97
102
|
def cache(
|
|
98
|
-
self,
|
|
103
|
+
self, url_or_slice: Url, refetch=False, media_types: list[MediaType] | None = None
|
|
99
104
|
) -> dict[MediaType, Path]:
|
|
100
105
|
"""
|
|
101
106
|
Cache the media files for the given media URL. Returns paths to cached copies
|
|
102
107
|
for each media type (video or audio). Returns cached copies if available,
|
|
103
108
|
unless `refetch` is True.
|
|
104
109
|
"""
|
|
110
|
+
key = str(url_or_slice) # Cache key is the URL (with slice fragment if present)
|
|
111
|
+
|
|
112
|
+
# Extract base URL and slice information
|
|
113
|
+
base_url, slice = parse_url_slice(url_or_slice)
|
|
114
|
+
|
|
105
115
|
cached_paths: dict[MediaType, Path] = {}
|
|
106
116
|
|
|
107
117
|
if not media_types:
|
|
@@ -109,14 +119,18 @@ class MediaCache(DirStore):
|
|
|
109
119
|
|
|
110
120
|
if not refetch:
|
|
111
121
|
if MediaType.audio in media_types:
|
|
112
|
-
audio_file = self.find(
|
|
122
|
+
audio_file = self.find(key, suffix=SUFFIX_MP3)
|
|
113
123
|
if audio_file:
|
|
114
|
-
log.message(
|
|
124
|
+
log.message(
|
|
125
|
+
"Audio already in cache: %s: %s", url_or_slice, fmt_path(audio_file)
|
|
126
|
+
)
|
|
115
127
|
cached_paths[MediaType.audio] = audio_file
|
|
116
128
|
if MediaType.video in media_types:
|
|
117
|
-
video_file = self.find(
|
|
129
|
+
video_file = self.find(key, suffix=SUFFIX_MP4)
|
|
118
130
|
if video_file:
|
|
119
|
-
log.message(
|
|
131
|
+
log.message(
|
|
132
|
+
"Video already in cache: %s: %s", url_or_slice, fmt_path(video_file)
|
|
133
|
+
)
|
|
120
134
|
cached_paths[MediaType.video] = video_file
|
|
121
135
|
if set(media_types).issubset(cached_paths.keys()):
|
|
122
136
|
return cached_paths
|
|
@@ -127,23 +141,30 @@ class MediaCache(DirStore):
|
|
|
127
141
|
[t.name for t in cached_paths.keys()],
|
|
128
142
|
)
|
|
129
143
|
|
|
130
|
-
log.message("Downloading media: %s",
|
|
131
|
-
media_paths = download_media_by_service(
|
|
144
|
+
log.message("Downloading media: %s", url_or_slice)
|
|
145
|
+
media_paths = download_media_by_service(
|
|
146
|
+
base_url, self.root, media_types=media_types, slice=slice
|
|
147
|
+
)
|
|
132
148
|
if MediaType.audio in media_paths:
|
|
133
|
-
audio_path = self.path_for(
|
|
149
|
+
audio_path = self.path_for(key, suffix=SUFFIX_MP3)
|
|
134
150
|
os.rename(media_paths[MediaType.audio], audio_path)
|
|
135
151
|
cached_paths[MediaType.audio] = audio_path
|
|
136
152
|
if MediaType.video in media_paths:
|
|
137
|
-
video_path = self.path_for(
|
|
153
|
+
video_path = self.path_for(key, suffix=SUFFIX_MP4)
|
|
138
154
|
os.rename(media_paths[MediaType.video], video_path)
|
|
139
155
|
cached_paths[MediaType.video] = video_path
|
|
140
156
|
|
|
141
157
|
log.message(
|
|
142
158
|
"Downloaded media and saved to cache:\n%s",
|
|
143
|
-
fmt_lines(
|
|
159
|
+
fmt_lines(
|
|
160
|
+
[
|
|
161
|
+
f"{t.name}: {fmt_size_dual(p.stat().st_size)}: {fmt_path(p)} "
|
|
162
|
+
for (t, p) in cached_paths.items()
|
|
163
|
+
]
|
|
164
|
+
),
|
|
144
165
|
)
|
|
145
166
|
|
|
146
|
-
self._downsample_audio(
|
|
167
|
+
self._downsample_audio(url_or_slice)
|
|
147
168
|
|
|
148
169
|
return cached_paths
|
|
149
170
|
|
|
@@ -156,30 +177,33 @@ class MediaCache(DirStore):
|
|
|
156
177
|
"""
|
|
157
178
|
if not isinstance(url_or_path, Path) and is_url(url_or_path):
|
|
158
179
|
# If it is a URL, cache it locally.
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
180
|
+
url_or_slice = url_or_path
|
|
181
|
+
# Canonicalize the URL (preserving slice information if present)
|
|
182
|
+
canon = canonicalize_media_url(url_or_slice)
|
|
183
|
+
if not canon:
|
|
162
184
|
log.error("Unrecognized media, current services: %s", get_media_services())
|
|
163
185
|
raise InvalidInput(
|
|
164
186
|
"Unrecognized media URL (is this media service configured?): %s" % url_or_path
|
|
165
187
|
)
|
|
188
|
+
url_or_slice = canon
|
|
189
|
+
|
|
166
190
|
if not refetch:
|
|
167
|
-
transcript = self._read_transcript(
|
|
191
|
+
transcript = self._read_transcript(url_or_slice)
|
|
168
192
|
if transcript:
|
|
169
193
|
return transcript
|
|
170
194
|
# Cache all formats since we usually will want them.
|
|
171
|
-
self.cache(
|
|
195
|
+
self.cache(url_or_slice, refetch)
|
|
172
196
|
elif isinstance(url_or_path, Path):
|
|
173
197
|
# Treat local media files as file:// URLs.
|
|
174
198
|
# Don't need to cache originals but we still will cache audio and transcriptions.
|
|
175
199
|
if not url_or_path.exists():
|
|
176
200
|
raise FileNotFound(f"File not found: {fmt_loc(url_or_path)}")
|
|
177
|
-
|
|
201
|
+
url_or_slice = as_file_url(url_or_path)
|
|
178
202
|
else:
|
|
179
203
|
raise InvalidInput(f"Not a media URL or path: {fmt_loc(url_or_path)}")
|
|
180
204
|
|
|
181
205
|
# Now do the transcription.
|
|
182
|
-
transcript = self._do_transcription(
|
|
206
|
+
transcript = self._do_transcription(url_or_slice, language=language)
|
|
183
207
|
if not transcript:
|
|
184
|
-
raise UnexpectedError("No transcript found for: %s" %
|
|
208
|
+
raise UnexpectedError("No transcript found for: %s" % url_or_slice)
|
|
185
209
|
return transcript
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
@@ -7,6 +9,7 @@ from strif import AtomicVar
|
|
|
7
9
|
from kash.media_base.services.local_file_media import LocalFileMedia
|
|
8
10
|
from kash.model.media_model import MediaMetadata, MediaService
|
|
9
11
|
from kash.utils.common.url import Url
|
|
12
|
+
from kash.utils.common.url_slice import Slice, add_slice_to_url, parse_url_slice
|
|
10
13
|
from kash.utils.errors import InvalidInput
|
|
11
14
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
12
15
|
|
|
@@ -32,14 +35,22 @@ def register_media_service(*services: MediaService) -> None:
|
|
|
32
35
|
_media_services.update(lambda services: services + new_services)
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def canonicalize_media_url(
|
|
38
|
+
def canonicalize_media_url(url_or_slice: Url) -> Url | None:
|
|
36
39
|
"""
|
|
37
40
|
Return the canonical form of a media URL from a supported service (like YouTube).
|
|
41
|
+
Preserves any slice information in URL fragments.
|
|
38
42
|
"""
|
|
43
|
+
base_url, slice = parse_url_slice(url_or_slice)
|
|
44
|
+
|
|
45
|
+
# Canonicalize the base URL
|
|
39
46
|
for service in _media_services.copy():
|
|
40
|
-
canonical_url = service.canonicalize(
|
|
47
|
+
canonical_url = service.canonicalize(base_url)
|
|
41
48
|
if canonical_url:
|
|
42
|
-
|
|
49
|
+
# Add slice back to canonical URL if it existed
|
|
50
|
+
if slice:
|
|
51
|
+
return add_slice_to_url(canonical_url, slice)
|
|
52
|
+
else:
|
|
53
|
+
return canonical_url
|
|
43
54
|
return None
|
|
44
55
|
|
|
45
56
|
|
|
@@ -51,10 +62,11 @@ def thumbnail_media_url(url: Url) -> Url | None:
|
|
|
51
62
|
"""
|
|
52
63
|
Return a URL that links to the thumbnail of the media.
|
|
53
64
|
"""
|
|
65
|
+
base_url, _ = parse_url_slice(url)
|
|
54
66
|
for service in _media_services.copy():
|
|
55
|
-
canonical_url = service.canonicalize(
|
|
67
|
+
canonical_url = service.canonicalize(base_url)
|
|
56
68
|
if canonical_url:
|
|
57
|
-
return service.thumbnail_url(
|
|
69
|
+
return service.thumbnail_url(base_url)
|
|
58
70
|
return None
|
|
59
71
|
|
|
60
72
|
|
|
@@ -62,18 +74,21 @@ def timestamp_media_url(url: Url, timestamp: float) -> Url:
|
|
|
62
74
|
"""
|
|
63
75
|
Return a URL that links to the media at the given timestamp.
|
|
64
76
|
"""
|
|
77
|
+
base_url, _ = parse_url_slice(url)
|
|
65
78
|
for service in _media_services.copy():
|
|
66
|
-
canonical_url = service.canonicalize(
|
|
79
|
+
canonical_url = service.canonicalize(base_url)
|
|
67
80
|
if canonical_url:
|
|
68
|
-
return service.timestamp_url(
|
|
81
|
+
return service.timestamp_url(base_url, timestamp)
|
|
69
82
|
raise InvalidInput(f"Unrecognized media URL: {url}")
|
|
70
83
|
|
|
71
84
|
|
|
72
85
|
def get_media_id(url: Url | None) -> str | None:
|
|
73
86
|
if not url:
|
|
74
87
|
return None
|
|
88
|
+
|
|
89
|
+
base_url, _ = parse_url_slice(url)
|
|
75
90
|
for service in _media_services.copy():
|
|
76
|
-
media_id = service.get_media_id(
|
|
91
|
+
media_id = service.get_media_id(base_url)
|
|
77
92
|
if media_id:
|
|
78
93
|
return media_id
|
|
79
94
|
return None
|
|
@@ -84,10 +99,11 @@ def get_media_metadata(url: Url) -> MediaMetadata | None:
|
|
|
84
99
|
"""
|
|
85
100
|
Return metadata for the media at the given URL.
|
|
86
101
|
"""
|
|
102
|
+
base_url, _ = parse_url_slice(url)
|
|
87
103
|
for service in _media_services.copy():
|
|
88
|
-
media_id = service.get_media_id(
|
|
104
|
+
media_id = service.get_media_id(base_url)
|
|
89
105
|
if media_id: # This is an actual video, not a channel etc.
|
|
90
|
-
return service.metadata(
|
|
106
|
+
return service.metadata(base_url)
|
|
91
107
|
return None
|
|
92
108
|
|
|
93
109
|
|
|
@@ -95,18 +111,51 @@ def list_channel_items(url: Url) -> list[MediaMetadata]:
|
|
|
95
111
|
"""
|
|
96
112
|
List all items in a channel.
|
|
97
113
|
"""
|
|
114
|
+
base_url, _ = parse_url_slice(url)
|
|
98
115
|
for service in _media_services.copy():
|
|
99
|
-
canonical_url = service.canonicalize(
|
|
116
|
+
canonical_url = service.canonicalize(base_url)
|
|
100
117
|
if canonical_url:
|
|
101
|
-
return service.list_channel_items(
|
|
118
|
+
return service.list_channel_items(base_url)
|
|
102
119
|
raise InvalidInput(f"Unrecognized media URL: {url}")
|
|
103
120
|
|
|
104
121
|
|
|
105
122
|
def download_media_by_service(
|
|
106
|
-
url: Url,
|
|
123
|
+
url: Url,
|
|
124
|
+
target_dir: Path,
|
|
125
|
+
*,
|
|
126
|
+
media_types: list[MediaType] | None = None,
|
|
127
|
+
slice: Slice | None = None,
|
|
107
128
|
) -> dict[MediaType, Path]:
|
|
108
129
|
for service in _media_services.copy():
|
|
109
130
|
canonical_url = service.canonicalize(url)
|
|
110
131
|
if canonical_url:
|
|
111
|
-
return service.download_media(url, target_dir, media_types=media_types)
|
|
132
|
+
return service.download_media(url, target_dir, media_types=media_types, slice=slice)
|
|
112
133
|
raise ValueError(f"Unrecognized media URL: {url}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
## Tests
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_canonicalize_media_url_preserves_slice():
|
|
140
|
+
"""Test that canonicalize_media_url preserves URL slice fragments."""
|
|
141
|
+
|
|
142
|
+
# Test with unrecognized URLs (should return None)
|
|
143
|
+
# This tests the slice extraction/reconstruction logic without requiring actual files
|
|
144
|
+
unrecognized_url = Url("https://unknown-service.com/video#~slice=10-30")
|
|
145
|
+
canonical_unknown = canonicalize_media_url(unrecognized_url)
|
|
146
|
+
assert canonical_unknown is None
|
|
147
|
+
|
|
148
|
+
# Test typical YouTube URL with slice (would work if YouTube service was registered)
|
|
149
|
+
youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=10-30")
|
|
150
|
+
# For now this returns None since YouTube service isn't registered in this test
|
|
151
|
+
# but the slice extraction/reconstruction logic is tested in url_slice.py
|
|
152
|
+
youtube_canonical = canonicalize_media_url(youtube_url)
|
|
153
|
+
assert youtube_canonical is None # No YouTube service registered
|
|
154
|
+
|
|
155
|
+
# Test HH:MM:SS format slice
|
|
156
|
+
hms_youtube_url = Url("https://www.youtube.com/watch?v=dQw4w9WgXcQ#~slice=01:30-02:45")
|
|
157
|
+
canonical_hms = canonicalize_media_url(hms_youtube_url)
|
|
158
|
+
assert canonical_hms is None # No YouTube service registered
|
|
159
|
+
|
|
160
|
+
# The actual slice functionality is thoroughly tested in url_slice.py
|
|
161
|
+
# This test ensures canonicalize_media_url doesn't break with slice URLs
|
|
@@ -13,6 +13,7 @@ from kash.file_storage.store_filenames import parse_item_filename
|
|
|
13
13
|
from kash.model.media_model import MediaMetadata, MediaService, MediaUrlType
|
|
14
14
|
from kash.utils.common.format_utils import fmt_loc
|
|
15
15
|
from kash.utils.common.url import Url
|
|
16
|
+
from kash.utils.common.url_slice import Slice
|
|
16
17
|
from kash.utils.errors import FileNotFound, InvalidInput
|
|
17
18
|
from kash.utils.file_utils.file_formats_model import FileExt, MediaType
|
|
18
19
|
|
|
@@ -73,11 +74,18 @@ class LocalFileMedia(MediaService):
|
|
|
73
74
|
|
|
74
75
|
@override
|
|
75
76
|
def download_media(
|
|
76
|
-
self,
|
|
77
|
+
self,
|
|
78
|
+
url: Url,
|
|
79
|
+
target_dir: Path,
|
|
80
|
+
*,
|
|
81
|
+
media_types: list[MediaType] | None = None,
|
|
82
|
+
slice: Slice | None = None,
|
|
77
83
|
) -> dict[MediaType, Path]:
|
|
78
84
|
path = self._parse_file_url(url)
|
|
79
85
|
if not path:
|
|
80
86
|
raise InvalidInput(f"Not a local file URL: {url}")
|
|
87
|
+
if slice:
|
|
88
|
+
raise NotImplementedError("Slicing currently not supported for local files")
|
|
81
89
|
|
|
82
90
|
_name, _item_type, format, file_ext = parse_item_filename(path)
|
|
83
91
|
os.makedirs(target_dir, exist_ok=True)
|
kash/model/actions_model.py
CHANGED
|
@@ -102,9 +102,9 @@ class ActionResult:
|
|
|
102
102
|
shell_result: ShellResult | None = None
|
|
103
103
|
"""Customize control of how the action's result is displayed in the shell."""
|
|
104
104
|
|
|
105
|
-
def get_by_format(self,
|
|
105
|
+
def get_by_format(self, *formats: Format) -> Item:
|
|
106
106
|
"""Convenience method to get an item for actions that return multiple formats."""
|
|
107
|
-
return next(item for item in self.items if item.format
|
|
107
|
+
return next(item for item in self.items if item.format in formats)
|
|
108
108
|
|
|
109
109
|
def has_hints(self) -> bool:
|
|
110
110
|
return bool(
|
kash/model/items_model.py
CHANGED
|
@@ -915,10 +915,10 @@ class Item:
|
|
|
915
915
|
key_filter={
|
|
916
916
|
"store_path": 0,
|
|
917
917
|
"type": 64,
|
|
918
|
-
"
|
|
918
|
+
"format": 64,
|
|
919
|
+
"title": 40,
|
|
919
920
|
"url": 64,
|
|
920
921
|
"external_path": 64,
|
|
921
|
-
"context": 64,
|
|
922
922
|
},
|
|
923
923
|
)
|
|
924
924
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
|
@@ -932,13 +932,12 @@ class Item:
|
|
|
932
932
|
"store_path": 0,
|
|
933
933
|
"external_path": 64,
|
|
934
934
|
"type": 64,
|
|
935
|
+
"format": 64,
|
|
935
936
|
"state": 64,
|
|
936
|
-
"title":
|
|
937
|
+
"title": 40,
|
|
937
938
|
"url": 64,
|
|
938
|
-
"format": 64,
|
|
939
939
|
"created_at": 64,
|
|
940
940
|
"body": 64,
|
|
941
|
-
"context": 64,
|
|
942
941
|
},
|
|
943
942
|
)
|
|
944
943
|
+ f"[{len(self.body) if self.body else 0} body chars]"
|
kash/model/media_model.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from datetime import date
|
|
3
5
|
from enum import Enum
|
|
@@ -7,6 +9,7 @@ from prettyfmt import abbrev_obj
|
|
|
7
9
|
from pydantic.dataclasses import dataclass
|
|
8
10
|
|
|
9
11
|
from kash.utils.common.url import Url
|
|
12
|
+
from kash.utils.common.url_slice import Slice
|
|
10
13
|
from kash.utils.file_utils.file_formats_model import MediaType
|
|
11
14
|
|
|
12
15
|
|
|
@@ -109,7 +112,12 @@ class MediaService(ABC):
|
|
|
109
112
|
|
|
110
113
|
@abstractmethod
|
|
111
114
|
def download_media(
|
|
112
|
-
self,
|
|
115
|
+
self,
|
|
116
|
+
url: Url,
|
|
117
|
+
target_dir: Path,
|
|
118
|
+
*,
|
|
119
|
+
media_types: list[MediaType] | None = None,
|
|
120
|
+
slice: Slice | None = None,
|
|
113
121
|
) -> dict[MediaType, Path]:
|
|
114
122
|
"""
|
|
115
123
|
Download media from URL and extract to audio or video formats.
|
kash/model/params_model.py
CHANGED
|
@@ -206,10 +206,10 @@ A list of parameter declarations, possibly with default values.
|
|
|
206
206
|
|
|
207
207
|
# These are the default models for typical use cases.
|
|
208
208
|
# The user may override them with parameters.
|
|
209
|
-
DEFAULT_CAREFUL_LLM = LLM.
|
|
209
|
+
DEFAULT_CAREFUL_LLM = LLM.o3
|
|
210
210
|
DEFAULT_STRUCTURED_LLM = LLM.gpt_4o
|
|
211
|
-
DEFAULT_STANDARD_LLM = LLM.
|
|
212
|
-
DEFAULT_FAST_LLM = LLM.
|
|
211
|
+
DEFAULT_STANDARD_LLM = LLM.claude_4_sonnet
|
|
212
|
+
DEFAULT_FAST_LLM = LLM.o1_mini
|
|
213
213
|
|
|
214
214
|
|
|
215
215
|
# Parameters set globally such as in the workspace.
|
|
@@ -262,6 +262,12 @@ COMMON_ACTION_PARAMS: dict[str, Param] = {
|
|
|
262
262
|
valid_str_values=list(LLM),
|
|
263
263
|
is_open_ended=True,
|
|
264
264
|
),
|
|
265
|
+
"model_list": Param(
|
|
266
|
+
"model_list",
|
|
267
|
+
"A list of LLMs to use, as names separated by commas.",
|
|
268
|
+
type=str,
|
|
269
|
+
default_value=None,
|
|
270
|
+
),
|
|
265
271
|
"language": Param(
|
|
266
272
|
"language",
|
|
267
273
|
"The language of the input audio or text.",
|
|
@@ -4,7 +4,14 @@ from collections.abc import Callable
|
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from inspect import Parameter
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import (
|
|
8
|
+
Any,
|
|
9
|
+
Literal,
|
|
10
|
+
Union, # pyright: ignore[reportDeprecated]
|
|
11
|
+
cast,
|
|
12
|
+
get_args,
|
|
13
|
+
get_origin,
|
|
14
|
+
)
|
|
8
15
|
|
|
9
16
|
NO_DEFAULT = Parameter.empty # Alias for clarity
|
|
10
17
|
|
|
@@ -90,6 +97,23 @@ def _resolve_type_details(annotation: Any) -> tuple[type | None, type | None, bo
|
|
|
90
97
|
return (type(None), None, True)
|
|
91
98
|
# If multiple non_none_args (e.g., int | str), current_annotation remains the Union for now.
|
|
92
99
|
|
|
100
|
+
# Handle Literal types
|
|
101
|
+
if origin is Literal:
|
|
102
|
+
if args:
|
|
103
|
+
# Determine the common type of all literal values
|
|
104
|
+
literal_types = {type(arg) for arg in args}
|
|
105
|
+
if len(literal_types) == 1:
|
|
106
|
+
# All literals are the same type
|
|
107
|
+
final_effective_type = literal_types.pop()
|
|
108
|
+
else:
|
|
109
|
+
# Mixed types, fall back to the most common base type or str if all are basic types
|
|
110
|
+
if all(isinstance(arg, (str, int, float, bool)) for arg in args):
|
|
111
|
+
# For mixed basic types, use str as the effective type
|
|
112
|
+
final_effective_type = str
|
|
113
|
+
else:
|
|
114
|
+
final_effective_type = None
|
|
115
|
+
return final_effective_type, None, is_optional_flag
|
|
116
|
+
|
|
93
117
|
# Determine effective_type and inner_type from (potentially unwrapped) current_annotation
|
|
94
118
|
final_effective_type: type | None = None
|
|
95
119
|
final_inner_type: type | None = None
|
|
@@ -426,3 +450,75 @@ def test_inspect_function_parameters_updated():
|
|
|
426
450
|
is_explicitly_optional=True,
|
|
427
451
|
)
|
|
428
452
|
]
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def test_literal_types():
|
|
456
|
+
"""Test Literal type support in function parameter inspection."""
|
|
457
|
+
|
|
458
|
+
# Test string literals
|
|
459
|
+
def func_string_literal(converter: Literal["markitdown", "marker"] = "markitdown"):
|
|
460
|
+
return converter
|
|
461
|
+
|
|
462
|
+
params = inspect_function_params(func_string_literal)
|
|
463
|
+
assert len(params) == 1
|
|
464
|
+
param = params[0]
|
|
465
|
+
assert param.name == "converter"
|
|
466
|
+
assert param.effective_type is str
|
|
467
|
+
assert param.default == "markitdown"
|
|
468
|
+
assert param.is_explicitly_optional is False
|
|
469
|
+
|
|
470
|
+
# Test integer literals
|
|
471
|
+
def func_int_literal(count: Literal[1, 2, 3] = 1):
|
|
472
|
+
return count
|
|
473
|
+
|
|
474
|
+
params = inspect_function_params(func_int_literal)
|
|
475
|
+
assert len(params) == 1
|
|
476
|
+
param = params[0]
|
|
477
|
+
assert param.name == "count"
|
|
478
|
+
assert param.effective_type is int
|
|
479
|
+
assert param.default == 1
|
|
480
|
+
|
|
481
|
+
# Test mixed type literals (should default to str)
|
|
482
|
+
def func_mixed_literal(value: Literal["auto", 42]):
|
|
483
|
+
return value
|
|
484
|
+
|
|
485
|
+
params = inspect_function_params(func_mixed_literal)
|
|
486
|
+
assert len(params) == 1
|
|
487
|
+
param = params[0]
|
|
488
|
+
assert param.name == "value"
|
|
489
|
+
assert param.effective_type is str
|
|
490
|
+
assert param.default == NO_DEFAULT
|
|
491
|
+
|
|
492
|
+
# Test Literal directly (without TypeAlias to avoid scope issues)
|
|
493
|
+
def func_direct_literal(converter: Literal["markitdown", "marker"] = "markitdown"):
|
|
494
|
+
return converter
|
|
495
|
+
|
|
496
|
+
params = inspect_function_params(func_direct_literal)
|
|
497
|
+
assert len(params) == 1
|
|
498
|
+
param = params[0]
|
|
499
|
+
assert param.name == "converter"
|
|
500
|
+
assert param.effective_type is str
|
|
501
|
+
assert param.default == "markitdown"
|
|
502
|
+
|
|
503
|
+
# Test optional literal
|
|
504
|
+
def func_optional_literal(mode: Literal["fast", "slow"] | None = None):
|
|
505
|
+
return mode
|
|
506
|
+
|
|
507
|
+
params = inspect_function_params(func_optional_literal)
|
|
508
|
+
assert len(params) == 1
|
|
509
|
+
param = params[0]
|
|
510
|
+
assert param.name == "mode"
|
|
511
|
+
assert param.effective_type is str
|
|
512
|
+
assert param.is_explicitly_optional is True
|
|
513
|
+
assert param.default is None
|
|
514
|
+
|
|
515
|
+
# Test boolean literals
|
|
516
|
+
def func_bool_literal(flag: Literal[True, False] = True):
|
|
517
|
+
return flag
|
|
518
|
+
|
|
519
|
+
params = inspect_function_params(func_bool_literal)
|
|
520
|
+
assert len(params) == 1
|
|
521
|
+
param = params[0]
|
|
522
|
+
assert param.name == "flag"
|
|
523
|
+
assert param.effective_type is bool
|
|
524
|
+
assert param.default is True
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from typing import Literal, TypeAlias
|
|
7
|
+
|
|
8
|
+
TestMarker: TypeAlias = Literal["online", "integration"]
|
|
9
|
+
"""
|
|
10
|
+
Valid markers for tests. Currently just marking online tests (e.g. LLM APIs that
|
|
11
|
+
that require keys) and more complex integration tests.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def enable_if(marker: TestMarker) -> Callable:
|
|
16
|
+
"""
|
|
17
|
+
Mark a test as having external dependencies.
|
|
18
|
+
|
|
19
|
+
Test runs only if the corresponding environment variable is set, e.g.
|
|
20
|
+
for the marker "online", checks for ENABLE_TESTS_ONLINE=1.
|
|
21
|
+
|
|
22
|
+
Automatically sets pytest markers when pytest is available, but safe to use in
|
|
23
|
+
runtime code as well.
|
|
24
|
+
|
|
25
|
+
Example usage:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
def test_foo():
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@enable_if("online") # Only runs if ENABLE_TESTS_ONLINE=1
|
|
32
|
+
def test_bar():
|
|
33
|
+
...
|
|
34
|
+
```
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def decorator(func: Callable) -> Callable:
|
|
38
|
+
@wraps(func)
|
|
39
|
+
def wrapper(*args, **kwargs):
|
|
40
|
+
env_var = f"ENABLE_TESTS_{marker.upper()}"
|
|
41
|
+
if not os.getenv(env_var):
|
|
42
|
+
print(f"Skipping test function: {func.__name__} (set {env_var}=1 to enable)")
|
|
43
|
+
return
|
|
44
|
+
return func(*args, **kwargs)
|
|
45
|
+
|
|
46
|
+
# Set pytest markers automatically if pytest is available
|
|
47
|
+
try:
|
|
48
|
+
import pytest
|
|
49
|
+
|
|
50
|
+
wrapper = pytest.mark.integration(wrapper)
|
|
51
|
+
wrapper = getattr(pytest.mark, marker)(wrapper)
|
|
52
|
+
except ImportError:
|
|
53
|
+
# Pytest not available, which is fine for non-test runs
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
return wrapper
|
|
57
|
+
|
|
58
|
+
return decorator
|