kash-shell 0.3.17__py3-none-any.whl → 0.3.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/{markdownify.py → markdownify_html.py} +3 -6
- kash/actions/core/minify_html.py +41 -0
- kash/commands/base/show_command.py +11 -1
- kash/commands/workspace/workspace_commands.py +10 -88
- kash/config/colors.py +6 -2
- kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
- kash/docs/markdown/topics/a2_installation.md +17 -30
- kash/docs/markdown/topics/a3_getting_started.md +5 -19
- kash/exec/__init__.py +3 -0
- kash/exec/action_exec.py +3 -3
- kash/exec/fetch_url_items.py +109 -0
- kash/exec/precondition_registry.py +3 -3
- kash/file_storage/file_store.py +24 -1
- kash/file_storage/store_filenames.py +4 -0
- kash/help/function_param_info.py +1 -1
- kash/help/help_pages.py +1 -1
- kash/help/help_printing.py +1 -1
- kash/llm_utils/llm_features.py +5 -1
- kash/llm_utils/llms.py +18 -8
- kash/media_base/media_cache.py +48 -24
- kash/media_base/media_services.py +63 -14
- kash/media_base/services/local_file_media.py +9 -1
- kash/model/items_model.py +22 -8
- kash/model/media_model.py +9 -1
- kash/model/params_model.py +9 -3
- kash/utils/common/function_inspect.py +97 -1
- kash/utils/common/parse_docstring.py +347 -0
- kash/utils/common/testing.py +58 -0
- kash/utils/common/url_slice.py +329 -0
- kash/utils/file_utils/file_formats.py +1 -1
- kash/utils/text_handling/markdown_utils.py +424 -16
- kash/web_content/web_extract.py +34 -15
- kash/web_content/web_page_model.py +10 -1
- kash/web_gen/templates/base_styles.css.jinja +137 -15
- kash/web_gen/templates/base_webpage.html.jinja +13 -17
- kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
- kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
- kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
- kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
- kash/web_gen/templates/content_styles.css.jinja +13 -8
- kash/web_gen/templates/simple_webpage.html.jinja +15 -481
- kash/workspaces/workspaces.py +10 -1
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/METADATA +75 -72
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/RECORD +47 -40
- kash/exec/fetch_url_metadata.py +0 -72
- kash/help/docstring_utils.py +0 -111
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kash.utils.common.url import Url
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Slice:
|
|
11
|
+
"""
|
|
12
|
+
A start and end time range, in seconds.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
start_time: float
|
|
16
|
+
end_time: float
|
|
17
|
+
|
|
18
|
+
def __post_init__(self):
|
|
19
|
+
if self.start_time >= self.end_time or self.start_time < 0 or self.end_time <= 0:
|
|
20
|
+
raise ValueError(
|
|
21
|
+
f"Not a valid time slice: got start={self.start_time}, end={self.end_time}"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def parse(cls, slice_str: str) -> Slice:
|
|
26
|
+
"""
|
|
27
|
+
Parse a slice string in format SSS-SSS or HH:MM:SS-HH:MM:SS.
|
|
28
|
+
"""
|
|
29
|
+
if "-" not in slice_str:
|
|
30
|
+
raise ValueError(f"Not a valid time slice: {slice_str!r}")
|
|
31
|
+
|
|
32
|
+
start_str, end_str = slice_str.split("-", 1)
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# Try to parse as HH:MM:SS format first
|
|
36
|
+
start_time = _parse_time_format(start_str)
|
|
37
|
+
end_time = _parse_time_format(end_str)
|
|
38
|
+
except ValueError:
|
|
39
|
+
# Fall back to seconds format
|
|
40
|
+
try:
|
|
41
|
+
start_time = float(start_str)
|
|
42
|
+
end_time = float(end_str)
|
|
43
|
+
except ValueError:
|
|
44
|
+
raise ValueError(f"Not a valid time slice: {slice_str!r}")
|
|
45
|
+
|
|
46
|
+
return cls(start_time, end_time)
|
|
47
|
+
|
|
48
|
+
def __str__(self) -> str:
|
|
49
|
+
return f"{_format_seconds(self.start_time)}-{_format_seconds(self.end_time)}"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _parse_time_format(time_str: str) -> float:
|
|
53
|
+
"""
|
|
54
|
+
Parse time string in HH:MM:SS format and return seconds.
|
|
55
|
+
Supports formats like: HH:MM:SS, MM:SS, or just SS
|
|
56
|
+
"""
|
|
57
|
+
# Match HH:MM:SS, MM:SS, or SS format
|
|
58
|
+
time_pattern = r"^(?:(\d{1,2}):)?(?:(\d{1,2}):)?(\d{1,2}(?:\.\d+)?)$"
|
|
59
|
+
match = re.match(time_pattern, time_str.strip())
|
|
60
|
+
|
|
61
|
+
if not match:
|
|
62
|
+
raise ValueError(f"Invalid time format: {time_str}")
|
|
63
|
+
|
|
64
|
+
hours_str, minutes_str, seconds_str = match.groups()
|
|
65
|
+
|
|
66
|
+
# Default values
|
|
67
|
+
hours = int(hours_str) if hours_str else 0
|
|
68
|
+
minutes = int(minutes_str) if minutes_str else 0
|
|
69
|
+
seconds = float(seconds_str) if seconds_str else 0
|
|
70
|
+
|
|
71
|
+
# Handle case where we have MM:SS (2 components)
|
|
72
|
+
if hours_str and not minutes_str:
|
|
73
|
+
# This means we actually have MM:SS, not HH:MM
|
|
74
|
+
minutes = hours
|
|
75
|
+
hours = 0
|
|
76
|
+
|
|
77
|
+
total_seconds = hours * 3600 + minutes * 60 + seconds
|
|
78
|
+
return total_seconds
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _format_seconds(seconds: float) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Format seconds as HH:MM:SS, MM:SS, or SS depending on the value.
|
|
84
|
+
Uses the most compact and natural representation.
|
|
85
|
+
"""
|
|
86
|
+
total_seconds = int(seconds)
|
|
87
|
+
fractional_part = seconds - total_seconds
|
|
88
|
+
|
|
89
|
+
hours = total_seconds // 3600
|
|
90
|
+
minutes = (total_seconds % 3600) // 60
|
|
91
|
+
secs = total_seconds % 60
|
|
92
|
+
|
|
93
|
+
# Add fractional seconds if present
|
|
94
|
+
if fractional_part > 0:
|
|
95
|
+
secs += fractional_part
|
|
96
|
+
|
|
97
|
+
if hours > 0:
|
|
98
|
+
# HH:MM:SS format
|
|
99
|
+
if fractional_part > 0:
|
|
100
|
+
return f"{hours}:{minutes:02d}:{secs:04.1f}"
|
|
101
|
+
else:
|
|
102
|
+
return f"{hours}:{minutes:02d}:{secs:02d}"
|
|
103
|
+
elif minutes > 0:
|
|
104
|
+
# MM:SS format
|
|
105
|
+
if fractional_part > 0:
|
|
106
|
+
return f"{minutes}:{secs:04.1f}"
|
|
107
|
+
else:
|
|
108
|
+
return f"{minutes}:{secs:02d}"
|
|
109
|
+
else:
|
|
110
|
+
# Just seconds
|
|
111
|
+
if fractional_part > 0:
|
|
112
|
+
return f"{secs:.1f}"
|
|
113
|
+
else:
|
|
114
|
+
return str(int(secs))
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def is_url_slice(url: Url) -> bool:
|
|
118
|
+
"""
|
|
119
|
+
Check if a URL contains valid slice information in its fragment.
|
|
120
|
+
"""
|
|
121
|
+
return parse_url_slice(url)[1] is not None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def parse_url_slice(url: Url) -> tuple[Url, Slice | None]:
|
|
125
|
+
"""
|
|
126
|
+
Parse slice information from a URL and return the base URL and slice.
|
|
127
|
+
|
|
128
|
+
Looks for #~slice=START-END pattern at the end of the URL and validates
|
|
129
|
+
that START-END is a valid time slice format.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Tuple of (base_url_without_slice, slice_or_none)
|
|
133
|
+
"""
|
|
134
|
+
slice_marker = "#~slice="
|
|
135
|
+
slice_index = url.find(slice_marker)
|
|
136
|
+
|
|
137
|
+
if slice_index == -1:
|
|
138
|
+
return url, None
|
|
139
|
+
|
|
140
|
+
# Extract the slice string after #~slice= (must be at end of URL)
|
|
141
|
+
slice_str = url[slice_index + len(slice_marker) :]
|
|
142
|
+
|
|
143
|
+
# Validate slice is at the end (no additional content)
|
|
144
|
+
if not slice_str:
|
|
145
|
+
return url, None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
slice = Slice.parse(slice_str)
|
|
149
|
+
base_url = Url(url[:slice_index]) # Everything before #~slice=
|
|
150
|
+
return base_url, slice
|
|
151
|
+
except ValueError:
|
|
152
|
+
# Invalid slice format, treat as regular URL
|
|
153
|
+
return url, None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def add_slice_to_url(url: Url, slice: Slice) -> Url:
|
|
157
|
+
"""Add slice information to a URL as a fragment."""
|
|
158
|
+
# Remove any existing fragment and add the slice
|
|
159
|
+
fragment_index = url.find("#")
|
|
160
|
+
if fragment_index != -1:
|
|
161
|
+
base_url = Url(url[:fragment_index])
|
|
162
|
+
else:
|
|
163
|
+
base_url = url
|
|
164
|
+
return Url(f"{base_url}#~slice={slice}")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
## Tests
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_url_slice_functionality():
|
|
171
|
+
"""Test URL slice functionality with fragment-based encoding."""
|
|
172
|
+
|
|
173
|
+
# Basic slice creation and validation
|
|
174
|
+
slice = Slice(10.0, 20.0)
|
|
175
|
+
assert slice.start_time == 10.0
|
|
176
|
+
assert slice.end_time == 20.0
|
|
177
|
+
assert str(slice) == "10-20"
|
|
178
|
+
|
|
179
|
+
# Invalid slice creation should raise ValueError
|
|
180
|
+
for start, end in [(20.0, 10.0), (10.0, 10.0), (-5.0, 10.0), (0.0, 0.0)]:
|
|
181
|
+
try:
|
|
182
|
+
Slice(start, end)
|
|
183
|
+
raise AssertionError(f"Should have raised ValueError for start={start}, end={end}")
|
|
184
|
+
except ValueError as e:
|
|
185
|
+
assert "Not a valid time slice" in str(e)
|
|
186
|
+
|
|
187
|
+
# Test time format parsing - both seconds and HH:MM:SS formats
|
|
188
|
+
parse_cases = [
|
|
189
|
+
# Format: (input_string, expected_start, expected_end)
|
|
190
|
+
("10.5-25.7", 10.5, 25.7), # decimal seconds
|
|
191
|
+
("30-60", 30.0, 60.0), # integer seconds
|
|
192
|
+
("1:30-2:45", 90.0, 165.0), # MM:SS format
|
|
193
|
+
("01:23:45-02:30:15", 5025.0, 9015.0), # HH:MM:SS format
|
|
194
|
+
("00:01:30.5-00:02:45.25", 90.5, 165.25), # HH:MM:SS with decimals
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
for slice_str, expected_start, expected_end in parse_cases:
|
|
198
|
+
parsed = Slice.parse(slice_str)
|
|
199
|
+
assert parsed.start_time == expected_start
|
|
200
|
+
assert parsed.end_time == expected_end
|
|
201
|
+
|
|
202
|
+
# Test invalid parsing
|
|
203
|
+
for invalid_input in ["invalid", "10_20", "1:2:3:4-5:6:7"]:
|
|
204
|
+
try:
|
|
205
|
+
Slice.parse(invalid_input)
|
|
206
|
+
raise AssertionError(f"Should have raised ValueError for {invalid_input}")
|
|
207
|
+
except ValueError as e:
|
|
208
|
+
assert "Not a valid time slice" in str(e)
|
|
209
|
+
|
|
210
|
+
# Run all sub-tests
|
|
211
|
+
test_url_slice_detection()
|
|
212
|
+
test_url_slice_manipulation()
|
|
213
|
+
test_parse_url_slice()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def test_url_slice_detection():
|
|
217
|
+
"""Test URL slice detection and extraction."""
|
|
218
|
+
|
|
219
|
+
# Test slice detection
|
|
220
|
+
regular_url = Url("https://example.com/video.mp4")
|
|
221
|
+
slice_url = Url("https://example.com/video.mp4#~slice=10-30")
|
|
222
|
+
other_fragment_url = Url("https://example.com/video.mp4#chapter1")
|
|
223
|
+
|
|
224
|
+
assert not is_url_slice(regular_url)
|
|
225
|
+
assert is_url_slice(slice_url)
|
|
226
|
+
assert not is_url_slice(other_fragment_url)
|
|
227
|
+
|
|
228
|
+
# Test slice extraction
|
|
229
|
+
base_url, slice = parse_url_slice(regular_url)
|
|
230
|
+
assert base_url == regular_url
|
|
231
|
+
assert slice is None
|
|
232
|
+
|
|
233
|
+
base_url, slice = parse_url_slice(slice_url)
|
|
234
|
+
assert base_url == "https://example.com/video.mp4"
|
|
235
|
+
assert slice is not None
|
|
236
|
+
if slice: # Help type checker understand slice is not None
|
|
237
|
+
assert slice.start_time == 10.0
|
|
238
|
+
assert slice.end_time == 30.0
|
|
239
|
+
|
|
240
|
+
# Test with HH:MM:SS format
|
|
241
|
+
hms_url = Url("https://example.com/video.mp4#~slice=01:30-02:45")
|
|
242
|
+
base_url, slice = parse_url_slice(hms_url)
|
|
243
|
+
assert base_url == "https://example.com/video.mp4"
|
|
244
|
+
assert slice is not None
|
|
245
|
+
if slice: # Help type checker understand slice is not None
|
|
246
|
+
assert slice.start_time == 90.0 # 1:30 in seconds
|
|
247
|
+
assert slice.end_time == 165.0 # 2:45 in seconds
|
|
248
|
+
|
|
249
|
+
# Test slice at end of URL
|
|
250
|
+
slice_at_end_url = Url("https://example.com/video.mp4#~slice=30-60")
|
|
251
|
+
assert is_url_slice(slice_at_end_url)
|
|
252
|
+
base_url, slice = parse_url_slice(slice_at_end_url)
|
|
253
|
+
assert base_url == "https://example.com/video.mp4"
|
|
254
|
+
assert slice is not None
|
|
255
|
+
if slice:
|
|
256
|
+
assert slice.start_time == 30.0
|
|
257
|
+
assert slice.end_time == 60.0
|
|
258
|
+
|
|
259
|
+
# Test invalid slice in fragment
|
|
260
|
+
invalid_slice_url = Url("https://example.com/video.mp4#~slice=invalid-format")
|
|
261
|
+
assert not is_url_slice(invalid_slice_url)
|
|
262
|
+
base_url, slice = parse_url_slice(invalid_slice_url)
|
|
263
|
+
assert base_url == invalid_slice_url
|
|
264
|
+
assert slice is None
|
|
265
|
+
|
|
266
|
+
# Test partial slice marker
|
|
267
|
+
partial_slice_url = Url("https://example.com/video.mp4#~slic=10-30")
|
|
268
|
+
assert not is_url_slice(partial_slice_url)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def test_url_slice_manipulation():
|
|
272
|
+
"""Test adding slices to URLs."""
|
|
273
|
+
|
|
274
|
+
base_url = Url("https://example.com/video.mp4")
|
|
275
|
+
slice = Slice(10.0, 30.0)
|
|
276
|
+
|
|
277
|
+
# Add slice to URL
|
|
278
|
+
sliced_url = add_slice_to_url(base_url, slice)
|
|
279
|
+
assert sliced_url == "https://example.com/video.mp4#~slice=10-30"
|
|
280
|
+
assert is_url_slice(sliced_url)
|
|
281
|
+
|
|
282
|
+
# Extract it back
|
|
283
|
+
extracted_base, extracted_slice = parse_url_slice(sliced_url)
|
|
284
|
+
assert extracted_base == base_url
|
|
285
|
+
assert extracted_slice is not None
|
|
286
|
+
if extracted_slice: # Help type checker understand extracted_slice is not None
|
|
287
|
+
assert extracted_slice.start_time == 10.0
|
|
288
|
+
assert extracted_slice.end_time == 30.0
|
|
289
|
+
|
|
290
|
+
# Replace existing slice
|
|
291
|
+
new_slice = Slice(20.0, 40.0)
|
|
292
|
+
new_sliced_url = add_slice_to_url(sliced_url, new_slice)
|
|
293
|
+
assert new_sliced_url == "https://example.com/video.mp4#~slice=20-40"
|
|
294
|
+
|
|
295
|
+
# URL with existing non-slice fragment
|
|
296
|
+
fragment_url = Url("https://example.com/video.mp4#chapter1")
|
|
297
|
+
sliced_fragment_url = add_slice_to_url(fragment_url, slice)
|
|
298
|
+
assert sliced_fragment_url == "https://example.com/video.mp4#~slice=10-30"
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def test_parse_url_slice():
|
|
302
|
+
"""Test the parse_url_slice function directly."""
|
|
303
|
+
|
|
304
|
+
# Test valid slice formats
|
|
305
|
+
valid_cases = [
|
|
306
|
+
("https://example.com/video.mp4#~slice=10-30", 10.0, 30.0),
|
|
307
|
+
("https://example.com/video.mp4#~slice=1:30-2:45", 90.0, 165.0),
|
|
308
|
+
]
|
|
309
|
+
|
|
310
|
+
for url_str, expected_start, expected_end in valid_cases:
|
|
311
|
+
_, slice = parse_url_slice(Url(url_str))
|
|
312
|
+
assert slice is not None
|
|
313
|
+
if slice:
|
|
314
|
+
assert slice.start_time == expected_start
|
|
315
|
+
assert slice.end_time == expected_end
|
|
316
|
+
|
|
317
|
+
# Test invalid or missing slices
|
|
318
|
+
invalid_cases = [
|
|
319
|
+
"https://example.com/video.mp4", # No fragment
|
|
320
|
+
"https://example.com/video.mp4#chapter1", # Fragment but no slice
|
|
321
|
+
"https://example.com/video.mp4#~slice=", # Empty slice
|
|
322
|
+
"https://example.com/video.mp4#~slice=invalid", # Invalid format
|
|
323
|
+
"https://example.com/video.mp4#~slice=10", # Missing end time
|
|
324
|
+
"https://example.com/video.mp4#~slic=10-30", # Wrong marker
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
for url_str in invalid_cases:
|
|
328
|
+
_, slice = parse_url_slice(Url(url_str))
|
|
329
|
+
assert slice is None
|
|
@@ -16,7 +16,7 @@ def is_fullpage_html(content: str) -> bool:
|
|
|
16
16
|
A full HTML document that is a full page (headers, footers, etc.) and
|
|
17
17
|
so probably best rendered in a browser.
|
|
18
18
|
"""
|
|
19
|
-
return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content
|
|
19
|
+
return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content, re.IGNORECASE))
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
_yaml_header_pattern = re.compile(r"^---\n\w+:", re.MULTILINE)
|