kash-shell 0.3.17__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. kash/actions/core/{markdownify.py → markdownify_html.py} +3 -6
  2. kash/actions/core/minify_html.py +41 -0
  3. kash/commands/base/show_command.py +11 -1
  4. kash/commands/workspace/workspace_commands.py +10 -88
  5. kash/config/colors.py +6 -2
  6. kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
  7. kash/docs/markdown/topics/a2_installation.md +17 -30
  8. kash/docs/markdown/topics/a3_getting_started.md +5 -19
  9. kash/exec/__init__.py +3 -0
  10. kash/exec/action_exec.py +3 -3
  11. kash/exec/fetch_url_items.py +109 -0
  12. kash/exec/precondition_registry.py +3 -3
  13. kash/file_storage/file_store.py +24 -1
  14. kash/file_storage/store_filenames.py +4 -0
  15. kash/help/function_param_info.py +1 -1
  16. kash/help/help_pages.py +1 -1
  17. kash/help/help_printing.py +1 -1
  18. kash/llm_utils/llm_features.py +5 -1
  19. kash/llm_utils/llms.py +18 -8
  20. kash/media_base/media_cache.py +48 -24
  21. kash/media_base/media_services.py +63 -14
  22. kash/media_base/services/local_file_media.py +9 -1
  23. kash/model/items_model.py +22 -8
  24. kash/model/media_model.py +9 -1
  25. kash/model/params_model.py +9 -3
  26. kash/utils/common/function_inspect.py +97 -1
  27. kash/utils/common/parse_docstring.py +347 -0
  28. kash/utils/common/testing.py +58 -0
  29. kash/utils/common/url_slice.py +329 -0
  30. kash/utils/file_utils/file_formats.py +1 -1
  31. kash/utils/text_handling/markdown_utils.py +424 -16
  32. kash/web_content/web_extract.py +34 -15
  33. kash/web_content/web_page_model.py +10 -1
  34. kash/web_gen/templates/base_styles.css.jinja +137 -15
  35. kash/web_gen/templates/base_webpage.html.jinja +13 -17
  36. kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
  37. kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
  38. kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
  39. kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
  40. kash/web_gen/templates/content_styles.css.jinja +13 -8
  41. kash/web_gen/templates/simple_webpage.html.jinja +15 -481
  42. kash/workspaces/workspaces.py +10 -1
  43. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/METADATA +75 -72
  44. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/RECORD +47 -40
  45. kash/exec/fetch_url_metadata.py +0 -72
  46. kash/help/docstring_utils.py +0 -111
  47. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/WHEEL +0 -0
  48. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/entry_points.txt +0 -0
  49. {kash_shell-0.3.17.dist-info → kash_shell-0.3.20.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,329 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+
6
+ from kash.utils.common.url import Url
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class Slice:
11
+ """
12
+ A start and end time range, in seconds.
13
+ """
14
+
15
+ start_time: float
16
+ end_time: float
17
+
18
+ def __post_init__(self):
19
+ if self.start_time >= self.end_time or self.start_time < 0 or self.end_time <= 0:
20
+ raise ValueError(
21
+ f"Not a valid time slice: got start={self.start_time}, end={self.end_time}"
22
+ )
23
+
24
+ @classmethod
25
+ def parse(cls, slice_str: str) -> Slice:
26
+ """
27
+ Parse a slice string in format SSS-SSS or HH:MM:SS-HH:MM:SS.
28
+ """
29
+ if "-" not in slice_str:
30
+ raise ValueError(f"Not a valid time slice: {slice_str!r}")
31
+
32
+ start_str, end_str = slice_str.split("-", 1)
33
+
34
+ try:
35
+ # Try to parse as HH:MM:SS format first
36
+ start_time = _parse_time_format(start_str)
37
+ end_time = _parse_time_format(end_str)
38
+ except ValueError:
39
+ # Fall back to seconds format
40
+ try:
41
+ start_time = float(start_str)
42
+ end_time = float(end_str)
43
+ except ValueError:
44
+ raise ValueError(f"Not a valid time slice: {slice_str!r}")
45
+
46
+ return cls(start_time, end_time)
47
+
48
+ def __str__(self) -> str:
49
+ return f"{_format_seconds(self.start_time)}-{_format_seconds(self.end_time)}"
50
+
51
+
52
+ def _parse_time_format(time_str: str) -> float:
53
+ """
54
+ Parse time string in HH:MM:SS format and return seconds.
55
+ Supports formats like: HH:MM:SS, MM:SS, or just SS
56
+ """
57
+ # Match HH:MM:SS, MM:SS, or SS format
58
+ time_pattern = r"^(?:(\d{1,2}):)?(?:(\d{1,2}):)?(\d{1,2}(?:\.\d+)?)$"
59
+ match = re.match(time_pattern, time_str.strip())
60
+
61
+ if not match:
62
+ raise ValueError(f"Invalid time format: {time_str}")
63
+
64
+ hours_str, minutes_str, seconds_str = match.groups()
65
+
66
+ # Default values
67
+ hours = int(hours_str) if hours_str else 0
68
+ minutes = int(minutes_str) if minutes_str else 0
69
+ seconds = float(seconds_str) if seconds_str else 0
70
+
71
+ # Handle case where we have MM:SS (2 components)
72
+ if hours_str and not minutes_str:
73
+ # This means we actually have MM:SS, not HH:MM
74
+ minutes = hours
75
+ hours = 0
76
+
77
+ total_seconds = hours * 3600 + minutes * 60 + seconds
78
+ return total_seconds
79
+
80
+
81
+ def _format_seconds(seconds: float) -> str:
82
+ """
83
+ Format seconds as HH:MM:SS, MM:SS, or SS depending on the value.
84
+ Uses the most compact and natural representation.
85
+ """
86
+ total_seconds = int(seconds)
87
+ fractional_part = seconds - total_seconds
88
+
89
+ hours = total_seconds // 3600
90
+ minutes = (total_seconds % 3600) // 60
91
+ secs = total_seconds % 60
92
+
93
+ # Add fractional seconds if present
94
+ if fractional_part > 0:
95
+ secs += fractional_part
96
+
97
+ if hours > 0:
98
+ # HH:MM:SS format
99
+ if fractional_part > 0:
100
+ return f"{hours}:{minutes:02d}:{secs:04.1f}"
101
+ else:
102
+ return f"{hours}:{minutes:02d}:{secs:02d}"
103
+ elif minutes > 0:
104
+ # MM:SS format
105
+ if fractional_part > 0:
106
+ return f"{minutes}:{secs:04.1f}"
107
+ else:
108
+ return f"{minutes}:{secs:02d}"
109
+ else:
110
+ # Just seconds
111
+ if fractional_part > 0:
112
+ return f"{secs:.1f}"
113
+ else:
114
+ return str(int(secs))
115
+
116
+
117
+ def is_url_slice(url: Url) -> bool:
118
+ """
119
+ Check if a URL contains valid slice information in its fragment.
120
+ """
121
+ return parse_url_slice(url)[1] is not None
122
+
123
+
124
+ def parse_url_slice(url: Url) -> tuple[Url, Slice | None]:
125
+ """
126
+ Parse slice information from a URL and return the base URL and slice.
127
+
128
+ Looks for #~slice=START-END pattern at the end of the URL and validates
129
+ that START-END is a valid time slice format.
130
+
131
+ Returns:
132
+ Tuple of (base_url_without_slice, slice_or_none)
133
+ """
134
+ slice_marker = "#~slice="
135
+ slice_index = url.find(slice_marker)
136
+
137
+ if slice_index == -1:
138
+ return url, None
139
+
140
+ # Extract the slice string after #~slice= (must be at end of URL)
141
+ slice_str = url[slice_index + len(slice_marker) :]
142
+
143
+ # Validate slice is at the end (no additional content)
144
+ if not slice_str:
145
+ return url, None
146
+
147
+ try:
148
+ slice = Slice.parse(slice_str)
149
+ base_url = Url(url[:slice_index]) # Everything before #~slice=
150
+ return base_url, slice
151
+ except ValueError:
152
+ # Invalid slice format, treat as regular URL
153
+ return url, None
154
+
155
+
156
+ def add_slice_to_url(url: Url, slice: Slice) -> Url:
157
+ """Add slice information to a URL as a fragment."""
158
+ # Remove any existing fragment and add the slice
159
+ fragment_index = url.find("#")
160
+ if fragment_index != -1:
161
+ base_url = Url(url[:fragment_index])
162
+ else:
163
+ base_url = url
164
+ return Url(f"{base_url}#~slice={slice}")
165
+
166
+
167
+ ## Tests
168
+
169
+
170
+ def test_url_slice_functionality():
171
+ """Test URL slice functionality with fragment-based encoding."""
172
+
173
+ # Basic slice creation and validation
174
+ slice = Slice(10.0, 20.0)
175
+ assert slice.start_time == 10.0
176
+ assert slice.end_time == 20.0
177
+ assert str(slice) == "10-20"
178
+
179
+ # Invalid slice creation should raise ValueError
180
+ for start, end in [(20.0, 10.0), (10.0, 10.0), (-5.0, 10.0), (0.0, 0.0)]:
181
+ try:
182
+ Slice(start, end)
183
+ raise AssertionError(f"Should have raised ValueError for start={start}, end={end}")
184
+ except ValueError as e:
185
+ assert "Not a valid time slice" in str(e)
186
+
187
+ # Test time format parsing - both seconds and HH:MM:SS formats
188
+ parse_cases = [
189
+ # Format: (input_string, expected_start, expected_end)
190
+ ("10.5-25.7", 10.5, 25.7), # decimal seconds
191
+ ("30-60", 30.0, 60.0), # integer seconds
192
+ ("1:30-2:45", 90.0, 165.0), # MM:SS format
193
+ ("01:23:45-02:30:15", 5025.0, 9015.0), # HH:MM:SS format
194
+ ("00:01:30.5-00:02:45.25", 90.5, 165.25), # HH:MM:SS with decimals
195
+ ]
196
+
197
+ for slice_str, expected_start, expected_end in parse_cases:
198
+ parsed = Slice.parse(slice_str)
199
+ assert parsed.start_time == expected_start
200
+ assert parsed.end_time == expected_end
201
+
202
+ # Test invalid parsing
203
+ for invalid_input in ["invalid", "10_20", "1:2:3:4-5:6:7"]:
204
+ try:
205
+ Slice.parse(invalid_input)
206
+ raise AssertionError(f"Should have raised ValueError for {invalid_input}")
207
+ except ValueError as e:
208
+ assert "Not a valid time slice" in str(e)
209
+
210
+ # Run all sub-tests
211
+ test_url_slice_detection()
212
+ test_url_slice_manipulation()
213
+ test_parse_url_slice()
214
+
215
+
216
+ def test_url_slice_detection():
217
+ """Test URL slice detection and extraction."""
218
+
219
+ # Test slice detection
220
+ regular_url = Url("https://example.com/video.mp4")
221
+ slice_url = Url("https://example.com/video.mp4#~slice=10-30")
222
+ other_fragment_url = Url("https://example.com/video.mp4#chapter1")
223
+
224
+ assert not is_url_slice(regular_url)
225
+ assert is_url_slice(slice_url)
226
+ assert not is_url_slice(other_fragment_url)
227
+
228
+ # Test slice extraction
229
+ base_url, slice = parse_url_slice(regular_url)
230
+ assert base_url == regular_url
231
+ assert slice is None
232
+
233
+ base_url, slice = parse_url_slice(slice_url)
234
+ assert base_url == "https://example.com/video.mp4"
235
+ assert slice is not None
236
+ if slice: # Help type checker understand slice is not None
237
+ assert slice.start_time == 10.0
238
+ assert slice.end_time == 30.0
239
+
240
+ # Test with HH:MM:SS format
241
+ hms_url = Url("https://example.com/video.mp4#~slice=01:30-02:45")
242
+ base_url, slice = parse_url_slice(hms_url)
243
+ assert base_url == "https://example.com/video.mp4"
244
+ assert slice is not None
245
+ if slice: # Help type checker understand slice is not None
246
+ assert slice.start_time == 90.0 # 1:30 in seconds
247
+ assert slice.end_time == 165.0 # 2:45 in seconds
248
+
249
+ # Test slice at end of URL
250
+ slice_at_end_url = Url("https://example.com/video.mp4#~slice=30-60")
251
+ assert is_url_slice(slice_at_end_url)
252
+ base_url, slice = parse_url_slice(slice_at_end_url)
253
+ assert base_url == "https://example.com/video.mp4"
254
+ assert slice is not None
255
+ if slice:
256
+ assert slice.start_time == 30.0
257
+ assert slice.end_time == 60.0
258
+
259
+ # Test invalid slice in fragment
260
+ invalid_slice_url = Url("https://example.com/video.mp4#~slice=invalid-format")
261
+ assert not is_url_slice(invalid_slice_url)
262
+ base_url, slice = parse_url_slice(invalid_slice_url)
263
+ assert base_url == invalid_slice_url
264
+ assert slice is None
265
+
266
+ # Test partial slice marker
267
+ partial_slice_url = Url("https://example.com/video.mp4#~slic=10-30")
268
+ assert not is_url_slice(partial_slice_url)
269
+
270
+
271
+ def test_url_slice_manipulation():
272
+ """Test adding slices to URLs."""
273
+
274
+ base_url = Url("https://example.com/video.mp4")
275
+ slice = Slice(10.0, 30.0)
276
+
277
+ # Add slice to URL
278
+ sliced_url = add_slice_to_url(base_url, slice)
279
+ assert sliced_url == "https://example.com/video.mp4#~slice=10-30"
280
+ assert is_url_slice(sliced_url)
281
+
282
+ # Extract it back
283
+ extracted_base, extracted_slice = parse_url_slice(sliced_url)
284
+ assert extracted_base == base_url
285
+ assert extracted_slice is not None
286
+ if extracted_slice: # Help type checker understand extracted_slice is not None
287
+ assert extracted_slice.start_time == 10.0
288
+ assert extracted_slice.end_time == 30.0
289
+
290
+ # Replace existing slice
291
+ new_slice = Slice(20.0, 40.0)
292
+ new_sliced_url = add_slice_to_url(sliced_url, new_slice)
293
+ assert new_sliced_url == "https://example.com/video.mp4#~slice=20-40"
294
+
295
+ # URL with existing non-slice fragment
296
+ fragment_url = Url("https://example.com/video.mp4#chapter1")
297
+ sliced_fragment_url = add_slice_to_url(fragment_url, slice)
298
+ assert sliced_fragment_url == "https://example.com/video.mp4#~slice=10-30"
299
+
300
+
301
+ def test_parse_url_slice():
302
+ """Test the parse_url_slice function directly."""
303
+
304
+ # Test valid slice formats
305
+ valid_cases = [
306
+ ("https://example.com/video.mp4#~slice=10-30", 10.0, 30.0),
307
+ ("https://example.com/video.mp4#~slice=1:30-2:45", 90.0, 165.0),
308
+ ]
309
+
310
+ for url_str, expected_start, expected_end in valid_cases:
311
+ _, slice = parse_url_slice(Url(url_str))
312
+ assert slice is not None
313
+ if slice:
314
+ assert slice.start_time == expected_start
315
+ assert slice.end_time == expected_end
316
+
317
+ # Test invalid or missing slices
318
+ invalid_cases = [
319
+ "https://example.com/video.mp4", # No fragment
320
+ "https://example.com/video.mp4#chapter1", # Fragment but no slice
321
+ "https://example.com/video.mp4#~slice=", # Empty slice
322
+ "https://example.com/video.mp4#~slice=invalid", # Invalid format
323
+ "https://example.com/video.mp4#~slice=10", # Missing end time
324
+ "https://example.com/video.mp4#~slic=10-30", # Wrong marker
325
+ ]
326
+
327
+ for url_str in invalid_cases:
328
+ _, slice = parse_url_slice(Url(url_str))
329
+ assert slice is None
@@ -16,7 +16,7 @@ def is_fullpage_html(content: str) -> bool:
16
16
  A full HTML document that is a full page (headers, footers, etc.) and
17
17
  so probably best rendered in a browser.
18
18
  """
19
- return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content[:2048], re.IGNORECASE))
19
+ return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content, re.IGNORECASE))
20
20
 
21
21
 
22
22
  _yaml_header_pattern = re.compile(r"^---\n\w+:", re.MULTILINE)