kash-shell 0.3.16__py3-none-any.whl → 0.3.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. kash/actions/core/minify_html.py +41 -0
  2. kash/commands/base/files_command.py +2 -2
  3. kash/commands/base/show_command.py +11 -1
  4. kash/config/colors.py +20 -8
  5. kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
  6. kash/docs/markdown/topics/a2_installation.md +17 -30
  7. kash/docs/markdown/topics/a3_getting_started.md +5 -19
  8. kash/exec/action_exec.py +1 -1
  9. kash/exec/fetch_url_metadata.py +9 -0
  10. kash/exec/precondition_registry.py +3 -3
  11. kash/file_storage/file_store.py +18 -1
  12. kash/llm_utils/llm_features.py +5 -1
  13. kash/llm_utils/llms.py +18 -8
  14. kash/media_base/media_cache.py +48 -24
  15. kash/media_base/media_services.py +63 -14
  16. kash/media_base/services/local_file_media.py +9 -1
  17. kash/model/actions_model.py +2 -2
  18. kash/model/items_model.py +4 -5
  19. kash/model/media_model.py +9 -1
  20. kash/model/params_model.py +9 -3
  21. kash/utils/common/function_inspect.py +97 -1
  22. kash/utils/common/testing.py +58 -0
  23. kash/utils/common/url_slice.py +329 -0
  24. kash/utils/file_utils/file_formats.py +1 -1
  25. kash/utils/text_handling/markdown_utils.py +424 -16
  26. kash/web_gen/templates/base_styles.css.jinja +204 -25
  27. kash/web_gen/templates/base_webpage.html.jinja +48 -26
  28. kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
  29. kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
  30. kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
  31. kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
  32. kash/web_gen/templates/content_styles.css.jinja +13 -8
  33. kash/web_gen/templates/simple_webpage.html.jinja +59 -21
  34. kash/web_gen/templates/tabbed_webpage.html.jinja +4 -2
  35. kash/workspaces/workspaces.py +10 -1
  36. {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/METADATA +75 -72
  37. {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/RECORD +40 -33
  38. {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/WHEEL +0 -0
  39. {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/entry_points.txt +0 -0
  40. {kash_shell-0.3.16.dist-info → kash_shell-0.3.18.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,5 @@
1
1
  import re
2
+ from pathlib import Path
2
3
  from textwrap import dedent
3
4
  from typing import Any, TypeAlias
4
5
 
@@ -72,21 +73,40 @@ def _tree_links(element, include_internal=False):
72
73
  return links
73
74
 
74
75
 
75
- def extract_links(file_path: str, include_internal=False) -> list[str]:
76
+ def extract_links(content: str, include_internal=False) -> list[str]:
77
+ """
78
+ Extract all links from Markdown content.
79
+
80
+ Raises:
81
+ marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
82
+ """
83
+ document = marko.parse(content)
84
+ return _tree_links(document, include_internal)
85
+
86
+
87
+ def extract_file_links(file_path: Path, include_internal=False) -> list[str]:
76
88
  """
77
89
  Extract all links from a Markdown file. Future: Include textual and section context.
90
+
91
+ Returns an empty list if there are parsing errors.
78
92
  """
93
+ import logging
79
94
 
80
- with open(file_path) as file:
81
- content = file.read()
82
- document = marko.parse(content)
83
- return _tree_links(document, include_internal)
95
+ try:
96
+ content = file_path.read_text()
97
+ return extract_links(content, include_internal)
98
+ except Exception as e:
99
+ logging.warning(f"Failed to extract links from {file_path}: {e}")
100
+ return []
84
101
 
85
102
 
86
103
  def extract_first_header(content: str) -> str | None:
87
104
  """
88
105
  Extract the first header from markdown content if present.
89
106
  Also drops any formatting, so the result can be used as a document title.
107
+
108
+ Raises:
109
+ marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
90
110
  """
91
111
  document = marko.parse(content)
92
112
 
@@ -105,27 +125,77 @@ def _extract_text(element: Any) -> str:
105
125
  return ""
106
126
 
107
127
 
108
- def _tree_bullet_points(element: marko.block.Document) -> list[str]:
128
+ def _extract_list_item_markdown(element: Any) -> str:
129
+ """
130
+ Extract markdown from a list item, preserving all formatting.
131
+ """
132
+ from marko.block import BlankLine, List, Paragraph
133
+ from marko.inline import CodeSpan, Emphasis, Link, StrongEmphasis
134
+
135
+ if isinstance(element, str):
136
+ return element
137
+ elif isinstance(element, List):
138
+ # Skip nested lists
139
+ return ""
140
+ elif isinstance(element, BlankLine):
141
+ # Preserve paragraph breaks
142
+ return "\n\n"
143
+ elif isinstance(element, Paragraph):
144
+ # Extract content from paragraph
145
+ return "".join(_extract_list_item_markdown(child) for child in element.children)
146
+ elif isinstance(element, CodeSpan):
147
+ return f"`{''.join(_extract_list_item_markdown(child) for child in element.children)}`"
148
+ elif isinstance(element, Emphasis):
149
+ return f"*{''.join(_extract_list_item_markdown(child) for child in element.children)}*"
150
+ elif isinstance(element, StrongEmphasis):
151
+ return f"**{''.join(_extract_list_item_markdown(child) for child in element.children)}**"
152
+ elif isinstance(element, Link):
153
+ text = "".join(_extract_list_item_markdown(child) for child in element.children)
154
+ return f"[{text}]({element.dest})"
155
+ elif hasattr(element, "children"):
156
+ return "".join(_extract_list_item_markdown(child) for child in element.children)
157
+ else:
158
+ return ""
159
+
160
+
161
+ def extract_bullet_points(content: str, *, strict: bool = False) -> list[str]:
162
+ """
163
+ Extract list item values from a Markdown file, preserving all original formatting.
164
+
165
+ If no bullet points are found and `strict` is False, returns the entire content
166
+ as a single item (treating plain text as if it were the first bullet point).
167
+ If `strict` is True, only actual list items are returned.
168
+
169
+ Raises:
170
+ ValueError: If `strict` is True and no bullet points are found.
171
+ marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
172
+ """
173
+ document = marko.parse(content)
109
174
  bullet_points: list[str] = []
110
175
 
111
176
  def _find_bullet_points(element):
112
177
  if isinstance(element, ListItem):
113
- bullet_points.append(_extract_text(element).strip())
178
+ # Extract markdown from this list item, preserving formatting
179
+ bullet_points.append(_extract_list_item_markdown(element).strip())
180
+ # Then recursively process any nested lists within this item
181
+ if hasattr(element, "children"):
182
+ for child in element.children:
183
+ _find_bullet_points(child)
114
184
  elif hasattr(element, "children"):
115
185
  for child in element.children:
116
186
  _find_bullet_points(child)
117
187
 
118
- _find_bullet_points(element)
119
- return bullet_points
120
-
188
+ _find_bullet_points(document)
121
189
 
122
- def extract_bullet_points(content: str) -> list[str]:
123
- """
124
- Extract list item values from a Markdown file.
125
- """
190
+ # If no bullet points found
191
+ if not bullet_points:
192
+ if strict:
193
+ raise ValueError("No bullet points found in content")
194
+ elif content.strip():
195
+ # Not strict mode, treat as plain text
196
+ return [content.strip()]
126
197
 
127
- document = marko.parse(content)
128
- return _tree_bullet_points(document)
198
+ return bullet_points
129
199
 
130
200
 
131
201
  def _type_from_heading(heading: Heading) -> HTag:
@@ -180,6 +250,10 @@ def extract_headings(text: str) -> list[tuple[HTag, str]]:
180
250
  Returns a list of (tag, text) tuples:
181
251
  [("h1", "Main Title"), ("h2", "Subtitle")]
182
252
  where `#` corresponds to `h1`, `##` to `h2`, etc.
253
+
254
+ Raises:
255
+ marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
256
+ ValueError: If a heading with an unsupported level is encountered.
183
257
  """
184
258
  document = marko.parse(text)
185
259
  headings_list: list[tuple[HTag, str]] = []
@@ -202,6 +276,10 @@ def extract_headings(text: str) -> list[tuple[HTag, str]]:
202
276
  def first_heading(text: str, *, allowed_tags: tuple[HTag, ...] = ("h1", "h2")) -> str | None:
203
277
  """
204
278
  Find the text of the first heading. Returns first h1 if present, otherwise first h2, etc.
279
+
280
+ Raises:
281
+ marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
282
+ ValueError: If a heading with an unsupported level is encountered.
205
283
  """
206
284
  headings = extract_headings(text)
207
285
  for goal_tag in allowed_tags:
@@ -302,3 +380,333 @@ def test_extract_headings_and_first_header() -> None:
302
380
  formatted_header_md = "## *Formatted* _Header_ [link](#anchor)"
303
381
  assert extract_headings(formatted_header_md) == [("h2", "Formatted Header link")]
304
382
  assert first_heading(formatted_header_md, allowed_tags=("h2",)) == "Formatted Header link"
383
+
384
+
385
+ def test_extract_bullet_points() -> None:
386
+ # Empty content
387
+ assert extract_bullet_points("") == []
388
+
389
+ # No lists (strict mode)
390
+ try:
391
+ extract_bullet_points("Just some text without lists.", strict=True)
392
+ raise AssertionError("Expected ValueError for strict mode with no bullet points")
393
+ except ValueError as e:
394
+ assert "No bullet points found" in str(e)
395
+ # No lists (non-strict mode - should return as single item)
396
+ assert extract_bullet_points("Just some text without lists.") == [
397
+ "Just some text without lists."
398
+ ]
399
+
400
+ # Simple unordered list
401
+ content = dedent("""
402
+ - First item
403
+ - Second item
404
+ - Third item
405
+ """)
406
+ expected = ["First item", "Second item", "Third item"]
407
+ assert extract_bullet_points(content) == expected
408
+
409
+ # Simple ordered list
410
+ content = dedent("""
411
+ 1. First item
412
+ 2. Second item
413
+ 3. Third item
414
+ """)
415
+ expected = ["First item", "Second item", "Third item"]
416
+ assert extract_bullet_points(content) == expected
417
+
418
+ # Mixed list types (asterisk and dash)
419
+ content = dedent("""
420
+ * Item with asterisk
421
+ - Item with dash
422
+ + Item with plus
423
+ """)
424
+ expected = ["Item with asterisk", "Item with dash", "Item with plus"]
425
+ assert extract_bullet_points(content) == expected
426
+
427
+ # List items with formatting
428
+ content = dedent("""
429
+ - **Bold item**
430
+ - *Italic item*
431
+ - `Code item`
432
+ - [Link item](http://example.com)
433
+ - Item with _multiple_ **formats** and `code`
434
+ """)
435
+ expected = [
436
+ "**Bold item**",
437
+ "*Italic item*",
438
+ "`Code item`",
439
+ "[Link item](http://example.com)",
440
+ "Item with *multiple* **formats** and `code`",
441
+ ]
442
+ assert extract_bullet_points(content) == expected
443
+
444
+ # Nested lists
445
+ content = dedent("""
446
+ - Top level item 1
447
+ - Nested item 1.1
448
+ - Nested item 1.2
449
+ - Top level item 2
450
+ 1. Nested ordered 2.1
451
+ 2. Nested ordered 2.2
452
+ """)
453
+ expected = [
454
+ "Top level item 1",
455
+ "Nested item 1.1",
456
+ "Nested item 1.2",
457
+ "Top level item 2",
458
+ "Nested ordered 2.1",
459
+ "Nested ordered 2.2",
460
+ ]
461
+ assert extract_bullet_points(content) == expected
462
+
463
+ # Multi-line list items
464
+ content = dedent("""
465
+ - First item that spans
466
+ multiple lines with content
467
+ - Second item
468
+ that also spans multiple
469
+ lines
470
+ """)
471
+ expected = [
472
+ "First item that spans\nmultiple lines with content",
473
+ "Second item\nthat also spans multiple\nlines",
474
+ ]
475
+ assert extract_bullet_points(content) == expected
476
+
477
+ # Lists mixed with other content
478
+ content = dedent("""
479
+ # Header
480
+
481
+ Some text before the list.
482
+
483
+ - First item
484
+ - Second item
485
+
486
+ More text after the list.
487
+
488
+ 1. Another list item
489
+ 2. Final item
490
+
491
+ Conclusion text.
492
+ """)
493
+ expected = ["First item", "Second item", "Another list item", "Final item"]
494
+ assert extract_bullet_points(content) == expected
495
+
496
+ # List items with complex content
497
+ content = dedent("""
498
+ - Item with **bold** and *italic* and `inline code`
499
+ - Item with [external link](https://example.com) and [internal link](#section)
500
+ - Item with line breaks
501
+ and continued text
502
+ """)
503
+ expected = [
504
+ "Item with **bold** and *italic* and `inline code`",
505
+ "Item with [external link](https://example.com) and [internal link](#section)",
506
+ "Item with line breaks\nand continued text",
507
+ ]
508
+ assert extract_bullet_points(content) == expected
509
+
510
+ # Edge case: empty list items
511
+ content = dedent("""
512
+ -
513
+ - Non-empty item
514
+ -
515
+ """)
516
+ expected = ["", "Non-empty item", ""]
517
+ assert extract_bullet_points(content) == expected
518
+
519
+ # Plain text handling (default behavior - not strict)
520
+ plain_text = "This is just plain text without any lists."
521
+ expected = ["This is just plain text without any lists."]
522
+ assert extract_bullet_points(plain_text) == expected
523
+ assert extract_bullet_points(plain_text, strict=False) == expected
524
+
525
+ # Plain text handling (strict mode)
526
+ try:
527
+ extract_bullet_points(plain_text, strict=True)
528
+ raise AssertionError("Expected ValueError for strict mode with no bullet points")
529
+ except ValueError as e:
530
+ assert "No bullet points found" in str(e)
531
+
532
+ # Multi-line plain text handling
533
+ multiline_plain = dedent("""
534
+ This is a paragraph
535
+ with multiple lines
536
+ and no bullets.""").strip()
537
+ expected_multiline = ["This is a paragraph\nwith multiple lines\nand no bullets."]
538
+ assert extract_bullet_points(multiline_plain) == expected_multiline
539
+ try:
540
+ extract_bullet_points(multiline_plain, strict=True)
541
+ raise AssertionError("Expected ValueError for strict mode with no bullet points")
542
+ except ValueError as e:
543
+ assert "No bullet points found" in str(e)
544
+
545
+ # Mixed content with no lists in strict mode
546
+ mixed_no_lists = dedent("""
547
+ # Header
548
+ Some text here.
549
+ **Bold text** and *italic*.
550
+ """)
551
+ try:
552
+ extract_bullet_points(mixed_no_lists, strict=True)
553
+ raise AssertionError("Expected ValueError for strict mode with no bullet points")
554
+ except ValueError as e:
555
+ assert "No bullet points found" in str(e)
556
+ # Non-strict should return the content as single item
557
+ assert len(extract_bullet_points(mixed_no_lists, strict=False)) == 1
558
+
559
+
560
+ def test_extract_bullet_points_key_scenarios() -> None:
561
+ """Test key scenarios: plain text, multi-paragraph lists, and links in bullet text."""
562
+
563
+ # Plain text handling (the fundamental case)
564
+ plain_text = "This is just plain text without any markdown formatting."
565
+ assert extract_bullet_points(plain_text) == [plain_text]
566
+
567
+ # Multi-paragraph plain text
568
+ multiline_plain = dedent("""
569
+ This is a paragraph
570
+ with multiple lines
571
+ and no bullets at all.""").strip()
572
+ assert extract_bullet_points(multiline_plain) == [multiline_plain]
573
+
574
+ # Multi-paragraph bulleted lists with complex formatting
575
+ multi_paragraph_content = dedent("""
576
+ - First bullet point with **bold text** and a [link](https://example.com)
577
+
578
+ This is a continuation paragraph within the same bullet point.
579
+ It spans multiple lines and includes *italic text*.
580
+
581
+ - Second bullet point with `inline code` and another [internal link](#section)
582
+
583
+ Another paragraph here with more content.
584
+ Including **bold** and *italic* formatting.
585
+
586
+ - Third simple bullet
587
+ """)
588
+ expected_multi = [
589
+ "First bullet point with **bold text** and a [link](https://example.com)\n\nThis is a continuation paragraph within the same bullet point.\nIt spans multiple lines and includes *italic text*.",
590
+ "Second bullet point with `inline code` and another [internal link](#section)\n\nAnother paragraph here with more content.\nIncluding **bold** and *italic* formatting.",
591
+ "Third simple bullet",
592
+ ]
593
+ result_multi = extract_bullet_points(multi_paragraph_content)
594
+ assert result_multi == expected_multi
595
+
596
+ # Links inside bullet text (various types)
597
+ links_content = dedent("""
598
+ - Check out [this external link](https://google.com) for more info
599
+ - Visit [our docs](https://docs.example.com/api) and [FAQ](https://example.com/faq)
600
+ - Internal reference: [see section below](#implementation)
601
+ - Mixed: [external](https://test.com) and [internal](#ref) in one bullet
602
+ - Email link: [contact us](mailto:test@example.com)
603
+ - Link with **bold text**: [**Important Link**](https://critical.com)
604
+ """)
605
+ expected_links = [
606
+ "Check out [this external link](https://google.com) for more info",
607
+ "Visit [our docs](https://docs.example.com/api) and [FAQ](https://example.com/faq)",
608
+ "Internal reference: [see section below](#implementation)",
609
+ "Mixed: [external](https://test.com) and [internal](#ref) in one bullet",
610
+ "Email link: [contact us](mailto:test@example.com)",
611
+ "Link with **bold text**: [**Important Link**](https://critical.com)",
612
+ ]
613
+ result_links = extract_bullet_points(links_content)
614
+ assert result_links == expected_links
615
+
616
+ # Complex formatting combinations
617
+ complex_content = dedent("""
618
+ - **Bold** start with [link](https://example.com) and `code` end
619
+ - *Italic* with `inline code` and [another link](https://test.com) here
620
+ - Mixed: **bold _nested italic_** and `code with [link inside](https://nested.com)`
621
+ """)
622
+ expected_complex = [
623
+ "**Bold** start with [link](https://example.com) and `code` end",
624
+ "*Italic* with `inline code` and [another link](https://test.com) here",
625
+ "Mixed: **bold *nested italic*** and `code with [link inside](https://nested.com)`",
626
+ ]
627
+ result_complex = extract_bullet_points(complex_content)
628
+ assert result_complex == expected_complex
629
+
630
+
631
+ def test_markdown_structure_parsing() -> None:
632
+ """Test that demonstrates how markdown structure is parsed and preserved."""
633
+
634
+ # Test markdown structure preservation in list items
635
+ content = dedent("""
636
+ - First bullet with **bold text**
637
+
638
+ This is a continuation paragraph with *italic text*.
639
+ It spans multiple lines.
640
+
641
+ Another paragraph in the same list item.
642
+
643
+ - Second bullet with `code` and [link](https://example.com)
644
+ """)
645
+
646
+ result = extract_bullet_points(content)
647
+
648
+ # Verify we get exactly 2 bullet points
649
+ assert len(result) == 2
650
+
651
+ # Verify first bullet preserves all formatting and paragraph structure
652
+ expected_first = "First bullet with **bold text**\n\nThis is a continuation paragraph with *italic text*.\nIt spans multiple lines.\n\nAnother paragraph in the same list item."
653
+ assert result[0] == expected_first
654
+
655
+ # Verify second bullet preserves formatting
656
+ expected_second = "Second bullet with `code` and [link](https://example.com)"
657
+ assert result[1] == expected_second
658
+
659
+ # Test nested formatting combinations
660
+ nested_content = dedent("""
661
+ - Item with **bold containing *italic* text** and `code`
662
+ - Link with formatting: [**Bold Link Text**](https://example.com)
663
+ - Code with special chars: `function(param="value")`
664
+ """)
665
+
666
+ nested_result = extract_bullet_points(nested_content)
667
+ assert len(nested_result) == 3
668
+ assert nested_result[0] == "Item with **bold containing *italic* text** and `code`"
669
+ assert nested_result[1] == "Link with formatting: [**Bold Link Text**](https://example.com)"
670
+ assert nested_result[2] == 'Code with special chars: `function(param="value")`'
671
+
672
+
673
+ def test_markdown_utils_exceptions() -> None:
674
+ """Test exception handling for markdown utility functions."""
675
+ import tempfile
676
+
677
+ # Test extract_file_links with non-existent file
678
+ result = extract_file_links(Path("/non/existent/file.md"))
679
+ assert result == [] # Should return empty list for any error
680
+
681
+ # Test extract_file_links with empty file (should work fine)
682
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as tmp:
683
+ tmp.write("")
684
+ tmp_path = Path(tmp.name)
685
+
686
+ try:
687
+ result = extract_file_links(tmp_path)
688
+ assert result == [] # Empty file has no links
689
+ finally:
690
+ tmp_path.unlink()
691
+
692
+ # Test with invalid markdown formatting (markdown is very permissive)
693
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as tmp:
694
+ tmp.write("[incomplete link\n# Header\n- List item")
695
+ tmp_path = Path(tmp.name)
696
+
697
+ try:
698
+ result = extract_file_links(tmp_path)
699
+ # Should still work - marko is very permissive with markdown
700
+ assert isinstance(result, list)
701
+ finally:
702
+ tmp_path.unlink()
703
+
704
+ # Test extract_links with string content
705
+ content = "Check out [this link](https://example.com) and [internal](#section)"
706
+ result = extract_links(content)
707
+ assert "https://example.com" in result
708
+ assert "#section" not in result # Internal links excluded by default
709
+
710
+ result_with_internal = extract_links(content, include_internal=True)
711
+ assert "https://example.com" in result_with_internal
712
+ assert "#section" in result_with_internal