kash-shell 0.3.17__py3-none-any.whl → 0.3.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/minify_html.py +41 -0
- kash/commands/base/show_command.py +11 -1
- kash/config/colors.py +6 -2
- kash/docs/markdown/topics/a1_what_is_kash.md +52 -23
- kash/docs/markdown/topics/a2_installation.md +17 -30
- kash/docs/markdown/topics/a3_getting_started.md +5 -19
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_metadata.py +9 -0
- kash/exec/precondition_registry.py +3 -3
- kash/file_storage/file_store.py +18 -1
- kash/llm_utils/llm_features.py +5 -1
- kash/llm_utils/llms.py +18 -8
- kash/media_base/media_cache.py +48 -24
- kash/media_base/media_services.py +63 -14
- kash/media_base/services/local_file_media.py +9 -1
- kash/model/items_model.py +4 -5
- kash/model/media_model.py +9 -1
- kash/model/params_model.py +9 -3
- kash/utils/common/function_inspect.py +97 -1
- kash/utils/common/testing.py +58 -0
- kash/utils/common/url_slice.py +329 -0
- kash/utils/file_utils/file_formats.py +1 -1
- kash/utils/text_handling/markdown_utils.py +424 -16
- kash/web_gen/templates/base_styles.css.jinja +137 -15
- kash/web_gen/templates/base_webpage.html.jinja +13 -17
- kash/web_gen/templates/components/toc_scripts.js.jinja +319 -0
- kash/web_gen/templates/components/toc_styles.css.jinja +284 -0
- kash/web_gen/templates/components/tooltip_scripts.js.jinja +730 -0
- kash/web_gen/templates/components/tooltip_styles.css.jinja +482 -0
- kash/web_gen/templates/content_styles.css.jinja +13 -8
- kash/web_gen/templates/simple_webpage.html.jinja +15 -481
- kash/workspaces/workspaces.py +10 -1
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.18.dist-info}/METADATA +75 -72
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.18.dist-info}/RECORD +37 -30
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.18.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.18.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.17.dist-info → kash_shell-0.3.18.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from textwrap import dedent
|
|
3
4
|
from typing import Any, TypeAlias
|
|
4
5
|
|
|
@@ -72,21 +73,40 @@ def _tree_links(element, include_internal=False):
|
|
|
72
73
|
return links
|
|
73
74
|
|
|
74
75
|
|
|
75
|
-
def extract_links(
|
|
76
|
+
def extract_links(content: str, include_internal=False) -> list[str]:
|
|
77
|
+
"""
|
|
78
|
+
Extract all links from Markdown content.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
82
|
+
"""
|
|
83
|
+
document = marko.parse(content)
|
|
84
|
+
return _tree_links(document, include_internal)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_file_links(file_path: Path, include_internal=False) -> list[str]:
|
|
76
88
|
"""
|
|
77
89
|
Extract all links from a Markdown file. Future: Include textual and section context.
|
|
90
|
+
|
|
91
|
+
Returns an empty list if there are parsing errors.
|
|
78
92
|
"""
|
|
93
|
+
import logging
|
|
79
94
|
|
|
80
|
-
|
|
81
|
-
content =
|
|
82
|
-
|
|
83
|
-
|
|
95
|
+
try:
|
|
96
|
+
content = file_path.read_text()
|
|
97
|
+
return extract_links(content, include_internal)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logging.warning(f"Failed to extract links from {file_path}: {e}")
|
|
100
|
+
return []
|
|
84
101
|
|
|
85
102
|
|
|
86
103
|
def extract_first_header(content: str) -> str | None:
|
|
87
104
|
"""
|
|
88
105
|
Extract the first header from markdown content if present.
|
|
89
106
|
Also drops any formatting, so the result can be used as a document title.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
90
110
|
"""
|
|
91
111
|
document = marko.parse(content)
|
|
92
112
|
|
|
@@ -105,27 +125,77 @@ def _extract_text(element: Any) -> str:
|
|
|
105
125
|
return ""
|
|
106
126
|
|
|
107
127
|
|
|
108
|
-
def
|
|
128
|
+
def _extract_list_item_markdown(element: Any) -> str:
|
|
129
|
+
"""
|
|
130
|
+
Extract markdown from a list item, preserving all formatting.
|
|
131
|
+
"""
|
|
132
|
+
from marko.block import BlankLine, List, Paragraph
|
|
133
|
+
from marko.inline import CodeSpan, Emphasis, Link, StrongEmphasis
|
|
134
|
+
|
|
135
|
+
if isinstance(element, str):
|
|
136
|
+
return element
|
|
137
|
+
elif isinstance(element, List):
|
|
138
|
+
# Skip nested lists
|
|
139
|
+
return ""
|
|
140
|
+
elif isinstance(element, BlankLine):
|
|
141
|
+
# Preserve paragraph breaks
|
|
142
|
+
return "\n\n"
|
|
143
|
+
elif isinstance(element, Paragraph):
|
|
144
|
+
# Extract content from paragraph
|
|
145
|
+
return "".join(_extract_list_item_markdown(child) for child in element.children)
|
|
146
|
+
elif isinstance(element, CodeSpan):
|
|
147
|
+
return f"`{''.join(_extract_list_item_markdown(child) for child in element.children)}`"
|
|
148
|
+
elif isinstance(element, Emphasis):
|
|
149
|
+
return f"*{''.join(_extract_list_item_markdown(child) for child in element.children)}*"
|
|
150
|
+
elif isinstance(element, StrongEmphasis):
|
|
151
|
+
return f"**{''.join(_extract_list_item_markdown(child) for child in element.children)}**"
|
|
152
|
+
elif isinstance(element, Link):
|
|
153
|
+
text = "".join(_extract_list_item_markdown(child) for child in element.children)
|
|
154
|
+
return f"[{text}]({element.dest})"
|
|
155
|
+
elif hasattr(element, "children"):
|
|
156
|
+
return "".join(_extract_list_item_markdown(child) for child in element.children)
|
|
157
|
+
else:
|
|
158
|
+
return ""
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def extract_bullet_points(content: str, *, strict: bool = False) -> list[str]:
|
|
162
|
+
"""
|
|
163
|
+
Extract list item values from a Markdown file, preserving all original formatting.
|
|
164
|
+
|
|
165
|
+
If no bullet points are found and `strict` is False, returns the entire content
|
|
166
|
+
as a single item (treating plain text as if it were the first bullet point).
|
|
167
|
+
If `strict` is True, only actual list items are returned.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
ValueError: If `strict` is True and no bullet points are found.
|
|
171
|
+
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
172
|
+
"""
|
|
173
|
+
document = marko.parse(content)
|
|
109
174
|
bullet_points: list[str] = []
|
|
110
175
|
|
|
111
176
|
def _find_bullet_points(element):
|
|
112
177
|
if isinstance(element, ListItem):
|
|
113
|
-
|
|
178
|
+
# Extract markdown from this list item, preserving formatting
|
|
179
|
+
bullet_points.append(_extract_list_item_markdown(element).strip())
|
|
180
|
+
# Then recursively process any nested lists within this item
|
|
181
|
+
if hasattr(element, "children"):
|
|
182
|
+
for child in element.children:
|
|
183
|
+
_find_bullet_points(child)
|
|
114
184
|
elif hasattr(element, "children"):
|
|
115
185
|
for child in element.children:
|
|
116
186
|
_find_bullet_points(child)
|
|
117
187
|
|
|
118
|
-
_find_bullet_points(
|
|
119
|
-
return bullet_points
|
|
120
|
-
|
|
188
|
+
_find_bullet_points(document)
|
|
121
189
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
190
|
+
# If no bullet points found
|
|
191
|
+
if not bullet_points:
|
|
192
|
+
if strict:
|
|
193
|
+
raise ValueError("No bullet points found in content")
|
|
194
|
+
elif content.strip():
|
|
195
|
+
# Not strict mode, treat as plain text
|
|
196
|
+
return [content.strip()]
|
|
126
197
|
|
|
127
|
-
|
|
128
|
-
return _tree_bullet_points(document)
|
|
198
|
+
return bullet_points
|
|
129
199
|
|
|
130
200
|
|
|
131
201
|
def _type_from_heading(heading: Heading) -> HTag:
|
|
@@ -180,6 +250,10 @@ def extract_headings(text: str) -> list[tuple[HTag, str]]:
|
|
|
180
250
|
Returns a list of (tag, text) tuples:
|
|
181
251
|
[("h1", "Main Title"), ("h2", "Subtitle")]
|
|
182
252
|
where `#` corresponds to `h1`, `##` to `h2`, etc.
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
256
|
+
ValueError: If a heading with an unsupported level is encountered.
|
|
183
257
|
"""
|
|
184
258
|
document = marko.parse(text)
|
|
185
259
|
headings_list: list[tuple[HTag, str]] = []
|
|
@@ -202,6 +276,10 @@ def extract_headings(text: str) -> list[tuple[HTag, str]]:
|
|
|
202
276
|
def first_heading(text: str, *, allowed_tags: tuple[HTag, ...] = ("h1", "h2")) -> str | None:
|
|
203
277
|
"""
|
|
204
278
|
Find the text of the first heading. Returns first h1 if present, otherwise first h2, etc.
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
282
|
+
ValueError: If a heading with an unsupported level is encountered.
|
|
205
283
|
"""
|
|
206
284
|
headings = extract_headings(text)
|
|
207
285
|
for goal_tag in allowed_tags:
|
|
@@ -302,3 +380,333 @@ def test_extract_headings_and_first_header() -> None:
|
|
|
302
380
|
formatted_header_md = "## *Formatted* _Header_ [link](#anchor)"
|
|
303
381
|
assert extract_headings(formatted_header_md) == [("h2", "Formatted Header link")]
|
|
304
382
|
assert first_heading(formatted_header_md, allowed_tags=("h2",)) == "Formatted Header link"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def test_extract_bullet_points() -> None:
|
|
386
|
+
# Empty content
|
|
387
|
+
assert extract_bullet_points("") == []
|
|
388
|
+
|
|
389
|
+
# No lists (strict mode)
|
|
390
|
+
try:
|
|
391
|
+
extract_bullet_points("Just some text without lists.", strict=True)
|
|
392
|
+
raise AssertionError("Expected ValueError for strict mode with no bullet points")
|
|
393
|
+
except ValueError as e:
|
|
394
|
+
assert "No bullet points found" in str(e)
|
|
395
|
+
# No lists (non-strict mode - should return as single item)
|
|
396
|
+
assert extract_bullet_points("Just some text without lists.") == [
|
|
397
|
+
"Just some text without lists."
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
# Simple unordered list
|
|
401
|
+
content = dedent("""
|
|
402
|
+
- First item
|
|
403
|
+
- Second item
|
|
404
|
+
- Third item
|
|
405
|
+
""")
|
|
406
|
+
expected = ["First item", "Second item", "Third item"]
|
|
407
|
+
assert extract_bullet_points(content) == expected
|
|
408
|
+
|
|
409
|
+
# Simple ordered list
|
|
410
|
+
content = dedent("""
|
|
411
|
+
1. First item
|
|
412
|
+
2. Second item
|
|
413
|
+
3. Third item
|
|
414
|
+
""")
|
|
415
|
+
expected = ["First item", "Second item", "Third item"]
|
|
416
|
+
assert extract_bullet_points(content) == expected
|
|
417
|
+
|
|
418
|
+
# Mixed list types (asterisk and dash)
|
|
419
|
+
content = dedent("""
|
|
420
|
+
* Item with asterisk
|
|
421
|
+
- Item with dash
|
|
422
|
+
+ Item with plus
|
|
423
|
+
""")
|
|
424
|
+
expected = ["Item with asterisk", "Item with dash", "Item with plus"]
|
|
425
|
+
assert extract_bullet_points(content) == expected
|
|
426
|
+
|
|
427
|
+
# List items with formatting
|
|
428
|
+
content = dedent("""
|
|
429
|
+
- **Bold item**
|
|
430
|
+
- *Italic item*
|
|
431
|
+
- `Code item`
|
|
432
|
+
- [Link item](http://example.com)
|
|
433
|
+
- Item with _multiple_ **formats** and `code`
|
|
434
|
+
""")
|
|
435
|
+
expected = [
|
|
436
|
+
"**Bold item**",
|
|
437
|
+
"*Italic item*",
|
|
438
|
+
"`Code item`",
|
|
439
|
+
"[Link item](http://example.com)",
|
|
440
|
+
"Item with *multiple* **formats** and `code`",
|
|
441
|
+
]
|
|
442
|
+
assert extract_bullet_points(content) == expected
|
|
443
|
+
|
|
444
|
+
# Nested lists
|
|
445
|
+
content = dedent("""
|
|
446
|
+
- Top level item 1
|
|
447
|
+
- Nested item 1.1
|
|
448
|
+
- Nested item 1.2
|
|
449
|
+
- Top level item 2
|
|
450
|
+
1. Nested ordered 2.1
|
|
451
|
+
2. Nested ordered 2.2
|
|
452
|
+
""")
|
|
453
|
+
expected = [
|
|
454
|
+
"Top level item 1",
|
|
455
|
+
"Nested item 1.1",
|
|
456
|
+
"Nested item 1.2",
|
|
457
|
+
"Top level item 2",
|
|
458
|
+
"Nested ordered 2.1",
|
|
459
|
+
"Nested ordered 2.2",
|
|
460
|
+
]
|
|
461
|
+
assert extract_bullet_points(content) == expected
|
|
462
|
+
|
|
463
|
+
# Multi-line list items
|
|
464
|
+
content = dedent("""
|
|
465
|
+
- First item that spans
|
|
466
|
+
multiple lines with content
|
|
467
|
+
- Second item
|
|
468
|
+
that also spans multiple
|
|
469
|
+
lines
|
|
470
|
+
""")
|
|
471
|
+
expected = [
|
|
472
|
+
"First item that spans\nmultiple lines with content",
|
|
473
|
+
"Second item\nthat also spans multiple\nlines",
|
|
474
|
+
]
|
|
475
|
+
assert extract_bullet_points(content) == expected
|
|
476
|
+
|
|
477
|
+
# Lists mixed with other content
|
|
478
|
+
content = dedent("""
|
|
479
|
+
# Header
|
|
480
|
+
|
|
481
|
+
Some text before the list.
|
|
482
|
+
|
|
483
|
+
- First item
|
|
484
|
+
- Second item
|
|
485
|
+
|
|
486
|
+
More text after the list.
|
|
487
|
+
|
|
488
|
+
1. Another list item
|
|
489
|
+
2. Final item
|
|
490
|
+
|
|
491
|
+
Conclusion text.
|
|
492
|
+
""")
|
|
493
|
+
expected = ["First item", "Second item", "Another list item", "Final item"]
|
|
494
|
+
assert extract_bullet_points(content) == expected
|
|
495
|
+
|
|
496
|
+
# List items with complex content
|
|
497
|
+
content = dedent("""
|
|
498
|
+
- Item with **bold** and *italic* and `inline code`
|
|
499
|
+
- Item with [external link](https://example.com) and [internal link](#section)
|
|
500
|
+
- Item with line breaks
|
|
501
|
+
and continued text
|
|
502
|
+
""")
|
|
503
|
+
expected = [
|
|
504
|
+
"Item with **bold** and *italic* and `inline code`",
|
|
505
|
+
"Item with [external link](https://example.com) and [internal link](#section)",
|
|
506
|
+
"Item with line breaks\nand continued text",
|
|
507
|
+
]
|
|
508
|
+
assert extract_bullet_points(content) == expected
|
|
509
|
+
|
|
510
|
+
# Edge case: empty list items
|
|
511
|
+
content = dedent("""
|
|
512
|
+
-
|
|
513
|
+
- Non-empty item
|
|
514
|
+
-
|
|
515
|
+
""")
|
|
516
|
+
expected = ["", "Non-empty item", ""]
|
|
517
|
+
assert extract_bullet_points(content) == expected
|
|
518
|
+
|
|
519
|
+
# Plain text handling (default behavior - not strict)
|
|
520
|
+
plain_text = "This is just plain text without any lists."
|
|
521
|
+
expected = ["This is just plain text without any lists."]
|
|
522
|
+
assert extract_bullet_points(plain_text) == expected
|
|
523
|
+
assert extract_bullet_points(plain_text, strict=False) == expected
|
|
524
|
+
|
|
525
|
+
# Plain text handling (strict mode)
|
|
526
|
+
try:
|
|
527
|
+
extract_bullet_points(plain_text, strict=True)
|
|
528
|
+
raise AssertionError("Expected ValueError for strict mode with no bullet points")
|
|
529
|
+
except ValueError as e:
|
|
530
|
+
assert "No bullet points found" in str(e)
|
|
531
|
+
|
|
532
|
+
# Multi-line plain text handling
|
|
533
|
+
multiline_plain = dedent("""
|
|
534
|
+
This is a paragraph
|
|
535
|
+
with multiple lines
|
|
536
|
+
and no bullets.""").strip()
|
|
537
|
+
expected_multiline = ["This is a paragraph\nwith multiple lines\nand no bullets."]
|
|
538
|
+
assert extract_bullet_points(multiline_plain) == expected_multiline
|
|
539
|
+
try:
|
|
540
|
+
extract_bullet_points(multiline_plain, strict=True)
|
|
541
|
+
raise AssertionError("Expected ValueError for strict mode with no bullet points")
|
|
542
|
+
except ValueError as e:
|
|
543
|
+
assert "No bullet points found" in str(e)
|
|
544
|
+
|
|
545
|
+
# Mixed content with no lists in strict mode
|
|
546
|
+
mixed_no_lists = dedent("""
|
|
547
|
+
# Header
|
|
548
|
+
Some text here.
|
|
549
|
+
**Bold text** and *italic*.
|
|
550
|
+
""")
|
|
551
|
+
try:
|
|
552
|
+
extract_bullet_points(mixed_no_lists, strict=True)
|
|
553
|
+
raise AssertionError("Expected ValueError for strict mode with no bullet points")
|
|
554
|
+
except ValueError as e:
|
|
555
|
+
assert "No bullet points found" in str(e)
|
|
556
|
+
# Non-strict should return the content as single item
|
|
557
|
+
assert len(extract_bullet_points(mixed_no_lists, strict=False)) == 1
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def test_extract_bullet_points_key_scenarios() -> None:
|
|
561
|
+
"""Test key scenarios: plain text, multi-paragraph lists, and links in bullet text."""
|
|
562
|
+
|
|
563
|
+
# Plain text handling (the fundamental case)
|
|
564
|
+
plain_text = "This is just plain text without any markdown formatting."
|
|
565
|
+
assert extract_bullet_points(plain_text) == [plain_text]
|
|
566
|
+
|
|
567
|
+
# Multi-paragraph plain text
|
|
568
|
+
multiline_plain = dedent("""
|
|
569
|
+
This is a paragraph
|
|
570
|
+
with multiple lines
|
|
571
|
+
and no bullets at all.""").strip()
|
|
572
|
+
assert extract_bullet_points(multiline_plain) == [multiline_plain]
|
|
573
|
+
|
|
574
|
+
# Multi-paragraph bulleted lists with complex formatting
|
|
575
|
+
multi_paragraph_content = dedent("""
|
|
576
|
+
- First bullet point with **bold text** and a [link](https://example.com)
|
|
577
|
+
|
|
578
|
+
This is a continuation paragraph within the same bullet point.
|
|
579
|
+
It spans multiple lines and includes *italic text*.
|
|
580
|
+
|
|
581
|
+
- Second bullet point with `inline code` and another [internal link](#section)
|
|
582
|
+
|
|
583
|
+
Another paragraph here with more content.
|
|
584
|
+
Including **bold** and *italic* formatting.
|
|
585
|
+
|
|
586
|
+
- Third simple bullet
|
|
587
|
+
""")
|
|
588
|
+
expected_multi = [
|
|
589
|
+
"First bullet point with **bold text** and a [link](https://example.com)\n\nThis is a continuation paragraph within the same bullet point.\nIt spans multiple lines and includes *italic text*.",
|
|
590
|
+
"Second bullet point with `inline code` and another [internal link](#section)\n\nAnother paragraph here with more content.\nIncluding **bold** and *italic* formatting.",
|
|
591
|
+
"Third simple bullet",
|
|
592
|
+
]
|
|
593
|
+
result_multi = extract_bullet_points(multi_paragraph_content)
|
|
594
|
+
assert result_multi == expected_multi
|
|
595
|
+
|
|
596
|
+
# Links inside bullet text (various types)
|
|
597
|
+
links_content = dedent("""
|
|
598
|
+
- Check out [this external link](https://google.com) for more info
|
|
599
|
+
- Visit [our docs](https://docs.example.com/api) and [FAQ](https://example.com/faq)
|
|
600
|
+
- Internal reference: [see section below](#implementation)
|
|
601
|
+
- Mixed: [external](https://test.com) and [internal](#ref) in one bullet
|
|
602
|
+
- Email link: [contact us](mailto:test@example.com)
|
|
603
|
+
- Link with **bold text**: [**Important Link**](https://critical.com)
|
|
604
|
+
""")
|
|
605
|
+
expected_links = [
|
|
606
|
+
"Check out [this external link](https://google.com) for more info",
|
|
607
|
+
"Visit [our docs](https://docs.example.com/api) and [FAQ](https://example.com/faq)",
|
|
608
|
+
"Internal reference: [see section below](#implementation)",
|
|
609
|
+
"Mixed: [external](https://test.com) and [internal](#ref) in one bullet",
|
|
610
|
+
"Email link: [contact us](mailto:test@example.com)",
|
|
611
|
+
"Link with **bold text**: [**Important Link**](https://critical.com)",
|
|
612
|
+
]
|
|
613
|
+
result_links = extract_bullet_points(links_content)
|
|
614
|
+
assert result_links == expected_links
|
|
615
|
+
|
|
616
|
+
# Complex formatting combinations
|
|
617
|
+
complex_content = dedent("""
|
|
618
|
+
- **Bold** start with [link](https://example.com) and `code` end
|
|
619
|
+
- *Italic* with `inline code` and [another link](https://test.com) here
|
|
620
|
+
- Mixed: **bold _nested italic_** and `code with [link inside](https://nested.com)`
|
|
621
|
+
""")
|
|
622
|
+
expected_complex = [
|
|
623
|
+
"**Bold** start with [link](https://example.com) and `code` end",
|
|
624
|
+
"*Italic* with `inline code` and [another link](https://test.com) here",
|
|
625
|
+
"Mixed: **bold *nested italic*** and `code with [link inside](https://nested.com)`",
|
|
626
|
+
]
|
|
627
|
+
result_complex = extract_bullet_points(complex_content)
|
|
628
|
+
assert result_complex == expected_complex
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def test_markdown_structure_parsing() -> None:
|
|
632
|
+
"""Test that demonstrates how markdown structure is parsed and preserved."""
|
|
633
|
+
|
|
634
|
+
# Test markdown structure preservation in list items
|
|
635
|
+
content = dedent("""
|
|
636
|
+
- First bullet with **bold text**
|
|
637
|
+
|
|
638
|
+
This is a continuation paragraph with *italic text*.
|
|
639
|
+
It spans multiple lines.
|
|
640
|
+
|
|
641
|
+
Another paragraph in the same list item.
|
|
642
|
+
|
|
643
|
+
- Second bullet with `code` and [link](https://example.com)
|
|
644
|
+
""")
|
|
645
|
+
|
|
646
|
+
result = extract_bullet_points(content)
|
|
647
|
+
|
|
648
|
+
# Verify we get exactly 2 bullet points
|
|
649
|
+
assert len(result) == 2
|
|
650
|
+
|
|
651
|
+
# Verify first bullet preserves all formatting and paragraph structure
|
|
652
|
+
expected_first = "First bullet with **bold text**\n\nThis is a continuation paragraph with *italic text*.\nIt spans multiple lines.\n\nAnother paragraph in the same list item."
|
|
653
|
+
assert result[0] == expected_first
|
|
654
|
+
|
|
655
|
+
# Verify second bullet preserves formatting
|
|
656
|
+
expected_second = "Second bullet with `code` and [link](https://example.com)"
|
|
657
|
+
assert result[1] == expected_second
|
|
658
|
+
|
|
659
|
+
# Test nested formatting combinations
|
|
660
|
+
nested_content = dedent("""
|
|
661
|
+
- Item with **bold containing *italic* text** and `code`
|
|
662
|
+
- Link with formatting: [**Bold Link Text**](https://example.com)
|
|
663
|
+
- Code with special chars: `function(param="value")`
|
|
664
|
+
""")
|
|
665
|
+
|
|
666
|
+
nested_result = extract_bullet_points(nested_content)
|
|
667
|
+
assert len(nested_result) == 3
|
|
668
|
+
assert nested_result[0] == "Item with **bold containing *italic* text** and `code`"
|
|
669
|
+
assert nested_result[1] == "Link with formatting: [**Bold Link Text**](https://example.com)"
|
|
670
|
+
assert nested_result[2] == 'Code with special chars: `function(param="value")`'
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def test_markdown_utils_exceptions() -> None:
|
|
674
|
+
"""Test exception handling for markdown utility functions."""
|
|
675
|
+
import tempfile
|
|
676
|
+
|
|
677
|
+
# Test extract_file_links with non-existent file
|
|
678
|
+
result = extract_file_links(Path("/non/existent/file.md"))
|
|
679
|
+
assert result == [] # Should return empty list for any error
|
|
680
|
+
|
|
681
|
+
# Test extract_file_links with empty file (should work fine)
|
|
682
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as tmp:
|
|
683
|
+
tmp.write("")
|
|
684
|
+
tmp_path = Path(tmp.name)
|
|
685
|
+
|
|
686
|
+
try:
|
|
687
|
+
result = extract_file_links(tmp_path)
|
|
688
|
+
assert result == [] # Empty file has no links
|
|
689
|
+
finally:
|
|
690
|
+
tmp_path.unlink()
|
|
691
|
+
|
|
692
|
+
# Test with invalid markdown formatting (markdown is very permissive)
|
|
693
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as tmp:
|
|
694
|
+
tmp.write("[incomplete link\n# Header\n- List item")
|
|
695
|
+
tmp_path = Path(tmp.name)
|
|
696
|
+
|
|
697
|
+
try:
|
|
698
|
+
result = extract_file_links(tmp_path)
|
|
699
|
+
# Should still work - marko is very permissive with markdown
|
|
700
|
+
assert isinstance(result, list)
|
|
701
|
+
finally:
|
|
702
|
+
tmp_path.unlink()
|
|
703
|
+
|
|
704
|
+
# Test extract_links with string content
|
|
705
|
+
content = "Check out [this link](https://example.com) and [internal](#section)"
|
|
706
|
+
result = extract_links(content)
|
|
707
|
+
assert "https://example.com" in result
|
|
708
|
+
assert "#section" not in result # Internal links excluded by default
|
|
709
|
+
|
|
710
|
+
result_with_internal = extract_links(content, include_internal=True)
|
|
711
|
+
assert "https://example.com" in result_with_internal
|
|
712
|
+
assert "#section" in result_with_internal
|