kash-shell 0.3.33__py3-none-any.whl → 0.3.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,9 +13,7 @@ from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
13
13
  log = get_logger(__name__)
14
14
 
15
15
 
16
- @kash_action(
17
- precondition=is_url_resource | has_html_body, output_format=Format.markdown, mcp_tool=True
18
- )
16
+ @kash_action(precondition=is_url_resource | has_html_body, output_format=Format.markdown)
19
17
  def markdownify_html(item: Item) -> Item:
20
18
  """
21
19
  Converts raw HTML or the URL of an HTML page to Markdown, fetching with the content
@@ -47,7 +47,7 @@ llm_options = LLMOptions(
47
47
  )
48
48
 
49
49
 
50
- @kash_action(llm_options=llm_options, params=common_params("model"), mcp_tool=True)
50
+ @kash_action(llm_options=llm_options, params=common_params("model"))
51
51
  def summarize_as_bullets(item: Item, model: LLMName = LLM.default_standard) -> Item:
52
52
  """
53
53
  Summarize text as bullet points.
@@ -585,9 +585,9 @@ class Action(ABC):
585
585
  "type": "array",
586
586
  "items": {
587
587
  "type": "string",
588
- "description": "A path to a local file or a URL",
588
+ "description": "A URL or S3 URL or a workspace file path, e.g. https://example.com/some/file/path or s3://somebucket/some/file/path or some/file/path",
589
589
  },
590
- "description": f"Input items ({self.expected_args.as_str()})",
590
+ "description": f"A list of paths or URLs of input items ({self.expected_args.as_str()}). Use an array of length one for a single input.",
591
591
  }
592
592
 
593
593
  # Set min/max items.
kash/model/items_model.py CHANGED
@@ -7,6 +7,7 @@ from datetime import UTC, datetime
7
7
  from enum import Enum
8
8
  from pathlib import Path
9
9
  from typing import TYPE_CHECKING, Any, NotRequired, TypedDict, TypeVar, Unpack
10
+ from urllib.parse import urlparse
10
11
 
11
12
  from frontmatter_format import from_yaml_string, new_yaml
12
13
  from prettyfmt import (
@@ -570,12 +571,19 @@ class Item:
570
571
  from kash.file_storage.store_filenames import parse_item_filename
571
572
 
572
573
  # Prefer original to external, e.g. if we know the original but the external might
573
- # be a cache filename.
574
- path = self.store_path or self.original_filename or self.external_path
574
+ # be a cache filename. Also check
575
+ path = (
576
+ self.store_path
577
+ or self.original_filename
578
+ or self.external_path
579
+ or (self.url and urlparse(self.url).path)
580
+ or ""
581
+ ).strip()
575
582
  if path:
576
583
  path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
577
584
  else:
578
585
  path_name = None
586
+
579
587
  return path_name
580
588
 
581
589
  def slug_name(
@@ -607,6 +615,7 @@ class Item:
607
615
 
608
616
  slug = self.slug_name()
609
617
  full_suffix = self.get_full_suffix()
618
+
610
619
  return join_suffix(slug, full_suffix)
611
620
 
612
621
  def body_heading(self, allowed_tags: tuple[str, ...] = ("h1", "h2")) -> str | None:
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Callable, Iterable, Sequence
4
- from typing import Any, TypeVar
4
+ from dataclasses import dataclass
5
+ from typing import Any, Generic, TypeVar, cast
6
+
7
+ from strif import abbrev_list
5
8
 
6
9
  from kash.config.logger import get_logger
7
10
  from kash.config.settings import global_settings
@@ -14,8 +17,41 @@ T = TypeVar("T")
14
17
  log = get_logger(name=__name__)
15
18
 
16
19
 
20
+ @dataclass(frozen=True)
21
+ class MultitaskResult(Generic[T]):
22
+ """
23
+ Container for results from multitask_gather preserving original order.
24
+ Access `.successes` and `.errors` to get partitioned views.
25
+ """
26
+
27
+ raw_results: list[T | BaseException]
28
+
29
+ @property
30
+ def successes_or_none(self) -> list[T | None]:
31
+ """
32
+ Return a list of successes or None, aligned with the original order.
33
+ """
34
+ return [
35
+ None if isinstance(item, BaseException) else cast(T, item) for item in self.raw_results
36
+ ]
37
+
38
+ @property
39
+ def successes(self) -> list[T]:
40
+ """
41
+ Return a list of successes only. May be shorter than the original list.
42
+ """
43
+ return [cast(T, item) for item in self.raw_results if not isinstance(item, BaseException)]
44
+
45
+ @property
46
+ def errors(self) -> list[BaseException]:
47
+ """
48
+ Return a list of errors only. May be shorter than the original list.
49
+ """
50
+ return [item for item in self.raw_results if isinstance(item, BaseException)]
51
+
52
+
17
53
  def _default_labeler(total: int) -> Callable[[int, Any], str]:
18
- def labeler(i: int, _spec: Any) -> str: # pyright: ignore[reportUnusedParameter]
54
+ def labeler(i: int, _spec: Any) -> str:
19
55
  return f"Task {i + 1}/{total}"
20
56
 
21
57
  return labeler
@@ -29,7 +65,7 @@ async def multitask_gather(
29
65
  bucket_limits: dict[str, Limit] | None = None,
30
66
  retry_settings: RetrySettings | None = None,
31
67
  show_progress: bool = True,
32
- ) -> list[T]:
68
+ ) -> MultitaskResult[T]:
33
69
  """
34
70
  Run many `FuncTask`s concurrently with shared progress UI and rate limits.
35
71
 
@@ -39,7 +75,8 @@ async def multitask_gather(
39
75
  - `labeler` can be omitted; a simple "Task X/Y" label will be used.
40
76
  - If `limit` is not provided, defaults are taken from `global_settings()`.
41
77
  - If `show_progress` is False, tasks are run without the status context.
42
- - By default, exceptions are returned as results rather than raised (return_exceptions=True).
78
+ - Exceptions are collected (using return_exceptions=True). Use properties on the
79
+ returned `MultitaskResult` to access `.successes` and `.errors`.
43
80
  """
44
81
 
45
82
  # Normalize tasks to a list for length and stable iteration
@@ -64,11 +101,34 @@ async def multitask_gather(
64
101
  log.warning("Running %d tasks (progress disabled)…", len(task_list))
65
102
 
66
103
  async with multitask_status(enabled=show_progress) as status:
67
- return await gather_limited_sync(
68
- *task_list,
69
- limit=effective_limit,
70
- bucket_limits=bucket_limits,
71
- status=status,
72
- labeler=effective_labeler,
73
- retry_settings=retry_settings,
104
+ raw_results = cast(
105
+ list[T | BaseException],
106
+ await gather_limited_sync(
107
+ *task_list,
108
+ limit=effective_limit,
109
+ bucket_limits=bucket_limits,
110
+ status=status,
111
+ labeler=effective_labeler,
112
+ retry_settings=retry_settings,
113
+ return_exceptions=True,
114
+ ),
115
+ )
116
+
117
+ result = MultitaskResult[T](raw_results=raw_results)
118
+
119
+ if result.errors:
120
+ log.warning(
121
+ "multitask_gather: had %d errors (out of %d tasks): %s",
122
+ len(result.errors),
123
+ len(task_list),
124
+ abbrev_list(result.errors),
125
+ )
126
+ log.error(
127
+ "multitask_gather: first error (full traceback):",
128
+ exc_info=(
129
+ type(result.errors[0]),
130
+ result.errors[0],
131
+ result.errors[0].__traceback__,
132
+ ),
74
133
  )
134
+ return result
@@ -72,6 +72,8 @@ RUNNING_SYMBOL = ""
72
72
  DEFAULT_LABEL_WIDTH = 40
73
73
  DEFAULT_PROGRESS_WIDTH = 20
74
74
 
75
+ MAX_DISPLAY_TASKS = 20
76
+
75
77
 
76
78
  # Calculate spinner width to maintain column alignment
77
79
  def _get_spinner_width(spinner_name: str) -> int:
@@ -101,6 +103,9 @@ class StatusSettings:
101
103
  transient: bool = True
102
104
  refresh_per_second: float = 10
103
105
  styles: StatusStyles = DEFAULT_STYLES
106
+ # Maximum number of tasks to keep visible in the live display.
107
+ # Older completed/skipped/failed tasks beyond this cap will be removed from the live view.
108
+ max_display_tasks: int = MAX_DISPLAY_TASKS
104
109
 
105
110
 
106
111
  class SpinnerStatusColumn(ProgressColumn):
@@ -298,6 +303,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
298
303
  self._task_info: dict[int, TaskInfo] = {}
299
304
  self._next_id: int = 1
300
305
  self._rich_task_ids: dict[int, TaskID] = {} # Map our IDs to Rich Progress IDs
306
+ # Track order of tasks added to the Progress so we can prune oldest completed ones
307
+ self._displayed_task_order: list[int] = []
308
+ # Track tasks pruned from the live display so we don't re-add them later
309
+ self._pruned_task_ids: set[int] = set()
301
310
 
302
311
  # Unified live integration
303
312
  self._unified_live: Any | None = None # Reference to the global unified live
@@ -442,6 +451,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
442
451
  progress_display=None,
443
452
  )
444
453
  self._rich_task_ids[task_id] = rich_task_id
454
+ self._displayed_task_order.append(task_id)
455
+
456
+ # Prune if too many tasks are visible (prefer removing completed ones)
457
+ self._prune_completed_tasks_if_needed()
445
458
 
446
459
  async def set_progress_display(self, task_id: int, display: RenderableType) -> None:
447
460
  """
@@ -536,18 +549,31 @@ class MultiTaskStatus(AbstractAsyncContextManager):
536
549
 
537
550
  # Complete the progress bar and stop spinner
538
551
  if rich_task_id is not None:
539
- total = self._progress.tasks[rich_task_id].total or 1
552
+ # Safely find the Task by id; Progress.tasks is a list, not a dict
553
+ task_obj = next((t for t in self._progress.tasks if t.id == rich_task_id), None)
554
+ if task_obj is not None and task_obj.total is not None:
555
+ total = task_obj.total
556
+ else:
557
+ total = task_info.steps_total or 1
540
558
  self._progress.update(rich_task_id, completed=total, task_info=task_info)
541
559
  else:
542
- # Task was never started, but we still need to add it to show completion
543
- rich_task_id = self._progress.add_task(
544
- "",
545
- total=task_info.steps_total,
546
- label=task_info.label,
547
- completed=task_info.steps_total,
548
- task_info=task_info,
549
- )
550
- self._rich_task_ids[task_id] = rich_task_id
560
+ # If this task was pruned from the live display, skip re-adding it
561
+ if task_id in self._pruned_task_ids:
562
+ pass
563
+ else:
564
+ # Task was never started; add a completed row so it appears once
565
+ rich_task_id = self._progress.add_task(
566
+ "",
567
+ total=task_info.steps_total,
568
+ label=task_info.label,
569
+ completed=task_info.steps_total,
570
+ task_info=task_info,
571
+ )
572
+ self._rich_task_ids[task_id] = rich_task_id
573
+ self._displayed_task_order.append(task_id)
574
+
575
+ # After finishing, prune completed tasks to respect max visible cap
576
+ self._prune_completed_tasks_if_needed()
551
577
 
552
578
  def get_task_info(self, task_id: int) -> TaskInfo | None:
553
579
  """Get additional task information."""
@@ -567,6 +593,54 @@ class MultiTaskStatus(AbstractAsyncContextManager):
567
593
  """Get console instance for additional output above progress."""
568
594
  return self._progress.console
569
595
 
596
+ def _prune_completed_tasks_if_needed(self) -> None:
597
+ """
598
+ Ensure at most `max_display_tasks` tasks are visible by removing the oldest
599
+ completed/skipped/failed tasks first. Running or waiting tasks are never
600
+ removed by this method.
601
+ Note: This method assumes it's called under self._lock.
602
+ """
603
+ max_visible = self.settings.max_display_tasks
604
+
605
+ # Nothing to prune or unlimited
606
+ if max_visible <= 0:
607
+ return
608
+
609
+ # Count visible tasks (those with a Rich task id present)
610
+ visible_task_ids = [tid for tid in self._displayed_task_order if tid in self._rich_task_ids]
611
+ excess = len(visible_task_ids) - max_visible
612
+ if excess <= 0:
613
+ return
614
+
615
+ # Build list of terminal tasks that can be pruned (oldest first)
616
+ terminal_tasks = []
617
+ for tid in self._displayed_task_order:
618
+ if tid not in self._rich_task_ids:
619
+ continue
620
+ info = self._task_info.get(tid)
621
+ if info and info.state in (
622
+ TaskState.COMPLETED,
623
+ TaskState.FAILED,
624
+ TaskState.SKIPPED,
625
+ ):
626
+ terminal_tasks.append(tid)
627
+
628
+ # Remove the oldest terminal tasks up to the excess count
629
+ tasks_to_remove = terminal_tasks[:excess]
630
+
631
+ for tid in tasks_to_remove:
632
+ rich_tid = self._rich_task_ids.pop(tid, None)
633
+ if rich_tid is not None:
634
+ # Remove from Rich progress display
635
+ self._progress.remove_task(rich_tid)
636
+ # Mark as pruned so we don't re-add on finish
637
+ self._pruned_task_ids.add(tid)
638
+
639
+ # Efficiently rebuild the displayed task order without the removed tasks
640
+ self._displayed_task_order = [
641
+ tid for tid in self._displayed_task_order if tid not in tasks_to_remove
642
+ ]
643
+
570
644
 
571
645
  ## Tests
572
646
 
@@ -1,48 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
- import re
4
3
  from dataclasses import dataclass, field
5
4
  from typing import Any
6
5
 
7
- from flowmark import flowmark_markdown, line_wrap_by_sentence
8
6
  from marko import Markdown
7
+ from marko.block import Document
9
8
  from marko.ext import footnote
10
9
 
11
- from kash.utils.text_handling.markdown_utils import comprehensive_transform_tree
12
-
13
-
14
- def _normalize_footnotes_in_markdown(content: str) -> str:
15
- """
16
- Ensure blank lines between consecutive footnote definitions.
17
-
18
- Marko has a bug where consecutive footnotes without blank lines are parsed
19
- as a single footnote. This adds blank lines where needed.
20
- """
21
- lines = content.split("\n")
22
- result = []
23
- i = 0
24
-
25
- while i < len(lines):
26
- line = lines[i]
27
- result.append(line)
28
-
29
- # Check if this is a footnote definition
30
- if re.match(r"^\[\^[^\]]+\]:", line):
31
- # Look ahead to see if the next non-empty line is also a footnote
32
- j = i + 1
33
- while j < len(lines) and not lines[j].strip():
34
- result.append(lines[j])
35
- j += 1
36
-
37
- if j < len(lines) and re.match(r"^\[\^[^\]]+\]:", lines[j]):
38
- # Next non-empty line is also a footnote, add blank line
39
- result.append("")
40
-
41
- i = j
42
- else:
43
- i += 1
44
-
45
- return "\n".join(result)
10
+ from kash.utils.text_handling.markdown_utils import (
11
+ MARKDOWN as DEFAULT_MARKDOWN,
12
+ )
13
+ from kash.utils.text_handling.markdown_utils import (
14
+ comprehensive_transform_tree,
15
+ normalize_footnotes_in_markdown,
16
+ )
46
17
 
47
18
 
48
19
  @dataclass
@@ -81,15 +52,17 @@ class MarkdownFootnotes:
81
52
  MarkdownFootnotes instance with all footnotes indexed by ID
82
53
  """
83
54
  if markdown_parser is None:
84
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
55
+ markdown_parser = DEFAULT_MARKDOWN
85
56
 
86
57
  # Normalize to work around marko bug with consecutive footnotes
87
- normalized_content = _normalize_footnotes_in_markdown(content)
58
+ normalized_content = normalize_footnotes_in_markdown(content)
88
59
  document = markdown_parser.parse(normalized_content)
89
60
  return MarkdownFootnotes.from_document(document, markdown_parser)
90
61
 
91
62
  @staticmethod
92
- def from_document(document: Any, markdown_parser: Markdown | None = None) -> MarkdownFootnotes:
63
+ def from_document(
64
+ document: Document, markdown_parser: Markdown | None = None
65
+ ) -> MarkdownFootnotes:
93
66
  """
94
67
  Extract all footnotes from a parsed markdown document.
95
68
 
@@ -102,7 +75,7 @@ class MarkdownFootnotes:
102
75
  MarkdownFootnotes instance with all footnotes indexed by ID
103
76
  """
104
77
  if markdown_parser is None:
105
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
78
+ markdown_parser = DEFAULT_MARKDOWN
106
79
 
107
80
  footnotes_dict: dict[str, FootnoteInfo] = {}
108
81
 
@@ -206,9 +179,9 @@ def extract_footnote_references(content: str, markdown_parser: Markdown | None =
206
179
  List of unique footnote IDs that are referenced (with the ^)
207
180
  """
208
181
  if markdown_parser is None:
209
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
182
+ markdown_parser = DEFAULT_MARKDOWN
210
183
 
211
- normalized_content = _normalize_footnotes_in_markdown(content)
184
+ normalized_content = normalize_footnotes_in_markdown(content)
212
185
  document = markdown_parser.parse(normalized_content)
213
186
  references: list[str] = []
214
187
  seen: set[str] = set()
@@ -26,10 +26,48 @@ MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
26
26
  MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
27
27
 
28
28
  # Use flowmark for Markdown parsing and rendering.
29
- # Replaces the single shard marko Markdown object.
29
+ # This replaces the single shared Markdown object that marko offers.
30
30
  MARKDOWN = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
31
31
 
32
32
 
33
+ # Regex for a markdown footnote definition line: "[^id]: ..."
34
+ FOOTNOTE_DEF_RE = re.compile(r"^\[\^[^\]]+\]:")
35
+
36
+
37
+ def normalize_footnotes_in_markdown(content: str) -> str:
38
+ """
39
+ Ensure blank lines between consecutive footnote definitions.
40
+
41
+ Some markdown parsers (marko) merge consecutive footnotes without blank
42
+ lines into a single definition. This adds blank lines where needed.
43
+ """
44
+ lines = content.split("\n")
45
+ result: list[str] = []
46
+ i = 0
47
+
48
+ while i < len(lines):
49
+ line = lines[i]
50
+ result.append(line)
51
+
52
+ # Check if this is a footnote definition
53
+ if FOOTNOTE_DEF_RE.match(line):
54
+ # Look ahead to see if the next non-empty line is also a footnote
55
+ j = i + 1
56
+ while j < len(lines) and not lines[j].strip():
57
+ result.append(lines[j])
58
+ j += 1
59
+
60
+ if j < len(lines) and FOOTNOTE_DEF_RE.match(lines[j]):
61
+ # Next non-empty line is also a footnote, add blank line
62
+ result.append("")
63
+
64
+ i = j
65
+ else:
66
+ i += 1
67
+
68
+ return "\n".join(result)
69
+
70
+
33
71
  def escape_markdown(text: str) -> str:
34
72
  """
35
73
  Escape characters with special meaning in Markdown.
@@ -87,42 +125,49 @@ def comprehensive_transform_tree(element: Any, transformer: Callable[[Any], None
87
125
  comprehensive_transform_tree(child, transformer)
88
126
 
89
127
 
90
- def _tree_links(element, include_internal=False):
91
- links = []
128
+ def _tree_links(element, include_internal=False) -> list[str]:
129
+ links: list[str] = []
92
130
 
93
131
  def _find_links(element):
94
132
  if isinstance(element, (Link, AutoLink)):
95
133
  if include_internal or not element.dest.startswith("#"):
134
+ assert isinstance(element.dest, str)
96
135
  links.append(element.dest)
97
136
 
98
137
  comprehensive_transform_tree(element, _find_links)
99
138
  return links
100
139
 
101
140
 
102
- def extract_links(content: str, include_internal=False) -> list[str]:
141
+ # TODO: Marko seems to include trailing parentheses on bare links.
142
+ # Fix this in flowmark
143
+ def _fix_link(url: str) -> str:
144
+ return url.rstrip(")")
145
+
146
+
147
+ def extract_urls(content: str, include_internal=False) -> list[Url]:
103
148
  """
104
- Extract all links from Markdown content. Deduplicates and
105
- preserves order.
149
+ Extract all URLs from Markdown content. Deduplicates and preserves order.
106
150
 
107
151
  Raises:
108
152
  marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
109
153
  """
154
+ content = normalize_footnotes_in_markdown(content)
110
155
  document = MARKDOWN.parse(content)
111
156
  all_links = _tree_links(document, include_internal)
112
157
 
113
158
  # Deduplicate while preserving order
114
159
  seen: dict[str, None] = {}
115
- result = []
160
+ result: list[Url] = []
116
161
  for link in all_links:
117
162
  if link not in seen:
118
163
  seen[link] = None
119
- result.append(link)
164
+ result.append(Url(_fix_link(link)))
120
165
  return result
121
166
 
122
167
 
123
- def extract_file_links(file_path: Path, include_internal=False) -> list[str]:
168
+ def extract_file_urls(file_path: Path, include_internal=False) -> list[Url]:
124
169
  """
125
- Extract all links from a Markdown file. Future: Include textual and section context.
170
+ Extract all URLs from a Markdown file. Future: Include textual and section context.
126
171
 
127
172
  Returns an empty list if there are parsing errors.
128
173
  """
@@ -130,7 +175,7 @@ def extract_file_links(file_path: Path, include_internal=False) -> list[str]:
130
175
 
131
176
  try:
132
177
  content = file_path.read_text()
133
- return extract_links(content, include_internal)
178
+ return extract_urls(content, include_internal)
134
179
  except Exception as e:
135
180
  logging.warning(f"Failed to extract links from {file_path}: {e}")
136
181
  return []
@@ -156,6 +201,7 @@ def rewrite_urls(
156
201
  Raises:
157
202
  marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
158
203
  """
204
+ content = normalize_footnotes_in_markdown(content)
159
205
  document = MARKDOWN.parse(content)
160
206
  _rewrite_tree_urls(document, url_rewriter, element_types)
161
207
 
@@ -232,6 +278,7 @@ def extract_first_header(content: str) -> str | None:
232
278
  Raises:
233
279
  marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
234
280
  """
281
+ content = normalize_footnotes_in_markdown(content)
235
282
  document = MARKDOWN.parse(content)
236
283
 
237
284
  if document.children and isinstance(document.children[0], Heading):
@@ -282,18 +329,25 @@ def _extract_list_item_markdown(element: Any) -> str:
282
329
  return ""
283
330
 
284
331
 
285
- def extract_bullet_points(content: str, *, strict: bool = False) -> list[str]:
332
+ def extract_bullet_points(
333
+ content: str, *, strict: bool = False, allow_paragraphs: bool = False
334
+ ) -> list[str]:
286
335
  """
287
336
  Extract list item values from a Markdown file, preserving all original formatting.
288
337
 
289
338
  If no bullet points are found and `strict` is False, returns the entire content
290
339
  as a single item (treating plain text as if it were the first bullet point).
340
+
291
341
  If `strict` is True, only actual list items are returned.
292
342
 
343
+ If `allow_paragraphs` is True, if the content contains multiple paragraphs and no
344
+ bullet points are found, return the paragraphs as separate items.
345
+
293
346
  Raises:
294
347
  ValueError: If `strict` is True and no bullet points are found.
295
348
  marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
296
349
  """
350
+ content = normalize_footnotes_in_markdown(content)
297
351
  document = MARKDOWN.parse(content)
298
352
  bullet_points: list[str] = []
299
353
 
@@ -308,6 +362,8 @@ def extract_bullet_points(content: str, *, strict: bool = False) -> list[str]:
308
362
  if not bullet_points:
309
363
  if strict:
310
364
  raise ValueError("No bullet points found in content")
365
+ elif allow_paragraphs and "\n\n" in content:
366
+ return [p.strip() for p in content.split("\n\n")]
311
367
  elif content.strip():
312
368
  # Not strict mode, treat as plain text
313
369
  return [content.strip()]
@@ -372,6 +428,7 @@ def extract_headings(text: str) -> list[tuple[HTag, str]]:
372
428
  marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
373
429
  ValueError: If a heading with an unsupported level is encountered.
374
430
  """
431
+ text = normalize_footnotes_in_markdown(text)
375
432
  document = MARKDOWN.parse(text)
376
433
  headings_list: list[tuple[HTag, str]] = []
377
434
 
@@ -788,7 +845,7 @@ def test_markdown_utils_exceptions() -> None:
788
845
  import tempfile
789
846
 
790
847
  # Test extract_file_links with non-existent file
791
- result = extract_file_links(Path("/non/existent/file.md"))
848
+ result = extract_file_urls(Path("/non/existent/file.md"))
792
849
  assert result == [] # Should return empty list for any error
793
850
 
794
851
  # Test extract_file_links with empty file (should work fine)
@@ -797,7 +854,7 @@ def test_markdown_utils_exceptions() -> None:
797
854
  tmp_path = Path(tmp.name)
798
855
 
799
856
  try:
800
- result = extract_file_links(tmp_path)
857
+ result = extract_file_urls(tmp_path)
801
858
  assert result == [] # Empty file has no links
802
859
  finally:
803
860
  tmp_path.unlink()
@@ -808,7 +865,7 @@ def test_markdown_utils_exceptions() -> None:
808
865
  tmp_path = Path(tmp.name)
809
866
 
810
867
  try:
811
- result = extract_file_links(tmp_path)
868
+ result = extract_file_urls(tmp_path)
812
869
  # Should still work - marko is very permissive with markdown
813
870
  assert isinstance(result, list)
814
871
  finally:
@@ -816,11 +873,11 @@ def test_markdown_utils_exceptions() -> None:
816
873
 
817
874
  # Test extract_links with string content
818
875
  content = "Check out [this link](https://example.com) and [internal](#section)"
819
- result = extract_links(content)
876
+ result = extract_urls(content)
820
877
  assert "https://example.com" in result
821
878
  assert "#section" not in result # Internal links excluded by default
822
879
 
823
- result_with_internal = extract_links(content, include_internal=True)
880
+ result_with_internal = extract_urls(content, include_internal=True)
824
881
  assert "https://example.com" in result_with_internal
825
882
  assert "#section" in result_with_internal
826
883
 
@@ -830,21 +887,21 @@ def test_extract_links_comprehensive() -> None:
830
887
 
831
888
  # Test regular markdown links
832
889
  regular_links = "Check out [this link](https://example.com) and [another](https://test.com)"
833
- result = extract_links(regular_links)
890
+ result = extract_urls(regular_links)
834
891
  assert "https://example.com" in result
835
892
  assert "https://test.com" in result
836
893
  assert len(result) == 2
837
894
 
838
895
  # Test bare/autolinks in angle brackets
839
896
  bare_links = "Visit <https://google.com> and also <https://github.com>"
840
- result_bare = extract_links(bare_links)
897
+ result_bare = extract_urls(bare_links)
841
898
  assert "https://google.com" in result_bare
842
899
  assert "https://github.com" in result_bare
843
900
  assert len(result_bare) == 2
844
901
 
845
902
  # Test autolinks without brackets (GFM extension enables auto-linking of plain URLs)
846
903
  auto_links = "Visit https://stackoverflow.com or http://reddit.com"
847
- result_auto = extract_links(auto_links)
904
+ result_auto = extract_urls(auto_links)
848
905
  assert "https://stackoverflow.com" in result_auto
849
906
  assert "http://reddit.com" in result_auto
850
907
  assert len(result_auto) == 2 # GFM auto-links plain URLs
@@ -855,7 +912,7 @@ def test_extract_links_comprehensive() -> None:
855
912
  - The Ko-Op, accessed June 28, 2025,
856
913
  <https://psychedelictherapists.co/blog/the-future-of-ketamine-assisted-psychotherapy/>
857
914
  """
858
- result_footnote = extract_links(footnote_content)
915
+ result_footnote = extract_urls(footnote_content)
859
916
  assert (
860
917
  "https://psychedelictherapists.co/blog/the-future-of-ketamine-assisted-psychotherapy/"
861
918
  in result_footnote
@@ -873,7 +930,7 @@ Auto link: https://auto-link.com
873
930
  [^1]: Footnote with [regular link](https://footnote-regular.com)
874
931
  [^2]: Footnote with bare link <https://footnote-bare.com>
875
932
  """
876
- result_mixed = extract_links(mixed_content)
933
+ result_mixed = extract_urls(mixed_content)
877
934
  expected_links = [
878
935
  "https://example.com", # Regular link
879
936
  "https://bare-link.com", # Bare link
@@ -889,7 +946,7 @@ Auto link: https://auto-link.com
889
946
  def test_extract_bare_links() -> None:
890
947
  """Test extraction of bare links in angle brackets."""
891
948
  content = "Visit <https://example.com> and <https://github.com/user/repo> for more info"
892
- result = extract_links(content)
949
+ result = extract_urls(content)
893
950
  assert "https://example.com" in result
894
951
  assert "https://github.com/user/repo" in result
895
952
  assert len(result) == 2
@@ -902,7 +959,7 @@ def test_extract_footnote_links() -> None:
902
959
 
903
960
  [^1]: This footnote has a [regular link](https://example.com) and <https://bare-link.com>
904
961
  """)
905
- result = extract_links(content)
962
+ result = extract_urls(content)
906
963
  assert "https://example.com" in result
907
964
  assert "https://bare-link.com" in result
908
965
  assert len(result) == 2
@@ -916,7 +973,7 @@ def test_extract_reference_style_links() -> None:
916
973
  [ref1]: https://example.com/article1
917
974
  [ref2]: https://example.com/article2
918
975
  """)
919
- result = extract_links(content)
976
+ result = extract_urls(content)
920
977
  assert "https://example.com/article1" in result
921
978
  assert "https://example.com/article2" in result
922
979
  assert len(result) == 2
@@ -931,14 +988,14 @@ def test_extract_links_and_dups() -> None:
931
988
  """)
932
989
 
933
990
  # Default behavior: exclude internal links
934
- result = extract_links(content)
991
+ result = extract_urls(content)
935
992
  assert "https://example.com" in result
936
993
  assert "#introduction" not in result
937
994
  assert "#conclusion" not in result
938
995
  assert len(result) == 1
939
996
 
940
997
  # Include internal links
941
- result_with_internal = extract_links(content, include_internal=True)
998
+ result_with_internal = extract_urls(content, include_internal=True)
942
999
  assert "https://example.com" in result_with_internal
943
1000
  assert "#introduction" in result_with_internal
944
1001
  assert "#conclusion" in result_with_internal
@@ -966,7 +1023,7 @@ def test_extract_links_mixed_real_world() -> None:
966
1023
  <https://psychedelictherapists.co/blog/the-future-of-ketamine-assisted-psychotherapy/>
967
1024
  """)
968
1025
 
969
- result = extract_links(content)
1026
+ result = extract_urls(content)
970
1027
  expected_links = [
971
1028
  "https://pubmed.ncbi.nlm.nih.gov",
972
1029
  "https://scholar.google.com",
@@ -1369,3 +1426,26 @@ def test_rewrite_urls_simplified_api() -> None:
1369
1426
  # Verify that relative URLs in angle brackets remain unchanged
1370
1427
  # (marko doesn't parse them as URL elements)
1371
1428
  assert "<./contact.html>" in result
1429
+
1430
+
1431
+ def test_extract_links_parentheses_adjacent() -> None:
1432
+ """URLs adjacent to closing parentheses should not include the parenthesis."""
1433
+ content = dedent(
1434
+ """
1435
+ [^res1]: Under 50 U.S.C. § 4531(c)(3), amounts in the Defense Production Act Fund (used
1436
+ for Title III) “shall remain available until expended,” meaning they do not expire
1437
+ at the end of a fiscal year (law text:
1438
+ https://www.law.cornell.edu/uscode/text/50/4531).
1439
+
1440
+ [^res2]: USAspending.gov’s federal account 097-0801 (Defense Production Act Purchases,
1441
+ Defense) provides official figures for obligations and unobligated balances by
1442
+ fiscal year drawn from Treasury data (https://www.usaspending.gov/account/097-0801).
1443
+ """
1444
+ )
1445
+
1446
+ links = extract_urls(content)
1447
+ assert "https://www.law.cornell.edu/uscode/text/50/4531" in links
1448
+ assert "https://www.law.cornell.edu/uscode/text/50/4531)" not in links
1449
+
1450
+ assert "https://www.usaspending.gov/account/097-0801" in links
1451
+ assert "https://www.usaspending.gov/account/097-0801)" not in links
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kash-shell
3
- Version: 0.3.33
3
+ Version: 0.3.34
4
4
  Summary: The knowledge agent shell (core)
5
5
  Project-URL: Repository, https://github.com/jlevy/kash-shell
6
6
  Author-email: Joshua Levy <joshua@cal.berkeley.edu>
@@ -20,7 +20,7 @@ Requires-Dist: aiolimiter>=1.2.1
20
20
  Requires-Dist: anyio>=4.8.0
21
21
  Requires-Dist: audioop-lts>=0.2.1; python_version >= '3.13'
22
22
  Requires-Dist: cachetools>=5.5.2
23
- Requires-Dist: chopdiff>=0.2.5
23
+ Requires-Dist: chopdiff>=0.2.6
24
24
  Requires-Dist: clideps>=0.1.4
25
25
  Requires-Dist: colour>=0.1.5
26
26
  Requires-Dist: cssselect>=1.2.0
@@ -41,7 +41,7 @@ Requires-Dist: litellm>=1.74.15.post1
41
41
  Requires-Dist: markdownify>=0.13.1
42
42
  Requires-Dist: mcp-proxy>=0.5.0
43
43
  Requires-Dist: mcp>=1.6.0
44
- Requires-Dist: openai>=1.66.3
44
+ Requires-Dist: openai==1.99.9
45
45
  Requires-Dist: pandas>=2.2.3
46
46
  Requires-Dist: patch-ng>=1.18.1
47
47
  Requires-Dist: pathspec>=0.12.1
@@ -6,14 +6,14 @@ kash/actions/core/chat.py,sha256=9_xh9cWwXjkC_SYme-ScOg6Miqeydv15ccrwHqQvgq8,272
6
6
  kash/actions/core/combine_docs.py,sha256=5bTU7n_ICavvTXfC7fs5BDMeZYn7Xh5FkU7DVQqDHAQ,1536
7
7
  kash/actions/core/concat_docs.py,sha256=Umx3VzFiHJGY-76AEs4ju_1HnB9SbQsBux03Mkeig24,1345
8
8
  kash/actions/core/format_markdown_template.py,sha256=ZJbtyTSypPo2ewLiGRSyIpVf711vQMhI_-Ng-FgCs80,2991
9
- kash/actions/core/markdownify_html.py,sha256=0ZPH4b7IUWbMGi1mi0RzDPQKlqpLIsOy6ax_Gn7SSyA,1770
9
+ kash/actions/core/markdownify_html.py,sha256=Oqpq9b9JgMItOwJwbC5b5rG8UR0pXhxernjsdHyVB-o,1749
10
10
  kash/actions/core/minify_html.py,sha256=TRhyn7Gvcowou8pzq9vzDTtcCFOA4eC5217pJ9rPuOw,1386
11
11
  kash/actions/core/readability.py,sha256=P1whiDanaAKTPw2KwHG15QNcjHzwpuTWne0s4LyUfuQ,990
12
12
  kash/actions/core/render_as_html.py,sha256=i0WgtDgEJAeTTpVLS_CxDloDCb1Mhkzrcvv0VmoOyQ8,1901
13
13
  kash/actions/core/save_sidematter_meta.py,sha256=fKLE5eWIorOdw_FW46AUivXACQ6cxWvKWllcEjT6mz8,1440
14
14
  kash/actions/core/show_webpage.py,sha256=2A8u29Wf-iWNbPRfnz7u6MUhcXk_b8B8ruUT825d_mA,978
15
15
  kash/actions/core/strip_html.py,sha256=FDLN_4CKB11q5cU4NixTf7PGrAq92AjQNbKAdvQDwCY,849
16
- kash/actions/core/summarize_as_bullets.py,sha256=Zwr8lNzL77pwpnW_289LQjNBijNDpTPANfFdOJA-PZ4,2070
16
+ kash/actions/core/summarize_as_bullets.py,sha256=bzEH43BwwdqMJCt6m01iIME8sfmVPylBtF1PNbDdrBw,2055
17
17
  kash/actions/core/tabbed_webpage_config.py,sha256=rIbzEhBTmnkbSiRZC-Rj46T1J6c0jOztiKE9Usa4nsc,980
18
18
  kash/actions/core/tabbed_webpage_generate.py,sha256=935HkDSuP4eZ1e0xf-LhjPOdicU3wI5Kuh79r61QCl8,988
19
19
  kash/actions/core/zip_sidematter.py,sha256=E7ae0g9Bz7uXApYdNY-a8GvSIIPoqXcD95mjMaKQlsM,1557
@@ -163,13 +163,13 @@ kash/media_base/transcription_format.py,sha256=rOVPTpwvW22c27BRwYF-Tc_xzqK_wOtUZ
163
163
  kash/media_base/transcription_whisper.py,sha256=GqvroW9kBAH4-gcbYkMgNCfs2MpMIgm1ip3NMWtJ0IE,1169
164
164
  kash/media_base/services/local_file_media.py,sha256=_NV-T90rShJ8ucUjQXMPCKKJ50GSFE9PyyVzhXp5z9w,5624
165
165
  kash/model/__init__.py,sha256=kFfBKb5N70NWYUfpRRxn_Sb9p_vXlB6BBaTCqWmSReo,2978
166
- kash/model/actions_model.py,sha256=-41qz76Z9NZZhRQcPDRopyHAGxHCrTvbMRDv8fnb7CQ,23185
166
+ kash/model/actions_model.py,sha256=D-q-eZO_yOug3fuvDUWB1AI_MNwhpElSomaPS7PybyU,23362
167
167
  kash/model/assistant_response_model.py,sha256=6eDfC27nyuBDFjv5nCYMa_Qb2mPbKwDzZy7uLOIyskI,2653
168
168
  kash/model/compound_actions_model.py,sha256=oYEtVKtQv-mA1abZkK7PvaM9xazVBUuk1z0geKBulak,6965
169
169
  kash/model/concept_model.py,sha256=we2qOcy9Mv1q7XPfkDLp_CyO_-8DwAUfUYlpgy_jrFs,1011
170
170
  kash/model/exec_model.py,sha256=3Su3NEmEtDoSuQSxvg75FYY_EdClSM5pwQK1i7_S88A,3131
171
171
  kash/model/graph_model.py,sha256=T034y0E9OJtITd1g9zp9vll5pLscdatq6JoT08KvPZE,2724
172
- kash/model/items_model.py,sha256=V7so_AWc7skRZGlByIK5m3ETUaHEw8IYx9OB9pmzNEA,39545
172
+ kash/model/items_model.py,sha256=--n5PzDHuX0zp0cleCAEhigqzj9g4uSIoF58iIpv2AI,39720
173
173
  kash/model/language_list.py,sha256=I3RIbxTseVmPdhExQimimEv18Gmy2ImMbpXe0-_t1Qw,450
174
174
  kash/model/llm_actions_model.py,sha256=a29uXVNfS2CiqvM7HPdC6H9A23rSQQihAideuBLMH8g,2110
175
175
  kash/model/media_model.py,sha256=ZnlZ-FkswbAIGpUAuNqLce1WDZK-WbnwHn2ipg8x7-0,3511
@@ -208,7 +208,7 @@ kash/utils/api_utils/api_retries.py,sha256=TtgxLxoMnXIzYMKbMUzsnVcPf-aKFm3cJ95zO
208
208
  kash/utils/api_utils/cache_requests_limited.py,sha256=TA5buZ9Dgbj4I1zHhwerTXre018i0TCACGsezsjX9Uc,3140
209
209
  kash/utils/api_utils/gather_limited.py,sha256=6K0Z3u_NeX9wBfFFk21wUQeSimaDIm53AHlGYRLD6LQ,33018
210
210
  kash/utils/api_utils/http_utils.py,sha256=Ou6QNiba5w7n71cgNmV168OFTLmMDNxWW5MM-XkFEME,1461
211
- kash/utils/api_utils/multitask_gather.py,sha256=iC1UlZXZV7YMevDD--mCi1eR0Rmd7wAWrOy-C_l0ACw,2594
211
+ kash/utils/api_utils/multitask_gather.py,sha256=LAylwWZ2APbv-O_l0kLwBfP762D0qswMBV8ID4eCOA0,4446
212
212
  kash/utils/api_utils/progress_protocol.py,sha256=6cT5URY6cScHYd6UZoTT_rHI0mbsE52joBf88regEN8,8816
213
213
  kash/utils/common/__init__.py,sha256=ggeWw1xmbl1mgCQD3c4CNN2h5WXFCsN2wXlCWurEUEI,161
214
214
  kash/utils/common/format_utils.py,sha256=83FhAwbMnOQIFudpnOGMuCqCiyoAlWGS6cc8q6xgZus,2072
@@ -244,15 +244,15 @@ kash/utils/lang_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
244
244
  kash/utils/lang_utils/capitalization.py,sha256=5XbqBvjkzlxsm1Ue5AQP3P1J1IG0PubMVmGnoKVTF-c,3903
245
245
  kash/utils/rich_custom/__init__.py,sha256=_g2F3Bqc1UnLTdAdCwkvzXmW7OvmqXrA8DpfT1dKy6w,75
246
246
  kash/utils/rich_custom/ansi_cell_len.py,sha256=oQlNrqWB0f6pmigkbRRyeK6oWlGHMPbV_YLO_qmDH5E,2356
247
- kash/utils/rich_custom/multitask_status.py,sha256=eOON62evEAOmmNyVBSjfYkh5y9OTejQrs02rc2L55VE,24375
247
+ kash/utils/rich_custom/multitask_status.py,sha256=3hMxXNAClxcQzzQdBA0rPDNp19Y_6gT8NRIuT8OkO7Q,27667
248
248
  kash/utils/rich_custom/rich_char_transform.py,sha256=3M89tViKM0y31VHsDoHi5eHFWlv5ME7F4p35IdDxnrw,2616
249
249
  kash/utils/rich_custom/rich_indent.py,sha256=nz72yNpUuYjOsaPNVmxM81oEQm-GKEfQkNsuWmv16G0,2286
250
250
  kash/utils/rich_custom/rich_markdown_fork.py,sha256=M_JRaSAyHrSg-wuLv9C9P7SkehSim3lwkqQPuMIFkVw,26551
251
251
  kash/utils/text_handling/doc_normalization.py,sha256=GsK8J8HSVINYYIeO2XQvWYK1ZSiQ6mX34mVb9UOjgG8,3029
252
252
  kash/utils/text_handling/escape_html_tags.py,sha256=8pC3JgoKRtdnbnOu8DiWrlvNR6GAqjwhGbQgl3jiFG4,6441
253
- kash/utils/text_handling/markdown_footnotes.py,sha256=4_ZOez-xHjiSn_XHyqXPk9MNbjts1hiHOh1ARs9vVZA,7494
253
+ kash/utils/text_handling/markdown_footnotes.py,sha256=TgS3un4h_qmZB1KnDUVKaOYLZWhljlUZ-QjLfL6gkgg,6480
254
254
  kash/utils/text_handling/markdown_render.py,sha256=LHPdJc__2ejBx7iwkp_P9wIePNmiVSgwu4-uhamVjms,3791
255
- kash/utils/text_handling/markdown_utils.py,sha256=Yf57dVljpbg8vuHbtcOSHZqz1PafOSBal6R8ESJz1Bs,49220
255
+ kash/utils/text_handling/markdown_utils.py,sha256=ufVYSBvBl9jRYP6Bfsoxhgv754SW3KDxo8rN67OK6a4,52274
256
256
  kash/utils/text_handling/markdownify_utils.py,sha256=fXl3uSUk9aHXL0PDqxdlvWvIvBXUQTOfQxnK9uicQcg,2964
257
257
  kash/utils/text_handling/unified_diffs.py,sha256=JfHSakISkT_GuBPBI4fTooHrp2aenWzDKiVvDewVfMk,2655
258
258
  kash/web_content/canon_url.py,sha256=Zv2q7xQdIHBFkxxwyJn3_ME-qqMFRi_fKxE_IgV2Z50,742
@@ -304,8 +304,8 @@ kash/xonsh_custom/xonsh_modern_tools.py,sha256=mj_b34LZXfE8MJe9EpDmp5JZ0tDM1biYN
304
304
  kash/xonsh_custom/xonsh_ranking_completer.py,sha256=ZRGiAfoEgqgnlq2-ReUVEaX5oOgW1DQ9WxIv2OJLuTo,5620
305
305
  kash/xontrib/fnm.py,sha256=V2tsOdmIDgbFbZSfMLpsvDIwwJJqiYnOkOySD1cXNXw,3700
306
306
  kash/xontrib/kash_extension.py,sha256=FLIMlgR3C_6A1fwKE-Ul0nmmpJSszVPbAriinUyQ8Zg,1896
307
- kash_shell-0.3.33.dist-info/METADATA,sha256=rEXEktz-jYfIDY2XI0fmEQbBVnLYoo1omDU6B0LQLhw,33547
308
- kash_shell-0.3.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
309
- kash_shell-0.3.33.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
310
- kash_shell-0.3.33.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
311
- kash_shell-0.3.33.dist-info/RECORD,,
307
+ kash_shell-0.3.34.dist-info/METADATA,sha256=ubXLWP8kfglDx7f2zvpr1TKaah6BZ6ye-B7hPYorfYc,33547
308
+ kash_shell-0.3.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
309
+ kash_shell-0.3.34.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
310
+ kash_shell-0.3.34.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
311
+ kash_shell-0.3.34.dist-info/RECORD,,