kash-shell 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. kash/actions/core/chat.py +1 -0
  2. kash/actions/core/markdownify_html.py +1 -1
  3. kash/actions/core/readability.py +1 -4
  4. kash/actions/core/render_as_html.py +1 -0
  5. kash/actions/core/show_webpage.py +2 -0
  6. kash/actions/core/summarize_as_bullets.py +1 -1
  7. kash/config/logger.py +1 -1
  8. kash/config/text_styles.py +1 -1
  9. kash/docs/markdown/topics/a2_installation.md +3 -2
  10. kash/exec/action_decorators.py +5 -3
  11. kash/exec/action_exec.py +50 -5
  12. kash/exec/fetch_url_items.py +4 -2
  13. kash/exec/llm_transforms.py +14 -5
  14. kash/exec/runtime_settings.py +2 -0
  15. kash/file_storage/file_store.py +50 -92
  16. kash/file_storage/item_id_index.py +128 -0
  17. kash/mcp/mcp_server_routes.py +42 -12
  18. kash/model/actions_model.py +18 -7
  19. kash/model/exec_model.py +3 -0
  20. kash/model/items_model.py +54 -12
  21. kash/utils/api_utils/gather_limited.py +2 -0
  22. kash/utils/api_utils/multitask_gather.py +134 -0
  23. kash/utils/common/s3_utils.py +108 -0
  24. kash/utils/common/url.py +16 -4
  25. kash/utils/rich_custom/multitask_status.py +84 -10
  26. kash/utils/text_handling/markdown_footnotes.py +16 -43
  27. kash/utils/text_handling/markdown_utils.py +108 -28
  28. kash/web_content/web_fetch.py +2 -1
  29. {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/METADATA +5 -5
  30. {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/RECORD +33 -30
  31. {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/WHEEL +0 -0
  32. {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/entry_points.txt +0 -0
  33. {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ from pathlib import Path
6
+
7
+ from sidematter_format.sidematter_format import Sidematter
8
+
9
+ from kash.utils.common.url import Url, is_s3_url, parse_s3_url
10
+
11
+
12
+ def check_aws_cli() -> None:
13
+ """
14
+ Check if the AWS CLI is installed and available.
15
+ """
16
+ if shutil.which("aws") is None:
17
+ raise RuntimeError(
18
+ "AWS CLI not found in PATH. Please install 'awscli' and ensure 'aws' is available."
19
+ )
20
+
21
+
22
+ def get_s3_parent_folder(url: Url) -> Url | None:
23
+ """
24
+ Get the parent folder of an S3 URL, or None if not an S3 URL.
25
+ """
26
+ if is_s3_url(url):
27
+ s3_bucket, s3_key = parse_s3_url(url)
28
+ s3_parent_folder = Path(s3_key).parent
29
+
30
+ return Url(f"s3://{s3_bucket}/{s3_parent_folder}")
31
+
32
+ else:
33
+ return None
34
+
35
+
36
+ def s3_sync_to_folder(
37
+ src_path: str | Path,
38
+ s3_dest_parent: Url,
39
+ *,
40
+ include_sidematter: bool = False,
41
+ ) -> list[Url]:
42
+ """
43
+ Sync a local file or directory to an S3 "parent" folder using the AWS CLI.
44
+ Set `include_sidematter` to include sidematter files alongside the source files.
45
+
46
+ Returns a list of S3 URLs that were the top-level sync targets:
47
+ - For a single file: the file URL (and sidematter file/dir URLs if included).
48
+ - For a directory: the destination parent prefix URL (non-recursive reporting).
49
+ """
50
+
51
+ src_path = Path(src_path)
52
+ if not src_path.exists():
53
+ raise ValueError(f"Source path does not exist: {src_path}")
54
+ if not is_s3_url(s3_dest_parent):
55
+ raise ValueError(f"Destination must be an s3:// URL: {s3_dest_parent}")
56
+
57
+ check_aws_cli()
58
+
59
+ dest_prefix = str(s3_dest_parent).rstrip("/") + "/"
60
+ targets: list[Url] = []
61
+
62
+ if src_path.is_file():
63
+ # Build the list of paths to sync using Sidematter's resolved path_list if requested.
64
+ sync_paths: list[Path]
65
+ if include_sidematter:
66
+ resolved = Sidematter(src_path).resolve(parse_meta=False, use_frontmatter=False)
67
+ sync_paths = resolved.path_list
68
+ else:
69
+ sync_paths = [src_path]
70
+
71
+ for p in sync_paths:
72
+ if p.is_file():
73
+ # Use sync with include/exclude to leverage default short-circuiting
74
+ subprocess.run(
75
+ [
76
+ "aws",
77
+ "s3",
78
+ "sync",
79
+ str(p.parent),
80
+ dest_prefix,
81
+ "--exclude",
82
+ "*",
83
+ "--include",
84
+ p.name,
85
+ ],
86
+ check=True,
87
+ )
88
+ targets.append(Url(dest_prefix + p.name))
89
+ elif p.is_dir():
90
+ dest_dir = dest_prefix + p.name + "/"
91
+ subprocess.run(["aws", "s3", "sync", str(p), dest_dir], check=True)
92
+ targets.append(Url(dest_dir))
93
+
94
+ return targets
95
+ else:
96
+ # Directory mode: sync whole directory.
97
+ subprocess.run(
98
+ [
99
+ "aws",
100
+ "s3",
101
+ "sync",
102
+ str(src_path),
103
+ dest_prefix,
104
+ ],
105
+ check=True,
106
+ )
107
+ targets.append(Url(dest_prefix))
108
+ return targets
kash/utils/common/url.py CHANGED
@@ -26,6 +26,7 @@ A string that may not be resolved to a URL or path.
26
26
 
27
27
  HTTP_ONLY = ["http", "https"]
28
28
  HTTP_OR_FILE = HTTP_ONLY + ["file"]
29
+ HTTP_OR_FILE_OR_S3 = HTTP_OR_FILE + ["s3"]
29
30
 
30
31
 
31
32
  def check_if_url(
@@ -36,7 +37,8 @@ def check_if_url(
36
37
  the `urlparse.ParseResult`.
37
38
 
38
39
  Also returns false for Paths, so that it's easy to use local paths and URLs
39
- (`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE` to
40
+ (`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE`
41
+ or `HTTP_OR_FILE_OR_S3` to restrict to only certain schemes.
40
42
  restrict to only certain schemes.
41
43
  """
42
44
  if isinstance(text, Path):
@@ -69,6 +71,13 @@ def is_file_url(url: str | Url) -> bool:
69
71
  return url.startswith("file://")
70
72
 
71
73
 
74
+ def is_s3_url(url: str | Url) -> bool:
75
+ """
76
+ Is URL an S3 URL?
77
+ """
78
+ return url.startswith("s3://")
79
+
80
+
72
81
  def parse_http_url(url: str | Url) -> ParseResult:
73
82
  """
74
83
  Parse an http/https URL and return the parsed result, raising ValueError if
@@ -118,7 +127,7 @@ def as_file_url(path: str | Path) -> Url:
118
127
 
119
128
  def normalize_url(
120
129
  url: Url,
121
- check_schemes: list[str] | None = HTTP_OR_FILE,
130
+ check_schemes: list[str] | None = HTTP_OR_FILE_OR_S3,
122
131
  drop_fragment: bool = True,
123
132
  resolve_local_paths: bool = True,
124
133
  ) -> Url:
@@ -238,7 +247,10 @@ def test_normalize_url():
238
247
  normalize_url(url=Url("/not/a/URL"))
239
248
  raise AssertionError()
240
249
  except ValueError as e:
241
- assert str(e) == "Scheme '' not in allowed schemes: ['http', 'https', 'file']: /not/a/URL"
250
+ assert (
251
+ str(e)
252
+ == "Scheme '' not in allowed schemes: ['http', 'https', 'file', 's3']: /not/a/URL"
253
+ )
242
254
 
243
255
  try:
244
256
  normalize_url(Url("ftp://example.com"))
@@ -246,7 +258,7 @@ def test_normalize_url():
246
258
  except ValueError as e:
247
259
  assert (
248
260
  str(e)
249
- == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
261
+ == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file', 's3']: ftp://example.com"
250
262
  )
251
263
 
252
264
 
@@ -72,6 +72,8 @@ RUNNING_SYMBOL = ""
72
72
  DEFAULT_LABEL_WIDTH = 40
73
73
  DEFAULT_PROGRESS_WIDTH = 20
74
74
 
75
+ MAX_DISPLAY_TASKS = 20
76
+
75
77
 
76
78
  # Calculate spinner width to maintain column alignment
77
79
  def _get_spinner_width(spinner_name: str) -> int:
@@ -101,6 +103,9 @@ class StatusSettings:
101
103
  transient: bool = True
102
104
  refresh_per_second: float = 10
103
105
  styles: StatusStyles = DEFAULT_STYLES
106
+ # Maximum number of tasks to keep visible in the live display.
107
+ # Older completed/skipped/failed tasks beyond this cap will be removed from the live view.
108
+ max_display_tasks: int = MAX_DISPLAY_TASKS
104
109
 
105
110
 
106
111
  class SpinnerStatusColumn(ProgressColumn):
@@ -298,6 +303,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
298
303
  self._task_info: dict[int, TaskInfo] = {}
299
304
  self._next_id: int = 1
300
305
  self._rich_task_ids: dict[int, TaskID] = {} # Map our IDs to Rich Progress IDs
306
+ # Track order of tasks added to the Progress so we can prune oldest completed ones
307
+ self._displayed_task_order: list[int] = []
308
+ # Track tasks pruned from the live display so we don't re-add them later
309
+ self._pruned_task_ids: set[int] = set()
301
310
 
302
311
  # Unified live integration
303
312
  self._unified_live: Any | None = None # Reference to the global unified live
@@ -442,6 +451,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
442
451
  progress_display=None,
443
452
  )
444
453
  self._rich_task_ids[task_id] = rich_task_id
454
+ self._displayed_task_order.append(task_id)
455
+
456
+ # Prune if too many tasks are visible (prefer removing completed ones)
457
+ self._prune_completed_tasks_if_needed()
445
458
 
446
459
  async def set_progress_display(self, task_id: int, display: RenderableType) -> None:
447
460
  """
@@ -536,18 +549,31 @@ class MultiTaskStatus(AbstractAsyncContextManager):
536
549
 
537
550
  # Complete the progress bar and stop spinner
538
551
  if rich_task_id is not None:
539
- total = self._progress.tasks[rich_task_id].total or 1
552
+ # Safely find the Task by id; Progress.tasks is a list, not a dict
553
+ task_obj = next((t for t in self._progress.tasks if t.id == rich_task_id), None)
554
+ if task_obj is not None and task_obj.total is not None:
555
+ total = task_obj.total
556
+ else:
557
+ total = task_info.steps_total or 1
540
558
  self._progress.update(rich_task_id, completed=total, task_info=task_info)
541
559
  else:
542
- # Task was never started, but we still need to add it to show completion
543
- rich_task_id = self._progress.add_task(
544
- "",
545
- total=task_info.steps_total,
546
- label=task_info.label,
547
- completed=task_info.steps_total,
548
- task_info=task_info,
549
- )
550
- self._rich_task_ids[task_id] = rich_task_id
560
+ # If this task was pruned from the live display, skip re-adding it
561
+ if task_id in self._pruned_task_ids:
562
+ pass
563
+ else:
564
+ # Task was never started; add a completed row so it appears once
565
+ rich_task_id = self._progress.add_task(
566
+ "",
567
+ total=task_info.steps_total,
568
+ label=task_info.label,
569
+ completed=task_info.steps_total,
570
+ task_info=task_info,
571
+ )
572
+ self._rich_task_ids[task_id] = rich_task_id
573
+ self._displayed_task_order.append(task_id)
574
+
575
+ # After finishing, prune completed tasks to respect max visible cap
576
+ self._prune_completed_tasks_if_needed()
551
577
 
552
578
  def get_task_info(self, task_id: int) -> TaskInfo | None:
553
579
  """Get additional task information."""
@@ -567,6 +593,54 @@ class MultiTaskStatus(AbstractAsyncContextManager):
567
593
  """Get console instance for additional output above progress."""
568
594
  return self._progress.console
569
595
 
596
+ def _prune_completed_tasks_if_needed(self) -> None:
597
+ """
598
+ Ensure at most `max_display_tasks` tasks are visible by removing the oldest
599
+ completed/skipped/failed tasks first. Running or waiting tasks are never
600
+ removed by this method.
601
+ Note: This method assumes it's called under self._lock.
602
+ """
603
+ max_visible = self.settings.max_display_tasks
604
+
605
+ # Nothing to prune or unlimited
606
+ if max_visible <= 0:
607
+ return
608
+
609
+ # Count visible tasks (those with a Rich task id present)
610
+ visible_task_ids = [tid for tid in self._displayed_task_order if tid in self._rich_task_ids]
611
+ excess = len(visible_task_ids) - max_visible
612
+ if excess <= 0:
613
+ return
614
+
615
+ # Build list of terminal tasks that can be pruned (oldest first)
616
+ terminal_tasks = []
617
+ for tid in self._displayed_task_order:
618
+ if tid not in self._rich_task_ids:
619
+ continue
620
+ info = self._task_info.get(tid)
621
+ if info and info.state in (
622
+ TaskState.COMPLETED,
623
+ TaskState.FAILED,
624
+ TaskState.SKIPPED,
625
+ ):
626
+ terminal_tasks.append(tid)
627
+
628
+ # Remove the oldest terminal tasks up to the excess count
629
+ tasks_to_remove = terminal_tasks[:excess]
630
+
631
+ for tid in tasks_to_remove:
632
+ rich_tid = self._rich_task_ids.pop(tid, None)
633
+ if rich_tid is not None:
634
+ # Remove from Rich progress display
635
+ self._progress.remove_task(rich_tid)
636
+ # Mark as pruned so we don't re-add on finish
637
+ self._pruned_task_ids.add(tid)
638
+
639
+ # Efficiently rebuild the displayed task order without the removed tasks
640
+ self._displayed_task_order = [
641
+ tid for tid in self._displayed_task_order if tid not in tasks_to_remove
642
+ ]
643
+
570
644
 
571
645
  ## Tests
572
646
 
@@ -1,48 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
- import re
4
3
  from dataclasses import dataclass, field
5
4
  from typing import Any
6
5
 
7
- from flowmark import flowmark_markdown, line_wrap_by_sentence
8
6
  from marko import Markdown
7
+ from marko.block import Document
9
8
  from marko.ext import footnote
10
9
 
11
- from kash.utils.text_handling.markdown_utils import comprehensive_transform_tree
12
-
13
-
14
- def _normalize_footnotes_in_markdown(content: str) -> str:
15
- """
16
- Ensure blank lines between consecutive footnote definitions.
17
-
18
- Marko has a bug where consecutive footnotes without blank lines are parsed
19
- as a single footnote. This adds blank lines where needed.
20
- """
21
- lines = content.split("\n")
22
- result = []
23
- i = 0
24
-
25
- while i < len(lines):
26
- line = lines[i]
27
- result.append(line)
28
-
29
- # Check if this is a footnote definition
30
- if re.match(r"^\[\^[^\]]+\]:", line):
31
- # Look ahead to see if the next non-empty line is also a footnote
32
- j = i + 1
33
- while j < len(lines) and not lines[j].strip():
34
- result.append(lines[j])
35
- j += 1
36
-
37
- if j < len(lines) and re.match(r"^\[\^[^\]]+\]:", lines[j]):
38
- # Next non-empty line is also a footnote, add blank line
39
- result.append("")
40
-
41
- i = j
42
- else:
43
- i += 1
44
-
45
- return "\n".join(result)
10
+ from kash.utils.text_handling.markdown_utils import (
11
+ MARKDOWN as DEFAULT_MARKDOWN,
12
+ )
13
+ from kash.utils.text_handling.markdown_utils import (
14
+ comprehensive_transform_tree,
15
+ normalize_footnotes_in_markdown,
16
+ )
46
17
 
47
18
 
48
19
  @dataclass
@@ -81,15 +52,17 @@ class MarkdownFootnotes:
81
52
  MarkdownFootnotes instance with all footnotes indexed by ID
82
53
  """
83
54
  if markdown_parser is None:
84
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
55
+ markdown_parser = DEFAULT_MARKDOWN
85
56
 
86
57
  # Normalize to work around marko bug with consecutive footnotes
87
- normalized_content = _normalize_footnotes_in_markdown(content)
58
+ normalized_content = normalize_footnotes_in_markdown(content)
88
59
  document = markdown_parser.parse(normalized_content)
89
60
  return MarkdownFootnotes.from_document(document, markdown_parser)
90
61
 
91
62
  @staticmethod
92
- def from_document(document: Any, markdown_parser: Markdown | None = None) -> MarkdownFootnotes:
63
+ def from_document(
64
+ document: Document, markdown_parser: Markdown | None = None
65
+ ) -> MarkdownFootnotes:
93
66
  """
94
67
  Extract all footnotes from a parsed markdown document.
95
68
 
@@ -102,7 +75,7 @@ class MarkdownFootnotes:
102
75
  MarkdownFootnotes instance with all footnotes indexed by ID
103
76
  """
104
77
  if markdown_parser is None:
105
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
78
+ markdown_parser = DEFAULT_MARKDOWN
106
79
 
107
80
  footnotes_dict: dict[str, FootnoteInfo] = {}
108
81
 
@@ -206,9 +179,9 @@ def extract_footnote_references(content: str, markdown_parser: Markdown | None =
206
179
  List of unique footnote IDs that are referenced (with the ^)
207
180
  """
208
181
  if markdown_parser is None:
209
- markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
182
+ markdown_parser = DEFAULT_MARKDOWN
210
183
 
211
- normalized_content = _normalize_footnotes_in_markdown(content)
184
+ normalized_content = normalize_footnotes_in_markdown(content)
212
185
  document = markdown_parser.parse(normalized_content)
213
186
  references: list[str] = []
214
187
  seen: set[str] = set()