kash-shell 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/chat.py +1 -0
- kash/actions/core/markdownify_html.py +1 -1
- kash/actions/core/readability.py +1 -4
- kash/actions/core/render_as_html.py +1 -0
- kash/actions/core/show_webpage.py +2 -0
- kash/actions/core/summarize_as_bullets.py +1 -1
- kash/config/logger.py +1 -1
- kash/config/text_styles.py +1 -1
- kash/docs/markdown/topics/a2_installation.md +3 -2
- kash/exec/action_decorators.py +5 -3
- kash/exec/action_exec.py +50 -5
- kash/exec/fetch_url_items.py +4 -2
- kash/exec/llm_transforms.py +14 -5
- kash/exec/runtime_settings.py +2 -0
- kash/file_storage/file_store.py +50 -92
- kash/file_storage/item_id_index.py +128 -0
- kash/mcp/mcp_server_routes.py +42 -12
- kash/model/actions_model.py +18 -7
- kash/model/exec_model.py +3 -0
- kash/model/items_model.py +54 -12
- kash/utils/api_utils/gather_limited.py +2 -0
- kash/utils/api_utils/multitask_gather.py +134 -0
- kash/utils/common/s3_utils.py +108 -0
- kash/utils/common/url.py +16 -4
- kash/utils/rich_custom/multitask_status.py +84 -10
- kash/utils/text_handling/markdown_footnotes.py +16 -43
- kash/utils/text_handling/markdown_utils.py +108 -28
- kash/web_content/web_fetch.py +2 -1
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/METADATA +5 -5
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/RECORD +33 -30
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from sidematter_format.sidematter_format import Sidematter
|
|
8
|
+
|
|
9
|
+
from kash.utils.common.url import Url, is_s3_url, parse_s3_url
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_aws_cli() -> None:
|
|
13
|
+
"""
|
|
14
|
+
Check if the AWS CLI is installed and available.
|
|
15
|
+
"""
|
|
16
|
+
if shutil.which("aws") is None:
|
|
17
|
+
raise RuntimeError(
|
|
18
|
+
"AWS CLI not found in PATH. Please install 'awscli' and ensure 'aws' is available."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_s3_parent_folder(url: Url) -> Url | None:
|
|
23
|
+
"""
|
|
24
|
+
Get the parent folder of an S3 URL, or None if not an S3 URL.
|
|
25
|
+
"""
|
|
26
|
+
if is_s3_url(url):
|
|
27
|
+
s3_bucket, s3_key = parse_s3_url(url)
|
|
28
|
+
s3_parent_folder = Path(s3_key).parent
|
|
29
|
+
|
|
30
|
+
return Url(f"s3://{s3_bucket}/{s3_parent_folder}")
|
|
31
|
+
|
|
32
|
+
else:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def s3_sync_to_folder(
|
|
37
|
+
src_path: str | Path,
|
|
38
|
+
s3_dest_parent: Url,
|
|
39
|
+
*,
|
|
40
|
+
include_sidematter: bool = False,
|
|
41
|
+
) -> list[Url]:
|
|
42
|
+
"""
|
|
43
|
+
Sync a local file or directory to an S3 "parent" folder using the AWS CLI.
|
|
44
|
+
Set `include_sidematter` to include sidematter files alongside the source files.
|
|
45
|
+
|
|
46
|
+
Returns a list of S3 URLs that were the top-level sync targets:
|
|
47
|
+
- For a single file: the file URL (and sidematter file/dir URLs if included).
|
|
48
|
+
- For a directory: the destination parent prefix URL (non-recursive reporting).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
src_path = Path(src_path)
|
|
52
|
+
if not src_path.exists():
|
|
53
|
+
raise ValueError(f"Source path does not exist: {src_path}")
|
|
54
|
+
if not is_s3_url(s3_dest_parent):
|
|
55
|
+
raise ValueError(f"Destination must be an s3:// URL: {s3_dest_parent}")
|
|
56
|
+
|
|
57
|
+
check_aws_cli()
|
|
58
|
+
|
|
59
|
+
dest_prefix = str(s3_dest_parent).rstrip("/") + "/"
|
|
60
|
+
targets: list[Url] = []
|
|
61
|
+
|
|
62
|
+
if src_path.is_file():
|
|
63
|
+
# Build the list of paths to sync using Sidematter's resolved path_list if requested.
|
|
64
|
+
sync_paths: list[Path]
|
|
65
|
+
if include_sidematter:
|
|
66
|
+
resolved = Sidematter(src_path).resolve(parse_meta=False, use_frontmatter=False)
|
|
67
|
+
sync_paths = resolved.path_list
|
|
68
|
+
else:
|
|
69
|
+
sync_paths = [src_path]
|
|
70
|
+
|
|
71
|
+
for p in sync_paths:
|
|
72
|
+
if p.is_file():
|
|
73
|
+
# Use sync with include/exclude to leverage default short-circuiting
|
|
74
|
+
subprocess.run(
|
|
75
|
+
[
|
|
76
|
+
"aws",
|
|
77
|
+
"s3",
|
|
78
|
+
"sync",
|
|
79
|
+
str(p.parent),
|
|
80
|
+
dest_prefix,
|
|
81
|
+
"--exclude",
|
|
82
|
+
"*",
|
|
83
|
+
"--include",
|
|
84
|
+
p.name,
|
|
85
|
+
],
|
|
86
|
+
check=True,
|
|
87
|
+
)
|
|
88
|
+
targets.append(Url(dest_prefix + p.name))
|
|
89
|
+
elif p.is_dir():
|
|
90
|
+
dest_dir = dest_prefix + p.name + "/"
|
|
91
|
+
subprocess.run(["aws", "s3", "sync", str(p), dest_dir], check=True)
|
|
92
|
+
targets.append(Url(dest_dir))
|
|
93
|
+
|
|
94
|
+
return targets
|
|
95
|
+
else:
|
|
96
|
+
# Directory mode: sync whole directory.
|
|
97
|
+
subprocess.run(
|
|
98
|
+
[
|
|
99
|
+
"aws",
|
|
100
|
+
"s3",
|
|
101
|
+
"sync",
|
|
102
|
+
str(src_path),
|
|
103
|
+
dest_prefix,
|
|
104
|
+
],
|
|
105
|
+
check=True,
|
|
106
|
+
)
|
|
107
|
+
targets.append(Url(dest_prefix))
|
|
108
|
+
return targets
|
kash/utils/common/url.py
CHANGED
|
@@ -26,6 +26,7 @@ A string that may not be resolved to a URL or path.
|
|
|
26
26
|
|
|
27
27
|
HTTP_ONLY = ["http", "https"]
|
|
28
28
|
HTTP_OR_FILE = HTTP_ONLY + ["file"]
|
|
29
|
+
HTTP_OR_FILE_OR_S3 = HTTP_OR_FILE + ["s3"]
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
def check_if_url(
|
|
@@ -36,7 +37,8 @@ def check_if_url(
|
|
|
36
37
|
the `urlparse.ParseResult`.
|
|
37
38
|
|
|
38
39
|
Also returns false for Paths, so that it's easy to use local paths and URLs
|
|
39
|
-
(`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE`
|
|
40
|
+
(`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE`
|
|
41
|
+
or `HTTP_OR_FILE_OR_S3` to restrict to only certain schemes.
|
|
40
42
|
restrict to only certain schemes.
|
|
41
43
|
"""
|
|
42
44
|
if isinstance(text, Path):
|
|
@@ -69,6 +71,13 @@ def is_file_url(url: str | Url) -> bool:
|
|
|
69
71
|
return url.startswith("file://")
|
|
70
72
|
|
|
71
73
|
|
|
74
|
+
def is_s3_url(url: str | Url) -> bool:
|
|
75
|
+
"""
|
|
76
|
+
Is URL an S3 URL?
|
|
77
|
+
"""
|
|
78
|
+
return url.startswith("s3://")
|
|
79
|
+
|
|
80
|
+
|
|
72
81
|
def parse_http_url(url: str | Url) -> ParseResult:
|
|
73
82
|
"""
|
|
74
83
|
Parse an http/https URL and return the parsed result, raising ValueError if
|
|
@@ -118,7 +127,7 @@ def as_file_url(path: str | Path) -> Url:
|
|
|
118
127
|
|
|
119
128
|
def normalize_url(
|
|
120
129
|
url: Url,
|
|
121
|
-
check_schemes: list[str] | None =
|
|
130
|
+
check_schemes: list[str] | None = HTTP_OR_FILE_OR_S3,
|
|
122
131
|
drop_fragment: bool = True,
|
|
123
132
|
resolve_local_paths: bool = True,
|
|
124
133
|
) -> Url:
|
|
@@ -238,7 +247,10 @@ def test_normalize_url():
|
|
|
238
247
|
normalize_url(url=Url("/not/a/URL"))
|
|
239
248
|
raise AssertionError()
|
|
240
249
|
except ValueError as e:
|
|
241
|
-
assert
|
|
250
|
+
assert (
|
|
251
|
+
str(e)
|
|
252
|
+
== "Scheme '' not in allowed schemes: ['http', 'https', 'file', 's3']: /not/a/URL"
|
|
253
|
+
)
|
|
242
254
|
|
|
243
255
|
try:
|
|
244
256
|
normalize_url(Url("ftp://example.com"))
|
|
@@ -246,7 +258,7 @@ def test_normalize_url():
|
|
|
246
258
|
except ValueError as e:
|
|
247
259
|
assert (
|
|
248
260
|
str(e)
|
|
249
|
-
== "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
|
|
261
|
+
== "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file', 's3']: ftp://example.com"
|
|
250
262
|
)
|
|
251
263
|
|
|
252
264
|
|
|
@@ -72,6 +72,8 @@ RUNNING_SYMBOL = ""
|
|
|
72
72
|
DEFAULT_LABEL_WIDTH = 40
|
|
73
73
|
DEFAULT_PROGRESS_WIDTH = 20
|
|
74
74
|
|
|
75
|
+
MAX_DISPLAY_TASKS = 20
|
|
76
|
+
|
|
75
77
|
|
|
76
78
|
# Calculate spinner width to maintain column alignment
|
|
77
79
|
def _get_spinner_width(spinner_name: str) -> int:
|
|
@@ -101,6 +103,9 @@ class StatusSettings:
|
|
|
101
103
|
transient: bool = True
|
|
102
104
|
refresh_per_second: float = 10
|
|
103
105
|
styles: StatusStyles = DEFAULT_STYLES
|
|
106
|
+
# Maximum number of tasks to keep visible in the live display.
|
|
107
|
+
# Older completed/skipped/failed tasks beyond this cap will be removed from the live view.
|
|
108
|
+
max_display_tasks: int = MAX_DISPLAY_TASKS
|
|
104
109
|
|
|
105
110
|
|
|
106
111
|
class SpinnerStatusColumn(ProgressColumn):
|
|
@@ -298,6 +303,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
298
303
|
self._task_info: dict[int, TaskInfo] = {}
|
|
299
304
|
self._next_id: int = 1
|
|
300
305
|
self._rich_task_ids: dict[int, TaskID] = {} # Map our IDs to Rich Progress IDs
|
|
306
|
+
# Track order of tasks added to the Progress so we can prune oldest completed ones
|
|
307
|
+
self._displayed_task_order: list[int] = []
|
|
308
|
+
# Track tasks pruned from the live display so we don't re-add them later
|
|
309
|
+
self._pruned_task_ids: set[int] = set()
|
|
301
310
|
|
|
302
311
|
# Unified live integration
|
|
303
312
|
self._unified_live: Any | None = None # Reference to the global unified live
|
|
@@ -442,6 +451,10 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
442
451
|
progress_display=None,
|
|
443
452
|
)
|
|
444
453
|
self._rich_task_ids[task_id] = rich_task_id
|
|
454
|
+
self._displayed_task_order.append(task_id)
|
|
455
|
+
|
|
456
|
+
# Prune if too many tasks are visible (prefer removing completed ones)
|
|
457
|
+
self._prune_completed_tasks_if_needed()
|
|
445
458
|
|
|
446
459
|
async def set_progress_display(self, task_id: int, display: RenderableType) -> None:
|
|
447
460
|
"""
|
|
@@ -536,18 +549,31 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
536
549
|
|
|
537
550
|
# Complete the progress bar and stop spinner
|
|
538
551
|
if rich_task_id is not None:
|
|
539
|
-
|
|
552
|
+
# Safely find the Task by id; Progress.tasks is a list, not a dict
|
|
553
|
+
task_obj = next((t for t in self._progress.tasks if t.id == rich_task_id), None)
|
|
554
|
+
if task_obj is not None and task_obj.total is not None:
|
|
555
|
+
total = task_obj.total
|
|
556
|
+
else:
|
|
557
|
+
total = task_info.steps_total or 1
|
|
540
558
|
self._progress.update(rich_task_id, completed=total, task_info=task_info)
|
|
541
559
|
else:
|
|
542
|
-
#
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
560
|
+
# If this task was pruned from the live display, skip re-adding it
|
|
561
|
+
if task_id in self._pruned_task_ids:
|
|
562
|
+
pass
|
|
563
|
+
else:
|
|
564
|
+
# Task was never started; add a completed row so it appears once
|
|
565
|
+
rich_task_id = self._progress.add_task(
|
|
566
|
+
"",
|
|
567
|
+
total=task_info.steps_total,
|
|
568
|
+
label=task_info.label,
|
|
569
|
+
completed=task_info.steps_total,
|
|
570
|
+
task_info=task_info,
|
|
571
|
+
)
|
|
572
|
+
self._rich_task_ids[task_id] = rich_task_id
|
|
573
|
+
self._displayed_task_order.append(task_id)
|
|
574
|
+
|
|
575
|
+
# After finishing, prune completed tasks to respect max visible cap
|
|
576
|
+
self._prune_completed_tasks_if_needed()
|
|
551
577
|
|
|
552
578
|
def get_task_info(self, task_id: int) -> TaskInfo | None:
|
|
553
579
|
"""Get additional task information."""
|
|
@@ -567,6 +593,54 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
567
593
|
"""Get console instance for additional output above progress."""
|
|
568
594
|
return self._progress.console
|
|
569
595
|
|
|
596
|
+
def _prune_completed_tasks_if_needed(self) -> None:
|
|
597
|
+
"""
|
|
598
|
+
Ensure at most `max_display_tasks` tasks are visible by removing the oldest
|
|
599
|
+
completed/skipped/failed tasks first. Running or waiting tasks are never
|
|
600
|
+
removed by this method.
|
|
601
|
+
Note: This method assumes it's called under self._lock.
|
|
602
|
+
"""
|
|
603
|
+
max_visible = self.settings.max_display_tasks
|
|
604
|
+
|
|
605
|
+
# Nothing to prune or unlimited
|
|
606
|
+
if max_visible <= 0:
|
|
607
|
+
return
|
|
608
|
+
|
|
609
|
+
# Count visible tasks (those with a Rich task id present)
|
|
610
|
+
visible_task_ids = [tid for tid in self._displayed_task_order if tid in self._rich_task_ids]
|
|
611
|
+
excess = len(visible_task_ids) - max_visible
|
|
612
|
+
if excess <= 0:
|
|
613
|
+
return
|
|
614
|
+
|
|
615
|
+
# Build list of terminal tasks that can be pruned (oldest first)
|
|
616
|
+
terminal_tasks = []
|
|
617
|
+
for tid in self._displayed_task_order:
|
|
618
|
+
if tid not in self._rich_task_ids:
|
|
619
|
+
continue
|
|
620
|
+
info = self._task_info.get(tid)
|
|
621
|
+
if info and info.state in (
|
|
622
|
+
TaskState.COMPLETED,
|
|
623
|
+
TaskState.FAILED,
|
|
624
|
+
TaskState.SKIPPED,
|
|
625
|
+
):
|
|
626
|
+
terminal_tasks.append(tid)
|
|
627
|
+
|
|
628
|
+
# Remove the oldest terminal tasks up to the excess count
|
|
629
|
+
tasks_to_remove = terminal_tasks[:excess]
|
|
630
|
+
|
|
631
|
+
for tid in tasks_to_remove:
|
|
632
|
+
rich_tid = self._rich_task_ids.pop(tid, None)
|
|
633
|
+
if rich_tid is not None:
|
|
634
|
+
# Remove from Rich progress display
|
|
635
|
+
self._progress.remove_task(rich_tid)
|
|
636
|
+
# Mark as pruned so we don't re-add on finish
|
|
637
|
+
self._pruned_task_ids.add(tid)
|
|
638
|
+
|
|
639
|
+
# Efficiently rebuild the displayed task order without the removed tasks
|
|
640
|
+
self._displayed_task_order = [
|
|
641
|
+
tid for tid in self._displayed_task_order if tid not in tasks_to_remove
|
|
642
|
+
]
|
|
643
|
+
|
|
570
644
|
|
|
571
645
|
## Tests
|
|
572
646
|
|
|
@@ -1,48 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import re
|
|
4
3
|
from dataclasses import dataclass, field
|
|
5
4
|
from typing import Any
|
|
6
5
|
|
|
7
|
-
from flowmark import flowmark_markdown, line_wrap_by_sentence
|
|
8
6
|
from marko import Markdown
|
|
7
|
+
from marko.block import Document
|
|
9
8
|
from marko.ext import footnote
|
|
10
9
|
|
|
11
|
-
from kash.utils.text_handling.markdown_utils import
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Marko has a bug where consecutive footnotes without blank lines are parsed
|
|
19
|
-
as a single footnote. This adds blank lines where needed.
|
|
20
|
-
"""
|
|
21
|
-
lines = content.split("\n")
|
|
22
|
-
result = []
|
|
23
|
-
i = 0
|
|
24
|
-
|
|
25
|
-
while i < len(lines):
|
|
26
|
-
line = lines[i]
|
|
27
|
-
result.append(line)
|
|
28
|
-
|
|
29
|
-
# Check if this is a footnote definition
|
|
30
|
-
if re.match(r"^\[\^[^\]]+\]:", line):
|
|
31
|
-
# Look ahead to see if the next non-empty line is also a footnote
|
|
32
|
-
j = i + 1
|
|
33
|
-
while j < len(lines) and not lines[j].strip():
|
|
34
|
-
result.append(lines[j])
|
|
35
|
-
j += 1
|
|
36
|
-
|
|
37
|
-
if j < len(lines) and re.match(r"^\[\^[^\]]+\]:", lines[j]):
|
|
38
|
-
# Next non-empty line is also a footnote, add blank line
|
|
39
|
-
result.append("")
|
|
40
|
-
|
|
41
|
-
i = j
|
|
42
|
-
else:
|
|
43
|
-
i += 1
|
|
44
|
-
|
|
45
|
-
return "\n".join(result)
|
|
10
|
+
from kash.utils.text_handling.markdown_utils import (
|
|
11
|
+
MARKDOWN as DEFAULT_MARKDOWN,
|
|
12
|
+
)
|
|
13
|
+
from kash.utils.text_handling.markdown_utils import (
|
|
14
|
+
comprehensive_transform_tree,
|
|
15
|
+
normalize_footnotes_in_markdown,
|
|
16
|
+
)
|
|
46
17
|
|
|
47
18
|
|
|
48
19
|
@dataclass
|
|
@@ -81,15 +52,17 @@ class MarkdownFootnotes:
|
|
|
81
52
|
MarkdownFootnotes instance with all footnotes indexed by ID
|
|
82
53
|
"""
|
|
83
54
|
if markdown_parser is None:
|
|
84
|
-
markdown_parser =
|
|
55
|
+
markdown_parser = DEFAULT_MARKDOWN
|
|
85
56
|
|
|
86
57
|
# Normalize to work around marko bug with consecutive footnotes
|
|
87
|
-
normalized_content =
|
|
58
|
+
normalized_content = normalize_footnotes_in_markdown(content)
|
|
88
59
|
document = markdown_parser.parse(normalized_content)
|
|
89
60
|
return MarkdownFootnotes.from_document(document, markdown_parser)
|
|
90
61
|
|
|
91
62
|
@staticmethod
|
|
92
|
-
def from_document(
|
|
63
|
+
def from_document(
|
|
64
|
+
document: Document, markdown_parser: Markdown | None = None
|
|
65
|
+
) -> MarkdownFootnotes:
|
|
93
66
|
"""
|
|
94
67
|
Extract all footnotes from a parsed markdown document.
|
|
95
68
|
|
|
@@ -102,7 +75,7 @@ class MarkdownFootnotes:
|
|
|
102
75
|
MarkdownFootnotes instance with all footnotes indexed by ID
|
|
103
76
|
"""
|
|
104
77
|
if markdown_parser is None:
|
|
105
|
-
markdown_parser =
|
|
78
|
+
markdown_parser = DEFAULT_MARKDOWN
|
|
106
79
|
|
|
107
80
|
footnotes_dict: dict[str, FootnoteInfo] = {}
|
|
108
81
|
|
|
@@ -206,9 +179,9 @@ def extract_footnote_references(content: str, markdown_parser: Markdown | None =
|
|
|
206
179
|
List of unique footnote IDs that are referenced (with the ^)
|
|
207
180
|
"""
|
|
208
181
|
if markdown_parser is None:
|
|
209
|
-
markdown_parser =
|
|
182
|
+
markdown_parser = DEFAULT_MARKDOWN
|
|
210
183
|
|
|
211
|
-
normalized_content =
|
|
184
|
+
normalized_content = normalize_footnotes_in_markdown(content)
|
|
212
185
|
document = markdown_parser.parse(normalized_content)
|
|
213
186
|
references: list[str] = []
|
|
214
187
|
seen: set[str] = set()
|