kash-shell 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/combine_docs.py +52 -0
- kash/actions/core/concat_docs.py +47 -0
- kash/commands/workspace/workspace_commands.py +2 -2
- kash/config/logger.py +3 -2
- kash/config/settings.py +8 -0
- kash/docs/markdown/topics/a2_installation.md +2 -2
- kash/embeddings/embeddings.py +1 -1
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_items.py +52 -16
- kash/file_storage/file_store.py +3 -3
- kash/llm_utils/llm_completion.py +1 -1
- kash/mcp/mcp_cli.py +2 -2
- kash/utils/api_utils/api_retries.py +348 -14
- kash/utils/api_utils/gather_limited.py +366 -512
- kash/utils/api_utils/http_utils.py +46 -0
- kash/utils/api_utils/progress_protocol.py +49 -56
- kash/utils/rich_custom/multitask_status.py +70 -21
- kash/utils/text_handling/markdown_utils.py +14 -3
- kash/web_content/web_extract.py +13 -9
- kash/web_content/web_fetch.py +289 -60
- kash/web_content/web_page_model.py +5 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/METADATA +5 -3
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/RECORD +26 -23
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from chopdiff.html.html_in_md import div_wrapper
|
|
2
|
+
|
|
3
|
+
from kash.config.logger import get_logger
|
|
4
|
+
from kash.exec import kash_action
|
|
5
|
+
from kash.model import ONE_OR_MORE_ARGS, ActionInput, ActionResult, Param
|
|
6
|
+
from kash.utils.errors import InvalidInput
|
|
7
|
+
|
|
8
|
+
log = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@kash_action(
|
|
12
|
+
expected_args=ONE_OR_MORE_ARGS,
|
|
13
|
+
params=(
|
|
14
|
+
Param(
|
|
15
|
+
"class_name",
|
|
16
|
+
"CSS class name to use for wrapping each document in a div.",
|
|
17
|
+
type=str,
|
|
18
|
+
default_value="doc",
|
|
19
|
+
),
|
|
20
|
+
),
|
|
21
|
+
)
|
|
22
|
+
def combine_docs(input: ActionInput, class_name: str = "page") -> ActionResult:
|
|
23
|
+
"""
|
|
24
|
+
Combine multiple text items into a single document, wrapping each piece
|
|
25
|
+
in a div with the specified CSS class.
|
|
26
|
+
"""
|
|
27
|
+
items = input.items
|
|
28
|
+
|
|
29
|
+
if not items:
|
|
30
|
+
raise InvalidInput("No items provided for combination")
|
|
31
|
+
|
|
32
|
+
# Create wrapper function
|
|
33
|
+
wrapper = div_wrapper(class_name=class_name)
|
|
34
|
+
|
|
35
|
+
# Collect and wrap all bodies
|
|
36
|
+
wrapped_bodies = []
|
|
37
|
+
for item in items:
|
|
38
|
+
if not item.body:
|
|
39
|
+
raise InvalidInput(f"Item has no body: {item.store_path}")
|
|
40
|
+
wrapped_bodies.append(wrapper(item.body))
|
|
41
|
+
|
|
42
|
+
# Concatenate with double newlines
|
|
43
|
+
combined_body = "\n\n".join(wrapped_bodies)
|
|
44
|
+
|
|
45
|
+
# Create title
|
|
46
|
+
count = len(items)
|
|
47
|
+
title = f"Combined ({count} doc{'s' if count != 1 else ''})"
|
|
48
|
+
|
|
49
|
+
# Create result item based on first item
|
|
50
|
+
result_item = items[0].derived_copy(body=combined_body, title=title, original_filename=None)
|
|
51
|
+
|
|
52
|
+
return ActionResult([result_item])
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from kash.config.logger import get_logger
|
|
2
|
+
from kash.exec import kash_action
|
|
3
|
+
from kash.model import ONE_OR_MORE_ARGS, ActionInput, ActionResult, Param
|
|
4
|
+
from kash.utils.errors import InvalidInput
|
|
5
|
+
|
|
6
|
+
log = get_logger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@kash_action(
|
|
10
|
+
expected_args=ONE_OR_MORE_ARGS,
|
|
11
|
+
params=(
|
|
12
|
+
Param(
|
|
13
|
+
"separator",
|
|
14
|
+
"String to use between concatenated items.",
|
|
15
|
+
type=str,
|
|
16
|
+
default_value="\n\n",
|
|
17
|
+
),
|
|
18
|
+
),
|
|
19
|
+
)
|
|
20
|
+
def concat_docs(input: ActionInput, separator: str = "\n\n") -> ActionResult:
|
|
21
|
+
"""
|
|
22
|
+
Concatenate multiple text items into a single document with the specified
|
|
23
|
+
separator between each piece.
|
|
24
|
+
"""
|
|
25
|
+
items = input.items
|
|
26
|
+
|
|
27
|
+
if not items:
|
|
28
|
+
raise InvalidInput("No items provided for concatenation")
|
|
29
|
+
|
|
30
|
+
# Collect all bodies
|
|
31
|
+
bodies = []
|
|
32
|
+
for item in items:
|
|
33
|
+
if not item.body:
|
|
34
|
+
raise InvalidInput(f"Item has no body: {item.store_path}")
|
|
35
|
+
bodies.append(item.body)
|
|
36
|
+
|
|
37
|
+
# Concatenate with the specified separator
|
|
38
|
+
concat_body = separator.join(bodies)
|
|
39
|
+
|
|
40
|
+
# Create title
|
|
41
|
+
count = len(items)
|
|
42
|
+
title = f"Concat ({count} doc{'s' if count != 1 else ''})"
|
|
43
|
+
|
|
44
|
+
# Create result item based on first item
|
|
45
|
+
result_item = items[0].derived_copy(body=concat_body, title=title, original_filename=None)
|
|
46
|
+
|
|
47
|
+
return ActionResult([result_item])
|
|
@@ -474,8 +474,8 @@ def fetch_url(*files_or_urls: str, refetch: bool = False) -> ShellResult:
|
|
|
474
474
|
store_paths = []
|
|
475
475
|
for locator in locators:
|
|
476
476
|
try:
|
|
477
|
-
|
|
478
|
-
store_paths.append(
|
|
477
|
+
fetch_result = fetch_url_item(locator, refetch=refetch)
|
|
478
|
+
store_paths.append(fetch_result.item.store_path)
|
|
479
479
|
except InvalidInput as e:
|
|
480
480
|
log.warning(
|
|
481
481
|
"Not a URL or URL resource, will not fetch metadata: %s: %s", fmt_loc(locator), e
|
kash/config/logger.py
CHANGED
|
@@ -254,9 +254,10 @@ def _do_logging_setup(log_settings: LogSettings):
|
|
|
254
254
|
_console_handler = basic_stderr_handler(log_settings.log_console_level)
|
|
255
255
|
|
|
256
256
|
# Manually adjust logging for a few packages, removing previous verbose default handlers.
|
|
257
|
-
|
|
257
|
+
# Set root logger to most permissive level so handlers can do the filtering
|
|
258
|
+
root_level = min(log_settings.log_console_level.value, log_settings.log_file_level.value)
|
|
258
259
|
log_levels = {
|
|
259
|
-
None:
|
|
260
|
+
None: root_level,
|
|
260
261
|
"LiteLLM": INFO,
|
|
261
262
|
"LiteLLM Router": INFO,
|
|
262
263
|
"LiteLLM Proxy": INFO,
|
kash/config/settings.py
CHANGED
|
@@ -210,6 +210,12 @@ class Settings:
|
|
|
210
210
|
use_nerd_icons: bool
|
|
211
211
|
"""If true, use Nerd Icons in file listings. Requires a compatible font."""
|
|
212
212
|
|
|
213
|
+
limit_rps: float
|
|
214
|
+
"""Default rate limit for API calls."""
|
|
215
|
+
|
|
216
|
+
limit_concurrency: int
|
|
217
|
+
"""Default concurrency limit for API calls."""
|
|
218
|
+
|
|
213
219
|
|
|
214
220
|
ws_root_dir = Path("~/Kash").expanduser()
|
|
215
221
|
|
|
@@ -276,6 +282,8 @@ def _read_settings():
|
|
|
276
282
|
mcp_server_port=DEFAULT_MCP_SERVER_PORT,
|
|
277
283
|
use_kerm_codes=False,
|
|
278
284
|
use_nerd_icons=True,
|
|
285
|
+
limit_rps=5.0,
|
|
286
|
+
limit_concurrency=10,
|
|
279
287
|
)
|
|
280
288
|
|
|
281
289
|
|
|
@@ -124,7 +124,7 @@ These are for `kash-media` but you can use a `kash-shell` for a more basic setup
|
|
|
124
124
|
|
|
125
125
|
You can use kash from your MCP client (such as Anthropic Desktop or Cursor).
|
|
126
126
|
|
|
127
|
-
You do this by running the the `
|
|
127
|
+
You do this by running the the `kash-mcp` binary to make kash actions available as MCP
|
|
128
128
|
tools.
|
|
129
129
|
|
|
130
130
|
For Claude Desktop, my config looks like this:
|
|
@@ -133,7 +133,7 @@ For Claude Desktop, my config looks like this:
|
|
|
133
133
|
{
|
|
134
134
|
"mcpServers": {
|
|
135
135
|
"kash": {
|
|
136
|
-
"command": "/Users/levy/.local/bin/
|
|
136
|
+
"command": "/Users/levy/.local/bin/kash-mcp",
|
|
137
137
|
"args": ["--proxy"]
|
|
138
138
|
}
|
|
139
139
|
}
|
kash/embeddings/embeddings.py
CHANGED
kash/exec/action_exec.py
CHANGED
|
@@ -55,7 +55,7 @@ def prepare_action_input(*input_args: CommandArg, refetch: bool = False) -> Acti
|
|
|
55
55
|
if input_items:
|
|
56
56
|
log.message("Assembling metadata for input items:\n%s", fmt_lines(input_items))
|
|
57
57
|
input_items = [
|
|
58
|
-
fetch_url_item_content(item, refetch=refetch) if is_url_resource(item) else item
|
|
58
|
+
fetch_url_item_content(item, refetch=refetch).item if is_url_resource(item) else item
|
|
59
59
|
for item in input_items
|
|
60
60
|
]
|
|
61
61
|
|
kash/exec/fetch_url_items.py
CHANGED
|
@@ -1,19 +1,42 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
1
3
|
from kash.config.logger import get_logger
|
|
2
4
|
from kash.exec.preconditions import is_url_resource
|
|
3
|
-
from kash.media_base.media_services import get_media_metadata
|
|
4
5
|
from kash.model.items_model import Item, ItemType
|
|
5
6
|
from kash.model.paths_model import StorePath
|
|
6
7
|
from kash.utils.common.format_utils import fmt_loc
|
|
7
8
|
from kash.utils.common.url import Url, is_url
|
|
8
9
|
from kash.utils.common.url_slice import add_slice_to_url, parse_url_slice
|
|
9
10
|
from kash.utils.errors import InvalidInput
|
|
11
|
+
from kash.web_content.web_page_model import WebPageData
|
|
10
12
|
|
|
11
13
|
log = get_logger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class FetchItemResult:
|
|
18
|
+
"""
|
|
19
|
+
Result of fetching a URL item.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
item: Item
|
|
23
|
+
|
|
24
|
+
was_cached: bool
|
|
25
|
+
"""Whether this item was already present in cache (or if we skipped the fetch
|
|
26
|
+
because we already had the data)."""
|
|
27
|
+
|
|
28
|
+
page_data: WebPageData | None = None
|
|
29
|
+
"""If the item was fetched from a URL via the web content cache,
|
|
30
|
+
this will hold additional metadata whether it was cached."""
|
|
31
|
+
|
|
32
|
+
|
|
14
33
|
def fetch_url_item(
|
|
15
|
-
locator: Url | StorePath,
|
|
16
|
-
|
|
34
|
+
locator: Url | StorePath,
|
|
35
|
+
*,
|
|
36
|
+
save_content: bool = True,
|
|
37
|
+
refetch: bool = False,
|
|
38
|
+
cache: bool = True,
|
|
39
|
+
) -> FetchItemResult:
|
|
17
40
|
from kash.workspaces import current_ws
|
|
18
41
|
|
|
19
42
|
ws = current_ws()
|
|
@@ -28,44 +51,51 @@ def fetch_url_item(
|
|
|
28
51
|
else:
|
|
29
52
|
raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
|
|
30
53
|
|
|
31
|
-
return fetch_url_item_content(item, save_content=save_content, refetch=refetch)
|
|
54
|
+
return fetch_url_item_content(item, save_content=save_content, refetch=refetch, cache=cache)
|
|
32
55
|
|
|
33
56
|
|
|
34
|
-
def fetch_url_item_content(
|
|
57
|
+
def fetch_url_item_content(
|
|
58
|
+
item: Item, *, save_content: bool = True, refetch: bool = False, cache: bool = True
|
|
59
|
+
) -> FetchItemResult:
|
|
35
60
|
"""
|
|
36
61
|
Fetch content and metadata for a URL using a media service if we
|
|
37
62
|
recognize the URL as a known media service. Otherwise, fetch and extract the
|
|
38
63
|
metadata and content from the web page and save it to the URL item.
|
|
39
64
|
|
|
40
|
-
If `save_content` is true, a copy of the content is also saved
|
|
41
|
-
a resource item.
|
|
65
|
+
If `save_content` is true, a copy of the content is also saved to the workspace
|
|
66
|
+
as a resource item.
|
|
67
|
+
|
|
68
|
+
If `cache` is true, the content is also cached in the local file cache.
|
|
42
69
|
|
|
43
70
|
The content item is returned if content was saved. Otherwise, the updated
|
|
44
71
|
URL item is returned.
|
|
45
72
|
"""
|
|
73
|
+
from kash.media_base.media_services import get_media_metadata
|
|
46
74
|
from kash.web_content.canon_url import canonicalize_url
|
|
47
75
|
from kash.web_content.web_extract import fetch_page_content
|
|
48
76
|
from kash.workspaces import current_ws
|
|
49
77
|
|
|
50
78
|
ws = current_ws()
|
|
51
79
|
if not refetch and item.title and item.description and item.body:
|
|
52
|
-
log.
|
|
80
|
+
log.info(
|
|
53
81
|
"Already have title, description, and body, will not fetch: %s",
|
|
54
82
|
item.fmt_loc(),
|
|
55
83
|
)
|
|
56
|
-
return item
|
|
84
|
+
return FetchItemResult(item, was_cached=True)
|
|
57
85
|
|
|
58
86
|
if not item.url:
|
|
59
87
|
raise InvalidInput(f"No URL for item: {item.fmt_loc()}")
|
|
60
88
|
|
|
61
89
|
url = canonicalize_url(item.url)
|
|
62
|
-
log.
|
|
90
|
+
log.info("No metadata for URL, will fetch: %s", url)
|
|
63
91
|
|
|
64
92
|
# Prefer fetching metadata from media using the media service if possible.
|
|
65
93
|
# Data is cleaner and YouTube for example often blocks regular scraping.
|
|
66
94
|
media_metadata = get_media_metadata(url)
|
|
67
95
|
url_item: Item | None = None
|
|
68
96
|
content_item: Item | None = None
|
|
97
|
+
page_data: WebPageData | None = None
|
|
98
|
+
|
|
69
99
|
if media_metadata:
|
|
70
100
|
url_item = Item.from_media_metadata(media_metadata)
|
|
71
101
|
# Preserve and canonicalize any slice suffix on the URL.
|
|
@@ -73,12 +103,12 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
|
|
|
73
103
|
if slice:
|
|
74
104
|
new_url = add_slice_to_url(media_metadata.url, slice)
|
|
75
105
|
if new_url != item.url:
|
|
76
|
-
log.
|
|
106
|
+
log.info("Updated URL from metadata and added slice: %s", new_url)
|
|
77
107
|
url_item.url = new_url
|
|
78
108
|
|
|
79
109
|
url_item = item.merged_copy(url_item)
|
|
80
110
|
else:
|
|
81
|
-
page_data = fetch_page_content(url, refetch=refetch, cache=
|
|
111
|
+
page_data = fetch_page_content(url, refetch=refetch, cache=cache)
|
|
82
112
|
url_item = item.new_copy_with(
|
|
83
113
|
title=page_data.title or item.title,
|
|
84
114
|
description=page_data.description or item.description,
|
|
@@ -93,7 +123,6 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
|
|
|
93
123
|
original_filename=item.get_filename(),
|
|
94
124
|
format=page_data.format_info.format,
|
|
95
125
|
)
|
|
96
|
-
ws.save(content_item)
|
|
97
126
|
|
|
98
127
|
if not url_item.title:
|
|
99
128
|
log.warning("Failed to fetch page data: title is missing: %s", item.url)
|
|
@@ -104,8 +133,15 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
|
|
|
104
133
|
if content_item:
|
|
105
134
|
ws.save(content_item)
|
|
106
135
|
assert content_item.store_path
|
|
107
|
-
log.info(
|
|
136
|
+
log.info(
|
|
137
|
+
"Saved both URL and content item: %s, %s",
|
|
138
|
+
url_item.fmt_loc(),
|
|
139
|
+
content_item.fmt_loc(),
|
|
140
|
+
)
|
|
108
141
|
else:
|
|
109
|
-
log.info("Saved URL item: %s", url_item.fmt_loc())
|
|
142
|
+
log.info("Saved URL item (no content): %s", url_item.fmt_loc())
|
|
110
143
|
|
|
111
|
-
|
|
144
|
+
was_cached = bool(
|
|
145
|
+
not page_data or (page_data.cache_result and page_data.cache_result.was_cached)
|
|
146
|
+
)
|
|
147
|
+
return FetchItemResult(content_item or url_item, was_cached=was_cached, page_data=page_data)
|
kash/file_storage/file_store.py
CHANGED
|
@@ -289,7 +289,7 @@ class FileStore(Workspace):
|
|
|
289
289
|
if self.exists(default_path):
|
|
290
290
|
old_item = self.load(default_path)
|
|
291
291
|
if old_item.item_id() == item_id:
|
|
292
|
-
log.
|
|
292
|
+
log.info(
|
|
293
293
|
"Item with the same id already saved (disk check):\n%s",
|
|
294
294
|
fmt_lines([fmt_loc(default_path), item_id]),
|
|
295
295
|
)
|
|
@@ -297,7 +297,7 @@ class FileStore(Workspace):
|
|
|
297
297
|
self.id_map[item_id] = default_path
|
|
298
298
|
return default_path
|
|
299
299
|
if store_path and self.exists(store_path):
|
|
300
|
-
log.
|
|
300
|
+
log.info(
|
|
301
301
|
"Item with the same id already saved (disk check):\n%s",
|
|
302
302
|
fmt_lines([fmt_loc(store_path), item_id]),
|
|
303
303
|
)
|
|
@@ -536,7 +536,7 @@ class FileStore(Workspace):
|
|
|
536
536
|
item = Item(item_type, url=url, format=Format.url)
|
|
537
537
|
previous_store_path = self.find_by_id(item)
|
|
538
538
|
if previous_store_path and not reimport:
|
|
539
|
-
log.
|
|
539
|
+
log.info(
|
|
540
540
|
"Workspace already has this URL:\n%s",
|
|
541
541
|
fmt_lines([fmt_loc(previous_store_path), url]),
|
|
542
542
|
)
|
kash/llm_utils/llm_completion.py
CHANGED
|
@@ -107,7 +107,7 @@ def llm_completion(
|
|
|
107
107
|
|
|
108
108
|
total_input_len = sum(len(m["content"]) for m in messages)
|
|
109
109
|
speed = len(content) / elapsed
|
|
110
|
-
log.
|
|
110
|
+
log.info(
|
|
111
111
|
f"{EMOJI_TIMING} LLM completion from {model.litellm_name} in {format_duration(elapsed)}: "
|
|
112
112
|
f"input {total_input_len} chars in {len(messages)} messages, output {len(content)} chars "
|
|
113
113
|
f"({speed:.0f} char/s)"
|
kash/mcp/mcp_cli.py
CHANGED
|
@@ -114,10 +114,10 @@ def main():
|
|
|
114
114
|
args = build_parser().parse_args()
|
|
115
115
|
|
|
116
116
|
if args.list_tools or args.tool_help:
|
|
117
|
-
kash_setup(rich_logging=True,
|
|
117
|
+
kash_setup(rich_logging=True, log_level=LogLevel.warning)
|
|
118
118
|
show_tool_info(args.tool_help)
|
|
119
119
|
else:
|
|
120
|
-
kash_setup(rich_logging=False,
|
|
120
|
+
kash_setup(rich_logging=False, log_level=LogLevel.info)
|
|
121
121
|
run_server(args)
|
|
122
122
|
|
|
123
123
|
|