kash-shell 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/chat.py +1 -0
- kash/actions/core/markdownify_html.py +1 -1
- kash/actions/core/readability.py +1 -4
- kash/actions/core/render_as_html.py +1 -0
- kash/actions/core/show_webpage.py +2 -0
- kash/actions/core/summarize_as_bullets.py +1 -1
- kash/config/logger.py +1 -1
- kash/config/text_styles.py +1 -1
- kash/docs/markdown/topics/a2_installation.md +3 -2
- kash/exec/action_decorators.py +5 -3
- kash/exec/action_exec.py +50 -5
- kash/exec/fetch_url_items.py +4 -2
- kash/exec/llm_transforms.py +14 -5
- kash/exec/runtime_settings.py +2 -0
- kash/file_storage/file_store.py +50 -92
- kash/file_storage/item_id_index.py +128 -0
- kash/mcp/mcp_server_routes.py +42 -12
- kash/model/actions_model.py +18 -7
- kash/model/exec_model.py +3 -0
- kash/model/items_model.py +54 -12
- kash/utils/api_utils/gather_limited.py +2 -0
- kash/utils/api_utils/multitask_gather.py +134 -0
- kash/utils/common/s3_utils.py +108 -0
- kash/utils/common/url.py +16 -4
- kash/utils/rich_custom/multitask_status.py +84 -10
- kash/utils/text_handling/markdown_footnotes.py +16 -43
- kash/utils/text_handling/markdown_utils.py +108 -28
- kash/web_content/web_fetch.py +2 -1
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/METADATA +5 -5
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/RECORD +33 -30
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
|
|
6
|
+
from prettyfmt import fmt_lines, fmt_path
|
|
7
|
+
|
|
8
|
+
from kash.config.logger import get_logger
|
|
9
|
+
from kash.file_storage.store_filenames import join_suffix, parse_item_filename
|
|
10
|
+
from kash.model.items_model import Item, ItemId
|
|
11
|
+
from kash.model.paths_model import StorePath
|
|
12
|
+
from kash.utils.common.uniquifier import Uniquifier
|
|
13
|
+
from kash.utils.errors import InvalidFilename, SkippableError
|
|
14
|
+
|
|
15
|
+
log = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ItemIdIndex:
|
|
19
|
+
"""
|
|
20
|
+
Index of item identities and historical filenames within a workspace.
|
|
21
|
+
|
|
22
|
+
- Tracks a mapping of `ItemId -> StorePath` for quick lookups
|
|
23
|
+
- Tracks historical slugs via `Uniquifier` to generate unique names consistently
|
|
24
|
+
|
|
25
|
+
TODO: Should add a file system watcher to make this always consistent with disk state.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self._lock = threading.RLock()
|
|
30
|
+
self.uniquifier = Uniquifier()
|
|
31
|
+
self.id_map: dict[ItemId, StorePath] = {}
|
|
32
|
+
|
|
33
|
+
def reset(self) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Clear all index state.
|
|
36
|
+
"""
|
|
37
|
+
with self._lock:
|
|
38
|
+
log.info("ItemIdIndex: reset")
|
|
39
|
+
self.uniquifier = Uniquifier()
|
|
40
|
+
self.id_map.clear()
|
|
41
|
+
|
|
42
|
+
def __len__(self) -> int:
|
|
43
|
+
"""
|
|
44
|
+
Number of unique names tracked.
|
|
45
|
+
"""
|
|
46
|
+
with self._lock:
|
|
47
|
+
return len(self.uniquifier)
|
|
48
|
+
|
|
49
|
+
def uniquify_slug(self, slug: str, full_suffix: str) -> tuple[str, list[str]]:
|
|
50
|
+
"""
|
|
51
|
+
Return a unique slug and historic slugs for the given suffix.
|
|
52
|
+
"""
|
|
53
|
+
with self._lock:
|
|
54
|
+
# This updates internal history as a side-effect. Log for consistency.
|
|
55
|
+
log.info("ItemIdIndex: uniquify slug '%s' with suffix '%s'", slug, full_suffix)
|
|
56
|
+
return self.uniquifier.uniquify_historic(slug, full_suffix)
|
|
57
|
+
|
|
58
|
+
def index_item(
|
|
59
|
+
self, store_path: StorePath, load_item: Callable[[StorePath], Item]
|
|
60
|
+
) -> StorePath | None:
|
|
61
|
+
"""
|
|
62
|
+
Update the index with an item at `store_path`.
|
|
63
|
+
Returns store path of any duplicate item with the same id, otherwise None.
|
|
64
|
+
"""
|
|
65
|
+
name, item_type, _format, file_ext = parse_item_filename(store_path)
|
|
66
|
+
if not file_ext:
|
|
67
|
+
log.debug(
|
|
68
|
+
"Skipping file with unrecognized name or extension: %s",
|
|
69
|
+
fmt_path(store_path),
|
|
70
|
+
)
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
with self._lock:
|
|
74
|
+
full_suffix = join_suffix(item_type.name, file_ext.name) if item_type else file_ext.name
|
|
75
|
+
# Track unique name history
|
|
76
|
+
self.uniquifier.add(name, full_suffix)
|
|
77
|
+
|
|
78
|
+
log.info("ItemIdIndex: indexing %s", fmt_path(store_path))
|
|
79
|
+
|
|
80
|
+
# Load item outside the lock to avoid holding it during potentially slow I/O
|
|
81
|
+
try:
|
|
82
|
+
item = load_item(store_path)
|
|
83
|
+
except (ValueError, SkippableError) as e:
|
|
84
|
+
log.warning(
|
|
85
|
+
"ItemIdIndex: could not index file, skipping: %s: %s",
|
|
86
|
+
fmt_path(store_path),
|
|
87
|
+
e,
|
|
88
|
+
)
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
dup_path: StorePath | None = None
|
|
92
|
+
with self._lock:
|
|
93
|
+
item_id = item.item_id()
|
|
94
|
+
if item_id:
|
|
95
|
+
old_path = self.id_map.get(item_id)
|
|
96
|
+
if old_path and old_path != store_path:
|
|
97
|
+
dup_path = old_path
|
|
98
|
+
log.info(
|
|
99
|
+
"ItemIdIndex: duplicate id detected %s:\n%s",
|
|
100
|
+
item_id,
|
|
101
|
+
fmt_lines([old_path, store_path]),
|
|
102
|
+
)
|
|
103
|
+
self.id_map[item_id] = store_path
|
|
104
|
+
log.info("ItemIdIndex: set id %s -> %s", item_id, fmt_path(store_path))
|
|
105
|
+
|
|
106
|
+
return dup_path
|
|
107
|
+
|
|
108
|
+
def unindex_item(self, store_path: StorePath, load_item: Callable[[StorePath], Item]) -> None:
|
|
109
|
+
"""
|
|
110
|
+
Remove an item from the id index.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
# Load item outside the lock to avoid holding it during potentially slow I/O
|
|
114
|
+
item = load_item(store_path)
|
|
115
|
+
item_id = item.item_id()
|
|
116
|
+
if item_id:
|
|
117
|
+
with self._lock:
|
|
118
|
+
try:
|
|
119
|
+
self.id_map.pop(item_id, None)
|
|
120
|
+
log.info("ItemIdIndex: removed id %s for %s", item_id, fmt_path(store_path))
|
|
121
|
+
except KeyError:
|
|
122
|
+
pass
|
|
123
|
+
except (FileNotFoundError, InvalidFilename):
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
def find_store_path_by_id(self, item_id: ItemId) -> StorePath | None:
|
|
127
|
+
with self._lock:
|
|
128
|
+
return self.id_map.get(item_id)
|
kash/mcp/mcp_server_routes.py
CHANGED
|
@@ -6,8 +6,10 @@ from dataclasses import dataclass
|
|
|
6
6
|
|
|
7
7
|
from funlog import log_calls
|
|
8
8
|
from mcp.server.lowlevel import Server
|
|
9
|
+
from mcp.server.lowlevel.server import StructuredContent, UnstructuredContent
|
|
9
10
|
from mcp.types import Prompt, Resource, TextContent, Tool
|
|
10
11
|
from prettyfmt import fmt_path
|
|
12
|
+
from pydantic import BaseModel
|
|
11
13
|
from strif import AtomicVar
|
|
12
14
|
|
|
13
15
|
from kash.config.capture_output import CapturedOutput, captured_output
|
|
@@ -20,6 +22,7 @@ from kash.model.actions_model import Action, ActionResult
|
|
|
20
22
|
from kash.model.exec_model import ExecContext
|
|
21
23
|
from kash.model.params_model import TypedParamValues
|
|
22
24
|
from kash.model.paths_model import StorePath
|
|
25
|
+
from kash.utils.common.url import Url
|
|
23
26
|
|
|
24
27
|
log = get_logger(__name__)
|
|
25
28
|
|
|
@@ -109,6 +112,22 @@ def get_published_tools() -> list[Tool]:
|
|
|
109
112
|
return []
|
|
110
113
|
|
|
111
114
|
|
|
115
|
+
class StructuredActionResult(BaseModel):
|
|
116
|
+
"""
|
|
117
|
+
Error from an MCP tool call.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
s3_paths: list[Url] | None = None
|
|
121
|
+
"""If the tool created an S3 item, the S3 paths of the created items."""
|
|
122
|
+
|
|
123
|
+
error: str | None = None
|
|
124
|
+
"""If the tool had an error, the error message."""
|
|
125
|
+
|
|
126
|
+
# TODO: Include other metadata.
|
|
127
|
+
# metadata: dict[str, Any] | None = None
|
|
128
|
+
# """Metadata about the action result."""
|
|
129
|
+
|
|
130
|
+
|
|
112
131
|
@dataclass(frozen=True)
|
|
113
132
|
class ToolResult:
|
|
114
133
|
"""
|
|
@@ -119,6 +138,7 @@ class ToolResult:
|
|
|
119
138
|
captured_output: CapturedOutput
|
|
120
139
|
action_result: ActionResult
|
|
121
140
|
result_store_paths: list[StorePath]
|
|
141
|
+
result_s3_paths: list[Url]
|
|
122
142
|
error: Exception | None = None
|
|
123
143
|
|
|
124
144
|
@property
|
|
@@ -168,12 +188,13 @@ class ToolResult:
|
|
|
168
188
|
# TODO: Add more info on how to find the logs.
|
|
169
189
|
return "Check kash logs for details."
|
|
170
190
|
|
|
171
|
-
def
|
|
191
|
+
def as_mcp_content(self) -> tuple[UnstructuredContent, StructuredContent]:
|
|
172
192
|
"""
|
|
173
|
-
Convert the tool result to content for the client
|
|
193
|
+
Convert the tool result to content for the MCP client.
|
|
174
194
|
"""
|
|
195
|
+
structured = StructuredActionResult()
|
|
175
196
|
if self.error:
|
|
176
|
-
|
|
197
|
+
unstructured = [
|
|
177
198
|
TextContent(
|
|
178
199
|
text=f"The tool `{self.action.name}` had an error: {self.error}.\n\n"
|
|
179
200
|
+ self.check_logs_message,
|
|
@@ -194,7 +215,7 @@ class ToolResult:
|
|
|
194
215
|
if not chat_result:
|
|
195
216
|
chat_result = "No result. Check kash logs for details."
|
|
196
217
|
|
|
197
|
-
|
|
218
|
+
unstructured = [
|
|
198
219
|
TextContent(
|
|
199
220
|
text=f"{self.output_summary}\n\n"
|
|
200
221
|
f"{self.output_content}\n\n"
|
|
@@ -202,10 +223,15 @@ class ToolResult:
|
|
|
202
223
|
type="text",
|
|
203
224
|
),
|
|
204
225
|
]
|
|
226
|
+
structured = StructuredActionResult(s3_paths=self.result_s3_paths)
|
|
227
|
+
|
|
228
|
+
return unstructured, structured.model_dump()
|
|
205
229
|
|
|
206
230
|
|
|
207
231
|
@log_calls(level="info")
|
|
208
|
-
def run_mcp_tool(
|
|
232
|
+
def run_mcp_tool(
|
|
233
|
+
action_name: str, arguments: dict
|
|
234
|
+
) -> tuple[UnstructuredContent, StructuredContent]:
|
|
209
235
|
"""
|
|
210
236
|
Run the action as a tool.
|
|
211
237
|
"""
|
|
@@ -222,6 +248,7 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
|
|
|
222
248
|
refetch=False, # Using the file caches.
|
|
223
249
|
# Keeping all transient files for now, but maybe make transient?
|
|
224
250
|
override_state=None,
|
|
251
|
+
sync_to_s3=True, # Enable S3 syncing for MCP tools.
|
|
225
252
|
) as exec_settings:
|
|
226
253
|
action_cls = look_up_action_class(action_name)
|
|
227
254
|
|
|
@@ -237,9 +264,9 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
|
|
|
237
264
|
context = ExecContext(action=action, settings=exec_settings)
|
|
238
265
|
action_input = prepare_action_input(*input_items)
|
|
239
266
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
267
|
+
result_with_paths = run_action_with_caching(context, action_input)
|
|
268
|
+
result = result_with_paths.result
|
|
269
|
+
result_store_paths = result_with_paths.result_paths
|
|
243
270
|
|
|
244
271
|
# Return final result, formatted for the LLM to understand.
|
|
245
272
|
return ToolResult(
|
|
@@ -247,8 +274,9 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
|
|
|
247
274
|
captured_output=capture.output,
|
|
248
275
|
action_result=result,
|
|
249
276
|
result_store_paths=result_store_paths,
|
|
277
|
+
result_s3_paths=result_with_paths.s3_paths,
|
|
250
278
|
error=None,
|
|
251
|
-
).
|
|
279
|
+
).as_mcp_content()
|
|
252
280
|
|
|
253
281
|
except Exception as e:
|
|
254
282
|
log.exception("Error running mcp tool")
|
|
@@ -258,7 +286,7 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
|
|
|
258
286
|
+ "Check kash logs for details.",
|
|
259
287
|
type="text",
|
|
260
288
|
)
|
|
261
|
-
]
|
|
289
|
+
], StructuredActionResult(error=str(e)).model_dump()
|
|
262
290
|
|
|
263
291
|
|
|
264
292
|
def create_base_server() -> Server:
|
|
@@ -288,7 +316,9 @@ def create_base_server() -> Server:
|
|
|
288
316
|
return []
|
|
289
317
|
|
|
290
318
|
@app.call_tool()
|
|
291
|
-
async def handle_tool(
|
|
319
|
+
async def handle_tool(
|
|
320
|
+
name: str, arguments: dict
|
|
321
|
+
) -> tuple[UnstructuredContent, StructuredContent]:
|
|
292
322
|
try:
|
|
293
323
|
if name not in _mcp_published_actions.copy():
|
|
294
324
|
log.error(f"Unknown tool requested: {name}")
|
|
@@ -303,6 +333,6 @@ def create_base_server() -> Server:
|
|
|
303
333
|
text=f"Error executing tool {name}: {e}",
|
|
304
334
|
type="text",
|
|
305
335
|
)
|
|
306
|
-
]
|
|
336
|
+
], StructuredActionResult(error=str(e)).model_dump()
|
|
307
337
|
|
|
308
338
|
return app
|
kash/model/actions_model.py
CHANGED
|
@@ -246,7 +246,17 @@ class Action(ABC):
|
|
|
246
246
|
|
|
247
247
|
output_type: ItemType = ItemType.doc
|
|
248
248
|
"""
|
|
249
|
-
The type of the output item(s)
|
|
249
|
+
The type of the output item(s). If an action returns multiple output types,
|
|
250
|
+
this will be the output type of the first output.
|
|
251
|
+
This is mainly used for preassembly for the cache check if an output already exists.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
output_format: Format | None = None
|
|
255
|
+
"""
|
|
256
|
+
The format of the output item(s). The default is to assume it is the same
|
|
257
|
+
format as the input. If an action returns multiple output formats,
|
|
258
|
+
this will be the format of the first output.
|
|
259
|
+
This is mainly used for preassembly for the cache check if an output already exists.
|
|
250
260
|
"""
|
|
251
261
|
|
|
252
262
|
expected_outputs: ArgCount = ONE_ARG
|
|
@@ -540,7 +550,7 @@ class Action(ABC):
|
|
|
540
550
|
"""
|
|
541
551
|
can_preassemble = self.cacheable and self.expected_outputs == ONE_ARG
|
|
542
552
|
log.info(
|
|
543
|
-
"Preassemble check for `%s
|
|
553
|
+
"Preassemble check for `%s`: can_preassemble=%s (expected_outputs=%s, cacheable=%s)",
|
|
544
554
|
self.name,
|
|
545
555
|
can_preassemble,
|
|
546
556
|
self.expected_outputs,
|
|
@@ -549,9 +559,10 @@ class Action(ABC):
|
|
|
549
559
|
if can_preassemble:
|
|
550
560
|
# Using first input to determine the output title.
|
|
551
561
|
primary_input = context.action_input.items[0]
|
|
552
|
-
# In this case we only expect one output.
|
|
553
|
-
|
|
554
|
-
|
|
562
|
+
# In this case we only expect one output, of the type specified by the action.
|
|
563
|
+
primary_output = primary_input.derived_copy(context, 0, type=context.action.output_type)
|
|
564
|
+
log.info("Preassembled output: source %s, %s", primary_output.source, primary_output)
|
|
565
|
+
return ActionResult([primary_output])
|
|
555
566
|
else:
|
|
556
567
|
# Caching disabled.
|
|
557
568
|
return None
|
|
@@ -574,9 +585,9 @@ class Action(ABC):
|
|
|
574
585
|
"type": "array",
|
|
575
586
|
"items": {
|
|
576
587
|
"type": "string",
|
|
577
|
-
"description": "A
|
|
588
|
+
"description": "A URL or S3 URL or a workspace file path, e.g. https://example.com/some/file/path or s3://somebucket/some/file/path or some/file/path",
|
|
578
589
|
},
|
|
579
|
-
"description": f"
|
|
590
|
+
"description": f"A list of paths or URLs of input items ({self.expected_args.as_str()}). Use an array of length one for a single input.",
|
|
580
591
|
}
|
|
581
592
|
|
|
582
593
|
# Set min/max items.
|
kash/model/exec_model.py
CHANGED
|
@@ -43,6 +43,9 @@ class RuntimeSettings:
|
|
|
43
43
|
no_format: bool = False
|
|
44
44
|
"""If True, will not normalize the output item's body text formatting (for Markdown)."""
|
|
45
45
|
|
|
46
|
+
sync_to_s3: bool = True
|
|
47
|
+
"""If True, will sync output items to S3 if input was from S3."""
|
|
48
|
+
|
|
46
49
|
@property
|
|
47
50
|
def workspace(self) -> FileStore:
|
|
48
51
|
from kash.workspaces.workspaces import get_ws
|
kash/model/items_model.py
CHANGED
|
@@ -7,6 +7,7 @@ from datetime import UTC, datetime
|
|
|
7
7
|
from enum import Enum
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import TYPE_CHECKING, Any, NotRequired, TypedDict, TypeVar, Unpack
|
|
10
|
+
from urllib.parse import urlparse
|
|
10
11
|
|
|
11
12
|
from frontmatter_format import from_yaml_string, new_yaml
|
|
12
13
|
from prettyfmt import (
|
|
@@ -203,6 +204,15 @@ class ItemId:
|
|
|
203
204
|
# If we got here, the item has no identity.
|
|
204
205
|
item_id = None
|
|
205
206
|
|
|
207
|
+
log.debug(
|
|
208
|
+
"item_id is %s for type=%s, format=%s, url=%s, title=%s, source=%s",
|
|
209
|
+
item_id,
|
|
210
|
+
item.type,
|
|
211
|
+
item.format,
|
|
212
|
+
item.url,
|
|
213
|
+
item.title,
|
|
214
|
+
item.source,
|
|
215
|
+
)
|
|
206
216
|
return item_id
|
|
207
217
|
|
|
208
218
|
|
|
@@ -561,12 +571,19 @@ class Item:
|
|
|
561
571
|
from kash.file_storage.store_filenames import parse_item_filename
|
|
562
572
|
|
|
563
573
|
# Prefer original to external, e.g. if we know the original but the external might
|
|
564
|
-
# be a cache filename.
|
|
565
|
-
path =
|
|
574
|
+
# be a cache filename. Also check
|
|
575
|
+
path = (
|
|
576
|
+
self.store_path
|
|
577
|
+
or self.original_filename
|
|
578
|
+
or self.external_path
|
|
579
|
+
or (self.url and urlparse(self.url).path)
|
|
580
|
+
or ""
|
|
581
|
+
).strip()
|
|
566
582
|
if path:
|
|
567
583
|
path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
|
|
568
584
|
else:
|
|
569
585
|
path_name = None
|
|
586
|
+
|
|
570
587
|
return path_name
|
|
571
588
|
|
|
572
589
|
def slug_name(
|
|
@@ -598,6 +615,7 @@ class Item:
|
|
|
598
615
|
|
|
599
616
|
slug = self.slug_name()
|
|
600
617
|
full_suffix = self.get_full_suffix()
|
|
618
|
+
|
|
601
619
|
return join_suffix(slug, full_suffix)
|
|
602
620
|
|
|
603
621
|
def body_heading(self, allowed_tags: tuple[str, ...] = ("h1", "h2")) -> str | None:
|
|
@@ -835,7 +853,9 @@ class Item:
|
|
|
835
853
|
the type and the body.
|
|
836
854
|
|
|
837
855
|
Same as `new_copy_with` but also updates the `derived_from` relation. If we also
|
|
838
|
-
have an action context, then use
|
|
856
|
+
have an action context, then use that to fill some fields, in particular `title_template`
|
|
857
|
+
to derive a new title and `output_type` and `output_format` to set the output type
|
|
858
|
+
and format
|
|
839
859
|
"""
|
|
840
860
|
|
|
841
861
|
# Get derived_from relation if possible.
|
|
@@ -869,20 +889,38 @@ class Item:
|
|
|
869
889
|
if "external_path" not in updates:
|
|
870
890
|
updates["external_path"] = None
|
|
871
891
|
|
|
892
|
+
action_context = action_context or self.context
|
|
893
|
+
|
|
894
|
+
if action_context:
|
|
895
|
+
# Default the output item type and format to the action's declared output_type
|
|
896
|
+
# and format if not explicitly set.
|
|
897
|
+
if "type" not in updates:
|
|
898
|
+
updates["type"] = action_context.action.output_type
|
|
899
|
+
# If we were not given a format override, we leave the output type the same.
|
|
900
|
+
elif action_context.action.output_format:
|
|
901
|
+
# Check an overridden format and then our own format.
|
|
902
|
+
new_output_format = updates.get("format", self.format)
|
|
903
|
+
if new_output_format and action_context.action.output_format != new_output_format:
|
|
904
|
+
log.warning(
|
|
905
|
+
"Output item format `%s` does not match declared output format `%s` for action `%s`",
|
|
906
|
+
new_output_format,
|
|
907
|
+
action_context.action.output_format,
|
|
908
|
+
action_context.action.name,
|
|
909
|
+
)
|
|
910
|
+
|
|
872
911
|
new_item = self.new_copy_with(update_timestamp=True, **updates)
|
|
873
912
|
if derived_from:
|
|
874
913
|
new_item.update_relations(derived_from=derived_from)
|
|
875
914
|
|
|
876
|
-
action_context = action_context or self.context
|
|
877
|
-
|
|
878
915
|
# Record the history.
|
|
879
916
|
if action_context:
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
917
|
+
new_item.update_source(
|
|
918
|
+
Source(
|
|
919
|
+
operation=action_context.operation,
|
|
920
|
+
output_num=output_num,
|
|
921
|
+
cacheable=action_context.action.cacheable,
|
|
922
|
+
)
|
|
884
923
|
)
|
|
885
|
-
self.add_to_history(self.source.operation.summary())
|
|
886
924
|
action = action_context.action
|
|
887
925
|
else:
|
|
888
926
|
action = None
|
|
@@ -911,9 +949,10 @@ class Item:
|
|
|
911
949
|
setattr(self.relations, key, list(value))
|
|
912
950
|
return self.relations
|
|
913
951
|
|
|
914
|
-
def
|
|
952
|
+
def update_source(self, source: Source) -> None:
|
|
915
953
|
"""
|
|
916
|
-
Update the history of the item
|
|
954
|
+
Update the source and the history of the item to indicate it was created
|
|
955
|
+
by the given operation. For convenience, this is idempotent.
|
|
917
956
|
"""
|
|
918
957
|
self.source = source
|
|
919
958
|
self.add_to_history(source.operation.summary())
|
|
@@ -945,6 +984,9 @@ class Item:
|
|
|
945
984
|
return metadata_matches and body_matches
|
|
946
985
|
|
|
947
986
|
def add_to_history(self, operation_summary: OperationSummary):
|
|
987
|
+
"""
|
|
988
|
+
For convenience, this is idempotent.
|
|
989
|
+
"""
|
|
948
990
|
if not self.history:
|
|
949
991
|
self.history = []
|
|
950
992
|
# Don't add duplicates to the history.
|
|
@@ -542,6 +542,8 @@ async def gather_limited_sync(
|
|
|
542
542
|
# Mark as failed
|
|
543
543
|
if status and task_id is not None:
|
|
544
544
|
await status.finish(task_id, TaskState.FAILED, str(e))
|
|
545
|
+
|
|
546
|
+
log.warning("Task failed: %s: %s", label, e, exc_info=True)
|
|
545
547
|
raise
|
|
546
548
|
|
|
547
549
|
return await _gather_with_interrupt_handling(
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Generic, TypeVar, cast
|
|
6
|
+
|
|
7
|
+
from strif import abbrev_list
|
|
8
|
+
|
|
9
|
+
from kash.config.logger import get_logger
|
|
10
|
+
from kash.config.settings import global_settings
|
|
11
|
+
from kash.shell.output.shell_output import multitask_status
|
|
12
|
+
from kash.utils.api_utils.api_retries import RetrySettings
|
|
13
|
+
from kash.utils.api_utils.gather_limited import FuncTask, Limit, gather_limited_sync
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
log = get_logger(name=__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class MultitaskResult(Generic[T]):
|
|
22
|
+
"""
|
|
23
|
+
Container for results from multitask_gather preserving original order.
|
|
24
|
+
Access `.successes` and `.errors` to get partitioned views.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
raw_results: list[T | BaseException]
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def successes_or_none(self) -> list[T | None]:
|
|
31
|
+
"""
|
|
32
|
+
Return a list of successes or None, aligned with the original order.
|
|
33
|
+
"""
|
|
34
|
+
return [
|
|
35
|
+
None if isinstance(item, BaseException) else cast(T, item) for item in self.raw_results
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def successes(self) -> list[T]:
|
|
40
|
+
"""
|
|
41
|
+
Return a list of successes only. May be shorter than the original list.
|
|
42
|
+
"""
|
|
43
|
+
return [cast(T, item) for item in self.raw_results if not isinstance(item, BaseException)]
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def errors(self) -> list[BaseException]:
|
|
47
|
+
"""
|
|
48
|
+
Return a list of errors only. May be shorter than the original list.
|
|
49
|
+
"""
|
|
50
|
+
return [item for item in self.raw_results if isinstance(item, BaseException)]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _default_labeler(total: int) -> Callable[[int, Any], str]:
|
|
54
|
+
def labeler(i: int, _spec: Any) -> str:
|
|
55
|
+
return f"Task {i + 1}/{total}"
|
|
56
|
+
|
|
57
|
+
return labeler
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def multitask_gather(
|
|
61
|
+
tasks: Iterable[FuncTask[T]] | Sequence[FuncTask[T]],
|
|
62
|
+
*,
|
|
63
|
+
labeler: Callable[[int, Any], str] | None = None,
|
|
64
|
+
limit: Limit | None = None,
|
|
65
|
+
bucket_limits: dict[str, Limit] | None = None,
|
|
66
|
+
retry_settings: RetrySettings | None = None,
|
|
67
|
+
show_progress: bool = True,
|
|
68
|
+
) -> MultitaskResult[T]:
|
|
69
|
+
"""
|
|
70
|
+
Run many `FuncTask`s concurrently with shared progress UI and rate limits.
|
|
71
|
+
|
|
72
|
+
This wraps the standard pattern of creating a status context, providing a labeler,
|
|
73
|
+
and calling `gather_limited_sync` with common options.
|
|
74
|
+
|
|
75
|
+
- `labeler` can be omitted; a simple "Task X/Y" label will be used.
|
|
76
|
+
- If `limit` is not provided, defaults are taken from `global_settings()`.
|
|
77
|
+
- If `show_progress` is False, tasks are run without the status context.
|
|
78
|
+
- Exceptions are collected (using return_exceptions=True). Use properties on the
|
|
79
|
+
returned `MultitaskResult` to access `.successes` and `.errors`.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
# Normalize tasks to a list for length and stable iteration
|
|
83
|
+
task_list: list[FuncTask[T]] = list(tasks)
|
|
84
|
+
|
|
85
|
+
# Provide a default labeler if none is supplied
|
|
86
|
+
effective_labeler: Callable[[int, Any], str] = (
|
|
87
|
+
labeler if labeler is not None else _default_labeler(len(task_list))
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Provide sensible default rate limits if none are supplied
|
|
91
|
+
effective_limit: Limit = (
|
|
92
|
+
limit
|
|
93
|
+
if limit is not None
|
|
94
|
+
else Limit(
|
|
95
|
+
rps=global_settings().limit_rps,
|
|
96
|
+
concurrency=global_settings().limit_concurrency,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if not show_progress:
|
|
101
|
+
log.warning("Running %d tasks (progress disabled)…", len(task_list))
|
|
102
|
+
|
|
103
|
+
async with multitask_status(enabled=show_progress) as status:
|
|
104
|
+
raw_results = cast(
|
|
105
|
+
list[T | BaseException],
|
|
106
|
+
await gather_limited_sync(
|
|
107
|
+
*task_list,
|
|
108
|
+
limit=effective_limit,
|
|
109
|
+
bucket_limits=bucket_limits,
|
|
110
|
+
status=status,
|
|
111
|
+
labeler=effective_labeler,
|
|
112
|
+
retry_settings=retry_settings,
|
|
113
|
+
return_exceptions=True,
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
result = MultitaskResult[T](raw_results=raw_results)
|
|
118
|
+
|
|
119
|
+
if result.errors:
|
|
120
|
+
log.warning(
|
|
121
|
+
"multitask_gather: had %d errors (out of %d tasks): %s",
|
|
122
|
+
len(result.errors),
|
|
123
|
+
len(task_list),
|
|
124
|
+
abbrev_list(result.errors),
|
|
125
|
+
)
|
|
126
|
+
log.error(
|
|
127
|
+
"multitask_gather: first error (full traceback):",
|
|
128
|
+
exc_info=(
|
|
129
|
+
type(result.errors[0]),
|
|
130
|
+
result.errors[0],
|
|
131
|
+
result.errors[0].__traceback__,
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
return result
|