PyPI - kash-shell - Versions diffs - 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl - Mend

kash-shell 0.3.30py3-none-any.whl → 0.3.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

kash/actions/core/chat.py +1 -0
kash/actions/core/markdownify_html.py +1 -1
kash/actions/core/readability.py +1 -4
kash/actions/core/render_as_html.py +1 -0
kash/actions/core/show_webpage.py +2 -0
kash/actions/core/summarize_as_bullets.py +1 -1
kash/config/logger.py +1 -1
kash/config/text_styles.py +1 -1
kash/docs/markdown/topics/a2_installation.md +3 -2
kash/exec/action_decorators.py +5 -3
kash/exec/action_exec.py +50 -5
kash/exec/fetch_url_items.py +4 -2
kash/exec/llm_transforms.py +14 -5
kash/exec/runtime_settings.py +2 -0
kash/file_storage/file_store.py +50 -92
kash/file_storage/item_id_index.py +128 -0
kash/mcp/mcp_server_routes.py +42 -12
kash/model/actions_model.py +18 -7
kash/model/exec_model.py +3 -0
kash/model/items_model.py +54 -12
kash/utils/api_utils/gather_limited.py +2 -0
kash/utils/api_utils/multitask_gather.py +134 -0
kash/utils/common/s3_utils.py +108 -0
kash/utils/common/url.py +16 -4
kash/utils/rich_custom/multitask_status.py +84 -10
kash/utils/text_handling/markdown_footnotes.py +16 -43
kash/utils/text_handling/markdown_utils.py +108 -28
kash/web_content/web_fetch.py +2 -1
{kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/METADATA +5 -5
{kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/RECORD +33 -30
{kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/WHEEL +0 -0
{kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/entry_points.txt +0 -0
{kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/licenses/LICENSE +0 -0

kash/file_storage/item_id_index.py ADDED Viewed

@@ -0,0 +1,128 @@
+from __future__ import annotations
+import threading
+from collections.abc import Callable
+from prettyfmt import fmt_lines, fmt_path
+from kash.config.logger import get_logger
+from kash.file_storage.store_filenames import join_suffix, parse_item_filename
+from kash.model.items_model import Item, ItemId
+from kash.model.paths_model import StorePath
+from kash.utils.common.uniquifier import Uniquifier
+from kash.utils.errors import InvalidFilename, SkippableError
+log = get_logger(__name__)
+class ItemIdIndex:
+    """
+    Index of item identities and historical filenames within a workspace.
+    - Tracks a mapping of `ItemId -> StorePath` for quick lookups
+    - Tracks historical slugs via `Uniquifier` to generate unique names consistently
+    TODO: Should add a file system watcher to make this always consistent with disk state.
+    """
+    def __init__(self) -> None:
+        self._lock = threading.RLock()
+        self.uniquifier = Uniquifier()
+        self.id_map: dict[ItemId, StorePath] = {}
+    def reset(self) -> None:
+        """
+        Clear all index state.
+        """
+        with self._lock:
+            log.info("ItemIdIndex: reset")
+            self.uniquifier = Uniquifier()
+            self.id_map.clear()
+    def __len__(self) -> int:
+        """
+        Number of unique names tracked.
+        """
+        with self._lock:
+            return len(self.uniquifier)
+    def uniquify_slug(self, slug: str, full_suffix: str) -> tuple[str, list[str]]:
+        """
+        Return a unique slug and historic slugs for the given suffix.
+        """
+        with self._lock:
+            # This updates internal history as a side-effect. Log for consistency.
+            log.info("ItemIdIndex: uniquify slug '%s' with suffix '%s'", slug, full_suffix)
+            return self.uniquifier.uniquify_historic(slug, full_suffix)
+    def index_item(
+        self, store_path: StorePath, load_item: Callable[[StorePath], Item]
+    ) -> StorePath | None:
+        """
+        Update the index with an item at `store_path`.
+        Returns store path of any duplicate item with the same id, otherwise None.
+        """
+        name, item_type, _format, file_ext = parse_item_filename(store_path)
+        if not file_ext:
+            log.debug(
+                "Skipping file with unrecognized name or extension: %s",
+                fmt_path(store_path),
+            )
+            return None
+        with self._lock:
+            full_suffix = join_suffix(item_type.name, file_ext.name) if item_type else file_ext.name
+            # Track unique name history
+            self.uniquifier.add(name, full_suffix)
+        log.info("ItemIdIndex: indexing %s", fmt_path(store_path))
+        # Load item outside the lock to avoid holding it during potentially slow I/O
+        try:
+            item = load_item(store_path)
+        except (ValueError, SkippableError) as e:
+            log.warning(
+                "ItemIdIndex: could not index file, skipping: %s: %s",
+                fmt_path(store_path),
+                e,
+            )
+            return None
+        dup_path: StorePath | None = None
+        with self._lock:
+            item_id = item.item_id()
+            if item_id:
+                old_path = self.id_map.get(item_id)
+                if old_path and old_path != store_path:
+                    dup_path = old_path
+                    log.info(
+                        "ItemIdIndex: duplicate id detected %s:\n%s",
+                        item_id,
+                        fmt_lines([old_path, store_path]),
+                    )
+                self.id_map[item_id] = store_path
+                log.info("ItemIdIndex: set id %s -> %s", item_id, fmt_path(store_path))
+        return dup_path
+    def unindex_item(self, store_path: StorePath, load_item: Callable[[StorePath], Item]) -> None:
+        """
+        Remove an item from the id index.
+        """
+        try:
+            # Load item outside the lock to avoid holding it during potentially slow I/O
+            item = load_item(store_path)
+            item_id = item.item_id()
+            if item_id:
+                with self._lock:
+                    try:
+                        self.id_map.pop(item_id, None)
+                        log.info("ItemIdIndex: removed id %s for %s", item_id, fmt_path(store_path))
+                    except KeyError:
+                        pass
+        except (FileNotFoundError, InvalidFilename):
+            pass
+    def find_store_path_by_id(self, item_id: ItemId) -> StorePath | None:
+        with self._lock:
+            return self.id_map.get(item_id)

kash/mcp/mcp_server_routes.py CHANGED Viewed

@@ -6,8 +6,10 @@ from dataclasses import dataclass
 from funlog import log_calls
 from mcp.server.lowlevel import Server
+from mcp.server.lowlevel.server import StructuredContent, UnstructuredContent
 from mcp.types import Prompt, Resource, TextContent, Tool
 from prettyfmt import fmt_path
+from pydantic import BaseModel
 from strif import AtomicVar
 from kash.config.capture_output import CapturedOutput, captured_output
@@ -20,6 +22,7 @@ from kash.model.actions_model import Action, ActionResult
 from kash.model.exec_model import ExecContext
 from kash.model.params_model import TypedParamValues
 from kash.model.paths_model import StorePath
+from kash.utils.common.url import Url
 log = get_logger(__name__)
@@ -109,6 +112,22 @@ def get_published_tools() -> list[Tool]:
         return []
+class StructuredActionResult(BaseModel):
+    """
+    Error from an MCP tool call.
+    """
+    s3_paths: list[Url] | None = None
+    """If the tool created an S3 item, the S3 paths of the created items."""
+    error: str | None = None
+    """If the tool had an error, the error message."""
+    # TODO: Include other metadata.
+    # metadata: dict[str, Any] | None = None
+    # """Metadata about the action result."""
 @dataclass(frozen=True)
 class ToolResult:
     """
@@ -119,6 +138,7 @@ class ToolResult:
     captured_output: CapturedOutput
     action_result: ActionResult
     result_store_paths: list[StorePath]
+    result_s3_paths: list[Url]
     error: Exception | None = None
     @property
@@ -168,12 +188,13 @@ class ToolResult:
         # TODO: Add more info on how to find the logs.
         return "Check kash logs for details."
-    def formatted_for_client(self) -> list[TextContent]:
+    def as_mcp_content(self) -> tuple[UnstructuredContent, StructuredContent]:
         """
-        Convert the tool result to content for the client LLM.
+        Convert the tool result to content for the MCP client.
         """
+        structured = StructuredActionResult()
         if self.error:
-            return [
+            unstructured = [
                 TextContent(
                     text=f"The tool `{self.action.name}` had an error: {self.error}.\n\n"
                     + self.check_logs_message,
@@ -194,7 +215,7 @@ class ToolResult:
             if not chat_result:
                 chat_result = "No result. Check kash logs for details."
-            return [
+            unstructured = [
                 TextContent(
                     text=f"{self.output_summary}\n\n"
                     f"{self.output_content}\n\n"
@@ -202,10 +223,15 @@ class ToolResult:
                     type="text",
                 ),
             ]
+            structured = StructuredActionResult(s3_paths=self.result_s3_paths)
+        return unstructured, structured.model_dump()
 @log_calls(level="info")
-def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
+def run_mcp_tool(
+    action_name: str, arguments: dict
+) -> tuple[UnstructuredContent, StructuredContent]:
     """
     Run the action as a tool.
     """
@@ -222,6 +248,7 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
                 refetch=False,  # Using the file caches.
                 # Keeping all transient files for now, but maybe make transient?
                 override_state=None,
+                sync_to_s3=True,  # Enable S3 syncing for MCP tools.
             ) as exec_settings:
                 action_cls = look_up_action_class(action_name)
@@ -237,9 +264,9 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
                 context = ExecContext(action=action, settings=exec_settings)
                 action_input = prepare_action_input(*input_items)
-                result, result_store_paths, _archived_store_paths = run_action_with_caching(
-                    context, action_input
-                )
+                result_with_paths = run_action_with_caching(context, action_input)
+                result = result_with_paths.result
+                result_store_paths = result_with_paths.result_paths
         # Return final result, formatted for the LLM to understand.
         return ToolResult(
@@ -247,8 +274,9 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
             captured_output=capture.output,
             action_result=result,
             result_store_paths=result_store_paths,
+            result_s3_paths=result_with_paths.s3_paths,
             error=None,
-        ).formatted_for_client()
+        ).as_mcp_content()
     except Exception as e:
         log.exception("Error running mcp tool")
@@ -258,7 +286,7 @@ def run_mcp_tool(action_name: str, arguments: dict) -> list[TextContent]:
                 + "Check kash logs for details.",
                 type="text",
             )
-        ]
+        ], StructuredActionResult(error=str(e)).model_dump()
 def create_base_server() -> Server:
@@ -288,7 +316,9 @@ def create_base_server() -> Server:
         return []
     @app.call_tool()
-    async def handle_tool(name: str, arguments: dict) -> list[TextContent]:
+    async def handle_tool(
+        name: str, arguments: dict
+    ) -> tuple[UnstructuredContent, StructuredContent]:
         try:
             if name not in _mcp_published_actions.copy():
                 log.error(f"Unknown tool requested: {name}")
@@ -303,6 +333,6 @@ def create_base_server() -> Server:
                     text=f"Error executing tool {name}: {e}",
                     type="text",
                 )
-            ]
+            ], StructuredActionResult(error=str(e)).model_dump()
     return app

kash/model/actions_model.py CHANGED Viewed

@@ -246,7 +246,17 @@ class Action(ABC):
     output_type: ItemType = ItemType.doc
     """
-    The type of the output item(s), which for now are all assumed to be of the same type.
+    The type of the output item(s). If an action returns multiple output types,
+    this will be the output type of the first output.
+    This is mainly used for preassembly for the cache check if an output already exists.
+    """
+    output_format: Format | None = None
+    """
+    The format of the output item(s). The default is to assume it is the same
+    format as the input. If an action returns multiple output formats,
+    this will be the format of the first output.
+    This is mainly used for preassembly for the cache check if an output already exists.
     """
     expected_outputs: ArgCount = ONE_ARG
@@ -540,7 +550,7 @@ class Action(ABC):
         """
         can_preassemble = self.cacheable and self.expected_outputs == ONE_ARG
         log.info(
-            "Preassemble check for `%s` is %s (%s with cacheable=%s)",
+            "Preassemble check for `%s`: can_preassemble=%s (expected_outputs=%s, cacheable=%s)",
             self.name,
             can_preassemble,
             self.expected_outputs,
@@ -549,9 +559,10 @@ class Action(ABC):
         if can_preassemble:
             # Using first input to determine the output title.
             primary_input = context.action_input.items[0]
-            # In this case we only expect one output.
-            item = primary_input.derived_copy(context, 0)
-            return ActionResult([item])
+            # In this case we only expect one output, of the type specified by the action.
+            primary_output = primary_input.derived_copy(context, 0, type=context.action.output_type)
+            log.info("Preassembled output: source %s, %s", primary_output.source, primary_output)
+            return ActionResult([primary_output])
         else:
             # Caching disabled.
             return None
@@ -574,9 +585,9 @@ class Action(ABC):
                 "type": "array",
                 "items": {
                     "type": "string",
-                    "description": "A path to a local file or a URL",
+                    "description": "A URL or S3 URL or a workspace file path, e.g. https://example.com/some/file/path or s3://somebucket/some/file/path or some/file/path",
                 },
-                "description": f"Input items ({self.expected_args.as_str()})",
+                "description": f"A list of paths or URLs of input items ({self.expected_args.as_str()}). Use an array of length one for a single input.",
             }
             # Set min/max items.

kash/model/exec_model.py CHANGED Viewed

@@ -43,6 +43,9 @@ class RuntimeSettings:
     no_format: bool = False
     """If True, will not normalize the output item's body text formatting (for Markdown)."""
+    sync_to_s3: bool = True
+    """If True, will sync output items to S3 if input was from S3."""
     @property
     def workspace(self) -> FileStore:
         from kash.workspaces.workspaces import get_ws

kash/model/items_model.py CHANGED Viewed

@@ -7,6 +7,7 @@ from datetime import UTC, datetime
 from enum import Enum
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, NotRequired, TypedDict, TypeVar, Unpack
+from urllib.parse import urlparse
 from frontmatter_format import from_yaml_string, new_yaml
 from prettyfmt import (
@@ -203,6 +204,15 @@ class ItemId:
             # If we got here, the item has no identity.
             item_id = None
+        log.debug(
+            "item_id is %s for type=%s, format=%s, url=%s, title=%s, source=%s",
+            item_id,
+            item.type,
+            item.format,
+            item.url,
+            item.title,
+            item.source,
+        )
         return item_id
@@ -561,12 +571,19 @@ class Item:
         from kash.file_storage.store_filenames import parse_item_filename
         # Prefer original to external, e.g. if we know the original but the external might
-        # be a cache filename.
-        path = self.store_path or self.original_filename or self.external_path
+        # be a cache filename. Also check
+        path = (
+            self.store_path
+            or self.original_filename
+            or self.external_path
+            or (self.url and urlparse(self.url).path)
+            or ""
+        ).strip()
         if path:
             path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
         else:
             path_name = None
         return path_name
     def slug_name(
@@ -598,6 +615,7 @@ class Item:
         slug = self.slug_name()
         full_suffix = self.get_full_suffix()
         return join_suffix(slug, full_suffix)
     def body_heading(self, allowed_tags: tuple[str, ...] = ("h1", "h2")) -> str | None:
@@ -835,7 +853,9 @@ class Item:
         the type and the body.
         Same as `new_copy_with` but also updates the `derived_from` relation. If we also
-        have an action context, then use the `title_template` to derive a new title.
+        have an action context, then use that to fill some fields, in particular `title_template`
+        to derive a new title and `output_type` and `output_format` to set the output type
+        and format
         """
         # Get derived_from relation if possible.
@@ -869,20 +889,38 @@ class Item:
         if "external_path" not in updates:
             updates["external_path"] = None
+        action_context = action_context or self.context
+        if action_context:
+            # Default the output item type and format to the action's declared output_type
+            # and format if not explicitly set.
+            if "type" not in updates:
+                updates["type"] = action_context.action.output_type
+            # If we were not given a format override, we leave the output type the same.
+            elif action_context.action.output_format:
+                # Check an overridden format and then our own format.
+                new_output_format = updates.get("format", self.format)
+                if new_output_format and action_context.action.output_format != new_output_format:
+                    log.warning(
+                        "Output item format `%s` does not match declared output format `%s` for action `%s`",
+                        new_output_format,
+                        action_context.action.output_format,
+                        action_context.action.name,
+                    )
         new_item = self.new_copy_with(update_timestamp=True, **updates)
         if derived_from:
             new_item.update_relations(derived_from=derived_from)
-        action_context = action_context or self.context
         # Record the history.
         if action_context:
-            self.source = Source(
-                operation=action_context.operation,
-                output_num=output_num,
-                cacheable=action_context.action.cacheable,
+            new_item.update_source(
+                Source(
+                    operation=action_context.operation,
+                    output_num=output_num,
+                    cacheable=action_context.action.cacheable,
+                )
             )
-            self.add_to_history(self.source.operation.summary())
             action = action_context.action
         else:
             action = None
@@ -911,9 +949,10 @@ class Item:
             setattr(self.relations, key, list(value))
         return self.relations
-    def update_history(self, source: Source) -> None:
+    def update_source(self, source: Source) -> None:
         """
-        Update the history of the item with the given operation.
+        Update the source and the history of the item to indicate it was created
+        by the given operation. For convenience, this is idempotent.
         """
         self.source = source
         self.add_to_history(source.operation.summary())
@@ -945,6 +984,9 @@ class Item:
         return metadata_matches and body_matches
     def add_to_history(self, operation_summary: OperationSummary):
+        """
+        For convenience, this is idempotent.
+        """
         if not self.history:
             self.history = []
         # Don't add duplicates to the history.

kash/utils/api_utils/gather_limited.py CHANGED Viewed

@@ -542,6 +542,8 @@ async def gather_limited_sync(
             # Mark as failed
             if status and task_id is not None:
                 await status.finish(task_id, TaskState.FAILED, str(e))
+            log.warning("Task failed: %s: %s", label, e, exc_info=True)
             raise
     return await _gather_with_interrupt_handling(

kash/utils/api_utils/multitask_gather.py ADDED Viewed

@@ -0,0 +1,134 @@
+from __future__ import annotations
+from collections.abc import Callable, Iterable, Sequence
+from dataclasses import dataclass
+from typing import Any, Generic, TypeVar, cast
+from strif import abbrev_list
+from kash.config.logger import get_logger
+from kash.config.settings import global_settings
+from kash.shell.output.shell_output import multitask_status
+from kash.utils.api_utils.api_retries import RetrySettings
+from kash.utils.api_utils.gather_limited import FuncTask, Limit, gather_limited_sync
+T = TypeVar("T")
+log = get_logger(name=__name__)
+@dataclass(frozen=True)
+class MultitaskResult(Generic[T]):
+    """
+    Container for results from multitask_gather preserving original order.
+    Access `.successes` and `.errors` to get partitioned views.
+    """
+    raw_results: list[T | BaseException]
+    @property
+    def successes_or_none(self) -> list[T | None]:
+        """
+        Return a list of successes or None, aligned with the original order.
+        """
+        return [
+            None if isinstance(item, BaseException) else cast(T, item) for item in self.raw_results
+        ]
+    @property
+    def successes(self) -> list[T]:
+        """
+        Return a list of successes only. May be shorter than the original list.
+        """
+        return [cast(T, item) for item in self.raw_results if not isinstance(item, BaseException)]
+    @property
+    def errors(self) -> list[BaseException]:
+        """
+        Return a list of errors only. May be shorter than the original list.
+        """
+        return [item for item in self.raw_results if isinstance(item, BaseException)]
+def _default_labeler(total: int) -> Callable[[int, Any], str]:
+    def labeler(i: int, _spec: Any) -> str:
+        return f"Task {i + 1}/{total}"
+    return labeler
+async def multitask_gather(
+    tasks: Iterable[FuncTask[T]] | Sequence[FuncTask[T]],
+    *,
+    labeler: Callable[[int, Any], str] | None = None,
+    limit: Limit | None = None,
+    bucket_limits: dict[str, Limit] | None = None,
+    retry_settings: RetrySettings | None = None,
+    show_progress: bool = True,
+) -> MultitaskResult[T]:
+    """
+    Run many `FuncTask`s concurrently with shared progress UI and rate limits.
+    This wraps the standard pattern of creating a status context, providing a labeler,
+    and calling `gather_limited_sync` with common options.
+    - `labeler` can be omitted; a simple "Task X/Y" label will be used.
+    - If `limit` is not provided, defaults are taken from `global_settings()`.
+    - If `show_progress` is False, tasks are run without the status context.
+    - Exceptions are collected (using return_exceptions=True). Use properties on the
+      returned `MultitaskResult` to access `.successes` and `.errors`.
+    """
+    # Normalize tasks to a list for length and stable iteration
+    task_list: list[FuncTask[T]] = list(tasks)
+    # Provide a default labeler if none is supplied
+    effective_labeler: Callable[[int, Any], str] = (
+        labeler if labeler is not None else _default_labeler(len(task_list))
+    )
+    # Provide sensible default rate limits if none are supplied
+    effective_limit: Limit = (
+        limit
+        if limit is not None
+        else Limit(
+            rps=global_settings().limit_rps,
+            concurrency=global_settings().limit_concurrency,
+        )
+    )
+    if not show_progress:
+        log.warning("Running %d tasks (progress disabled)…", len(task_list))
+    async with multitask_status(enabled=show_progress) as status:
+        raw_results = cast(
+            list[T | BaseException],
+            await gather_limited_sync(
+                *task_list,
+                limit=effective_limit,
+                bucket_limits=bucket_limits,
+                status=status,
+                labeler=effective_labeler,
+                retry_settings=retry_settings,
+                return_exceptions=True,
+            ),
+        )
+    result = MultitaskResult[T](raw_results=raw_results)
+    if result.errors:
+        log.warning(
+            "multitask_gather: had %d errors (out of %d tasks): %s",
+            len(result.errors),
+            len(task_list),
+            abbrev_list(result.errors),
+        )
+        log.error(
+            "multitask_gather: first error (full traceback):",
+            exc_info=(
+                type(result.errors[0]),
+                result.errors[0],
+                result.errors[0].__traceback__,
+            ),
+        )
+    return result

kash-shell 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl

kash-shell 0.3.30py3-none-any.whl → 0.3.34py3-none-any.whl