PyPI - kash-shell - Versions diffs - 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl - Mend

kash-shell 0.3.12py3-none-any.whl → 0.3.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

kash/actions/core/markdownify.py +12 -8
kash/actions/core/readability.py +8 -7
kash/actions/core/render_as_html.py +8 -6
kash/actions/core/show_webpage.py +2 -2
kash/commands/base/basic_file_commands.py +3 -0
kash/commands/base/diff_commands.py +38 -3
kash/commands/base/reformat_command.py +1 -1
kash/commands/base/show_command.py +1 -1
kash/commands/workspace/selection_commands.py +1 -1
kash/commands/workspace/workspace_commands.py +92 -29
kash/docs/load_source_code.py +1 -1
kash/exec/action_exec.py +6 -8
kash/exec/fetch_url_metadata.py +8 -5
kash/exec/importing.py +4 -4
kash/exec/llm_transforms.py +1 -1
kash/exec/preconditions.py +30 -10
kash/file_storage/file_store.py +105 -43
kash/file_storage/item_file_format.py +1 -1
kash/file_storage/store_filenames.py +2 -1
kash/help/help_embeddings.py +2 -2
kash/llm_utils/clean_headings.py +1 -1
kash/{text_handling → llm_utils}/custom_sliding_transforms.py +0 -3
kash/llm_utils/llm_completion.py +1 -1
kash/local_server/__init__.py +1 -1
kash/local_server/local_server_commands.py +2 -1
kash/mcp/__init__.py +1 -1
kash/mcp/mcp_server_commands.py +8 -2
kash/media_base/media_cache.py +10 -3
kash/model/actions_model.py +3 -0
kash/model/items_model.py +78 -44
kash/model/operations_model.py +14 -0
kash/shell/ui/shell_results.py +2 -1
kash/shell/utils/native_utils.py +2 -2
kash/utils/common/format_utils.py +0 -8
kash/utils/common/import_utils.py +46 -18
kash/utils/common/url.py +80 -3
kash/utils/file_utils/file_formats.py +3 -2
kash/utils/file_utils/file_formats_model.py +47 -45
kash/utils/file_utils/filename_parsing.py +41 -16
kash/{text_handling → utils/text_handling}/doc_normalization.py +10 -8
kash/utils/text_handling/escape_html_tags.py +156 -0
kash/{text_handling → utils/text_handling}/markdown_utils.py +0 -3
kash/utils/text_handling/markdownify_utils.py +87 -0
kash/{text_handling → utils/text_handling}/unified_diffs.py +1 -44
kash/web_content/file_cache_utils.py +42 -34
kash/web_content/local_file_cache.py +53 -13
kash/web_content/web_extract.py +1 -1
kash/web_content/web_extract_readabilipy.py +4 -2
kash/web_content/web_fetch.py +42 -7
kash/web_content/web_page_model.py +2 -1
kash/web_gen/simple_webpage.py +1 -1
kash/web_gen/templates/base_styles.css.jinja +134 -16
kash/web_gen/templates/simple_webpage.html.jinja +1 -1
kash/workspaces/selections.py +2 -2
kash/workspaces/workspace_output.py +2 -2
kash/xonsh_custom/load_into_xonsh.py +4 -2
{kash_shell-0.3.12.dist-info → kash_shell-0.3.14.dist-info}/METADATA +1 -1
{kash_shell-0.3.12.dist-info → kash_shell-0.3.14.dist-info}/RECORD +62 -62
kash/utils/common/inflection.py +0 -22
kash/workspaces/workspace_importing.py +0 -56
/kash/{text_handling → utils/text_handling}/markdown_render.py +0 -0
{kash_shell-0.3.12.dist-info → kash_shell-0.3.14.dist-info}/WHEEL +0 -0
{kash_shell-0.3.12.dist-info → kash_shell-0.3.14.dist-info}/entry_points.txt +0 -0
{kash_shell-0.3.12.dist-info → kash_shell-0.3.14.dist-info}/licenses/LICENSE +0 -0

kash/model/items_model.py CHANGED Viewed

@@ -24,13 +24,14 @@ from kash.model.concept_model import canonicalize_concept
 from kash.model.media_model import MediaMetadata
 from kash.model.operations_model import OperationSummary, Source
 from kash.model.paths_model import StorePath, fmt_store_path
-from kash.text_handling.markdown_render import markdown_to_html
-from kash.text_handling.markdown_utils import first_heading
 from kash.utils.common.format_utils import fmt_loc, html_to_plaintext, plaintext_to_html
 from kash.utils.common.url import Locator, Url
 from kash.utils.errors import FileFormatError
 from kash.utils.file_formats.chat_format import ChatHistory
+from kash.utils.file_utils.file_formats import MimeType
 from kash.utils.file_utils.file_formats_model import FileExt, Format
+from kash.utils.text_handling.markdown_render import markdown_to_html
+from kash.utils.text_handling.markdown_utils import first_heading
 if TYPE_CHECKING:
     from kash.model.exec_model import ExecContext
@@ -180,7 +181,7 @@ class ItemId:
             item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
         elif item.type == ItemType.concept and item.title:
             item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
-        elif item.source and item.source.cacheable:
+        elif item.source and item.source.cacheable and item.source.operation.has_known_inputs:
             # We know the source of this and if the action was cacheable, we can create
             # an identity based on the source.
             item_id = ItemId(item.type, IdType.source, item.source.as_str())
@@ -359,20 +360,27 @@ class Item:
         cls,
         path: Path | str,
         item_type: ItemType | None = None,
+        *,
         title: str | None = None,
+        original_filename: str | None = None,
+        url: Url | None = None,
+        mime_type: MimeType | None = None,
     ) -> Item:
         """
         Create a resource Item for a file with a format inferred from the file extension
         or the content. Only sets basic metadata. Does not read the content. Will set
         `format` and `file_ext` if possible but will leave them as None if unrecognized.
+        If `mime_type` is provided, it can help determine the file extension if the
+        extension isn't recognized from the filename or URL.
         """
         from kash.file_storage.store_filenames import parse_item_filename
-        from kash.utils.file_utils.file_formats_model import detect_file_format
+        from kash.utils.file_utils.file_formats_model import file_format_info
         # Will raise error for unrecognized file ext.
         _name, filename_item_type, format, file_ext = parse_item_filename(path)
+        format_info = file_format_info(path, suggested_mime_type=mime_type)
         if not format:
-            format = detect_file_format(path)
+            format = format_info.format
         if not item_type and filename_item_type:
             item_type = filename_item_type
         if not item_type:
@@ -380,12 +388,19 @@ class Item:
             item_type = (
                 ItemType.doc if format and format.supports_frontmatter else ItemType.resource
             )
+        # Try to determine a good file extension if it's not already on the filename.
+        if not file_ext:
+            file_ext = format_info.suggested_file_ext
         item = cls(
             type=item_type,
             title=title,
             file_ext=file_ext,
             format=format,
             external_path=str(path),
+            original_filename=original_filename,
+            url=url,
         )
         # Update modified time from the file system.
@@ -507,17 +522,43 @@ class Item:
         return item_dict
-    def display_title(self) -> str:
+    def filename_stem(self) -> str | None:
         """
-        A display title for this item. Same as abbrev_title() but will fall back
-        to the filename if it is available.
+        If the item has an existing or previous filename, return its stem,
+        for use in picking new filenames.
         """
-        display_title = self.title
-        if not display_title and self.store_path:
-            display_title = Path(self.store_path).name
-        if not display_title:
-            display_title = self.abbrev_title()
-        return display_title
+        from kash.file_storage.store_filenames import parse_item_filename
+        # Prefer original to external, e.g. if we know the original but the external might
+        # be a cache filename.
+        path = self.store_path or self.original_filename or self.external_path
+        if path:
+            path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
+        else:
+            path_name = None
+        return path_name
+    def slug_name(self, max_len: int = SLUG_MAX_LEN, prefer_title: bool = False) -> str:
+        """
+        Get a readable slugified name for this item, either from a previous filename
+        or from slugifying the title or content. May not be unique.
+        """
+        filename_stem = self.filename_stem()
+        if filename_stem and not prefer_title:
+            return slugify_snake(filename_stem)
+        else:
+            return slugify_snake(self.abbrev_title(max_len=max_len, add_ops_suffix=True))
+    def default_filename(self) -> str:
+        """
+        Get the default filename for an item based on slugifying its title or other
+        metadata. May not be unique.
+        """
+        from kash.file_storage.store_filenames import join_suffix
+        slug = self.slug_name()
+        full_suffix = self.get_full_suffix()
+        return join_suffix(slug, full_suffix)
     def abbrev_title(
         self,
@@ -527,12 +568,10 @@ class Item:
         pull_body_heading: bool = False,
     ) -> str:
         """
-        Get or infer a title for this item, falling back to the filename, URL,
-        description, or finally body text.
-        Optionally, include the last operation as a parenthetical at the end of the title.
+        Get or infer a title for this item, falling back to the filename, URL, description, or
+        finally body text. Optionally, include the last operation as a parenthetical at the end
+        of the title. Will use "Untitled" if all else fails.
         """
-        from kash.file_storage.store_filenames import parse_item_filename
         # First special case: if we are pulling the title from the body header, check
         # that.
         if not self.title and pull_body_heading:
@@ -544,18 +583,12 @@ class Item:
         if not self.title and self.url:
             return abbrev_str(self.url, max_len)
-        # Special case for filenames with no title.
-        # Use stem to drop suffix like .resource.docx etc in a title.
-        path = self.store_path or self.external_path or self.original_filename
-        if path:
-            path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
-        else:
-            path_name = None
+        filename_stem = self.filename_stem()
         # Use the title or the path if possible, falling back to description or even body text.
         title_raw_text = (
             self.title
-            or path_name
+            or filename_stem
             or self.description
             or (not self.is_binary and self.abbrev_body(max_len))
             or UNTITLED
@@ -586,6 +619,24 @@ class Item:
         return final_text
+    def display_title(self) -> str:
+        """
+        A display title for this item. Same as abbrev_title() but will fall back
+        to the filename if it is available.
+        """
+        display_title = self.title
+        if not display_title and self.store_path:
+            display_title = Path(self.store_path).name
+        if not display_title:
+            display_title = self.abbrev_title()
+        return display_title
+    def abbrev_description(self, max_len: int = 1000) -> str:
+        """
+        Get or infer description.
+        """
+        return abbrev_on_words(html_to_plaintext(self.description or self.body or ""), max_len)
     def body_heading(self) -> str | None:
         """
         Get the first h1 or h2 heading from the body text, if present.
@@ -620,21 +671,6 @@ class Item:
         """
         return bool(self.body and self.body.strip())
-    def slug_name(self, max_len: int = SLUG_MAX_LEN) -> str:
-        """
-        Get a readable slugified version of the title or filename or content
-        appropriate for this item. May not be unique.
-        """
-        title = self.abbrev_title(max_len=max_len, add_ops_suffix=True)
-        slug = slugify_snake(title)
-        return slug
-    def abbrev_description(self, max_len: int = 1000) -> str:
-        """
-        Get or infer description.
-        """
-        return abbrev_on_words(html_to_plaintext(self.description or self.body or ""), max_len)
     def read_as_config(self) -> Any:
         """
         If it is a config Item, return the parsed YAML.
@@ -653,8 +689,6 @@ class Item:
         """
         if self.file_ext:
             return self.file_ext
-        if self.is_binary and not self.file_ext:
-            raise ValueError(f"Binary Items must have a file extension: {self}")
         inferred_ext = self.format and self.format.file_ext
         if not inferred_ext:
             raise ValueError(f"Cannot infer file extension for Item: {self}")

kash/model/operations_model.py CHANGED Viewed

@@ -66,6 +66,13 @@ class Input:
         else:
             return "[input info missing]"
+    @property
+    def is_known(self) -> bool:
+        """
+        Whether the input is known, i.e. we had saved inputs with hashes.
+        """
+        return bool(self.path and self.hash)
     # Inputs are equal if the hashes match (even if the paths have changed).
     def __hash__(self):
@@ -117,6 +124,13 @@ class Operation:
         return d
+    @property
+    def has_known_inputs(self) -> bool:
+        """
+        Whether the operation has known inputs, i.e. all inputs have hashes.
+        """
+        return all(arg.is_known for arg in self.arguments)
     def summary(self) -> OperationSummary:
         return OperationSummary(self.action_name)

kash/shell/ui/shell_results.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Any
+from prettyfmt import fmt_count_items
 from rich.box import SQUARE
 from rich.panel import Panel
 from rich.table import Table
@@ -10,7 +11,7 @@ from kash.config.text_styles import COLOR_SELECTION, STYLE_HINT
 from kash.exec.command_exec import run_command_or_action
 from kash.exec_model.shell_model import ShellResult
 from kash.shell.output.shell_output import PrintHooks, console_pager, cprint, print_result
-from kash.utils.common.format_utils import fmt_count_items, fmt_loc
+from kash.utils.common.format_utils import fmt_loc
 from kash.utils.errors import is_fatal
 from kash.workspaces import SelectionHistory

kash/shell/utils/native_utils.py CHANGED Viewed

@@ -23,7 +23,7 @@ from kash.shell.output.shell_output import cprint
 from kash.utils.common.format_utils import fmt_loc
 from kash.utils.common.url import as_file_url, is_file_url, is_url
 from kash.utils.errors import FileNotFound, SetupError
-from kash.utils.file_utils.file_formats import is_full_html_page, read_partial_text
+from kash.utils.file_utils.file_formats import is_fullpage_html, read_partial_text
 from kash.utils.file_utils.file_formats_model import file_format_info
 log = get_logger(__name__)
@@ -88,7 +88,7 @@ def _detect_view_mode(file_or_url: str) -> ViewMode:
     path = Path(file_or_url)
     if path.is_file():  # File or symlink.
         content = read_partial_text(path)
-        if content and is_full_html_page(content):
+        if content and is_fullpage_html(content):
             return ViewMode.browser
         info = file_format_info(path)

kash/utils/common/format_utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ from pathlib import Path
 from prettyfmt import fmt_path
-from kash.utils.common.inflection import plural
 from kash.utils.common.url import Locator, is_url
@@ -44,13 +43,6 @@ def fmt_loc(locator: str | Locator, resolve: bool = True) -> str:
         return fmt_path(locator, resolve=resolve)
-def fmt_count_items(count: int, name: str = "item") -> str:
-    """
-    Format a count and a name as a pluralized phrase, e.g. "1 item" or "2 items".
-    """
-    return f"{count} {plural(name, count)}"  # pyright: ignore
 ## Tests

kash/utils/common/import_utils.py CHANGED Viewed

@@ -12,36 +12,64 @@ log = logging.getLogger(__name__)
 Tallies: TypeAlias = dict[str, int]
-def import_subdirs(
+def import_recursive(
     parent_package_name: str,
     parent_dir: Path,
-    subdir_names: list[str] | None = None,
+    resource_names: list[str] | None = None,
     tallies: Tallies | None = None,
 ):
     """
-    Import all files in the given subdirectories of a single parent directory.
-    Wraps `pkgutil.iter_modules` to iterate over all modules in the subdirectories.
-    If `subdir_names` is `None`, will import all subdirectories.
+    Import modules from subdirectories or individual Python modules within a parent package.
+    Each resource in `resource_names` can be:
+    - A directory name (all modules within it will be imported)
+    - A module name with or without '.py' extension (a single module will be imported)
+    - "." to import all modules in the parent_dir
+    If `resource_names` is `None`, imports all modules directly in parent_dir.
+    Simply a convenience wrapper for `importlib.import_module` and
+    `pkgutil.iter_modules` to iterate over all modules in the subdirectories.
+    If `tallies` is provided, it will be updated with the number of modules imported
+    for each package.
     """
     if tallies is None:
         tallies = {}
-    if not subdir_names:
-        subdir_names = ["."]
+    if not resource_names:
+        resource_names = ["."]
-    for subdir_name in subdir_names:
-        if subdir_name == ".":
+    for name in resource_names:
+        if name == ".":
             full_path = parent_dir
             package_name = parent_package_name
         else:
-            full_path = parent_dir / subdir_name
-            package_name = f"{parent_package_name}.{subdir_name}"
-        if not full_path.is_dir():
-            raise FileNotFoundError(f"Subdirectory not found: {full_path}")
-        for _module_finder, module_name, _is_pkg in pkgutil.iter_modules(path=[str(full_path)]):
-            importlib.import_module(f"{package_name}.{module_name}")  # Propagate import errors
-            tallies[package_name] = tallies.get(package_name, 0) + 1
+            full_path = parent_dir / name
+            package_name = f"{parent_package_name}.{name}"
+        # Check if it's a directory
+        if full_path.is_dir():
+            # Import all modules in the directory
+            for _, module_name, _ in pkgutil.iter_modules(path=[str(full_path)]):
+                importlib.import_module(f"{package_name}.{module_name}")
+                tallies[package_name] = tallies.get(package_name, 0) + 1
+        else:
+            # Not a directory, try as a module file
+            module_path = full_path
+            module_name = name
+            # Handle with or without .py extension
+            if not module_path.is_file() and module_path.suffix != ".py":
+                module_path = parent_dir / f"{name}.py"
+                module_name = name
+            elif module_path.suffix == ".py":
+                module_name = module_path.stem
+            if module_path.is_file() and module_name != "__init__":
+                importlib.import_module(f"{parent_package_name}.{module_name}")
+                tallies[parent_package_name] = tallies.get(parent_package_name, 0) + 1
+            else:
+                raise FileNotFoundError(f"Path not found or not importable: {full_path}")
     return tallies

kash/utils/common/url.py CHANGED Viewed

@@ -47,7 +47,9 @@ def check_if_url(
         if only_schemes:
             return result if result.scheme in only_schemes else None
         else:
-            return result if result.scheme != "" else None
+            # Consider it a URL if the scheme is present and longer than a single character.
+            # This helps avoid misinterpreting Windows drive letters (e.g., "C:\foo") as schemes.
+            return result if result.scheme and len(result.scheme) > 1 else None
     except ValueError:
         return None
@@ -145,6 +147,41 @@ def normalize_url(
     return Url(normalized_url)
+def is_valid_path(text: UnresolvedLocator) -> bool:
+    """
+    Sanity check if the input is plausibly a file path, i.e. not a URL or malformed in
+    an obvious way. Does not check for existence or OS-specific naming restrictions.
+    For a more thorough check there are other more complex options like:
+    https://github.com/thombashi/pathvalidate
+    """
+    if isinstance(text, Path):
+        return True
+    elif isinstance(text, str):
+        path_str = text
+    else:
+        return False
+    # Check for empty or whitespace-only strings or null characters
+    # (never acceptable paths).
+    if not path_str or path_str.isspace():
+        return False
+    if "\0" in path_str:
+        return False
+    # Explicitly disallow URLs.
+    if is_url(path_str):
+        return False
+    # As a final lightweight check, ensure it can be instantiated as a Path object
+    # This doesn't validate existence or character restrictions.
+    try:
+        _ = Path(path_str)
+    except (TypeError, ValueError):
+        return False
+    return True
 ## Tests
@@ -155,13 +192,19 @@ def test_is_url():
     assert is_url("ftp://example.com") == True
     assert is_url("file:///path/to/file") == True
     assert is_url("file://hostname/path/to/file") == True
-    assert is_url("invalid-url") == False
-    assert is_url("www.example.com") == False
     assert is_url("http://example.com", only_schemes=HTTP_ONLY) == True
     assert is_url("https://example.com", only_schemes=HTTP_ONLY) == True
+    assert is_url("invalid-url") == False
+    assert is_url("www.example.com") == False
     assert is_url("ftp://example.com", only_schemes=HTTP_ONLY) == False
     assert is_url("file:///path/to/file", only_schemes=HTTP_ONLY) == False
+    assert is_url("www.example.com") is False
+    assert is_url("c:\\path\\to\\file") is False
+    assert is_url("/foo/bar") is False
+    assert is_url("//foo") is False
 def test_as_file_url():
     assert as_file_url("file:///path/to/file") == "file:///path/to/file"
@@ -205,3 +248,37 @@ def test_normalize_url():
             str(e)
             == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
         )
+def test_is_path():
+    assert is_valid_path("foo/bar") is True
+    assert is_valid_path("/foo/bar") is True
+    assert is_valid_path("./foo/bar") is True
+    assert is_valid_path("../foo/bar") is True
+    assert is_valid_path("foo.txt") is True
+    assert is_valid_path(Path("foo/bar")) is True
+    assert is_valid_path(Path()) is True
+    assert is_valid_path(".") is True
+    assert is_valid_path("..") is True
+    assert is_valid_path("C:\\Users\\name") is True  # Windows-style
+    assert is_valid_path("file_with:colon.txt") is True  # Valid on POSIX
+    assert is_valid_path(Url("relative/path")) is True  # Url type with relative content
+    assert is_valid_path("http://example.com") is False
+    assert is_valid_path("https://example.com/path") is False
+    assert is_valid_path("file:///path/to/file") is False
+    assert is_valid_path(Url("http://example.com")) is False
+    assert is_valid_path("") is False
+    assert is_valid_path("  ") is False
+    assert is_valid_path("foo\0bar.txt") is False
+    assert is_valid_path(None) is False  # pyright: ignore
+    assert is_valid_path(123) is False  # pyright: ignore
+    # Edge cases
+    assert is_valid_path("www.example.com") is True  # No scheme
+    assert str(Path("")) == "."
+    assert str(Path(" ")) == " "
+    assert is_valid_path(Path(" ")) is True  # A bad idea but allowed
+    assert is_valid_path(Path("")) is True
+    assert is_valid_path(" ") is False
+    assert is_valid_path("") is False

kash/utils/file_utils/file_formats.py CHANGED Viewed

@@ -11,9 +11,10 @@ from kash.config.logger import get_logger
 log = get_logger(__name__)
-def is_full_html_page(content: str) -> bool:
+def is_fullpage_html(content: str) -> bool:
     """
-    A full HTML document that is probably best rendered in a browser.
+    A full HTML document that is a full page (headers, footers, etc.) and
+    so probably best rendered in a browser.
     """
     return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content[:2048], re.IGNORECASE))

kash-shell 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl

kash-shell 0.3.12py3-none-any.whl → 0.3.14py3-none-any.whl