PyPI - kash-shell - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

kash-shell 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

kash/actions/__init__.py +4 -4
kash/actions/core/markdownify.py +5 -2
kash/actions/core/readability.py +5 -2
kash/actions/core/render_as_html.py +18 -0
kash/actions/core/webpage_config.py +12 -4
kash/commands/__init__.py +8 -20
kash/commands/base/basic_file_commands.py +15 -0
kash/commands/base/debug_commands.py +15 -2
kash/commands/base/general_commands.py +27 -18
kash/commands/base/logs_commands.py +1 -4
kash/commands/base/model_commands.py +8 -8
kash/commands/base/search_command.py +3 -2
kash/commands/base/show_command.py +5 -3
kash/commands/extras/parse_uv_lock.py +186 -0
kash/commands/help/doc_commands.py +2 -31
kash/commands/help/welcome.py +33 -0
kash/commands/workspace/selection_commands.py +11 -6
kash/commands/workspace/workspace_commands.py +19 -16
kash/config/colors.py +2 -0
kash/config/env_settings.py +72 -0
kash/config/init.py +2 -2
kash/config/logger.py +61 -59
kash/config/logger_basic.py +12 -5
kash/config/server_config.py +6 -6
kash/config/settings.py +117 -67
kash/config/setup.py +35 -9
kash/config/suppress_warnings.py +30 -12
kash/config/text_styles.py +3 -13
kash/docs/load_api_docs.py +2 -1
kash/docs/markdown/topics/a2_installation.md +7 -3
kash/docs/markdown/topics/a3_getting_started.md +3 -2
kash/docs/markdown/warning.md +3 -8
kash/docs/markdown/welcome.md +4 -0
kash/docs_base/load_recipe_snippets.py +1 -1
kash/docs_base/recipes/{general_system_commands.ksh → general_system_commands.sh} +1 -1
kash/{concepts → embeddings}/cosine.py +2 -1
kash/embeddings/text_similarity.py +57 -0
kash/exec/__init__.py +20 -3
kash/exec/action_decorators.py +18 -4
kash/exec/action_exec.py +41 -23
kash/exec/action_registry.py +13 -48
kash/exec/command_registry.py +2 -1
kash/exec/fetch_url_metadata.py +4 -6
kash/exec/importing.py +56 -0
kash/exec/llm_transforms.py +6 -6
kash/exec/precondition_registry.py +2 -1
kash/exec/preconditions.py +16 -1
kash/exec/shell_callable_action.py +33 -19
kash/file_storage/file_store.py +23 -14
kash/file_storage/item_file_format.py +13 -3
kash/file_storage/metadata_dirs.py +11 -2
kash/help/assistant.py +2 -2
kash/help/assistant_instructions.py +2 -1
kash/help/help_embeddings.py +2 -2
kash/help/help_printing.py +14 -10
kash/help/tldr_help.py +5 -3
kash/llm_utils/clean_headings.py +1 -1
kash/llm_utils/llm_api_keys.py +4 -4
kash/llm_utils/llm_completion.py +2 -2
kash/llm_utils/llm_features.py +68 -0
kash/llm_utils/llm_messages.py +1 -2
kash/llm_utils/llm_names.py +1 -1
kash/llm_utils/llms.py +17 -12
kash/local_server/__init__.py +5 -2
kash/local_server/local_server.py +56 -46
kash/local_server/local_server_commands.py +15 -15
kash/local_server/local_server_routes.py +2 -2
kash/local_server/local_url_formatters.py +1 -1
kash/mcp/__init__.py +5 -2
kash/mcp/mcp_cli.py +54 -17
kash/mcp/mcp_server_commands.py +5 -6
kash/mcp/mcp_server_routes.py +14 -11
kash/mcp/mcp_server_sse.py +61 -34
kash/mcp/mcp_server_stdio.py +0 -8
kash/media_base/audio_processing.py +81 -7
kash/media_base/media_cache.py +18 -18
kash/media_base/media_services.py +1 -1
kash/media_base/media_tools.py +6 -6
kash/media_base/services/local_file_media.py +2 -2
kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -109
kash/media_base/transcription_format.py +73 -0
kash/media_base/transcription_whisper.py +38 -0
kash/model/__init__.py +73 -5
kash/model/actions_model.py +38 -4
kash/model/concept_model.py +30 -0
kash/model/items_model.py +56 -13
kash/model/params_model.py +24 -0
kash/shell/completions/completion_scoring.py +37 -5
kash/shell/output/kerm_codes.py +1 -2
kash/shell/output/shell_formatting.py +14 -4
kash/shell/shell_main.py +2 -2
kash/shell/utils/exception_printing.py +6 -0
kash/shell/utils/native_utils.py +26 -20
kash/text_handling/custom_sliding_transforms.py +12 -4
kash/text_handling/doc_normalization.py +6 -2
kash/text_handling/markdown_render.py +117 -0
kash/text_handling/markdown_utils.py +204 -0
kash/utils/common/import_utils.py +12 -3
kash/utils/common/type_utils.py +0 -29
kash/utils/common/url.py +80 -28
kash/utils/errors.py +6 -0
kash/utils/file_utils/{dir_size.py → dir_info.py} +25 -4
kash/utils/file_utils/file_ext.py +2 -3
kash/utils/file_utils/file_formats.py +28 -2
kash/utils/file_utils/file_formats_model.py +50 -19
kash/utils/file_utils/filename_parsing.py +10 -4
kash/web_content/dir_store.py +1 -2
kash/web_content/file_cache_utils.py +37 -10
kash/web_content/file_processing.py +68 -0
kash/web_content/local_file_cache.py +12 -9
kash/web_content/web_extract.py +8 -3
kash/web_content/web_fetch.py +12 -4
kash/web_gen/tabbed_webpage.py +5 -2
kash/web_gen/templates/base_styles.css.jinja +120 -14
kash/web_gen/templates/base_webpage.html.jinja +60 -13
kash/web_gen/templates/content_styles.css.jinja +4 -2
kash/web_gen/templates/item_view.html.jinja +2 -2
kash/web_gen/templates/tabbed_webpage.html.jinja +1 -2
kash/workspaces/__init__.py +15 -2
kash/workspaces/selections.py +18 -3
kash/workspaces/source_items.py +4 -2
kash/workspaces/workspace_output.py +11 -4
kash/workspaces/workspaces.py +5 -11
kash/xonsh_custom/command_nl_utils.py +40 -19
kash/xonsh_custom/custom_shell.py +44 -12
kash/xonsh_custom/customize_prompt.py +39 -21
kash/xonsh_custom/load_into_xonsh.py +26 -27
kash/xonsh_custom/shell_load_commands.py +2 -2
kash/xonsh_custom/xonsh_completers.py +2 -249
kash/xonsh_custom/xonsh_keybindings.py +282 -0
kash/xonsh_custom/xonsh_modern_tools.py +3 -3
kash/xontrib/kash_extension.py +5 -6
{kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/METADATA +26 -12
{kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/RECORD +140 -140
{kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/entry_points.txt +1 -1
kash/concepts/concept_formats.py +0 -23
kash/concepts/text_similarity.py +0 -112
kash/shell/clideps/api_keys.py +0 -99
kash/shell/clideps/dotenv_setup.py +0 -114
kash/shell/clideps/dotenv_utils.py +0 -89
kash/shell/clideps/pkg_deps.py +0 -232
kash/shell/clideps/platforms.py +0 -11
kash/shell/clideps/terminal_features.py +0 -56
kash/shell/utils/osc_utils.py +0 -95
kash/shell/utils/terminal_images.py +0 -133
kash/text_handling/markdown_util.py +0 -167
kash/utils/common/atomic_var.py +0 -158
kash/utils/common/string_replace.py +0 -93
kash/utils/common/string_template.py +0 -101
/kash/docs_base/recipes/{python_dev_commands.ksh → python_dev_commands.sh} +0 -0
/kash/docs_base/recipes/{tldr_standard_commands.ksh → tldr_standard_commands.sh} +0 -0
/kash/{concepts → embeddings}/embeddings.py +0 -0
{kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/WHEEL +0 -0
{kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/licenses/LICENSE +0 -0

kash/text_handling/markdown_render.py ADDED Viewed

@@ -0,0 +1,117 @@
+from textwrap import dedent
+import marko
+import regex
+from marko.block import HTMLBlock
+from marko.ext.gfm import GFM
+from marko.helpers import MarkoExtension
+# When we use divs in Markdown we usually want them to be standalone paragraphs,
+# so it doesn't break other wrapping with flowmark etc. This handles that.
+class CustomHTMLBlockMixin:
+    div_pattern = regex.compile(r"^\s*<div\b", regex.IGNORECASE)
+    def render_html_block(self, element: HTMLBlock) -> str:
+        # Apply GFM filtering first via the next renderer in the MRO.
+        filtered_body = super().render_html_block(element)  # pyright: ignore
+        # Check if the original block was a div.
+        if self.div_pattern.match(element.body.strip()):
+            # If it was a div, wrap the *filtered* result in newlines.
+            return f"\n{filtered_body.strip()}\n"
+        else:
+            # Otherwise, return the GFM-filtered body directly.
+            return filtered_body
+# GFM first, adding our custom override as an extension to handle divs our way.
+# Extensions later in this list are earlier in MRO.
+MARKO_GFM = marko.Markdown(
+    extensions=["footnote", GFM, MarkoExtension(renderer_mixins=[CustomHTMLBlockMixin])]
+)
+FOOTNOTE_UP_ARROW = "&nbsp;↑&nbsp;"
+def html_postprocess(html: str) -> str:
+    """
+    Final tweaks to the HTML.
+    """
+    html = html.replace(
+        """class="footnote">&#8617;</a>""", f"""class="footnote">{FOOTNOTE_UP_ARROW}</a>"""
+    )
+    return html
+def markdown_to_html(markdown: str, converter: marko.Markdown = MARKO_GFM) -> str:
+    """
+    Convert Markdown to HTML.
+    Wraps div blocks with newlines for better Markdown compatibility.
+    Output passes through raw HTML! Note per GFM, unsafe script tags etc
+    are [allowed in some cases](https://github.github.com/gfm/#example-140) so
+    additional sanitization is needed if input isn't trusted.
+    """
+    html = converter.convert(markdown)
+    return html_postprocess(html)
+    return html
+## Tests
+def test_markdown_to_html():
+    markdown = dedent(
+        """
+        # Heading
+        This is a paragraph and a [link](https://example.com).
+        - Item 1
+        - Item 2
+        ## Subheading
+        This is a paragraph with a <span>span</span> tag.
+        This is a paragraph with a <div>div</div> tag.
+        This is a paragraph with an <a href='https://example.com'>example link</a>.
+        <div class="div1">This is a div.</div>
+        <div class="div2">This is a second div.
+        <iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
+        </div>
+        <!-- Script tag in a block, note this isn't sanitized -->
+        <script>console.log("Javascript block!");</script>
+        """
+    )
+    print(markdown_to_html(markdown))
+    expected_html = dedent(
+        """
+        <h1>Heading</h1>
+        <p>This is a paragraph and a <a href="https://example.com">link</a>.</p>
+        <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        </ul>
+        <h2>Subheading</h2>
+        <p>This is a paragraph with a <span>span</span> tag.
+        This is a paragraph with a <div>div</div> tag.
+        This is a paragraph with an <a href='https://example.com'>example link</a>.</p>
+        <div class="div1">This is a div.</div>
+        <div class="div2">This is a second div.
+        &lt;iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
+        </div>
+        <!-- Script tag in a block, note this isn't sanitized -->
+        <script>console.log("Javascript block!");</script>
+        """
+    )
+    assert markdown_to_html(markdown).strip() == expected_html.strip()

kash/text_handling/markdown_utils.py ADDED Viewed

@@ -0,0 +1,204 @@
+import re
+from typing import Any
+import marko
+import regex
+from marko.block import Heading, ListItem
+from marko.inline import Link
+from kash.config.logger import get_logger
+from kash.utils.common.url import Url
+log = get_logger(__name__)
+# Characters that commonly need escaping in Markdown inline text.
+MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
+MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
+def escape_markdown(text: str) -> str:
+    """
+    Escape characters with special meaning in Markdown.
+    """
+    return MARKDOWN_ESCAPE_RE.sub(r"\\\1", text)
+def as_bullet_points(values: list[Any]) -> str:
+    """
+    Convert a list of values to a Markdown bullet-point list. If a value is a string,
+    it is treated like Markdown. If it's something else it's converted to a string
+    and also escaped for Markdown.
+    """
+    points: list[str] = []
+    for value in values:
+        value = value.replace("\n", " ").strip()
+        if isinstance(value, str):
+            points.append(value)
+        else:
+            points.append(escape_markdown(str(value)))
+    return "\n\n".join(f"- {point}" for point in points)
+def markdown_link(text: str, url: str | Url) -> str:
+    """
+    Create a Markdown link.
+    """
+    text = text.replace("[", "\\[").replace("]", "\\]")
+    return f"[{text}]({url})"
+def is_markdown_header(markdown: str) -> bool:
+    """
+    Is the start of this content a Markdown header?
+    """
+    return regex.match(r"^#+ ", markdown) is not None
+def _tree_links(element, include_internal=False):
+    links = []
+    def _find_links(element):
+        match element:
+            case Link():
+                if include_internal or not element.dest.startswith("#"):
+                    links.append(element.dest)
+            case _:
+                if hasattr(element, "children"):
+                    for child in element.children:
+                        _find_links(child)
+    _find_links(element)
+    return links
+def extract_links(file_path: str, include_internal=False) -> list[str]:
+    """
+    Extract all links from a Markdown file. Future: Include textual and section context.
+    """
+    with open(file_path) as file:
+        content = file.read()
+        document = marko.parse(content)
+        return _tree_links(document, include_internal)
+def _extract_text(element: Any) -> str:
+    if isinstance(element, str):
+        return element
+    elif hasattr(element, "children"):
+        return "".join(_extract_text(child) for child in element.children)
+    else:
+        return ""
+def _tree_bullet_points(element: marko.block.Document) -> list[str]:
+    bullet_points: list[str] = []
+    def _find_bullet_points(element):
+        if isinstance(element, ListItem):
+            bullet_points.append(_extract_text(element).strip())
+        elif hasattr(element, "children"):
+            for child in element.children:
+                _find_bullet_points(child)
+    _find_bullet_points(element)
+    return bullet_points
+def extract_bullet_points(content: str) -> list[str]:
+    """
+    Extract list item values from a Markdown file.
+    """
+    document = marko.parse(content)
+    return _tree_bullet_points(document)
+def _type_from_heading(heading: Heading) -> str:
+    if heading.level in [1, 2, 3, 4, 5, 6]:
+        return f"h{heading.level}"
+    else:
+        raise ValueError(f"Unsupported heading: {heading}: level {heading.level}")
+def _last_unescaped_bracket(text: str, index: int) -> str | None:
+    escaped = False
+    for i in range(index - 1, -1, -1):
+        ch = text[i]
+        if ch == "\\":
+            escaped = not escaped  # Toggle escaping chain
+            continue
+        if ch in "[]":
+            if not escaped:
+                return ch
+        # Reset escape status after any non‑backslash char
+        escaped = False
+    return None
+def find_markdown_text(
+    pattern: re.Pattern[str], text: str, *, start_pos: int = 0
+) -> re.Match[str] | None:
+    """
+    Return first regex `pattern` match in `text` not inside an existing link.
+    A match is considered inside a link when the most recent unescaped square
+    bracket preceding the match start is an opening bracket "[".
+    """
+    pos = start_pos
+    while True:
+        match = pattern.search(text, pos)
+        if match is None:
+            return None
+        last_bracket = _last_unescaped_bracket(text, match.start())
+        if last_bracket != "[":
+            return match
+        # Skip this match and continue searching
+        pos = match.end()
+## Tests
+def test_escape_markdown() -> None:
+    assert escape_markdown("") == ""
+    assert escape_markdown("Hello world") == "Hello world"
+    assert escape_markdown("`code`") == "\\`code\\`"
+    assert escape_markdown("*italic*") == "\\*italic\\*"
+    assert escape_markdown("_bold_") == "\\_bold\\_"
+    assert escape_markdown("{braces}") == "\\{braces\\}"
+    assert escape_markdown("# header") == "\\# header"
+    assert escape_markdown("1. item") == "1\\. item"
+    assert escape_markdown("line+break") == "line\\+break"
+    assert escape_markdown("dash-") == "dash\\-"
+    assert escape_markdown("!bang") == "\\!bang"
+    assert escape_markdown("backslash\\") == "backslash\\\\"
+    assert escape_markdown("Multiple *special* chars [here](#anchor).") == (
+        "Multiple \\*special\\* chars \\[here\\]\\(\\#anchor\\)\\."
+    )
+def test_find_markdown_text() -> None:  # pragma: no cover
+    # Match is returned when the term is not inside a link.
+    text = "Foo bar baz"
+    pattern = re.compile("Foo Bar", re.IGNORECASE)
+    match = find_markdown_text(pattern, text)
+    assert match is not None and match.group(0) == "Foo bar"
+    # Skips occurrence inside link and returns the first one outside.
+    text = "[Foo](http://example.com) something Foo"
+    pattern = re.compile("Foo", re.IGNORECASE)
+    match = find_markdown_text(pattern, text)
+    assert match is not None
+    assert match.start() > text.index(") ")
+    assert text[match.start() : match.end()] == "Foo"
+    # Returns None when the only occurrences are inside links.
+    text = "prefix [bar](http://example.com) suffix"
+    pattern = re.compile("bar", re.IGNORECASE)
+    match = find_markdown_text(pattern, text)
+    assert match is None

kash/utils/common/import_utils.py CHANGED Viewed

@@ -15,21 +15,30 @@ Tallies: TypeAlias = dict[str, int]
 def import_subdirs(
     parent_package_name: str,
     parent_dir: Path,
-    subdir_names: list[str],
+    subdir_names: list[str] | None = None,
     tallies: Tallies | None = None,
 ):
     """
     Import all files in the given subdirectories of a single parent directory.
+    Wraps `pkgutil.iter_modules` to iterate over all modules in the subdirectories.
+    If `subdir_names` is `None`, will import all subdirectories.
     """
     if tallies is None:
         tallies = {}
+    if not subdir_names:
+        subdir_names = ["."]
     for subdir_name in subdir_names:
-        full_path = parent_dir / subdir_name
+        if subdir_name == ".":
+            full_path = parent_dir
+            package_name = parent_package_name
+        else:
+            full_path = parent_dir / subdir_name
+            package_name = f"{parent_package_name}.{subdir_name}"
         if not full_path.is_dir():
             raise FileNotFoundError(f"Subdirectory not found: {full_path}")
-        package_name = f"{parent_package_name}.{subdir_name}"
         for _module_finder, module_name, _is_pkg in pkgutil.iter_modules(path=[str(full_path)]):
             importlib.import_module(f"{package_name}.{module_name}")  # Propagate import errors
             tallies[package_name] = tallies.get(package_name, 0) + 1

kash/utils/common/type_utils.py CHANGED Viewed

@@ -15,35 +15,6 @@ def not_none(value: T | None, message: str | None = None) -> T:
     return value
-def is_truthy(value: Any, strict: bool = True) -> bool:
-    """
-    True for all common string and non-string values for true. Useful for parsing
-    string values or command line arguments.
-    """
-    truthy_values = {"true", "1", "yes", "on", "y"}
-    falsy_values = {"false", "0", "no", "off", "n", ""}
-    if value is None:
-        return False
-    elif isinstance(value, str):
-        value = value.strip().lower()
-        if value in truthy_values:
-            return True
-        elif value in falsy_values:
-            return False
-    elif isinstance(value, (int, float)):
-        return value != 0
-    elif isinstance(value, bool):
-        return value
-    elif isinstance(value, (list, tuple, set, dict)):
-        return len(value) > 0
-    if strict:
-        raise ValueError(f"Could not convert type {type(value)} to boolean: {repr(value)}")
-    return bool(value)
 def as_dataclass(dict_data: dict[str, Any], dataclass_type: type[T]) -> T:
     """
     Convert a dict recursively to dataclass object, raising an error if the data does

kash/utils/common/url.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """
 A simple `Url` type and basic URL handling with no dependencies.
+Simply a few convenience types and functions around `urllib`.
 """
 import re
 from pathlib import Path
 from typing import NewType
-from urllib.parse import urlparse, urlsplit, urlunsplit
+from urllib.parse import ParseResult, urlparse, urlsplit, urlunsplit
 Url = NewType("Url", str)
 """
@@ -23,41 +24,82 @@ UnresolvedLocator = str | Locator
 A string that may not be resolved to a URL or path.
 """
+HTTP_ONLY = ["http", "https"]
+HTTP_OR_FILE = HTTP_ONLY + ["file"]
-def is_url(text: UnresolvedLocator, http_only: bool = False) -> bool:
+def check_if_url(
+    text: UnresolvedLocator, only_schemes: list[str] | None = None
+) -> ParseResult | None:
     """
-    Check if a string is a URL. For convenience, also returns false for
-    Paths, so that it's easy to use local paths and URLs interchangeably.
+    Convenience function to check if a string or Path is a URL and if so return
+    the `urlparse.ParseResult`.
+    Also returns false for Paths, so that it's easy to use local paths and URLs
+    (`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE` to
+    restrict to only certain schemes.
     """
     if isinstance(text, Path):
-        return False
+        return None
     text = str(text)  # Handle paths or anything else unexpected.
     try:
         result = urlparse(text)
-        if http_only:
-            return result.scheme in ["http", "https"]
+        if only_schemes:
+            return result if result.scheme in only_schemes else None
         else:
-            return result.scheme != ""
+            return result if result.scheme != "" else None
     except ValueError:
-        return False
+        return None
+def is_url(text: UnresolvedLocator, only_schemes: list[str] | None = None) -> bool:
+    """
+    Check if a string is a URL. For convenience, also returns false for
+    Paths, so that it's easy to use local paths and URLs interchangeably.
+    """
+    return check_if_url(text, only_schemes) is not None
 def is_file_url(url: str | Url) -> bool:
     """
-    Is URL a file:// URL?
+    Is URL a file:// URL? Does not check for local file paths.
     """
     return url.startswith("file://")
-def parse_file_url(url: str | Url) -> Path | None:
+def parse_http_url(url: str | Url) -> ParseResult:
     """
-    Parse a file URL and return the path, or None if not a file URL.
+    Parse an http/https URL and return the parsed result, raising ValueError if
+    not an http/https URL.
+    """
+    parsed_url = urlparse(url)
+    if parsed_url.scheme in ("http", "https"):
+        return parsed_url
+    else:
+        raise ValueError(f"Not an http/https URL: {url}")
+def parse_file_url(url: str | Url) -> Path:
+    """
+    Parse a file URL and return the path, raising ValueError if not a file URL.
     """
     parsed_url = urlparse(url)
     if parsed_url.scheme == "file":
         return Path(parsed_url.path)
     else:
-        return None
+        raise ValueError(f"Not a file URL: {url}")
+def parse_s3_url(url: str | Url) -> tuple[str, str]:
+    """
+    Parse an S3 URL and return the bucket and key, raising ValueError if not an
+    S3 URL.
+    """
+    parsed_url = urlparse(url)
+    if parsed_url.scheme == "s3":
+        return parsed_url.netloc, parsed_url.path.lstrip("/")
+    else:
+        raise ValueError(f"Not an S3 URL: {url}")
 def as_file_url(path: str | Path) -> Url:
@@ -73,24 +115,24 @@ def as_file_url(path: str | Path) -> Url:
 def normalize_url(
-    url: Url, http_or_file_only=True, drop_fragment=True, resolve_local_paths=True
+    url: Url,
+    check_schemes: list[str] | None = HTTP_OR_FILE,
+    drop_fragment: bool = True,
+    resolve_local_paths: bool = True,
 ) -> Url:
     """
     Minimal URL normalization. By default also enforces http/https/file URLs and
-    removes fragment.
+    removes fragment. By default enforces http/https/file URLs but this can be
+    adjusted with `check_schemes`.
     """
-    # urlsplit is too forgiving.
-    if (
-        http_or_file_only
-        and not url.startswith("http://")
-        and not url.startswith("https://")
-        and not is_file_url(url)
-    ):
-        raise ValueError(f"Expected http:// or https:// or file:// URL but found: {url}")
     fragment: str | None
     scheme, netloc, path, query, fragment = urlsplit(url)
+    # urlsplit is too forgiving.
+    if check_schemes and scheme not in check_schemes:
+        raise ValueError(f"Scheme {scheme!r} not in allowed schemes: {check_schemes!r}: {url}")
     if drop_fragment:
         fragment = None
     if path == "/":
@@ -115,10 +157,10 @@ def test_is_url():
     assert is_url("file://hostname/path/to/file") == True
     assert is_url("invalid-url") == False
     assert is_url("www.example.com") == False
-    assert is_url("http://example.com", http_only=True) == True
-    assert is_url("https://example.com", http_only=True) == True
-    assert is_url("ftp://example.com", http_only=True) == False
-    assert is_url("file:///path/to/file", http_only=True) == False
+    assert is_url("http://example.com", only_schemes=HTTP_ONLY) == True
+    assert is_url("https://example.com", only_schemes=HTTP_ONLY) == True
+    assert is_url("ftp://example.com", only_schemes=HTTP_ONLY) == False
+    assert is_url("file:///path/to/file", only_schemes=HTTP_ONLY) == False
 def test_as_file_url():
@@ -148,8 +190,18 @@ def test_normalize_url():
         normalize_url(Url("file:///path/to/file#fragment"), drop_fragment=False)
         == "file:///path/to/file#fragment"
     )
+    try:
+        normalize_url(url=Url("/not/a/URL"))
+        raise AssertionError()
+    except ValueError as e:
+        assert str(e) == "Scheme '' not in allowed schemes: ['http', 'https', 'file']: /not/a/URL"
     try:
         normalize_url(Url("ftp://example.com"))
         raise AssertionError()
     except ValueError as e:
-        assert str(e) == "Expected http:// or https:// or file:// URL but found: ftp://example.com"
+        assert (
+            str(e)
+            == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
+        )

kash/utils/errors.py CHANGED Viewed

@@ -139,6 +139,12 @@ class FileFormatError(ContentError):
     pass
+class ApiError(KashRuntimeError):
+    """Raised when an API call returns something unexpected."""
+    pass
 def _nonfatal_exceptions() -> tuple[type[Exception], ...]:
     exceptions: list[type[Exception]] = [SelfExplanatoryError, FileNotFoundError, IOError]
     try:

kash/utils/file_utils/{dir_size.py → dir_info.py} RENAMED Viewed

@@ -1,14 +1,18 @@
+from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
+from kash.utils.file_utils.file_formats_model import file_format_info
 @dataclass(frozen=True)
-class SizeInfo:
+class DirInfo:
     total_size: int
     file_count: int
     dir_count: int
     symlink_count: int
     other_count: int
+    format_tallies: dict[str, int] | None = None
     @property
     def total_count(self) -> int:
@@ -18,7 +22,7 @@ class SizeInfo:
         return self.file_count == 0 and self.dir_count == 0 and self.other_count == 0
-def get_dir_size(path: Path) -> SizeInfo:
+def get_dir_info(path: Path, tally_formats: bool = False) -> DirInfo:
     """
     Get tallies of all files, directories, and other items in the given directory.
     """
@@ -29,10 +33,15 @@ def get_dir_size(path: Path) -> SizeInfo:
     symlink_count = 0
     other_count = 0
+    format_tallies: dict[str, int] = defaultdict(int)
     for file_path in path.rglob("*"):
         if file_path.is_file():
             file_count += 1
             total_size += file_path.stat().st_size
+            if tally_formats:
+                file_info = file_format_info(file_path)
+                format_tallies[file_info.as_str()] += 1
         elif file_path.is_dir():
             dir_count += 1
         elif file_path.is_symlink():
@@ -40,9 +49,21 @@ def get_dir_size(path: Path) -> SizeInfo:
         else:
             other_count += 1
-    return SizeInfo(total_size, file_count, dir_count, symlink_count, other_count)
+    if format_tallies:
+        sorted_format_tallies = {k: format_tallies[k] for k in sorted(format_tallies)}
+    else:
+        sorted_format_tallies = None
+    return DirInfo(
+        total_size,
+        file_count,
+        dir_count,
+        symlink_count,
+        other_count,
+        sorted_format_tallies,
+    )
 def is_nonempty_dir(path: str | Path) -> bool:
     path = Path(path)
-    return path.is_dir() and get_dir_size(path).file_count > 0
+    return path.is_dir() and get_dir_info(path).file_count > 0

kash/utils/file_utils/file_ext.py CHANGED Viewed

@@ -24,12 +24,12 @@ class FileExt(Enum):
     log = "log"
     py = "py"
     sh = "sh"
-    ksh = "ksh"
     xsh = "xsh"
     pdf = "pdf"
     docx = "docx"
     jpg = "jpg"
     png = "png"
+    gif = "gif"
     svg = "svg"
     mp3 = "mp3"
     m4a = "m4a"
@@ -49,13 +49,12 @@ class FileExt(Enum):
             self.json,
             self.py,
             self.sh,
-            self.ksh,
             self.xsh,
         ]
     @property
     def is_image(self) -> bool:
-        return self in [self.jpg, self.png]
+        return self in [self.jpg, self.png, self.gif, self.svg]
     @classmethod
     def parse(cls, ext_str: str) -> FileExt | None:

kash-shell 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

kash-shell 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl