PyPI - kash-shell - Versions diffs - 0.3.34__py3-none-any.whl → 0.3.35__py3-none-any.whl - Mend

kash-shell 0.3.34py3-none-any.whl → 0.3.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

kash/config/env_settings.py +0 -3
kash/config/logger.py +2 -0
kash/config/logger_basic.py +10 -1
kash/config/settings.py +0 -12
kash/config/setup.py +15 -0
kash/config/text_styles.py +1 -1
kash/config/warm_slow_imports.py +60 -0
kash/exec/action_decorators.py +2 -2
kash/exec/action_exec.py +1 -1
kash/exec/fetch_url_items.py +4 -2
kash/mcp/mcp_cli.py +17 -5
kash/mcp/mcp_server_routes.py +6 -4
kash/model/actions_model.py +11 -3
kash/model/items_model.py +16 -11
kash/shell/shell_main.py +3 -14
kash/utils/common/import_utils.py +136 -12
kash/utils/common/s3_utils.py +89 -7
kash/web_content/web_extract.py +0 -1
kash/web_content/web_fetch.py +270 -98
kash/workspaces/workspaces.py +2 -0
{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/METADATA +2 -1
{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/RECORD +25 -24
{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/WHEEL +0 -0
{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/entry_points.txt +0 -0
{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/licenses/LICENSE +0 -0

kash/utils/common/s3_utils.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from __future__ import annotations
+import os
 import shutil
 import subprocess
+from logging import getLogger
 from pathlib import Path
+from dotenv import find_dotenv, load_dotenv
 from sidematter_format.sidematter_format import Sidematter
+from strif import abbrev_str
 from kash.utils.common.url import Url, is_s3_url, parse_s3_url
+log = getLogger(__name__)
 def check_aws_cli() -> None:
     """
@@ -19,6 +25,54 @@ def check_aws_cli() -> None:
         )
+def run_aws_command(cmd: list[str]) -> subprocess.CompletedProcess[str]:
+    """
+    Run an AWS CLI command and capture output.
+    Raises a RuntimeError with stdout/stderr on failure.
+    """
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        env=os.environ,
+    )
+    if result.returncode != 0:
+        # Build a detailed error message
+        error_parts = [f"AWS command failed with exit code {result.returncode}"]
+        error_parts.append(f"Command: {' '.join(cmd)}")
+        if result.stdout:
+            error_parts.append(f"stdout: {result.stdout}")
+        if result.stderr:
+            error_parts.append(f"stderr: {result.stderr}")
+        raise RuntimeError("\n".join(error_parts))
+    return result
+def reload_aws_env_vars() -> None:
+    """
+    Fresh reload of AWS env vars from .env.local.
+    """
+    def aws_creds() -> set[tuple[str, str]]:
+        return {(k, abbrev_str(v, 5)) for k, v in os.environ.items() if k.startswith("AWS_")}
+    if len(aws_creds()) == 0:
+        dotenv_path = find_dotenv(".env.local", usecwd=True) or find_dotenv(".env", usecwd=True)
+        load_dotenv(dotenv_path, override=True)
+        if len(aws_creds()) > 0:
+            log.info(
+                "Loaded %s, found AWS credentials: %s",
+                dotenv_path,
+                aws_creds(),
+            )
+        else:
+            log.warning("No AWS credentials found in env or .env files")
 def get_s3_parent_folder(url: Url) -> Url | None:
     """
     Get the parent folder of an S3 URL, or None if not an S3 URL.
@@ -47,6 +101,7 @@ def s3_sync_to_folder(
     - For a single file: the file URL (and sidematter file/dir URLs if included).
     - For a directory: the destination parent prefix URL (non-recursive reporting).
     """
+    reload_aws_env_vars()
     src_path = Path(src_path)
     if not src_path.exists():
@@ -71,7 +126,7 @@ def s3_sync_to_folder(
         for p in sync_paths:
             if p.is_file():
                 # Use sync with include/exclude to leverage default short-circuiting
-                subprocess.run(
+                run_aws_command(
                     [
                         "aws",
                         "s3",
@@ -82,27 +137,54 @@ def s3_sync_to_folder(
                         "*",
                         "--include",
                         p.name,
-                    ],
-                    check=True,
+                    ]
                 )
                 targets.append(Url(dest_prefix + p.name))
             elif p.is_dir():
                 dest_dir = dest_prefix + p.name + "/"
-                subprocess.run(["aws", "s3", "sync", str(p), dest_dir], check=True)
+                run_aws_command(["aws", "s3", "sync", str(p), dest_dir])
                 targets.append(Url(dest_dir))
         return targets
     else:
         # Directory mode: sync whole directory.
-        subprocess.run(
+        run_aws_command(
             [
                 "aws",
                 "s3",
                 "sync",
                 str(src_path),
                 dest_prefix,
-            ],
-            check=True,
+            ]
         )
         targets.append(Url(dest_prefix))
         return targets
+def s3_download_file(s3_url: Url, target_path: str | Path) -> None:
+    """
+    Download a file from S3 to a local path using the AWS CLI.
+    Args:
+        s3_url: The S3 URL to download from (s3://bucket/path/to/file)
+        target_path: The local path to save the file to
+    """
+    reload_aws_env_vars()
+    if not is_s3_url(s3_url):
+        raise ValueError(f"Source must be an s3:// URL: {s3_url}")
+    check_aws_cli()
+    target_path = Path(target_path)
+    # Use aws s3 cp to download the file
+    run_aws_command(
+        [
+            "aws",
+            "s3",
+            "cp",
+            str(s3_url),
+            str(target_path),
+        ]
+    )

kash/web_content/web_extract.py CHANGED Viewed

@@ -26,7 +26,6 @@ def fetch_page_content(
     Force re-fetching and updating the cache by setting `refetch` to true.
     For HTML and other text files, uses the `text_extractor` to extract
     clean text and page metadata.
     """

kash/web_content/web_fetch.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from __future__ import annotations
 import logging
+import ssl
+from collections.abc import Iterable
 from dataclasses import dataclass
 from enum import Enum
 from functools import cache, cached_property
@@ -12,17 +14,145 @@ from cachetools import TTLCache
 from strif import atomic_output_file, copyfile_atomic
 from kash.config.env_settings import KashEnv
+from kash.utils.common.s3_utils import s3_download_file
 from kash.utils.common.url import Url
 from kash.utils.file_utils.file_formats import MimeType
+log = logging.getLogger(__name__)
+def _httpx_verify_context() -> ssl.SSLContext | bool:
+    """
+    Return an SSLContext that uses the system trust store via truststore, if available.
+    Falls back to certifi bundle; otherwise True to use httpx defaults.
+    """
+    try:
+        import truststore
+        return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+    except Exception:
+        try:
+            import certifi
+            return ssl.create_default_context(cafile=certifi.where())
+        except Exception:
+            return True
+def _stream_to_file(
+    target_filename: str | Path,
+    response_iterator: Iterable[bytes],
+    total_size: int,
+    show_progress: bool,
+) -> None:
+    with atomic_output_file(target_filename, make_parents=True) as temp_filename:
+        with open(temp_filename, "wb") as f:
+            if not show_progress:
+                for chunk in response_iterator:
+                    if chunk:
+                        f.write(chunk)
+            else:
+                from tqdm import tqdm
+                with tqdm(
+                    total=total_size,
+                    unit="B",
+                    unit_scale=True,
+                    desc=f"Downloading {Path(str(target_filename)).name}",
+                ) as progress:
+                    for chunk in response_iterator:
+                        if chunk:
+                            f.write(chunk)
+                            progress.update(len(chunk))
+def _httpx_fetch(
+    url: Url,
+    *,
+    timeout: int,
+    auth: Any | None,
+    headers: dict[str, str] | None,
+    mode: ClientMode,
+    log_label: str,
+):
+    import httpx
+    req_headers = _get_req_headers(mode, headers)
+    parsed_url = urlparse(str(url))
+    with httpx.Client(
+        follow_redirects=True,
+        timeout=timeout,
+        auth=auth,
+        headers=req_headers,
+        verify=_httpx_verify_context(),
+    ) as client:
+        log.debug("fetch_url (%s): using headers: %s", log_label, client.headers)
+        if mode is ClientMode.BROWSER_HEADERS:
+            _prime_host(parsed_url.netloc, client, timeout)
+        response = client.get(url)
+        log.info(
+            "Fetched (%s): %s (%s bytes): %s",
+            log_label,
+            response.status_code,
+            len(response.content),
+            url,
+        )
+        response.raise_for_status()
+        return response
+def _httpx_download(
+    url: Url,
+    target_filename: str | Path,
+    *,
+    show_progress: bool,
+    timeout: int,
+    auth: Any | None,
+    headers: dict[str, str] | None,
+    mode: ClientMode,
+    log_label: str,
+) -> dict[str, str]:
+    import httpx
+    req_headers = _get_req_headers(mode, headers)
+    parsed_url = urlparse(str(url))
+    with httpx.Client(
+        follow_redirects=True,
+        timeout=timeout,
+        headers=req_headers,
+        verify=_httpx_verify_context(),
+    ) as client:
+        if mode is ClientMode.BROWSER_HEADERS:
+            _prime_host(parsed_url.netloc, client, timeout)
+        log.debug("download_url (%s): using headers: %s", log_label, client.headers)
+        with client.stream("GET", url, auth=auth, follow_redirects=True) as response:
+            response.raise_for_status()
+            response_headers = dict(response.headers)
+            total = int(response.headers.get("content-length", "0"))
+            _stream_to_file(target_filename, response.iter_bytes(), total, show_progress)
+            return response_headers
+def _is_tls_cert_error(exc: Exception) -> bool:
+    """
+    Heuristic detection of TLS/certificate verification errors coming from curl_cffi/libcurl.
+    """
+    s = str(exc).lower()
+    if "curl: (60)" in s:
+        return True
+    if "certificate verify failed" in s:
+        return True
+    if "ssl" in s and ("certificate" in s or "cert" in s or "handshake" in s):
+        return True
+    return False
 if TYPE_CHECKING:
     from curl_cffi.requests import Response as CurlCffiResponse
     from curl_cffi.requests import Session as CurlCffiSession
     from httpx import Client as HttpxClient
     from httpx import Response as HttpxResponse
-log = logging.getLogger(__name__)
 DEFAULT_TIMEOUT = 30
 CURL_CFFI_IMPERSONATE_VERSION = "chrome120"
@@ -199,54 +329,57 @@ def fetch_url(
         from curl_cffi.requests import Session
-        with Session() as client:
-            # Set headers on the session - they will be sent with all requests
-            client.headers.update(req_headers)
-            _prime_host(
-                parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
+        exc: Exception | None = None
+        try:
+            with Session() as client:
+                # Set headers on the session - they will be sent with all requests
+                client.headers.update(req_headers)
+                _prime_host(
+                    parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
+                )
+                log.debug("fetch_url (curl_cffi): using session headers: %s", client.headers)
+                response = client.get(
+                    url,
+                    impersonate=CURL_CFFI_IMPERSONATE_VERSION,
+                    timeout=timeout,
+                    auth=auth,
+                    allow_redirects=True,
+                )
+                log.info(
+                    "Fetched (curl_cffi): %s (%s bytes): %s",
+                    response.status_code,
+                    len(response.content),
+                    url,
+                )
+                response.raise_for_status()
+                return response
+        except Exception as e:
+            exc = e
+        if exc and _is_tls_cert_error(exc):
+            log.warning(
+                "TLS/SSL verification failed with curl_cffi for %s: %s; falling back to httpx",
+                url,
+                exc,
             )
-            log.debug("fetch_url (curl_cffi): using session headers: %s", client.headers)
-            response = client.get(
+            # Fallback to httpx with browser-like headers (uses system trust if available)
+            return _httpx_fetch(
                 url,
-                impersonate=CURL_CFFI_IMPERSONATE_VERSION,
                 timeout=timeout,
                 auth=auth,
-                allow_redirects=True,
-            )
-            log.info(
-                "Fetched (curl_cffi): %s (%s bytes): %s",
-                response.status_code,
-                len(response.content),
-                url,
+                headers=headers,
+                mode=ClientMode.BROWSER_HEADERS,
+                log_label="httpx fallback",
             )
-            response.raise_for_status()
-            return response
+        if exc:
+            raise exc
     # Handle httpx modes
     else:
-        import httpx
-        with httpx.Client(
-            follow_redirects=True,
-            timeout=timeout,
-            auth=auth,
-            headers=req_headers,
-        ) as client:
-            log.debug("fetch_url (httpx): using headers: %s", client.headers)
-            # Cookie priming only makes sense for the browser-like mode
-            if mode is ClientMode.BROWSER_HEADERS:
-                _prime_host(parsed_url.netloc, client, timeout)
-            response = client.get(url)
-            log.info(
-                "Fetched (httpx): %s (%s bytes): %s",
-                response.status_code,
-                len(response.content),
-                url,
-            )
-            response.raise_for_status()
-            return response
+        return _httpx_fetch(
+            url, timeout=timeout, auth=auth, headers=headers, mode=mode, log_label="httpx"
+        )
 @dataclass(frozen=True)
@@ -289,38 +422,13 @@ def download_url(
         copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename, make_parents=True)
         return None
     elif parsed_url.scheme == "s3":
-        import boto3  # pyright: ignore
-        s3 = boto3.resource("s3")
-        s3_path = parsed_url.path.lstrip("/")
         with atomic_output_file(target_filename, make_parents=True) as temp_filename:
-            s3.Bucket(parsed_url.netloc).download_file(s3_path, temp_filename)
+            s3_download_file(url, temp_filename)
         return None
     req_headers = _get_req_headers(mode, headers)
     response_headers = None
-    def stream_to_file(response_iterator, total_size):
-        with atomic_output_file(target_filename, make_parents=True) as temp_filename:
-            with open(temp_filename, "wb") as f:
-                if not show_progress:
-                    for chunk in response_iterator:
-                        if chunk:  # Skip empty chunks
-                            f.write(chunk)
-                else:
-                    from tqdm import tqdm
-                    with tqdm(
-                        total=total_size,
-                        unit="B",
-                        unit_scale=True,
-                        desc=f"Downloading {Path(target_filename).name}",
-                    ) as progress:
-                        for chunk in response_iterator:
-                            if chunk:  # Skip empty chunks
-                                f.write(chunk)
-                                progress.update(len(chunk))
     # Handle curl_cffi mode
     if mode is ClientMode.CURL_CFFI:
         if not _have_curl_cffi():
@@ -328,47 +436,111 @@ def download_url(
         from curl_cffi.requests import Session
-        with Session() as client:
-            # Set headers on the session; they will be sent with all requests
-            client.headers.update(req_headers)
-            _prime_host(
-                parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
-            )
-            log.debug("download_url (curl_cffi): using session headers: %s", client.headers)
+        exc: Exception | None = None
+        try:
+            with Session() as client:
+                # Set headers on the session; they will be sent with all requests
+                client.headers.update(req_headers)
+                _prime_host(
+                    parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
+                )
+                log.debug("download_url (curl_cffi): using session headers: %s", client.headers)
+                response = client.get(
+                    url,
+                    impersonate=CURL_CFFI_IMPERSONATE_VERSION,
+                    timeout=timeout,
+                    auth=auth,
+                    allow_redirects=True,
+                    stream=True,
+                )
+                response.raise_for_status()
+                response_headers = dict(response.headers)
+                total = int(response.headers.get("content-length", "0"))
+                # Use iter_content for streaming; this is the standard method for curl_cffi
+                chunk_iterator = response.iter_content(chunk_size=8192)
+                _stream_to_file(target_filename, chunk_iterator, total, show_progress)
+        except Exception as e:
+            exc = e
-            response = client.get(
+        if exc and _is_tls_cert_error(exc):
+            log.warning(
+                "TLS/SSL verification failed with curl_cffi for %s: %s; falling back to httpx",
                 url,
-                impersonate=CURL_CFFI_IMPERSONATE_VERSION,
+                exc,
+            )
+            # Fallback to httpx streaming with browser-like headers (system trust store if available)
+            response_headers = _httpx_download(
+                url,
+                target_filename,
+                show_progress=show_progress,
                 timeout=timeout,
                 auth=auth,
-                allow_redirects=True,
-                stream=True,
+                headers=headers,
+                mode=ClientMode.BROWSER_HEADERS,
+                log_label="httpx fallback",
             )
-            response.raise_for_status()
-            response_headers = dict(response.headers)
-            total = int(response.headers.get("content-length", "0"))
-            # Use iter_content for streaming; this is the standard method for curl_cffi
-            chunk_iterator = response.iter_content(chunk_size=8192)
-            stream_to_file(chunk_iterator, total)
+        elif exc:
+            raise exc
     # Handle httpx modes
     else:
-        import httpx
-        with httpx.Client(follow_redirects=True, timeout=timeout, headers=req_headers) as client:
-            if mode is ClientMode.BROWSER_HEADERS:
-                _prime_host(parsed_url.netloc, client, timeout)
-            log.debug("download_url (httpx): using headers: %s", client.headers)
-            with client.stream("GET", url, auth=auth, follow_redirects=True) as response:
-                response.raise_for_status()
-                response_headers = dict(response.headers)
-                total = int(response.headers.get("content-length", "0"))
-                stream_to_file(response.iter_bytes(), total)
+        response_headers = _httpx_download(
+            url,
+            target_filename,
+            show_progress=show_progress,
+            timeout=timeout,
+            auth=auth,
+            headers=headers,
+            mode=mode,
+            log_label="httpx",
+        )
     # Filter out None values from headers for HttpHeaders type compatibility
     if response_headers:
         clean_headers = {k: v for k, v in response_headers.items() if v is not None}
         return HttpHeaders(clean_headers)
     return None
+def main() -> None:
+    """
+    Simple CLI test harness for fetch and download.
+    Usage examples:
+      uv run python -m kash.web_content.web_fetch
+      uv run python -m kash.web_content.web_fetch https://www.example.com
+    """
+    import sys
+    import traceback
+    # Try to use the system trust store for TLS like command-line curl
+    try:
+        import truststore  # type: ignore
+        truststore.inject_into_ssl()
+        log.warning("truststore initialized for test harness: using system TLS trust store")
+    except Exception as exc:
+        log.info("truststore not available for test harness; using default TLS trust (%s)", exc)
+    urls = [
+        "https://www.example.com",
+        "https://www.businessdefense.gov/ibr/mceip/dpai/dpat3/index.html",
+    ]
+    args = [a for a in sys.argv[1:] if a and a.strip()]
+    if args:
+        urls = args
+    for u in urls:
+        try:
+            log.warning("Testing fetch_url: %s", u)
+            r = fetch_url(Url(u))
+            log.warning("fetch_url OK: %s -> %s bytes", u, len(r.content))
+        except Exception as exc:
+            log.exception("fetch_url FAILED for %s: %s", u, exc)
+            traceback.print_exc()
+if __name__ == "__main__":
+    main()

kash/workspaces/workspaces.py CHANGED Viewed

@@ -95,6 +95,8 @@ def get_ws(name_or_path: str | Path, auto_init: bool = True) -> FileStore:
     Get a workspace by name or path. Adds to the in-memory registry so we reuse it.
     With `auto_init` true, will initialize the workspace if it is not already initialized.
     """
+    if isinstance(name_or_path, Path):
+        name_or_path = name_or_path.expanduser().absolute()
     name = Path(name_or_path).name
     name = check_strict_workspace_name(name)
     info = resolve_ws(name_or_path)

{kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kash-shell
-Version: 0.3.34
+Version: 0.3.35
 Summary: The knowledge agent shell (core)
 Project-URL: Repository, https://github.com/jlevy/kash-shell
 Author-email: Joshua Levy <joshua@cal.berkeley.edu>
@@ -72,6 +72,7 @@ Requires-Dist: thefuzz>=0.22.1
 Requires-Dist: tiktoken>=0.9.0
 Requires-Dist: tldr>=3.3.0
 Requires-Dist: tminify>=0.1.6
+Requires-Dist: truststore>=0.10.4
 Requires-Dist: typing-extensions>=4.12.2
 Requires-Dist: uvicorn>=0.34.0
 Requires-Dist: xonsh>=0.19.3

kash-shell 0.3.34__py3-none-any.whl → 0.3.35__py3-none-any.whl

kash-shell 0.3.34py3-none-any.whl → 0.3.35py3-none-any.whl