PyPI - ostruct-cli - Versions diffs - 0.8.29__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

ostruct-cli 0.8.29py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

ostruct/cli/__init__.py +3 -15
ostruct/cli/attachment_processor.py +455 -0
ostruct/cli/attachment_template_bridge.py +973 -0
ostruct/cli/cli.py +157 -33
ostruct/cli/click_options.py +775 -692
ostruct/cli/code_interpreter.py +195 -12
ostruct/cli/commands/__init__.py +0 -3
ostruct/cli/commands/run.py +289 -62
ostruct/cli/config.py +23 -22
ostruct/cli/constants.py +89 -0
ostruct/cli/errors.py +175 -5
ostruct/cli/explicit_file_processor.py +0 -15
ostruct/cli/file_info.py +97 -15
ostruct/cli/file_list.py +43 -1
ostruct/cli/file_search.py +68 -2
ostruct/cli/help_json.py +235 -0
ostruct/cli/mcp_integration.py +13 -16
ostruct/cli/params.py +217 -0
ostruct/cli/plan_assembly.py +335 -0
ostruct/cli/plan_printing.py +385 -0
ostruct/cli/progress_reporting.py +8 -56
ostruct/cli/quick_ref_help.py +128 -0
ostruct/cli/rich_config.py +299 -0
ostruct/cli/runner.py +397 -190
ostruct/cli/security/__init__.py +2 -0
ostruct/cli/security/allowed_checker.py +41 -0
ostruct/cli/security/normalization.py +13 -9
ostruct/cli/security/security_manager.py +558 -17
ostruct/cli/security/types.py +15 -0
ostruct/cli/template_debug.py +283 -261
ostruct/cli/template_debug_help.py +233 -142
ostruct/cli/template_env.py +46 -5
ostruct/cli/template_filters.py +415 -8
ostruct/cli/template_processor.py +240 -619
ostruct/cli/template_rendering.py +49 -73
ostruct/cli/template_validation.py +2 -1
ostruct/cli/token_validation.py +35 -15
ostruct/cli/types.py +15 -19
ostruct/cli/unicode_compat.py +283 -0
ostruct/cli/upload_manager.py +448 -0
ostruct/cli/validators.py +255 -54
{ostruct_cli-0.8.29.dist-info → ostruct_cli-1.0.1.dist-info}/METADATA +231 -128
ostruct_cli-1.0.1.dist-info/RECORD +80 -0
ostruct/cli/commands/quick_ref.py +0 -54
ostruct/cli/template_optimizer.py +0 -478
ostruct_cli-0.8.29.dist-info/RECORD +0 -71
{ostruct_cli-0.8.29.dist-info → ostruct_cli-1.0.1.dist-info}/LICENSE +0 -0
{ostruct_cli-0.8.29.dist-info → ostruct_cli-1.0.1.dist-info}/WHEEL +0 -0
{ostruct_cli-0.8.29.dist-info → ostruct_cli-1.0.1.dist-info}/entry_points.txt +0 -0

ostruct/cli/file_list.py CHANGED Viewed

@@ -30,6 +30,9 @@ class FileInfoList(List[FileInfo]):
     handling of multi-file scenarios through indexing (files[0].content) or the
     |single filter (files|single.content).
+    Implements the file-sequence protocol by being iterable (already inherits from list)
+    and providing a .first property for uniform access patterns.
     This class is thread-safe. All operations that access or modify the internal list
     are protected by a reentrant lock (RLock). This allows nested method calls while
     holding the lock, preventing deadlocks in cases like:
@@ -49,6 +52,12 @@ class FileInfoList(List[FileInfo]):
             content = files[0].content  # Access first file explicitly
             content = files|single.content  # Use |single filter for validation
+        Uniform iteration (file-sequence protocol):
+            for file in files:  # Works for both single and multiple files
+                print(file.content)
+            first_file = files.first  # Get first file uniformly
     Properties:
         content: File content - only for single file from file mapping (not directory)
         path: File path - only for single file from file mapping
@@ -56,6 +65,8 @@ class FileInfoList(List[FileInfo]):
         size: File size in bytes - only for single file from file mapping
         name: Filename without directory path - only for single file from file mapping
         names: Always returns list of all filenames (safe for multi-file access)
+        first: Returns the first FileInfo object (uniform access)
+        is_collection: Always returns True (indicates this is a collection)
     Raises:
         ValueError: When accessing scalar properties on empty list, multiple files, or directory mappings
@@ -85,6 +96,37 @@ class FileInfoList(List[FileInfo]):
         self._from_dir = from_dir
         self._var_alias = var_alias
+    @property
+    def first(self) -> FileInfo:
+        """Get the first file in the collection.
+        This provides a uniform interface with FileInfo.first,
+        allowing templates to use .first regardless of whether they're
+        dealing with a single file or a collection.
+        Returns:
+            The first FileInfo object in the list
+        Raises:
+            ValueError: If the list is empty
+        """
+        with self._lock:
+            if not self:
+                var_name = self._var_alias or "file_list"
+                raise ValueError(
+                    f"No files in '{var_name}'. Cannot access .first property."
+                )
+            return self[0]
+    @property
+    def is_collection(self) -> bool:
+        """Indicate whether this is a collection of files.
+        Returns:
+            True, since FileInfoList represents a collection of files
+        """
+        return True
     @property
     def content(self) -> str:
         """Get the content of a single file.
@@ -443,7 +485,7 @@ class FileInfoList(List[FileInfo]):
             if not self:
                 return "FileInfoList([])"
-            # For single file from file mapping (--fta, -ft, etc.)
+            # For single file from file mapping (--file alias, etc.)
             if len(self) == 1 and not self._from_dir:
                 var_name = self._var_alias or "file_var"
                 return f"[File '{self[0].path}' - Use {{ {var_name}.content }} to access file content]"

ostruct/cli/file_search.py CHANGED Viewed

@@ -10,25 +10,34 @@ import logging
 import os
 import time
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 from openai import AsyncOpenAI
+if TYPE_CHECKING:
+    from .upload_manager import SharedUploadManager
 logger = logging.getLogger(__name__)
 class FileSearchManager:
     """Manager for File Search vector store operations with retry logic."""
-    def __init__(self, client: AsyncOpenAI):
+    def __init__(
+        self,
+        client: AsyncOpenAI,
+        upload_manager: Optional["SharedUploadManager"] = None,
+    ) -> None:
         """Initialize File Search manager.
         Args:
             client: AsyncOpenAI client instance
+            upload_manager: Optional shared upload manager for deduplication
         """
         self.client = client
         self.uploaded_file_ids: List[str] = []
         self.created_vector_stores: List[str] = []
+        self.upload_manager = upload_manager
     async def create_vector_store_with_retry(
         self,
@@ -321,6 +330,63 @@ class FileSearchManager:
             "vector_store_ids": [vector_store_id],
         }
+    async def create_vector_store_from_shared_manager(
+        self,
+        vector_store_name: str = "ostruct_vector_store",
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> str:
+        """Create vector store and populate with files from shared upload manager.
+        Args:
+            vector_store_name: Name for the vector store
+            max_retries: Maximum retry attempts
+            retry_delay: Delay between retries
+        Returns:
+            Vector store ID
+        Raises:
+            Exception: If vector store creation or file upload fails
+        """
+        if not self.upload_manager:
+            logger.warning("No shared upload manager available")
+            # Fall back to creating empty vector store
+            return await self.create_vector_store_with_retry(
+                vector_store_name, max_retries, retry_delay
+            )
+        # Get file IDs from shared manager
+        await self.upload_manager.upload_for_tool("file-search")
+        file_ids = self.upload_manager.get_files_for_tool("file-search")
+        if not file_ids:
+            logger.debug(
+                "No files for file-search, creating empty vector store"
+            )
+            return await self.create_vector_store_with_retry(
+                vector_store_name, max_retries, retry_delay
+            )
+        # Create vector store
+        vector_store_id = await self.create_vector_store_with_retry(
+            vector_store_name, max_retries, retry_delay
+        )
+        # Add files to vector store
+        await self._add_files_to_vector_store_with_retry(
+            vector_store_id, file_ids, max_retries, retry_delay
+        )
+        # Track uploaded files for cleanup
+        self.uploaded_file_ids.extend(file_ids)
+        logger.debug(
+            f"Created vector store {vector_store_id} with {len(file_ids)} files from shared manager"
+        )
+        return vector_store_id
     async def cleanup_resources(self) -> None:
         """Clean up uploaded files and created vector stores.

ostruct/cli/help_json.py ADDED Viewed

@@ -0,0 +1,235 @@
+"""Unified JSON help system for ostruct CLI."""
+import json
+from typing import Any, Dict, List
+import click
+from .. import __version__
+def generate_attachment_system_info() -> Dict[str, Any]:
+    """Generate structured attachment system information from codebase constants."""
+    from .params import TARGET_NORMALISE
+    # Build structured target information
+    canonical_targets = set(TARGET_NORMALISE.values())
+    aliases: Dict[str, List[str]] = {}
+    for alias, canonical in TARGET_NORMALISE.items():
+        if alias != canonical:  # Only include actual aliases
+            if canonical not in aliases:
+                aliases[canonical] = []
+            aliases[canonical].append(alias)
+    targets = {}
+    for canonical in sorted(canonical_targets):
+        targets[canonical] = {
+            "canonical_name": canonical,
+            "aliases": sorted(aliases.get(canonical, [])),
+            "description": _get_target_description(canonical),
+            "type": "file_routing_target",
+        }
+    return {
+        "format_spec": "[targets:]alias path",
+        "targets": targets,
+        "examples": [
+            {
+                "syntax": "--file data file.txt",
+                "targets": ["prompt"],
+                "description": "Template access only (default target)",
+            },
+            {
+                "syntax": "--file ci:analysis data.csv",
+                "targets": ["code-interpreter"],
+                "description": "Code execution & analysis",
+            },
+            {
+                "syntax": "--dir fs:docs ./documentation",
+                "targets": ["file-search"],
+                "description": "Document search & retrieval",
+            },
+            {
+                "syntax": "--file ci,fs:shared data.json",
+                "targets": ["code-interpreter", "file-search"],
+                "description": "Multi-target routing",
+            },
+        ],
+    }
+def _get_target_description(target: str) -> str:
+    """Get description for a target."""
+    descriptions = {
+        "prompt": "Template access only (default)",
+        "code-interpreter": "Code execution & analysis",
+        "file-search": "Document search & retrieval",
+    }
+    return descriptions.get(target, f"Unknown target: {target}")
+def generate_json_output_modes() -> Dict[str, Any]:
+    """Generate structured JSON output mode information."""
+    return {
+        "help_json": {
+            "description": "Output command help in JSON format",
+            "output_destination": "stdout",
+            "exit_behavior": "exits_after_output",
+            "scope": "single_command_or_full_cli",
+        },
+        "dry_run_json": {
+            "description": "Output execution plan as JSON with --dry-run",
+            "output_destination": "stdout",
+            "requires": ["--dry-run"],
+            "exit_behavior": "exits_after_output",
+            "scope": "execution_plan",
+        },
+        "run_summary_json": {
+            "description": "Output run summary as JSON to stderr after execution",
+            "output_destination": "stderr",
+            "requires": [],
+            "conflicts_with": ["--dry-run"],
+            "exit_behavior": "continues_execution",
+            "scope": "execution_summary",
+        },
+    }
+def enhance_param_info(
+    param_info: Dict[str, Any], param: click.Parameter
+) -> Dict[str, Any]:
+    """Enhance parameter info with dynamic data."""
+    # Import here to avoid circular imports
+    try:
+        from .click_options import ModelChoice
+        model_choice_class = ModelChoice
+    except ImportError:
+        model_choice_class = None
+    # For model parameter, add dynamic choices metadata
+    if (
+        param.name == "model"
+        and model_choice_class is not None
+        and isinstance(param.type, model_choice_class)
+    ):
+        param_info["dynamic_choices"] = True
+        param_info["choices_source"] = "openai_model_registry"
+        # Add registry metadata if available
+        try:
+            from openai_model_registry import ModelRegistry
+            registry = ModelRegistry.get_instance()
+            choices_list = list(param.type.choices)
+            param_info["registry_metadata"] = {
+                "total_models": len(list(registry.models)),
+                "structured_output_models": len(choices_list),
+                "registry_path": str(
+                    getattr(registry.config, "registry_path", "unknown")
+                ),
+            }
+        except Exception:
+            param_info["registry_metadata"] = {"status": "unavailable"}
+    return param_info
+def generate_usage_patterns_from_commands(
+    commands: Dict[str, Any],
+) -> Dict[str, str]:
+    """Generate usage patterns from actual command definitions instead of hardcoding."""
+    # This would ideally inspect the actual commands and generate examples
+    # For now, we'll keep the patterns but mark them as generated
+    patterns = {}
+    if "run" in commands:
+        patterns.update(
+            {
+                "basic_template": "ostruct run TEMPLATE.j2 SCHEMA.json -V name=value",
+                "file_attachment": "ostruct run TEMPLATE.j2 SCHEMA.json --file ci:data DATA.csv --file fs:docs DOCS.pdf",
+                "mcp_integration": "ostruct run TEMPLATE.j2 SCHEMA.json --mcp-server label@https://server.com/sse",
+                "dry_run": "ostruct run TEMPLATE.j2 SCHEMA.json --dry-run",
+                "json_output": "ostruct run TEMPLATE.j2 SCHEMA.json --dry-run-json",
+            }
+        )
+    return patterns
+def print_command_help_json(
+    ctx: click.Context, param: click.Parameter, value: Any
+) -> None:
+    """Print single command help in JSON format."""
+    if not value or ctx.resilient_parsing:
+        return
+    # Use Click's built-in to_info_dict() method
+    help_data = ctx.to_info_dict()  # type: ignore[attr-defined]
+    # Enhance parameter info with dynamic data
+    if "command" in help_data and "params" in help_data["command"]:
+        for param_info in help_data["command"]["params"]:
+            # Find the corresponding Click parameter
+            param_name = param_info.get("name")
+            if param_name:
+                for click_param in ctx.command.params:
+                    if click_param.name == param_name:
+                        enhance_param_info(param_info, click_param)
+                        break
+    # Add ostruct-specific metadata
+    help_data.update(
+        {
+            "ostruct_version": __version__,
+            "help_type": "single_command",
+            "attachment_system": generate_attachment_system_info(),
+            "json_output_modes": generate_json_output_modes(),
+        }
+    )
+    click.echo(json.dumps(help_data, indent=2))
+    ctx.exit(0)
+def print_full_cli_help_json(
+    ctx: click.Context, param: click.Parameter, value: Any
+) -> None:
+    """Print comprehensive help for all commands in JSON format."""
+    if not value or ctx.resilient_parsing:
+        return
+    # Get main group help
+    main_help = ctx.to_info_dict()  # type: ignore[attr-defined]
+    # Get all commands help
+    commands_help = {}
+    if hasattr(ctx.command, "commands"):
+        for cmd_name, cmd in ctx.command.commands.items():
+            try:
+                cmd_ctx = cmd.make_context(
+                    cmd_name, [], parent=ctx, resilient_parsing=True
+                )
+                commands_help[cmd_name] = cmd_ctx.to_info_dict()
+            except Exception as e:
+                commands_help[cmd_name] = {
+                    "name": cmd_name,
+                    "help": getattr(cmd, "help", None)
+                    or getattr(cmd, "short_help", None),
+                    "error": f"Could not generate full help: {str(e)}",
+                }
+    # Build comprehensive help structure
+    full_help = {
+        "ostruct_version": __version__,
+        "help_type": "full_cli",
+        "main_command": main_help,
+        "commands": commands_help,
+        "usage_patterns": generate_usage_patterns_from_commands(commands_help),
+        "attachment_system": generate_attachment_system_info(),
+        "json_output_modes": generate_json_output_modes(),
+    }
+    click.echo(json.dumps(full_help, indent=2))
+    ctx.exit(0)

ostruct/cli/mcp_integration.py CHANGED Viewed

@@ -7,7 +7,7 @@ with the OpenAI Responses API for enhanced functionality in ostruct.
 import logging
 import re
 import time
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
 from urllib.parse import urlparse
 # Import requests for HTTP functionality (used in production)
@@ -16,6 +16,11 @@ try:
 except ImportError:
     requests = None  # type: ignore[assignment]
+try:
+    import bleach  # type: ignore[import-untyped]
+except ImportError:
+    bleach = None  # type: ignore[assignment]
 if TYPE_CHECKING:
     from .services import ServiceHealth
@@ -161,23 +166,15 @@ class MCPClient:
             if not isinstance(text, str):
                 return text
-            # Remove script tags
-            text = re.sub(
-                r"<script[^>]*>.*?</script>",
-                "",
-                text,
-                flags=re.IGNORECASE | re.DOTALL,
-            )
-            text = re.sub(r"<script[^>]*>", "", text, flags=re.IGNORECASE)
+            if bleach:
+                # Use bleach to strip all HTML tags, attributes, and styles.
+                # This is the safest way to prevent XSS.
+                text = bleach.clean(text, tags=[], attributes={}, strip=True)
-            # Remove javascript: URLs
+            # bleach.clean doesn't handle javascript: URIs that are not in an
+            # href attribute, so we remove them explicitly as a safeguard.
             text = re.sub(r"javascript:", "", text, flags=re.IGNORECASE)
-            # Remove other dangerous patterns
-            text = re.sub(
-                r"on\w+\s*=", "", text, flags=re.IGNORECASE
-            )  # Event handlers
             return text
         def sanitize_dict(data: Any) -> Any:
@@ -193,7 +190,7 @@ class MCPClient:
             else:
                 return data
-        return sanitize_dict(response)  # type: ignore[no-any-return]
+        return cast(Dict[str, Any], sanitize_dict(response))
     def _check_rate_limit(self) -> None:
         """Check and enforce rate limiting."""

ostruct/cli/params.py ADDED Viewed

@@ -0,0 +1,217 @@
+"""Parameter handling and validation for CLI attachment syntax."""
+from typing import Any, Dict, Optional, Set, Tuple, TypedDict, Union
+import click
+# Target mapping with explicit aliases
+TARGET_NORMALISE = {
+    "prompt": "prompt",
+    "code-interpreter": "code-interpreter",
+    "ci": "code-interpreter",
+    "file-search": "file-search",
+    "fs": "file-search",
+}
+class AttachmentSpec(TypedDict):
+    """Type definition for attachment specifications."""
+    alias: str
+    path: Union[
+        str, Tuple[str, str]
+    ]  # str or ("@", "filelist.txt") for collect
+    targets: Set[str]
+    recursive: bool
+    pattern: Optional[str]
+def normalise_targets(raw: str) -> Set[str]:
+    """Normalize comma-separated target list with aliases.
+    Args:
+        raw: Comma-separated string of targets (e.g., "prompt,ci,fs")
+    Returns:
+        Set of normalized target names
+    Raises:
+        click.BadParameter: If any target is unknown
+    Examples:
+        >>> normalise_targets("prompt")
+        {"prompt"}
+        >>> normalise_targets("ci,fs")
+        {"code-interpreter", "file-search"}
+        >>> normalise_targets("")
+        {"prompt"}
+    """
+    if not raw.strip():  # Guard against empty string edge case
+        return {"prompt"}
+    tokens = [t.strip().lower() for t in raw.split(",") if t.strip()]
+    if not tokens:  # After stripping, no valid tokens remain
+        return {"prompt"}
+    # Normalize all tokens and check for unknown ones
+    normalized = set()
+    bad_tokens = set()
+    for token in tokens:
+        if token in TARGET_NORMALISE:
+            normalized.add(TARGET_NORMALISE[token])
+        else:
+            bad_tokens.add(token)
+    if bad_tokens:
+        valid_targets = ", ".join(sorted(TARGET_NORMALISE.keys()))
+        raise click.BadParameter(
+            f"Unknown target(s): {', '.join(sorted(bad_tokens))}. "
+            f"Valid targets: {valid_targets}"
+        )
+    return normalized or {"prompt"}  # Fallback to prompt if somehow empty
+def validate_attachment_alias(alias: str) -> str:
+    """Validate and normalize attachment alias.
+    Args:
+        alias: The attachment alias to validate
+    Returns:
+        The validated alias
+    Raises:
+        click.BadParameter: If alias is invalid
+    """
+    if not alias or not alias.strip():
+        raise click.BadParameter("Attachment alias cannot be empty")
+    alias = alias.strip()
+    # Basic validation - no whitespace, reasonable length
+    if " " in alias or "\t" in alias:
+        raise click.BadParameter("Attachment alias cannot contain whitespace")
+    if len(alias) > 64:
+        raise click.BadParameter(
+            "Attachment alias too long (max 64 characters)"
+        )
+    return alias
+class AttachParam(click.ParamType):
+    """Custom Click parameter type for parsing attachment specifications.
+    Supports space-form syntax: '[targets:]alias path'
+    Examples:
+        --attach data ./file.txt
+        --attach ci:analysis ./data.csv
+        --collect ci,fs:mixed @file-list.txt
+    """
+    name = "attach-spec"
+    def __init__(self, multi: bool = False) -> None:
+        """Initialize AttachParam.
+        Args:
+            multi: If True, supports @filelist syntax for collect operations
+        """
+        self.multi = multi
+    def convert(
+        self,
+        value: Any,
+        param: Optional[click.Parameter],
+        ctx: Optional[click.Context],
+    ) -> Dict[str, Any]:
+        """Convert Click parameter value to AttachmentSpec.
+        Args:
+            value: Parameter value from Click (tuple for nargs=2)
+            param: Click parameter object
+            ctx: Click context
+        Returns:
+            Dict representing an AttachmentSpec
+        Raises:
+            click.BadParameter: If value format is invalid
+        """
+        # Space form only (nargs=2) - Click passes tuple
+        if not isinstance(value, tuple) or len(value) != 2:
+            self._fail_with_usage_examples(
+                "Attachment must use space form syntax", param, ctx
+            )
+        spec, path = value
+        # Parse spec part: [targets:]alias
+        if ":" in spec:
+            # Check for Windows drive letter false positive (C:\path)
+            if len(spec) == 2 and spec[1] == ":" and spec[0].isalpha():
+                # This is likely a drive letter, treat as alias only
+                prefix, alias = "prompt", spec
+            else:
+                prefix, alias = spec.split(":", 1)
+        else:
+            prefix, alias = "prompt", spec
+        # Normalize targets using the existing function
+        try:
+            targets = normalise_targets(prefix)
+        except click.BadParameter:
+            # Re-raise with context about attachment parsing
+            self._fail_with_usage_examples(
+                f"Invalid target(s) in '{prefix}'. Use comma-separated valid targets",
+                param,
+                ctx,
+            )
+        # Validate alias
+        try:
+            alias = validate_attachment_alias(alias)
+        except click.BadParameter as e:
+            self._fail_with_usage_examples(str(e), param, ctx)
+        # Handle collect @filelist syntax
+        if self.multi and path.startswith("@"):
+            filelist_path = path[1:]  # Remove @
+            if not filelist_path:
+                self._fail_with_usage_examples(
+                    "Filelist path cannot be empty after @", param, ctx
+                )
+            path = ("@", filelist_path)
+        return {
+            "alias": alias,
+            "path": path,
+            "targets": targets,
+            "recursive": False,  # Set by flag processing
+            "pattern": None,  # Set by flag processing
+        }
+    def _fail_with_usage_examples(
+        self,
+        message: str,
+        param: Optional[click.Parameter],
+        ctx: Optional[click.Context],
+    ) -> None:
+        """Provide helpful usage examples in error messages."""
+        examples = [
+            "--file data ./file.txt",
+            "--file ci:analysis ./data.csv",
+            "--dir fs:docs ./documentation",
+        ]
+        if self.multi:
+            examples.append("--collect ci,fs:mixed @file-list.txt")
+        full_message = f"{message}\n\nExamples:\n" + "\n".join(
+            f"  {ex}" for ex in examples
+        )
+        self.fail(full_message, param, ctx)

ostruct-cli 0.8.29__py3-none-any.whl → 1.0.1__py3-none-any.whl

ostruct-cli 0.8.29py3-none-any.whl → 1.0.1py3-none-any.whl