PyPI - ostruct-cli - Versions diffs - 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend

ostruct-cli 0.6.1py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

ostruct/cli/cli.py +116 -70
ostruct/cli/errors.py +61 -54
ostruct/cli/model_creation.py +67 -94
ostruct/cli/security/errors.py +1 -1
ostruct/cli/security/normalization.py +1 -1
ostruct/cli/security/security_manager.py +48 -7
ostruct/cli/template_extensions.py +32 -1
ostruct/cli/template_utils.py +175 -16
ostruct/cli/utils.py +3 -1
ostruct/cli/validators.py +6 -2
{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/METADATA +39 -177
{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/RECORD +15 -15
{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/LICENSE +0 -0
{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/WHEEL +0 -0
{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/entry_points.txt +0 -0

ostruct/cli/security/security_manager.py CHANGED Viewed

@@ -39,10 +39,16 @@ class SecurityManager:
     The security model is based on:
     1. A base directory that serves as the root for all file operations
+       (typically set to the current working directory by higher-level functions)
     2. A set of explicitly allowed directories that can be accessed outside the base directory
     3. Special handling for temporary directories that are always allowed
     4. Case-sensitive or case-insensitive path handling based on platform
+    Note:
+        While the SecurityManager class itself requires base_dir to be explicitly provided,
+        higher-level functions in the CLI layer (like validate_security_manager and file_utils)
+        will automatically use the current working directory as the base_dir if none is specified.
     Example:
         >>> sm = SecurityManager("/base/dir")
         >>> sm.add_allowed_directory("/tmp")
@@ -62,7 +68,9 @@ class SecurityManager:
         """Initialize the SecurityManager.
         Args:
-            base_dir: The root directory for file operations.
+            base_dir: The root directory for file operations. While this parameter is required here,
+                     note that higher-level functions in the CLI layer will automatically use the
+                     current working directory if no base_dir is specified.
             allowed_dirs: Additional directories allowed for access.
             allow_temp_paths: Whether to allow temporary directory paths.
             max_symlink_depth: Maximum depth for symlink resolution.
@@ -234,20 +242,53 @@ class SecurityManager:
                     context={"reason": SecurityErrorReasons.SYMLINK_ERROR},
                 ) from e
-        # For non-symlinks, just check if the normalized path is allowed
+        # Check for directory traversal attempts
+        if ".." in str(norm_path):
+            logger.error("Directory traversal attempt detected: %s", path)
+            raise PathSecurityError(
+                "Directory traversal attempt blocked",
+                path=str(path),
+                context={
+                    "reason": SecurityErrorReasons.PATH_TRAVERSAL,
+                    "base_dir": str(self._base_dir),
+                    "allowed_dirs": [str(d) for d in self._allowed_dirs],
+                },
+            )
+        # Check for suspicious Unicode characters
+        if any(
+            c in str(norm_path)
+            for c in [
+                "\u2024",
+                "\u2025",
+                "\u2026",
+                "\u0085",
+                "\u2028",
+                "\u2029",
+            ]
+        ):
+            logger.error("Suspicious Unicode characters detected: %s", path)
+            raise PathSecurityError(
+                "Suspicious characters detected in path",
+                path=str(path),
+                context={
+                    "reason": SecurityErrorReasons.UNSAFE_UNICODE,
+                    "base_dir": str(self._base_dir),
+                    "allowed_dirs": [str(d) for d in self._allowed_dirs],
+                },
+            )
+        # For non-symlinks, check if the normalized path is allowed
         logger.debug("Checking if path is allowed: %s", norm_path)
         if not self.is_path_allowed(norm_path):
             logger.error(
-                "Security violation: Path %s is outside allowed directories (base_dir=%s, allowed_dirs=%s)",
+                "Path outside allowed directories: %s (base_dir=%s, allowed_dirs=%s)",
                 path,
                 self._base_dir,
                 self._allowed_dirs,
             )
             raise PathSecurityError(
-                (
-                    f"Access denied: {os.path.basename(str(path))} is outside "
-                    "base directory and not in allowed directories"
-                ),
+                "Path outside allowed directories",
                 path=str(path),
                 context={
                     "reason": SecurityErrorReasons.PATH_OUTSIDE_ALLOWED,

ostruct/cli/template_extensions.py CHANGED Viewed

@@ -16,6 +16,26 @@ class CommentExtension(Extension):
     1. Contents of comment blocks are completely ignored during parsing
     2. Variables inside comments are not validated or processed
     3. Comments are stripped from the output
+    4. Nested comments are not allowed (will raise a syntax error)
+    Example:
+        Valid usage:
+        ```jinja
+        {% comment %}
+            This is a comment
+            {{ some_var }}  # This variable will be ignored
+        {% endcomment %}
+        ```
+        Invalid usage (will raise error):
+        ```jinja
+        {% comment %}
+            Outer comment
+            {% comment %}  # Error: Nested comments are not allowed
+                Inner comment
+            {% endcomment %}
+        {% endcomment %}
+        ```
     """
     tags = {"comment"}
@@ -23,6 +43,9 @@ class CommentExtension(Extension):
     def parse(self, parser: Parser) -> nodes.Node:
         """Parse a comment block, ignoring its contents.
+        Nested comments are not allowed and will raise a syntax error.
+        This keeps the template syntax simpler and more predictable.
         Args:
             parser: The Jinja2 parser instance
@@ -31,6 +54,7 @@ class CommentExtension(Extension):
         Raises:
             TemplateSyntaxError: If the comment block is not properly closed
+                                or if a nested comment is found
         """
         # Get the line number for error reporting
         lineno = parser.stream.current.lineno
@@ -38,10 +62,17 @@ class CommentExtension(Extension):
         # Skip the opening comment tag
         next(parser.stream)
-        # Skip until we find {% endcomment %}
+        # Skip until we find {% endcomment %}, rejecting nested comments
         while not parser.stream.current.test("name:endcomment"):
             if parser.stream.current.type == "eof":
                 raise parser.fail("Unclosed comment block", lineno)
+            # Explicitly reject nested comments
+            if parser.stream.current.test("name:comment"):
+                raise parser.fail(
+                    "Nested comments are not allowed. Use separate comment blocks instead.",
+                    parser.stream.current.lineno,
+                )
             next(parser.stream)
         # Skip the endcomment tag

ostruct/cli/template_utils.py CHANGED Viewed

@@ -47,30 +47,189 @@ class TemplateMetadataError(TaskTemplateError):
 def validate_json_schema(schema: Dict[str, Any]) -> None:
-    """Validate that a dictionary follows JSON Schema structure.
-    This function checks that the provided dictionary is a valid JSON Schema,
-    following the JSON Schema specification.
+    """Validate a JSON schema.
     Args:
-        schema: Dictionary to validate as a JSON Schema
+        schema: The schema to validate
     Raises:
         SchemaValidationError: If the schema is invalid
     """
     try:
-        # Get the validator class for the schema
-        validator_cls = jsonschema.validators.validator_for(schema)
-        # Check schema itself is valid
-        validator_cls.check_schema(schema)
-        # Create validator instance
-        validator_cls(schema)
-    except jsonschema.exceptions.SchemaError as e:
-        raise SchemaValidationError(f"Invalid JSON Schema: {e}")
+        # 1. Quick structural validation
+        if not isinstance(schema, dict):
+            raise SchemaValidationError(
+                "Invalid JSON Schema: Schema must be a JSON object",
+                context={
+                    "validation_type": "schema",
+                    "found": type(schema).__name__,
+                    "tips": ["Ensure your schema is a valid JSON object"],
+                },
+            )
+        # 2. Extract and validate schema wrapper
+        schema_to_validate = schema.get("schema", schema)
+        if not isinstance(schema_to_validate, dict):
+            raise SchemaValidationError(
+                "Invalid JSON Schema: Inner schema must be a JSON object",
+                context={
+                    "validation_type": "schema",
+                    "found": type(schema_to_validate).__name__,
+                    "tips": [
+                        "If using a schema wrapper, ensure the inner schema is a valid JSON object"
+                    ],
+                },
+            )
+        # 3. Check for circular references with enhanced detection
+        def resolve_ref(ref: str, root: Dict[str, Any]) -> Dict[str, Any]:
+            """Resolve a JSON reference to its target object."""
+            if not ref.startswith("#/"):
+                raise SchemaValidationError(
+                    "Invalid JSON Schema: Only local references are supported",
+                    context={
+                        "validation_type": "schema",
+                        "ref": ref,
+                        "tips": [
+                            "Use only local references (starting with #/)"
+                        ],
+                    },
+                )
+            parts = ref[2:].split("/")
+            current = root
+            for part in parts:
+                if part not in current:
+                    raise SchemaValidationError(
+                        f"Invalid JSON Schema: Reference {ref} not found",
+                        context={
+                            "validation_type": "schema",
+                            "ref": ref,
+                            "tips": [
+                                "Check that all references point to existing definitions"
+                            ],
+                        },
+                    )
+                current = current[part]
+            return current
+        def check_refs(
+            obj: Any,
+            path: List[str],
+            seen_refs: List[str],
+            root: Dict[str, Any],
+        ) -> None:
+            """Check for circular references in the schema."""
+            if isinstance(obj, dict):
+                if "$ref" in obj:
+                    ref = obj["$ref"]
+                    if ref in seen_refs:
+                        raise SchemaValidationError(
+                            "Invalid JSON Schema: Circular reference found",
+                            context={
+                                "validation_type": "schema",
+                                "path": "/".join(path),
+                                "ref": ref,
+                                "found": "circular reference",
+                                "tips": [
+                                    "Remove circular references in your schema",
+                                    "Use unique identifiers instead of nested references",
+                                    "Consider flattening your schema structure",
+                                ],
+                            },
+                        )
+                    # Resolve the reference and check its contents
+                    seen_refs.append(ref)
+                    try:
+                        resolved = resolve_ref(ref, root)
+                        check_refs(resolved, path, seen_refs.copy(), root)
+                    except SchemaValidationError:
+                        raise
+                    except Exception as e:
+                        raise SchemaValidationError(
+                            f"Invalid JSON Schema: Failed to resolve reference {ref}",
+                            context={
+                                "validation_type": "schema",
+                                "path": "/".join(path),
+                                "ref": ref,
+                                "error": str(e),
+                                "tips": [
+                                    "Check that all references are properly formatted"
+                                ],
+                            },
+                        )
+                for key, value in obj.items():
+                    if key != "$ref":  # Skip checking the reference itself
+                        check_refs(value, path + [key], seen_refs.copy(), root)
+            elif isinstance(obj, list):
+                for i, value in enumerate(obj):
+                    check_refs(value, path + [str(i)], seen_refs.copy(), root)
+        check_refs(schema_to_validate, [], [], schema_to_validate)
+        # 4. Check required root properties
+        if "type" not in schema_to_validate:
+            raise SchemaValidationError(
+                "Invalid JSON Schema: Missing required 'type' property",
+                context={
+                    "validation_type": "schema",
+                    "tips": ["Add a 'type' property to your schema root"],
+                },
+            )
+        # 5. Check for required fields not defined in properties
+        if schema_to_validate.get("type") == "object":
+            required_fields = schema_to_validate.get("required", [])
+            properties = schema_to_validate.get("properties", {})
+            missing_fields = [
+                field for field in required_fields if field not in properties
+            ]
+            if missing_fields:
+                raise SchemaValidationError(
+                    "Invalid JSON Schema: Required fields must be defined in properties",
+                    context={
+                        "validation_type": "schema",
+                        "missing_fields": missing_fields,
+                        "tips": [
+                            "Add the following fields to 'properties':",
+                            *[f"  - {field}" for field in missing_fields],
+                            "Or remove them from 'required' if they are not needed",
+                        ],
+                    },
+                )
+        # 6. Validate against JSON Schema meta-schema
+        try:
+            validator = jsonschema.validators.validator_for(schema_to_validate)
+            validator.check_schema(schema_to_validate)
+        except jsonschema.exceptions.SchemaError as e:
+            raise SchemaValidationError(
+                f"Invalid JSON Schema: {str(e)}",
+                context={
+                    "validation_type": "schema",
+                    "path": "/".join(str(p) for p in e.path),
+                    "details": e.message,
+                    "tips": [
+                        "Ensure your schema follows JSON Schema specification",
+                        "Check property types and formats",
+                        "Validate schema structure",
+                    ],
+                },
+            )
+    except SchemaValidationError:
+        raise  # Re-raise SchemaValidationError without wrapping
     except Exception as e:
-        raise SchemaValidationError(f"Schema validation error: {e}")
+        raise SchemaValidationError(
+            f"Invalid JSON Schema: {str(e)}",
+            context={
+                "validation_type": "schema",
+                "error": str(e),
+                "tips": ["Check schema syntax", "Validate JSON structure"],
+            },
+        )
 def validate_response(

ostruct/cli/utils.py CHANGED Viewed

@@ -12,7 +12,7 @@ def parse_mapping(mapping: str) -> Tuple[str, str]:
         mapping: Mapping string in format 'name=value'
     Returns:
-        Tuple of (name, value)
+        Tuple of (name, value) with whitespace stripped from both parts
     Raises:
         ValueError: If mapping format is invalid
@@ -23,6 +23,8 @@ def parse_mapping(mapping: str) -> Tuple[str, str]:
         raise ValueError("Invalid mapping format")
     name, value = mapping.split("=", 1)
+    name = name.strip()
+    value = value.strip()
     if not name:
         raise VariableNameError("Empty name in mapping")
     if not value:

ostruct/cli/validators.py CHANGED Viewed

@@ -49,7 +49,7 @@ def validate_variable(
         value: List of "name=value" strings
     Returns:
-        List of validated (name, value) tuples
+        List of validated (name, value) tuples with whitespace stripped from both parts
     Raises:
         click.BadParameter: If validation fails
@@ -64,6 +64,8 @@ def validate_variable(
                 f"Variable must be in format name=value: {var}"
             )
         name, val = var.split("=", 1)
+        name = name.strip()
+        val = val.strip()
         if not name.isidentifier():
             raise click.BadParameter(f"Invalid variable name: {name}")
         result.append((name, val))
@@ -81,7 +83,7 @@ def validate_json_variable(
         value: List of "name=json_string" values
     Returns:
-        List of validated (name, parsed_json) tuples
+        List of validated (name, parsed_json) tuples with whitespace stripped from name
     Raises:
         click.BadParameter: If validation fails
@@ -96,6 +98,8 @@ def validate_json_variable(
                 f'JSON variable must be in format name=\'{"json":"value"}\': {var}'
             )
         name, json_str = var.split("=", 1)
+        name = name.strip()
+        json_str = json_str.strip()
         if not name.isidentifier():
             raise VariableNameError(f"Invalid variable name: {name}")
         try:

{ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: ostruct-cli
-Version: 0.6.1
+Version: 0.6.2
 Summary: CLI for OpenAI Structured Output
 Author: Yaniv Golan
 Author-email: yaniv@golan.name
@@ -33,7 +33,9 @@ Description-Content-Type: text/markdown
 [![CI](https://github.com/yaniv-golan/ostruct/actions/workflows/ci.yml/badge.svg)](https://github.com/yaniv-golan/ostruct/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-Command-line interface for working with OpenAI models and structured output, powered by the [openai-structured](https://github.com/yaniv-golan/openai-structured) library.
+ostruct tranforms unstructured inputs into structured, usable JSON output using OpenAI APIs.
+ostruct will process a set of plain text files (data, source code, CSV, etc), input variables, a dynamic prompt template, and a JSON schema specifying the desired output format, and will produce the result in JSON format.
 ## Features
@@ -43,6 +45,10 @@ Command-line interface for working with OpenAI models and structured output, pow
 - Streaming support for real-time output
 - Secure handling of sensitive data
+## Requirements
+- Python 3.10 or higher
 ## Installation
 ### For Users
@@ -209,196 +215,52 @@ The command will output:
 }
 ```
-### Example 3: Processing Multiple Files
+## System Prompt Handling
-1. Create a template file `extract_from_profiles.j2`:
+ostruct-cli provides three ways to specify a system prompt, with a clear precedence order:
-```jinja
-Extract information about the people from this data:
+1. Command-line option (`--sys-prompt` or `--sys-file`):
-{% for profile in profiles %}
-== {{ profile.name }}
+   ```bash
+   # Direct string
+   ostruct run template.j2 schema.json --sys-prompt "You are an expert analyst"
-{{ profile.content }}
+   # From file
+   ostruct run template.j2 schema.json --sys-file system_prompt.txt
+   ```
-{% endfor %}
-```
+2. Template frontmatter:
-2. Use the same schema file `schema.json` as above, but updated for multiple people:
+   ```jinja
+   ---
+   system_prompt: You are an expert analyst
+   ---
+   Extract information from: {{ text }}
+   ```
-```json
-{
-  "type": "object",
-  "properties": {
-    "people": {
-      "type": "array",
-      "items": {
-        "type": "object",
-        "properties": {
-          "name": {
-            "type": "string",
-            "description": "The person's full name"
-          },
-          "age": {
-            "type": "integer",
-            "description": "The person's age"
-          },
-          "occupation": {
-            "type": "string",
-            "description": "The person's job or profession"
-          }
-        },
-        "required": ["name", "age", "occupation"],
-        "additionalProperties": false
-      }
-    }
-  },
-  "required": ["people"],
-  "additionalProperties": false
-}
-```
+3. Default system prompt (built into the CLI)
-3. Run the CLI:
+### Precedence Rules
-```bash
-# Basic usage
-ostruct run extract_from_profiles.j2 schema.json -p profiles "profiles/*.txt"
+When multiple system prompts are provided, they are resolved in this order:
-# With advanced options
-ostruct run extract_from_profiles.j2 schema.json \
-  -p profiles "profiles/*.txt" \
-  --model gpt-4o \
-  --sys-prompt "Extract precise information about the person" \
-  --temperature 0.5
-```
+1. Command-line options take highest precedence:
+   - If both `--sys-prompt` and `--sys-file` are provided, `--sys-prompt` wins
+   - Use `--ignore-task-sysprompt` to ignore template frontmatter
-The command will output:
+2. Template frontmatter is used if:
+   - No command-line options are provided
+   - `--ignore-task-sysprompt` is not set
-```json
-{
-  "people": [
-    {
-      "name": "John Smith",
-      "age": 35,
-      "occupation": "software engineer"
-    },
-    {
-      "name": "Jane Doe",
-      "age": 28,
-      "occupation": "data scientist"
-    }
-  ]
-}
-```
-### About Template Files
-Template files use the `.j2` extension to indicate they contain Jinja2 template syntax. This convention:
-- Enables proper syntax highlighting in most editors
-- Makes it clear the file contains template logic
-- Follows industry standards for Jinja2 templates
+3. Default system prompt is used only if no other prompts are provided
-## CLI Options
-The CLI revolves around a single subcommand called `run`. Basic usage:
+Example combining multiple sources:
 ```bash
-ostruct run <TASK_TEMPLATE> <SCHEMA_FILE> [OPTIONS]
-```
-Common options include:
-- File & Directory Inputs:
-  - `-f <NAME> <PATH>`: Map a single file to a variable name
-  - `-d <NAME> <DIR>`: Map a directory to a variable name
-  - `-p <NAME> <PATTERN>`: Map files matching a glob pattern to a variable name
-  - `-R, --recursive`: Enable recursive directory/pattern scanning
+# Command-line prompt will override template frontmatter
+ostruct run template.j2 schema.json --sys-prompt "Override prompt"
-- Variables:
-  - `-V name=value`: Define a simple string variable
-  - `-J name='{"key":"value"}'`: Define a JSON variable
-- Model Parameters:
-  - `-m, --model MODEL`: Select the OpenAI model (supported: gpt-4o, o1, o3-mini)
-  - `--temperature FLOAT`: Set sampling temperature (0.0-2.0)
-  - `--max-output-tokens INT`: Set maximum output tokens
-  - `--top-p FLOAT`: Set top-p sampling parameter (0.0-1.0)
-  - `--frequency-penalty FLOAT`: Adjust frequency penalty (-2.0-2.0)
-  - `--presence-penalty FLOAT`: Adjust presence penalty (-2.0-2.0)
-  - `--reasoning-effort [low|medium|high]`: Control model reasoning effort
-- System Prompt:
-  - `--sys-prompt TEXT`: Provide system prompt directly
-  - `--sys-file FILE`: Load system prompt from file
-  - `--ignore-task-sysprompt`: Ignore system prompt in template frontmatter
-- API Configuration:
-  - `--api-key KEY`: OpenAI API key (defaults to OPENAI_API_KEY env var)
-  - `--timeout FLOAT`: API timeout in seconds (default: 60.0)
-## Debug Options
-- `--debug-validation`: Show detailed schema validation debugging
-- `--debug-openai-stream`: Enable low-level debug output for OpenAI streaming
-- `--progress-level {none,basic,detailed}`: Set progress reporting level
-  - `none`: No progress indicators
-  - `basic`: Show key operation steps (default)
-  - `detailed`: Show all steps with additional info
-- `--show-model-schema`: Display the generated Pydantic model schema
-- `--verbose`: Enable verbose logging
-- `--dry-run`: Validate and render template without making API calls
-- `--no-progress`: Disable all progress indicators
-All debug and error logs are written to:
-- `~/.ostruct/logs/ostruct.log`: General application logs
-- `~/.ostruct/logs/openai_stream.log`: OpenAI streaming operations logs
-For more detailed documentation and examples, visit our [documentation](https://ostruct.readthedocs.io/).
-## Development
-To contribute or report issues, please visit our [GitHub repository](https://github.com/yaniv-golan/ostruct).
-## Development Setup
-1. Clone the repository:
-```bash
-git clone https://github.com/yanivgolan/ostruct.git
-cd ostruct
-```
-2. Install Poetry if you haven't already:
-```bash
-curl -sSL https://install.python-poetry.org | python3 -
-```
-3. Install dependencies:
-```bash
-poetry install
-```
-4. Install openai-structured in editable mode:
-```bash
-poetry add --editable ../openai-structured  # Adjust path as needed
+# Ignore template frontmatter and use default
+ostruct run template.j2 schema.json --ignore-task-sysprompt
 ```
-5. Run tests:
-```bash
-poetry run pytest
-```
-## Contributing
-Contributions are welcome! Please feel free to submit a Pull Request.
-## License
-This project is licensed under the MIT License - see the LICENSE file for details.

ostruct-cli 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl

ostruct-cli 0.6.1py3-none-any.whl → 0.6.2py3-none-any.whl