PyPI - ostruct-cli - Versions diffs - 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

ostruct-cli 0.5.0py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ostruct/cli/cli.py +119 -374
ostruct/cli/errors.py +63 -18
ostruct/cli/model_creation.py +507 -0
ostruct/cli/schema_validation.py +213 -0
{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/METADATA +211 -32
{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/RECORD +9 -7
{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/WHEEL +1 -1
{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/LICENSE +0 -0
{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/entry_points.txt +0 -0

ostruct/cli/schema_validation.py ADDED Viewed

@@ -0,0 +1,213 @@
+from enum import IntEnum
+from typing import Any, Dict, List, Optional
+from .errors import SchemaValidationError
+class SchemaLimits(IntEnum):
+    """Limits for OpenAI schema validation."""
+    MAX_NESTING_DEPTH = 5
+    MAX_PROPERTIES = 100
+    MAX_ENUM_VALUES = 500
+    MAX_ENUM_VALUES_CHAR_CHECK = 250
+    MAX_ENUM_TOTAL_CHARS = 7500
+# Validates the schema against OpenAI's structured output requirements.
+# https://platform.openai.com/docs/guides/structured-outputs
+def validate_openai_schema(
+    schema: Dict[str, Any], path: Optional[List[str]] = None
+) -> None:
+    """Validate schema against OpenAI's structured output requirements.
+    Args:
+        schema: The JSON schema to validate
+        path: Current path in schema for nested validation
+    Raises:
+        SchemaValidationError: If schema violates any OpenAI requirements
+    """
+    path = path or []
+    current_path = "/".join(path) or "<root>"
+    # Root level validation
+    if not path:  # Only check at root
+        if schema.get("type") != "object":
+            raise SchemaValidationError(
+                "Root schema must be type 'object'",
+                context={
+                    "path": current_path,
+                    "found": schema.get("type"),
+                    "tips": [
+                        "The root of your schema must be an object type",
+                        "If you have an array, wrap it in an object property:",
+                        {
+                            "type": "object",
+                            "properties": {
+                                "items": {
+                                    "type": "array",
+                                    "items": "...your array schema...",
+                                }
+                            },
+                            "required": ["items"],
+                            "additionalProperties": False,
+                        },
+                    ],
+                },
+            )
+        if schema.get("additionalProperties") is not False:
+            raise SchemaValidationError(
+                "Root schema must set additionalProperties: false",
+                context={
+                    "path": current_path,
+                    "tips": [
+                        "Add 'additionalProperties: false' to your root schema",
+                        "This ensures only defined properties are allowed",
+                    ],
+                },
+            )
+        # Validate required properties
+        root_properties = set(schema.get("properties", {}).keys())
+        required = set(schema.get("required", []))
+        if not root_properties:
+            raise SchemaValidationError(
+                "Root schema must define at least one property",
+                context={
+                    "path": current_path,
+                    "tips": [
+                        "Add properties to your schema",
+                        "Each property should define its type and any constraints",
+                    ],
+                },
+            )
+        if required != root_properties:
+            missing = root_properties - required
+            extra = required - root_properties
+            tips = []
+            if missing:
+                tips.append(
+                    f"Add these properties to 'required': {list(missing)}"
+                )
+            if extra:
+                tips.append(
+                    f"Remove these from 'required' as they aren't defined: {list(extra)}"
+                )
+            raise SchemaValidationError(
+                "All properties must be required in root schema",
+                context={
+                    "path": current_path,
+                    "missing_required": list(missing),
+                    "extra_required": list(extra),
+                    "tips": tips,
+                },
+            )
+    # Structural validation
+    if len(path) > SchemaLimits.MAX_NESTING_DEPTH:
+        raise SchemaValidationError(
+            f"Schema exceeds maximum nesting depth of {SchemaLimits.MAX_NESTING_DEPTH} levels",
+            context={
+                "path": current_path,
+                "tips": [
+                    "Flatten your schema structure",
+                    "Consider combining nested objects",
+                    "Move complex structures to root level properties",
+                ],
+            },
+        )
+    # Property count validation
+    if schema.get("type") == "object":
+        obj_properties: Dict[str, Any] = schema.get("properties", {})
+        if len(obj_properties) > SchemaLimits.MAX_PROPERTIES:
+            raise SchemaValidationError(
+                f"Schema exceeds maximum of {SchemaLimits.MAX_PROPERTIES} properties",
+                context={
+                    "path": current_path,
+                    "count": len(obj_properties),
+                    "tips": [
+                        "Reduce the number of properties",
+                        "Consider grouping related properties into sub-objects",
+                        "Remove any unused or optional properties",
+                    ],
+                },
+            )
+        # Validate each property
+        for prop_name, prop_schema in obj_properties.items():
+            validate_openai_schema(prop_schema, path + [prop_name])
+    # Array validation
+    elif schema.get("type") == "array":
+        if "items" in schema:
+            validate_openai_schema(schema["items"], path + ["items"])
+    # Enum validation
+    if "enum" in schema:
+        enum_values = schema["enum"]
+        if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES:
+            raise SchemaValidationError(
+                f"Enum exceeds maximum of {SchemaLimits.MAX_ENUM_VALUES} values",
+                context={
+                    "path": current_path,
+                    "count": len(enum_values),
+                    "tips": [
+                        "Reduce the number of enum values",
+                        "Consider using a different type or structure",
+                        "Split into multiple smaller enums if possible",
+                    ],
+                },
+            )
+        # Check enum string length for large enums
+        if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES_CHAR_CHECK:
+            total_chars = sum(len(str(v)) for v in enum_values)
+            if total_chars > SchemaLimits.MAX_ENUM_TOTAL_CHARS:
+                raise SchemaValidationError(
+                    f"Enum values exceed maximum total length of {SchemaLimits.MAX_ENUM_TOTAL_CHARS} characters",
+                    context={
+                        "path": current_path,
+                        "total_chars": total_chars,
+                        "tips": [
+                            "Reduce the length of enum values",
+                            "Consider using shorter identifiers",
+                            "Split into multiple smaller enums",
+                        ],
+                    },
+                )
+    # Prohibited keywords by type
+    type_prohibited = {
+        "object": ["patternProperties", "minProperties"],
+        "array": ["minItems", "maxItems", "uniqueItems"],
+        "string": ["pattern", "format", "minLength", "maxLength"],
+        "number": ["minimum", "maximum", "multipleOf"],
+        "integer": ["exclusiveMinimum", "exclusiveMaximum"],
+    }
+    schema_type = schema.get("type")
+    if schema_type in type_prohibited:
+        prohibited = set(type_prohibited[schema_type])
+        used_prohibited = prohibited.intersection(schema.keys())
+        if used_prohibited:
+            raise SchemaValidationError(
+                f"Schema uses prohibited keywords for type '{schema_type}'",
+                context={
+                    "path": current_path,
+                    "type": schema_type,
+                    "prohibited_used": list(used_prohibited),
+                    "tips": [
+                        f"Remove these prohibited keywords: {list(used_prohibited)}",
+                        "OpenAI structured output has limited keyword support",
+                        "Use only basic type constraints",
+                    ],
+                },
+            )

{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: ostruct-cli
-Version: 0.5.0
+Version: 0.6.1
 Summary: CLI for OpenAI Structured Output
 Author: Yaniv Golan
 Author-email: yaniv@golan.name
@@ -19,7 +19,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
 Requires-Dist: openai-structured (>=2.0.0,<3.0.0)
 Requires-Dist: pydantic (>=2.6.3,<3.0.0)
 Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
-Requires-Dist: tiktoken (>=0.9.0,<0.10.0)
+Requires-Dist: tiktoken (==0.9.0)
 Requires-Dist: tomli (>=2.0.1,<3.0.0) ; python_version < "3.11"
 Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
 Requires-Dist: werkzeug (>=3.1.3,<4.0.0)
@@ -37,19 +37,62 @@ Command-line interface for working with OpenAI models and structured output, pow
 ## Features
-- Generate structured output from natural language using OpenAI models
-- Rich template system for defining output schemas
+- Generate structured JSON output from natural language using OpenAI models and a JSON schema
+- Rich template system for defining prompts (Jinja2-based)
 - Automatic token counting and context window management
 - Streaming support for real-time output
-- Caching system for cost optimization
 - Secure handling of sensitive data
 ## Installation
+### For Users
+To install the latest stable version from PyPI:
 ```bash
 pip install ostruct-cli
 ```
+### For Developers
+If you plan to contribute to the project, see the [Development Setup](#development-setup) section below for instructions on setting up the development environment with Poetry.
+## Shell Completion
+ostruct-cli supports shell completion for Bash, Zsh, and Fish shells. To enable it:
+### Bash
+Add this to your `~/.bashrc`:
+```bash
+eval "$(_OSTRUCT_COMPLETE=bash_source ostruct)"
+```
+### Zsh
+Add this to your `~/.zshrc`:
+```bash
+eval "$(_OSTRUCT_COMPLETE=zsh_source ostruct)"
+```
+### Fish
+Add this to your `~/.config/fish/completions/ostruct.fish`:
+```fish
+eval (env _OSTRUCT_COMPLETE=fish_source ostruct)
+```
+After adding the appropriate line, restart your shell or source the configuration file.
+Shell completion will help you with:
+- Command options and their arguments
+- File paths for template and schema files
+- Directory paths for `-d` and `--base-dir` options
+- And more!
 ## Quick Start
 1. Set your OpenAI API key:
@@ -58,57 +101,193 @@ pip install ostruct-cli
 export OPENAI_API_KEY=your-api-key
 ```
-2. Create a task template file `task.j2`:
+### Example 1: Using stdin (Simplest)
-```
-Extract information about the person: {{ stdin }}
+1. Create a template file `extract_person.j2`:
+```jinja
+Extract information about the person from this text: {{ stdin }}
 ```
-3. Create a schema file `schema.json`:
+2. Create a schema file `schema.json`:
 ```json
 {
   "type": "object",
   "properties": {
-    "name": {
-      "type": "string",
-      "description": "The person's full name"
-    },
-    "age": {
-      "type": "integer",
-      "description": "The person's age"
-    },
-    "occupation": {
-      "type": "string",
-      "description": "The person's job or profession"
+    "person": {
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "string",
+          "description": "The person's full name"
+        },
+        "age": {
+          "type": "integer",
+          "description": "The person's age"
+        },
+        "occupation": {
+          "type": "string",
+          "description": "The person's job or profession"
+        }
+      },
+      "required": ["name", "age", "occupation"],
+      "additionalProperties": false
     }
   },
-  "required": ["name", "age", "occupation"]
+  "required": ["person"],
+  "additionalProperties": false
+}
+```
+3. Run the CLI:
+```bash
+# Basic usage
+echo "John Smith is a 35-year-old software engineer" | ostruct run extract_person.j2 schema.json
+# For longer text using heredoc
+cat << EOF | ostruct run extract_person.j2 schema.json
+John Smith is a 35-year-old software engineer
+working at Tech Corp. He has been programming
+for over 10 years.
+EOF
+# With advanced options
+echo "John Smith is a 35-year-old software engineer" | \
+  ostruct run extract_person.j2 schema.json \
+  --model gpt-4o \
+  --sys-prompt "Extract precise information about the person" \
+  --temperature 0.7
+```
+The command will output:
+```json
+{
+  "person": {
+    "name": "John Smith",
+    "age": 35,
+    "occupation": "software engineer"
+  }
 }
 ```
-4. Run the CLI:
+### Example 2: Processing a Single File
+1. Create a template file `extract_from_file.j2`:
+```jinja
+Extract information about the person from this text: {{ text.content }}
+```
+2. Use the same schema file `schema.json` as above.
+3. Run the CLI:
 ```bash
-ostruct run task.j2 schema.json
+# Basic usage
+ostruct run extract_from_file.j2 schema.json -f text input.txt
+# With advanced options
+ostruct run extract_from_file.j2 schema.json \
+  -f text input.txt \
+  --model gpt-4o \
+  --max-output-tokens 1000 \
+  --temperature 0.7
 ```
-Or with more options:
+The command will output:
+```json
+{
+  "person": {
+    "name": "John Smith",
+    "age": 35,
+    "occupation": "software engineer"
+  }
+}
+```
+### Example 3: Processing Multiple Files
+1. Create a template file `extract_from_profiles.j2`:
+```jinja
+Extract information about the people from this data:
+{% for profile in profiles %}
+== {{ profile.name }}
+{{ profile.content }}
+{% endfor %}
+```
+2. Use the same schema file `schema.json` as above, but updated for multiple people:
+```json
+{
+  "type": "object",
+  "properties": {
+    "people": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "description": "The person's full name"
+          },
+          "age": {
+            "type": "integer",
+            "description": "The person's age"
+          },
+          "occupation": {
+            "type": "string",
+            "description": "The person's job or profession"
+          }
+        },
+        "required": ["name", "age", "occupation"],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": ["people"],
+  "additionalProperties": false
+}
+```
+3. Run the CLI:
 ```bash
-ostruct run task.j2 schema.json \
-  -f content input.txt \
-  -m gpt-4o \
-  --sys-prompt "You are an expert content analyzer"
+# Basic usage
+ostruct run extract_from_profiles.j2 schema.json -p profiles "profiles/*.txt"
+# With advanced options
+ostruct run extract_from_profiles.j2 schema.json \
+  -p profiles "profiles/*.txt" \
+  --model gpt-4o \
+  --sys-prompt "Extract precise information about the person" \
+  --temperature 0.5
 ```
-Output:
+The command will output:
 ```json
 {
-  "name": "John Smith",
-  "age": 35,
-  "occupation": "software engineer"
+  "people": [
+    {
+      "name": "John Smith",
+      "age": 35,
+      "occupation": "software engineer"
+    },
+    {
+      "name": "Jane Doe",
+      "age": 28,
+      "occupation": "data scientist"
+    }
+  ]
 }
 ```

{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/RECORD RENAMED Viewed

@@ -2,15 +2,17 @@ ostruct/__init__.py,sha256=X6zo6V7ZNMv731Wi388aTVQngD1410ExGwGx4J6lpyo,187
 ostruct/cli/__init__.py,sha256=sYHKT6o1kFy1acbXejzAvVm8Cy8U91Yf1l4DlzquHKg,409
 ostruct/cli/base_errors.py,sha256=S1cQxoiALbXKPxzgLo6XdSWpzPRb7RKz0QARmu9Zt4g,5987
 ostruct/cli/cache_manager.py,sha256=ej3KrRfkKKZ_lEp2JswjbJ5bW2ncsvna9NeJu81cqqs,5192
-ostruct/cli/cli.py,sha256=R9k4eHpREmvQJb-JLY1VRiWZJO8fJcer1QgnaDX0RrY,74011
+ostruct/cli/cli.py,sha256=lagB4j8G1hg2NmAYvWEarA24qYuY2w-cuRWiqUzoWik,65105
 ostruct/cli/click_options.py,sha256=WbRJdB9sO63ChN3fnCP7XWs73DHKl0C1ervfwL11am0,11371
-ostruct/cli/errors.py,sha256=Muc4PygxON7M4bdZJ7-apztK9MrF252PXLPVNEogUv0,13322
+ostruct/cli/errors.py,sha256=zJdJ-AyzjCE8glVKbJGAcB-Mz1J1SlzTDJDmhqAVFYc,14930
 ostruct/cli/exit_codes.py,sha256=uNjvQeUGwU1mlUJYIDrExAn7YlwOXZo603yLAwpqIwk,338
 ostruct/cli/file_info.py,sha256=ilpT8IuckfhadLF1QQAPLXJp7p8kVpffDEEJ2erHPZU,14485
 ostruct/cli/file_list.py,sha256=jLuCd1ardoAXX8FNwPgIqEM-ixzr1xP5ZSqXo2lmrj0,11270
 ostruct/cli/file_utils.py,sha256=J3-6fbEGQ7KD_bU81pAxueHLv9XV0X7f8FSMt_0AJGQ,22537
+ostruct/cli/model_creation.py,sha256=TmqJVdnZOYtTctNihOlxWIbyAfX-zfxehP9rp2t6P2c,17586
 ostruct/cli/path_utils.py,sha256=j44q1OoLkqMErgK-qEuhuIZ1VyzqRIvNgxR1et9PoXA,4813
 ostruct/cli/progress.py,sha256=rj9nVEco5UeZORMbzd7mFJpFGJjbH9KbBFh5oTE5Anw,3415
+ostruct/cli/schema_validation.py,sha256=ohEuxJ0KF93qphj0JSZDnrxDn0C2ZU37g-U2JY03onM,8154
 ostruct/cli/security/__init__.py,sha256=CQpkCgTFYlA1p6atpQeNgIKtE4LZGUKt4EbytbGKpCs,846
 ostruct/cli/security/allowed_checker.py,sha256=N5UXlpjdj5zAbKk-lRDlHiHV3KtQHtJNhtZI_qGB4zw,1638
 ostruct/cli/security/base.py,sha256=q9YUdHEj2eg5w8GEw5403E9OQKIjZbEiaWsvYFnCGLw,1359
@@ -35,8 +37,8 @@ ostruct/cli/token_utils.py,sha256=r4KPEO3Sec18Q6mU0aClK6XGShvusgUggXEQgEPPlaA,13
 ostruct/cli/utils.py,sha256=1UCl4rHjBWKR5EKugvlVGHiHjO3XXmqvkgeAUSyIPDU,831
 ostruct/cli/validators.py,sha256=BYFZeebCPZObTUjO1TaAMpsD6h7ROkYAFn9C7uf1Q68,2992
 ostruct/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ostruct_cli-0.5.0.dist-info/LICENSE,sha256=QUOY6QCYVxAiH8vdrUTDqe3i9hQ5bcNczppDSVpLTjk,1068
-ostruct_cli-0.5.0.dist-info/METADATA,sha256=1SaOOVJvTKEqaheLw2MZzdwtinTgw2x_ms3xerwuCKA,6533
-ostruct_cli-0.5.0.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
-ostruct_cli-0.5.0.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
-ostruct_cli-0.5.0.dist-info/RECORD,,
+ostruct_cli-0.6.1.dist-info/LICENSE,sha256=QUOY6QCYVxAiH8vdrUTDqe3i9hQ5bcNczppDSVpLTjk,1068
+ostruct_cli-0.6.1.dist-info/METADATA,sha256=2D0_QCNb3xN2Y_K1pMB5WmZBcU8KkN2rqS9qwZMa-pc,10426
+ostruct_cli-0.6.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+ostruct_cli-0.6.1.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
+ostruct_cli-0.6.1.dist-info/RECORD,,

{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.0
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

ostruct-cli 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

ostruct-cli 0.5.0py3-none-any.whl → 0.6.1py3-none-any.whl