ostruct-cli 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/cli.py +70 -62
- ostruct/cli/errors.py +63 -18
- ostruct/cli/schema_validation.py +213 -0
- {ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.0.dist-info}/METADATA +211 -32
- {ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.0.dist-info}/RECORD +8 -7
- {ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.0.dist-info}/WHEEL +1 -1
- {ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.0.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.5.0.dist-info → ostruct_cli-0.6.0.dist-info}/entry_points.txt +0 -0
ostruct/cli/cli.py
CHANGED
@@ -106,6 +106,9 @@ class CLIParams(TypedDict, total=False):
|
|
106
106
|
dir: List[
|
107
107
|
Tuple[str, str]
|
108
108
|
] # List of (name, dir) tuples from Click's nargs=2
|
109
|
+
patterns: List[
|
110
|
+
Tuple[str, str]
|
111
|
+
] # List of (name, pattern) tuples from Click's nargs=2
|
109
112
|
allowed_dirs: List[str]
|
110
113
|
base_dir: str
|
111
114
|
allowed_dir_file: Optional[str]
|
@@ -796,7 +799,7 @@ def validate_schema_file(
|
|
796
799
|
if not isinstance(schema, dict):
|
797
800
|
msg = f"Schema in {path} must be a JSON object"
|
798
801
|
logger.error(msg)
|
799
|
-
raise SchemaValidationError(msg,
|
802
|
+
raise SchemaValidationError(msg, context={"path": path})
|
800
803
|
|
801
804
|
# Validate schema structure
|
802
805
|
if "schema" in schema:
|
@@ -806,7 +809,7 @@ def validate_schema_file(
|
|
806
809
|
if not isinstance(inner_schema, dict):
|
807
810
|
msg = f"Inner schema in {path} must be a JSON object"
|
808
811
|
logger.error(msg)
|
809
|
-
raise SchemaValidationError(msg,
|
812
|
+
raise SchemaValidationError(msg, context={"path": path})
|
810
813
|
if verbose:
|
811
814
|
logger.debug("Inner schema validated successfully")
|
812
815
|
logger.debug(
|
@@ -821,7 +824,7 @@ def validate_schema_file(
|
|
821
824
|
if "type" not in schema.get("schema", schema):
|
822
825
|
msg = f"Schema in {path} must specify a type"
|
823
826
|
logger.error(msg)
|
824
|
-
raise SchemaValidationError(msg,
|
827
|
+
raise SchemaValidationError(msg, context={"path": path})
|
825
828
|
|
826
829
|
# Return the full schema including wrapper
|
827
830
|
return schema
|
@@ -845,20 +848,22 @@ def collect_template_files(
|
|
845
848
|
ValueError: If file mappings are invalid or files cannot be accessed
|
846
849
|
"""
|
847
850
|
try:
|
848
|
-
# Get files and
|
851
|
+
# Get files, directories, and patterns from args - they are already tuples from Click's nargs=2
|
849
852
|
files = list(
|
850
853
|
args.get("files", [])
|
851
854
|
) # List of (name, path) tuples from Click
|
852
855
|
dirs = args.get("dir", []) # List of (name, dir) tuples from Click
|
856
|
+
patterns = args.get(
|
857
|
+
"patterns", []
|
858
|
+
) # List of (name, pattern) tuples from Click
|
853
859
|
|
854
|
-
# Collect files from directories
|
860
|
+
# Collect files from directories and patterns
|
855
861
|
dir_files = collect_files(
|
856
|
-
file_mappings=cast(
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
), # Cast to correct type
|
862
|
+
file_mappings=cast(List[Tuple[str, Union[str, Path]]], files),
|
863
|
+
dir_mappings=cast(List[Tuple[str, Union[str, Path]]], dirs),
|
864
|
+
pattern_mappings=cast(
|
865
|
+
List[Tuple[str, Union[str, Path]]], patterns
|
866
|
+
),
|
862
867
|
dir_recursive=args.get("recursive", False),
|
863
868
|
security_manager=security_manager,
|
864
869
|
)
|
@@ -984,22 +989,7 @@ def create_template_context(
|
|
984
989
|
security_manager: Optional[SecurityManager] = None,
|
985
990
|
stdin_content: Optional[str] = None,
|
986
991
|
) -> Dict[str, Any]:
|
987
|
-
"""Create template context from
|
988
|
-
|
989
|
-
Args:
|
990
|
-
files: Optional dictionary mapping names to FileInfoList objects
|
991
|
-
variables: Optional dictionary of simple string variables
|
992
|
-
json_variables: Optional dictionary of JSON variables
|
993
|
-
security_manager: Optional security manager for path validation
|
994
|
-
stdin_content: Optional content to use for stdin
|
995
|
-
|
996
|
-
Returns:
|
997
|
-
Template context dictionary
|
998
|
-
|
999
|
-
Raises:
|
1000
|
-
PathSecurityError: If any file paths violate security constraints
|
1001
|
-
VariableError: If variable mappings are invalid
|
1002
|
-
"""
|
992
|
+
"""Create template context from files and variables."""
|
1003
993
|
context: Dict[str, Any] = {}
|
1004
994
|
|
1005
995
|
# Add file variables
|
@@ -1413,11 +1403,33 @@ async def stream_structured_output(
|
|
1413
1403
|
):
|
1414
1404
|
yield chunk
|
1415
1405
|
|
1406
|
+
except APIResponseError as e:
|
1407
|
+
if "Invalid schema for response_format" in str(
|
1408
|
+
e
|
1409
|
+
) and 'type: "array"' in str(e):
|
1410
|
+
error_msg = (
|
1411
|
+
"OpenAI API Schema Error: The schema must have a root type of 'object', not 'array'. "
|
1412
|
+
"To fix this:\n"
|
1413
|
+
"1. Wrap your array in an object property, e.g.:\n"
|
1414
|
+
" {\n"
|
1415
|
+
' "type": "object",\n'
|
1416
|
+
' "properties": {\n'
|
1417
|
+
' "items": {\n'
|
1418
|
+
' "type": "array",\n'
|
1419
|
+
' "items": { ... your array items schema ... }\n'
|
1420
|
+
" }\n"
|
1421
|
+
" }\n"
|
1422
|
+
" }\n"
|
1423
|
+
"2. Make sure to update your template to handle the wrapper object."
|
1424
|
+
)
|
1425
|
+
logger.error(error_msg)
|
1426
|
+
raise InvalidResponseFormatError(error_msg)
|
1427
|
+
logger.error(f"API error: {e}")
|
1428
|
+
raise
|
1416
1429
|
except (
|
1417
1430
|
StreamInterruptedError,
|
1418
1431
|
StreamBufferError,
|
1419
1432
|
StreamParseError,
|
1420
|
-
APIResponseError,
|
1421
1433
|
EmptyResponseError,
|
1422
1434
|
InvalidResponseFormatError,
|
1423
1435
|
) as e:
|
@@ -1763,8 +1775,7 @@ async def execute_model(
|
|
1763
1775
|
user_prompt=user_prompt,
|
1764
1776
|
output_schema=output_model,
|
1765
1777
|
output_file=args.get("output_file"),
|
1766
|
-
|
1767
|
-
on_log=log_callback, # Pass logging callback separately
|
1778
|
+
on_log=log_callback,
|
1768
1779
|
):
|
1769
1780
|
output_buffer.append(response)
|
1770
1781
|
|
@@ -1949,15 +1960,13 @@ def create_dynamic_model(
|
|
1949
1960
|
|
1950
1961
|
Raises:
|
1951
1962
|
ModelValidationError: If the schema is invalid
|
1963
|
+
SchemaValidationError: If the schema violates OpenAI requirements
|
1952
1964
|
"""
|
1953
1965
|
if debug_validation:
|
1954
1966
|
logger.info("Creating dynamic model from schema:")
|
1955
1967
|
logger.info(json.dumps(schema, indent=2))
|
1956
1968
|
|
1957
1969
|
try:
|
1958
|
-
# Extract required fields
|
1959
|
-
required: Set[str] = set(schema.get("required", []))
|
1960
|
-
|
1961
1970
|
# Handle our wrapper format if present
|
1962
1971
|
if "schema" in schema:
|
1963
1972
|
if debug_validation:
|
@@ -1980,32 +1989,15 @@ def create_dynamic_model(
|
|
1980
1989
|
logger.info(json.dumps(inner_schema, indent=2))
|
1981
1990
|
schema = inner_schema
|
1982
1991
|
|
1983
|
-
#
|
1984
|
-
|
1985
|
-
if debug_validation:
|
1986
|
-
logger.info("Schema missing type field, assuming object type")
|
1987
|
-
schema["type"] = "object"
|
1992
|
+
# Validate against OpenAI requirements
|
1993
|
+
from .schema_validation import validate_openai_schema
|
1988
1994
|
|
1989
|
-
|
1990
|
-
if schema["type"] != "object":
|
1991
|
-
if debug_validation:
|
1992
|
-
logger.info(
|
1993
|
-
"Converting non-object root schema to object wrapper"
|
1994
|
-
)
|
1995
|
-
schema = {
|
1996
|
-
"type": "object",
|
1997
|
-
"properties": {"value": schema},
|
1998
|
-
"required": ["value"],
|
1999
|
-
}
|
1995
|
+
validate_openai_schema(schema)
|
2000
1996
|
|
2001
1997
|
# Create model configuration
|
2002
1998
|
config = ConfigDict(
|
2003
1999
|
title=schema.get("title", base_name),
|
2004
|
-
extra=
|
2005
|
-
"forbid"
|
2006
|
-
if schema.get("additionalProperties") is False
|
2007
|
-
else "allow"
|
2008
|
-
),
|
2000
|
+
extra="forbid", # OpenAI requires additionalProperties: false
|
2009
2001
|
validate_default=True,
|
2010
2002
|
use_enum_values=True,
|
2011
2003
|
arbitrary_types_allowed=True,
|
@@ -2115,24 +2107,38 @@ def create_dynamic_model(
|
|
2115
2107
|
try:
|
2116
2108
|
model.model_json_schema()
|
2117
2109
|
except ValidationError as e:
|
2118
|
-
if debug_validation:
|
2119
|
-
logger.error("Schema validation failed:")
|
2120
|
-
logger.error(" Error type: %s", type(e).__name__)
|
2121
|
-
logger.error(" Error message: %s", str(e))
|
2122
2110
|
validation_errors = (
|
2123
2111
|
[str(err) for err in e.errors()]
|
2124
2112
|
if hasattr(e, "errors")
|
2125
2113
|
else [str(e)]
|
2126
2114
|
)
|
2115
|
+
if debug_validation:
|
2116
|
+
logger.error("Schema validation failed:")
|
2117
|
+
logger.error(" Error type: %s", type(e).__name__)
|
2118
|
+
logger.error(" Error message: %s", str(e))
|
2127
2119
|
raise ModelValidationError(base_name, validation_errors)
|
2128
2120
|
|
2129
2121
|
return model
|
2130
2122
|
|
2123
|
+
except SchemaValidationError as e:
|
2124
|
+
# Always log basic error info
|
2125
|
+
logger.error("Schema validation error: %s", str(e))
|
2126
|
+
|
2127
|
+
# Log additional debug info if requested
|
2128
|
+
if debug_validation:
|
2129
|
+
logger.error(" Error type: %s", type(e).__name__)
|
2130
|
+
logger.error(" Error details: %s", str(e))
|
2131
|
+
# Always raise schema validation errors directly
|
2132
|
+
raise
|
2133
|
+
|
2131
2134
|
except Exception as e:
|
2135
|
+
# Always log basic error info
|
2136
|
+
logger.error("Model creation error: %s", str(e))
|
2137
|
+
|
2138
|
+
# Log additional debug info if requested
|
2132
2139
|
if debug_validation:
|
2133
|
-
logger.error("Failed to create model:")
|
2134
2140
|
logger.error(" Error type: %s", type(e).__name__)
|
2135
|
-
logger.error(" Error
|
2141
|
+
logger.error(" Error details: %s", str(e))
|
2136
2142
|
if hasattr(e, "__cause__"):
|
2137
2143
|
logger.error(" Caused by: %s", str(e.__cause__))
|
2138
2144
|
if hasattr(e, "__context__"):
|
@@ -2144,9 +2150,11 @@ def create_dynamic_model(
|
|
2144
2150
|
" Traceback:\n%s",
|
2145
2151
|
"".join(traceback.format_tb(e.__traceback__)),
|
2146
2152
|
)
|
2153
|
+
# Always wrap other errors as ModelCreationError
|
2147
2154
|
raise ModelCreationError(
|
2148
|
-
f"Failed to create model
|
2149
|
-
|
2155
|
+
f"Failed to create model {base_name}",
|
2156
|
+
context={"error": str(e)},
|
2157
|
+
) from e
|
2150
2158
|
|
2151
2159
|
|
2152
2160
|
# Validation functions
|
ostruct/cli/errors.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
"""Custom error classes for CLI error handling."""
|
2
2
|
|
3
|
+
import json
|
3
4
|
import logging
|
4
5
|
from typing import Any, Dict, List, Optional
|
5
6
|
|
@@ -323,31 +324,54 @@ class SchemaFileError(CLIError):
|
|
323
324
|
|
324
325
|
|
325
326
|
class SchemaValidationError(CLIError):
|
326
|
-
"""
|
327
|
+
"""Error raised when a schema fails validation."""
|
327
328
|
|
328
329
|
def __init__(
|
329
330
|
self,
|
330
331
|
message: str,
|
331
|
-
schema_path: Optional[str] = None,
|
332
332
|
context: Optional[Dict[str, Any]] = None,
|
333
333
|
):
|
334
334
|
context = context or {}
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
335
|
+
|
336
|
+
# Format error message with tips
|
337
|
+
formatted_message = [message]
|
338
|
+
|
339
|
+
if "path" in context:
|
340
|
+
formatted_message.append(f"\nLocation: {context['path']}")
|
341
|
+
|
342
|
+
if "found" in context:
|
343
|
+
formatted_message.append(f"Found: {context['found']}")
|
344
|
+
|
345
|
+
if "count" in context:
|
346
|
+
formatted_message.append(f"Count: {context['count']}")
|
347
|
+
|
348
|
+
if "missing_required" in context:
|
349
|
+
formatted_message.append(
|
350
|
+
f"Missing required: {context['missing_required']}"
|
351
|
+
)
|
352
|
+
|
353
|
+
if "extra_required" in context:
|
354
|
+
formatted_message.append(
|
355
|
+
f"Extra required: {context['extra_required']}"
|
356
|
+
)
|
357
|
+
|
358
|
+
if "prohibited_used" in context:
|
359
|
+
formatted_message.append(
|
360
|
+
f"Prohibited keywords used: {context['prohibited_used']}"
|
347
361
|
)
|
348
362
|
|
363
|
+
if "tips" in context:
|
364
|
+
formatted_message.append("\nHow to fix:")
|
365
|
+
for tip in context["tips"]:
|
366
|
+
if isinstance(tip, dict):
|
367
|
+
# Format JSON example
|
368
|
+
formatted_message.append("Example schema:")
|
369
|
+
formatted_message.append(json.dumps(tip, indent=2))
|
370
|
+
else:
|
371
|
+
formatted_message.append(f"- {tip}")
|
372
|
+
|
349
373
|
super().__init__(
|
350
|
-
|
374
|
+
"\n".join(formatted_message),
|
351
375
|
context=context,
|
352
376
|
exit_code=ExitCode.SCHEMA_ERROR,
|
353
377
|
)
|
@@ -430,9 +454,30 @@ class EmptyResponseError(CLIError):
|
|
430
454
|
|
431
455
|
|
432
456
|
class InvalidResponseFormatError(CLIError):
|
433
|
-
"""
|
434
|
-
|
435
|
-
|
457
|
+
"""Raised when the response format is invalid."""
|
458
|
+
|
459
|
+
def __init__(self, message: str, context: Optional[Dict[str, Any]] = None):
|
460
|
+
if "schema must be a JSON Schema of 'type: \"object\"'" in message:
|
461
|
+
message = (
|
462
|
+
"The schema must have a root type of 'object', but got 'array'. "
|
463
|
+
"To fix this, wrap your array in an object. For example:\n\n"
|
464
|
+
"{\n"
|
465
|
+
' "type": "object",\n'
|
466
|
+
' "properties": {\n'
|
467
|
+
' "items": {\n'
|
468
|
+
' "type": "array",\n'
|
469
|
+
' "items": { ... your array items schema ... }\n'
|
470
|
+
" }\n"
|
471
|
+
" },\n"
|
472
|
+
' "required": ["items"]\n'
|
473
|
+
"}\n\n"
|
474
|
+
"Then update your template to handle the wrapper object."
|
475
|
+
)
|
476
|
+
super().__init__(
|
477
|
+
message,
|
478
|
+
exit_code=ExitCode.API_ERROR,
|
479
|
+
context=context,
|
480
|
+
)
|
436
481
|
|
437
482
|
|
438
483
|
class OpenAIClientError(CLIError):
|
@@ -0,0 +1,213 @@
|
|
1
|
+
from enum import IntEnum
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
|
+
|
4
|
+
from .errors import SchemaValidationError
|
5
|
+
|
6
|
+
|
7
|
+
class SchemaLimits(IntEnum):
|
8
|
+
"""Limits for OpenAI schema validation."""
|
9
|
+
|
10
|
+
MAX_NESTING_DEPTH = 5
|
11
|
+
MAX_PROPERTIES = 100
|
12
|
+
MAX_ENUM_VALUES = 500
|
13
|
+
MAX_ENUM_VALUES_CHAR_CHECK = 250
|
14
|
+
MAX_ENUM_TOTAL_CHARS = 7500
|
15
|
+
|
16
|
+
|
17
|
+
# Validates the schema against OpenAI's structured output requirements.
|
18
|
+
# https://platform.openai.com/docs/guides/structured-outputs
|
19
|
+
|
20
|
+
|
21
|
+
def validate_openai_schema(
|
22
|
+
schema: Dict[str, Any], path: Optional[List[str]] = None
|
23
|
+
) -> None:
|
24
|
+
"""Validate schema against OpenAI's structured output requirements.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
schema: The JSON schema to validate
|
28
|
+
path: Current path in schema for nested validation
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
SchemaValidationError: If schema violates any OpenAI requirements
|
32
|
+
"""
|
33
|
+
path = path or []
|
34
|
+
current_path = "/".join(path) or "<root>"
|
35
|
+
|
36
|
+
# Root level validation
|
37
|
+
if not path: # Only check at root
|
38
|
+
if schema.get("type") != "object":
|
39
|
+
raise SchemaValidationError(
|
40
|
+
"Root schema must be type 'object'",
|
41
|
+
context={
|
42
|
+
"path": current_path,
|
43
|
+
"found": schema.get("type"),
|
44
|
+
"tips": [
|
45
|
+
"The root of your schema must be an object type",
|
46
|
+
"If you have an array, wrap it in an object property:",
|
47
|
+
{
|
48
|
+
"type": "object",
|
49
|
+
"properties": {
|
50
|
+
"items": {
|
51
|
+
"type": "array",
|
52
|
+
"items": "...your array schema...",
|
53
|
+
}
|
54
|
+
},
|
55
|
+
"required": ["items"],
|
56
|
+
"additionalProperties": False,
|
57
|
+
},
|
58
|
+
],
|
59
|
+
},
|
60
|
+
)
|
61
|
+
|
62
|
+
if schema.get("additionalProperties") is not False:
|
63
|
+
raise SchemaValidationError(
|
64
|
+
"Root schema must set additionalProperties: false",
|
65
|
+
context={
|
66
|
+
"path": current_path,
|
67
|
+
"tips": [
|
68
|
+
"Add 'additionalProperties: false' to your root schema",
|
69
|
+
"This ensures only defined properties are allowed",
|
70
|
+
],
|
71
|
+
},
|
72
|
+
)
|
73
|
+
|
74
|
+
# Validate required properties
|
75
|
+
root_properties = set(schema.get("properties", {}).keys())
|
76
|
+
required = set(schema.get("required", []))
|
77
|
+
|
78
|
+
if not root_properties:
|
79
|
+
raise SchemaValidationError(
|
80
|
+
"Root schema must define at least one property",
|
81
|
+
context={
|
82
|
+
"path": current_path,
|
83
|
+
"tips": [
|
84
|
+
"Add properties to your schema",
|
85
|
+
"Each property should define its type and any constraints",
|
86
|
+
],
|
87
|
+
},
|
88
|
+
)
|
89
|
+
|
90
|
+
if required != root_properties:
|
91
|
+
missing = root_properties - required
|
92
|
+
extra = required - root_properties
|
93
|
+
tips = []
|
94
|
+
if missing:
|
95
|
+
tips.append(
|
96
|
+
f"Add these properties to 'required': {list(missing)}"
|
97
|
+
)
|
98
|
+
if extra:
|
99
|
+
tips.append(
|
100
|
+
f"Remove these from 'required' as they aren't defined: {list(extra)}"
|
101
|
+
)
|
102
|
+
|
103
|
+
raise SchemaValidationError(
|
104
|
+
"All properties must be required in root schema",
|
105
|
+
context={
|
106
|
+
"path": current_path,
|
107
|
+
"missing_required": list(missing),
|
108
|
+
"extra_required": list(extra),
|
109
|
+
"tips": tips,
|
110
|
+
},
|
111
|
+
)
|
112
|
+
|
113
|
+
# Structural validation
|
114
|
+
if len(path) > SchemaLimits.MAX_NESTING_DEPTH:
|
115
|
+
raise SchemaValidationError(
|
116
|
+
f"Schema exceeds maximum nesting depth of {SchemaLimits.MAX_NESTING_DEPTH} levels",
|
117
|
+
context={
|
118
|
+
"path": current_path,
|
119
|
+
"tips": [
|
120
|
+
"Flatten your schema structure",
|
121
|
+
"Consider combining nested objects",
|
122
|
+
"Move complex structures to root level properties",
|
123
|
+
],
|
124
|
+
},
|
125
|
+
)
|
126
|
+
|
127
|
+
# Property count validation
|
128
|
+
if schema.get("type") == "object":
|
129
|
+
obj_properties: Dict[str, Any] = schema.get("properties", {})
|
130
|
+
if len(obj_properties) > SchemaLimits.MAX_PROPERTIES:
|
131
|
+
raise SchemaValidationError(
|
132
|
+
f"Schema exceeds maximum of {SchemaLimits.MAX_PROPERTIES} properties",
|
133
|
+
context={
|
134
|
+
"path": current_path,
|
135
|
+
"count": len(obj_properties),
|
136
|
+
"tips": [
|
137
|
+
"Reduce the number of properties",
|
138
|
+
"Consider grouping related properties into sub-objects",
|
139
|
+
"Remove any unused or optional properties",
|
140
|
+
],
|
141
|
+
},
|
142
|
+
)
|
143
|
+
|
144
|
+
# Validate each property
|
145
|
+
for prop_name, prop_schema in obj_properties.items():
|
146
|
+
validate_openai_schema(prop_schema, path + [prop_name])
|
147
|
+
|
148
|
+
# Array validation
|
149
|
+
elif schema.get("type") == "array":
|
150
|
+
if "items" in schema:
|
151
|
+
validate_openai_schema(schema["items"], path + ["items"])
|
152
|
+
|
153
|
+
# Enum validation
|
154
|
+
if "enum" in schema:
|
155
|
+
enum_values = schema["enum"]
|
156
|
+
if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES:
|
157
|
+
raise SchemaValidationError(
|
158
|
+
f"Enum exceeds maximum of {SchemaLimits.MAX_ENUM_VALUES} values",
|
159
|
+
context={
|
160
|
+
"path": current_path,
|
161
|
+
"count": len(enum_values),
|
162
|
+
"tips": [
|
163
|
+
"Reduce the number of enum values",
|
164
|
+
"Consider using a different type or structure",
|
165
|
+
"Split into multiple smaller enums if possible",
|
166
|
+
],
|
167
|
+
},
|
168
|
+
)
|
169
|
+
|
170
|
+
# Check enum string length for large enums
|
171
|
+
if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES_CHAR_CHECK:
|
172
|
+
total_chars = sum(len(str(v)) for v in enum_values)
|
173
|
+
if total_chars > SchemaLimits.MAX_ENUM_TOTAL_CHARS:
|
174
|
+
raise SchemaValidationError(
|
175
|
+
f"Enum values exceed maximum total length of {SchemaLimits.MAX_ENUM_TOTAL_CHARS} characters",
|
176
|
+
context={
|
177
|
+
"path": current_path,
|
178
|
+
"total_chars": total_chars,
|
179
|
+
"tips": [
|
180
|
+
"Reduce the length of enum values",
|
181
|
+
"Consider using shorter identifiers",
|
182
|
+
"Split into multiple smaller enums",
|
183
|
+
],
|
184
|
+
},
|
185
|
+
)
|
186
|
+
|
187
|
+
# Prohibited keywords by type
|
188
|
+
type_prohibited = {
|
189
|
+
"object": ["patternProperties", "minProperties"],
|
190
|
+
"array": ["minItems", "maxItems", "uniqueItems"],
|
191
|
+
"string": ["pattern", "format", "minLength", "maxLength"],
|
192
|
+
"number": ["minimum", "maximum", "multipleOf"],
|
193
|
+
"integer": ["exclusiveMinimum", "exclusiveMaximum"],
|
194
|
+
}
|
195
|
+
|
196
|
+
schema_type = schema.get("type")
|
197
|
+
if schema_type in type_prohibited:
|
198
|
+
prohibited = set(type_prohibited[schema_type])
|
199
|
+
used_prohibited = prohibited.intersection(schema.keys())
|
200
|
+
if used_prohibited:
|
201
|
+
raise SchemaValidationError(
|
202
|
+
f"Schema uses prohibited keywords for type '{schema_type}'",
|
203
|
+
context={
|
204
|
+
"path": current_path,
|
205
|
+
"type": schema_type,
|
206
|
+
"prohibited_used": list(used_prohibited),
|
207
|
+
"tips": [
|
208
|
+
f"Remove these prohibited keywords: {list(used_prohibited)}",
|
209
|
+
"OpenAI structured output has limited keyword support",
|
210
|
+
"Use only basic type constraints",
|
211
|
+
],
|
212
|
+
},
|
213
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: ostruct-cli
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.6.0
|
4
4
|
Summary: CLI for OpenAI Structured Output
|
5
5
|
Author: Yaniv Golan
|
6
6
|
Author-email: yaniv@golan.name
|
@@ -19,7 +19,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
|
|
19
19
|
Requires-Dist: openai-structured (>=2.0.0,<3.0.0)
|
20
20
|
Requires-Dist: pydantic (>=2.6.3,<3.0.0)
|
21
21
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
22
|
-
Requires-Dist: tiktoken (
|
22
|
+
Requires-Dist: tiktoken (==0.9.0)
|
23
23
|
Requires-Dist: tomli (>=2.0.1,<3.0.0) ; python_version < "3.11"
|
24
24
|
Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
|
25
25
|
Requires-Dist: werkzeug (>=3.1.3,<4.0.0)
|
@@ -37,19 +37,62 @@ Command-line interface for working with OpenAI models and structured output, pow
|
|
37
37
|
|
38
38
|
## Features
|
39
39
|
|
40
|
-
- Generate structured output from natural language using OpenAI models
|
41
|
-
- Rich template system for defining
|
40
|
+
- Generate structured JSON output from natural language using OpenAI models and a JSON schema
|
41
|
+
- Rich template system for defining prompts (Jinja2-based)
|
42
42
|
- Automatic token counting and context window management
|
43
43
|
- Streaming support for real-time output
|
44
|
-
- Caching system for cost optimization
|
45
44
|
- Secure handling of sensitive data
|
46
45
|
|
47
46
|
## Installation
|
48
47
|
|
48
|
+
### For Users
|
49
|
+
|
50
|
+
To install the latest stable version from PyPI:
|
51
|
+
|
49
52
|
```bash
|
50
53
|
pip install ostruct-cli
|
51
54
|
```
|
52
55
|
|
56
|
+
### For Developers
|
57
|
+
|
58
|
+
If you plan to contribute to the project, see the [Development Setup](#development-setup) section below for instructions on setting up the development environment with Poetry.
|
59
|
+
|
60
|
+
## Shell Completion
|
61
|
+
|
62
|
+
ostruct-cli supports shell completion for Bash, Zsh, and Fish shells. To enable it:
|
63
|
+
|
64
|
+
### Bash
|
65
|
+
|
66
|
+
Add this to your `~/.bashrc`:
|
67
|
+
|
68
|
+
```bash
|
69
|
+
eval "$(_OSTRUCT_COMPLETE=bash_source ostruct)"
|
70
|
+
```
|
71
|
+
|
72
|
+
### Zsh
|
73
|
+
|
74
|
+
Add this to your `~/.zshrc`:
|
75
|
+
|
76
|
+
```bash
|
77
|
+
eval "$(_OSTRUCT_COMPLETE=zsh_source ostruct)"
|
78
|
+
```
|
79
|
+
|
80
|
+
### Fish
|
81
|
+
|
82
|
+
Add this to your `~/.config/fish/completions/ostruct.fish`:
|
83
|
+
|
84
|
+
```fish
|
85
|
+
eval (env _OSTRUCT_COMPLETE=fish_source ostruct)
|
86
|
+
```
|
87
|
+
|
88
|
+
After adding the appropriate line, restart your shell or source the configuration file.
|
89
|
+
Shell completion will help you with:
|
90
|
+
|
91
|
+
- Command options and their arguments
|
92
|
+
- File paths for template and schema files
|
93
|
+
- Directory paths for `-d` and `--base-dir` options
|
94
|
+
- And more!
|
95
|
+
|
53
96
|
## Quick Start
|
54
97
|
|
55
98
|
1. Set your OpenAI API key:
|
@@ -58,57 +101,193 @@ pip install ostruct-cli
|
|
58
101
|
export OPENAI_API_KEY=your-api-key
|
59
102
|
```
|
60
103
|
|
61
|
-
|
104
|
+
### Example 1: Using stdin (Simplest)
|
62
105
|
|
63
|
-
|
64
|
-
|
106
|
+
1. Create a template file `extract_person.j2`:
|
107
|
+
|
108
|
+
```jinja
|
109
|
+
Extract information about the person from this text: {{ stdin }}
|
65
110
|
```
|
66
111
|
|
67
|
-
|
112
|
+
2. Create a schema file `schema.json`:
|
68
113
|
|
69
114
|
```json
|
70
115
|
{
|
71
116
|
"type": "object",
|
72
117
|
"properties": {
|
73
|
-
"
|
74
|
-
"type": "
|
75
|
-
"
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
118
|
+
"person": {
|
119
|
+
"type": "object",
|
120
|
+
"properties": {
|
121
|
+
"name": {
|
122
|
+
"type": "string",
|
123
|
+
"description": "The person's full name"
|
124
|
+
},
|
125
|
+
"age": {
|
126
|
+
"type": "integer",
|
127
|
+
"description": "The person's age"
|
128
|
+
},
|
129
|
+
"occupation": {
|
130
|
+
"type": "string",
|
131
|
+
"description": "The person's job or profession"
|
132
|
+
}
|
133
|
+
},
|
134
|
+
"required": ["name", "age", "occupation"],
|
135
|
+
"additionalProperties": false
|
84
136
|
}
|
85
137
|
},
|
86
|
-
"required": ["
|
138
|
+
"required": ["person"],
|
139
|
+
"additionalProperties": false
|
140
|
+
}
|
141
|
+
```
|
142
|
+
|
143
|
+
3. Run the CLI:
|
144
|
+
|
145
|
+
```bash
|
146
|
+
# Basic usage
|
147
|
+
echo "John Smith is a 35-year-old software engineer" | ostruct run extract_person.j2 schema.json
|
148
|
+
|
149
|
+
# For longer text using heredoc
|
150
|
+
cat << EOF | ostruct run extract_person.j2 schema.json
|
151
|
+
John Smith is a 35-year-old software engineer
|
152
|
+
working at Tech Corp. He has been programming
|
153
|
+
for over 10 years.
|
154
|
+
EOF
|
155
|
+
|
156
|
+
# With advanced options
|
157
|
+
echo "John Smith is a 35-year-old software engineer" | \
|
158
|
+
ostruct run extract_person.j2 schema.json \
|
159
|
+
--model gpt-4o \
|
160
|
+
--sys-prompt "Extract precise information about the person" \
|
161
|
+
--temperature 0.7
|
162
|
+
```
|
163
|
+
|
164
|
+
The command will output:
|
165
|
+
|
166
|
+
```json
|
167
|
+
{
|
168
|
+
"person": {
|
169
|
+
"name": "John Smith",
|
170
|
+
"age": 35,
|
171
|
+
"occupation": "software engineer"
|
172
|
+
}
|
87
173
|
}
|
88
174
|
```
|
89
175
|
|
90
|
-
|
176
|
+
### Example 2: Processing a Single File
|
177
|
+
|
178
|
+
1. Create a template file `extract_from_file.j2`:
|
179
|
+
|
180
|
+
```jinja
|
181
|
+
Extract information about the person from this text: {{ text.content }}
|
182
|
+
```
|
183
|
+
|
184
|
+
2. Use the same schema file `schema.json` as above.
|
185
|
+
|
186
|
+
3. Run the CLI:
|
91
187
|
|
92
188
|
```bash
|
93
|
-
|
189
|
+
# Basic usage
|
190
|
+
ostruct run extract_from_file.j2 schema.json -f text input.txt
|
191
|
+
|
192
|
+
# With advanced options
|
193
|
+
ostruct run extract_from_file.j2 schema.json \
|
194
|
+
-f text input.txt \
|
195
|
+
--model gpt-4o \
|
196
|
+
--max-output-tokens 1000 \
|
197
|
+
--temperature 0.7
|
94
198
|
```
|
95
199
|
|
96
|
-
|
200
|
+
The command will output:
|
201
|
+
|
202
|
+
```json
|
203
|
+
{
|
204
|
+
"person": {
|
205
|
+
"name": "John Smith",
|
206
|
+
"age": 35,
|
207
|
+
"occupation": "software engineer"
|
208
|
+
}
|
209
|
+
}
|
210
|
+
```
|
211
|
+
|
212
|
+
### Example 3: Processing Multiple Files
|
213
|
+
|
214
|
+
1. Create a template file `extract_from_profiles.j2`:
|
215
|
+
|
216
|
+
```jinja
|
217
|
+
Extract information about the people from this data:
|
218
|
+
|
219
|
+
{% for profile in profiles %}
|
220
|
+
== {{ profile.name }}
|
221
|
+
|
222
|
+
{{ profile.content }}
|
223
|
+
|
224
|
+
{% endfor %}
|
225
|
+
```
|
226
|
+
|
227
|
+
2. Use the same schema file `schema.json` as above, but updated for multiple people:
|
228
|
+
|
229
|
+
```json
|
230
|
+
{
|
231
|
+
"type": "object",
|
232
|
+
"properties": {
|
233
|
+
"people": {
|
234
|
+
"type": "array",
|
235
|
+
"items": {
|
236
|
+
"type": "object",
|
237
|
+
"properties": {
|
238
|
+
"name": {
|
239
|
+
"type": "string",
|
240
|
+
"description": "The person's full name"
|
241
|
+
},
|
242
|
+
"age": {
|
243
|
+
"type": "integer",
|
244
|
+
"description": "The person's age"
|
245
|
+
},
|
246
|
+
"occupation": {
|
247
|
+
"type": "string",
|
248
|
+
"description": "The person's job or profession"
|
249
|
+
}
|
250
|
+
},
|
251
|
+
"required": ["name", "age", "occupation"],
|
252
|
+
"additionalProperties": false
|
253
|
+
}
|
254
|
+
}
|
255
|
+
},
|
256
|
+
"required": ["people"],
|
257
|
+
"additionalProperties": false
|
258
|
+
}
|
259
|
+
```
|
260
|
+
|
261
|
+
3. Run the CLI:
|
97
262
|
|
98
263
|
```bash
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
264
|
+
# Basic usage
|
265
|
+
ostruct run extract_from_profiles.j2 schema.json -p profiles "profiles/*.txt"
|
266
|
+
|
267
|
+
# With advanced options
|
268
|
+
ostruct run extract_from_profiles.j2 schema.json \
|
269
|
+
-p profiles "profiles/*.txt" \
|
270
|
+
--model gpt-4o \
|
271
|
+
--sys-prompt "Extract precise information about the person" \
|
272
|
+
--temperature 0.5
|
103
273
|
```
|
104
274
|
|
105
|
-
|
275
|
+
The command will output:
|
106
276
|
|
107
277
|
```json
|
108
278
|
{
|
109
|
-
"
|
110
|
-
|
111
|
-
|
279
|
+
"people": [
|
280
|
+
{
|
281
|
+
"name": "John Smith",
|
282
|
+
"age": 35,
|
283
|
+
"occupation": "software engineer"
|
284
|
+
},
|
285
|
+
{
|
286
|
+
"name": "Jane Doe",
|
287
|
+
"age": 28,
|
288
|
+
"occupation": "data scientist"
|
289
|
+
}
|
290
|
+
]
|
112
291
|
}
|
113
292
|
```
|
114
293
|
|
@@ -2,15 +2,16 @@ ostruct/__init__.py,sha256=X6zo6V7ZNMv731Wi388aTVQngD1410ExGwGx4J6lpyo,187
|
|
2
2
|
ostruct/cli/__init__.py,sha256=sYHKT6o1kFy1acbXejzAvVm8Cy8U91Yf1l4DlzquHKg,409
|
3
3
|
ostruct/cli/base_errors.py,sha256=S1cQxoiALbXKPxzgLo6XdSWpzPRb7RKz0QARmu9Zt4g,5987
|
4
4
|
ostruct/cli/cache_manager.py,sha256=ej3KrRfkKKZ_lEp2JswjbJ5bW2ncsvna9NeJu81cqqs,5192
|
5
|
-
ostruct/cli/cli.py,sha256=
|
5
|
+
ostruct/cli/cli.py,sha256=wfO5Z8PPoP8eUn5CfhxrjrdMzfbvr4ryo_tsRST0LlU,74588
|
6
6
|
ostruct/cli/click_options.py,sha256=WbRJdB9sO63ChN3fnCP7XWs73DHKl0C1ervfwL11am0,11371
|
7
|
-
ostruct/cli/errors.py,sha256=
|
7
|
+
ostruct/cli/errors.py,sha256=zJdJ-AyzjCE8glVKbJGAcB-Mz1J1SlzTDJDmhqAVFYc,14930
|
8
8
|
ostruct/cli/exit_codes.py,sha256=uNjvQeUGwU1mlUJYIDrExAn7YlwOXZo603yLAwpqIwk,338
|
9
9
|
ostruct/cli/file_info.py,sha256=ilpT8IuckfhadLF1QQAPLXJp7p8kVpffDEEJ2erHPZU,14485
|
10
10
|
ostruct/cli/file_list.py,sha256=jLuCd1ardoAXX8FNwPgIqEM-ixzr1xP5ZSqXo2lmrj0,11270
|
11
11
|
ostruct/cli/file_utils.py,sha256=J3-6fbEGQ7KD_bU81pAxueHLv9XV0X7f8FSMt_0AJGQ,22537
|
12
12
|
ostruct/cli/path_utils.py,sha256=j44q1OoLkqMErgK-qEuhuIZ1VyzqRIvNgxR1et9PoXA,4813
|
13
13
|
ostruct/cli/progress.py,sha256=rj9nVEco5UeZORMbzd7mFJpFGJjbH9KbBFh5oTE5Anw,3415
|
14
|
+
ostruct/cli/schema_validation.py,sha256=ohEuxJ0KF93qphj0JSZDnrxDn0C2ZU37g-U2JY03onM,8154
|
14
15
|
ostruct/cli/security/__init__.py,sha256=CQpkCgTFYlA1p6atpQeNgIKtE4LZGUKt4EbytbGKpCs,846
|
15
16
|
ostruct/cli/security/allowed_checker.py,sha256=N5UXlpjdj5zAbKk-lRDlHiHV3KtQHtJNhtZI_qGB4zw,1638
|
16
17
|
ostruct/cli/security/base.py,sha256=q9YUdHEj2eg5w8GEw5403E9OQKIjZbEiaWsvYFnCGLw,1359
|
@@ -35,8 +36,8 @@ ostruct/cli/token_utils.py,sha256=r4KPEO3Sec18Q6mU0aClK6XGShvusgUggXEQgEPPlaA,13
|
|
35
36
|
ostruct/cli/utils.py,sha256=1UCl4rHjBWKR5EKugvlVGHiHjO3XXmqvkgeAUSyIPDU,831
|
36
37
|
ostruct/cli/validators.py,sha256=BYFZeebCPZObTUjO1TaAMpsD6h7ROkYAFn9C7uf1Q68,2992
|
37
38
|
ostruct/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
-
ostruct_cli-0.
|
39
|
-
ostruct_cli-0.
|
40
|
-
ostruct_cli-0.
|
41
|
-
ostruct_cli-0.
|
42
|
-
ostruct_cli-0.
|
39
|
+
ostruct_cli-0.6.0.dist-info/LICENSE,sha256=QUOY6QCYVxAiH8vdrUTDqe3i9hQ5bcNczppDSVpLTjk,1068
|
40
|
+
ostruct_cli-0.6.0.dist-info/METADATA,sha256=Zrq8a-EvLhnZdOQBBlYvONWKo61XmdAR8934_OtHUa4,10426
|
41
|
+
ostruct_cli-0.6.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
42
|
+
ostruct_cli-0.6.0.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
|
43
|
+
ostruct_cli-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|