ostruct-cli 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,213 @@
1
+ from enum import IntEnum
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from .errors import SchemaValidationError
5
+
6
+
7
+ class SchemaLimits(IntEnum):
8
+ """Limits for OpenAI schema validation."""
9
+
10
+ MAX_NESTING_DEPTH = 5
11
+ MAX_PROPERTIES = 100
12
+ MAX_ENUM_VALUES = 500
13
+ MAX_ENUM_VALUES_CHAR_CHECK = 250
14
+ MAX_ENUM_TOTAL_CHARS = 7500
15
+
16
+
17
+ # Validates the schema against OpenAI's structured output requirements.
18
+ # https://platform.openai.com/docs/guides/structured-outputs
19
+
20
+
21
+ def validate_openai_schema(
22
+ schema: Dict[str, Any], path: Optional[List[str]] = None
23
+ ) -> None:
24
+ """Validate schema against OpenAI's structured output requirements.
25
+
26
+ Args:
27
+ schema: The JSON schema to validate
28
+ path: Current path in schema for nested validation
29
+
30
+ Raises:
31
+ SchemaValidationError: If schema violates any OpenAI requirements
32
+ """
33
+ path = path or []
34
+ current_path = "/".join(path) or "<root>"
35
+
36
+ # Root level validation
37
+ if not path: # Only check at root
38
+ if schema.get("type") != "object":
39
+ raise SchemaValidationError(
40
+ "Root schema must be type 'object'",
41
+ context={
42
+ "path": current_path,
43
+ "found": schema.get("type"),
44
+ "tips": [
45
+ "The root of your schema must be an object type",
46
+ "If you have an array, wrap it in an object property:",
47
+ {
48
+ "type": "object",
49
+ "properties": {
50
+ "items": {
51
+ "type": "array",
52
+ "items": "...your array schema...",
53
+ }
54
+ },
55
+ "required": ["items"],
56
+ "additionalProperties": False,
57
+ },
58
+ ],
59
+ },
60
+ )
61
+
62
+ if schema.get("additionalProperties") is not False:
63
+ raise SchemaValidationError(
64
+ "Root schema must set additionalProperties: false",
65
+ context={
66
+ "path": current_path,
67
+ "tips": [
68
+ "Add 'additionalProperties: false' to your root schema",
69
+ "This ensures only defined properties are allowed",
70
+ ],
71
+ },
72
+ )
73
+
74
+ # Validate required properties
75
+ root_properties = set(schema.get("properties", {}).keys())
76
+ required = set(schema.get("required", []))
77
+
78
+ if not root_properties:
79
+ raise SchemaValidationError(
80
+ "Root schema must define at least one property",
81
+ context={
82
+ "path": current_path,
83
+ "tips": [
84
+ "Add properties to your schema",
85
+ "Each property should define its type and any constraints",
86
+ ],
87
+ },
88
+ )
89
+
90
+ if required != root_properties:
91
+ missing = root_properties - required
92
+ extra = required - root_properties
93
+ tips = []
94
+ if missing:
95
+ tips.append(
96
+ f"Add these properties to 'required': {list(missing)}"
97
+ )
98
+ if extra:
99
+ tips.append(
100
+ f"Remove these from 'required' as they aren't defined: {list(extra)}"
101
+ )
102
+
103
+ raise SchemaValidationError(
104
+ "All properties must be required in root schema",
105
+ context={
106
+ "path": current_path,
107
+ "missing_required": list(missing),
108
+ "extra_required": list(extra),
109
+ "tips": tips,
110
+ },
111
+ )
112
+
113
+ # Structural validation
114
+ if len(path) > SchemaLimits.MAX_NESTING_DEPTH:
115
+ raise SchemaValidationError(
116
+ f"Schema exceeds maximum nesting depth of {SchemaLimits.MAX_NESTING_DEPTH} levels",
117
+ context={
118
+ "path": current_path,
119
+ "tips": [
120
+ "Flatten your schema structure",
121
+ "Consider combining nested objects",
122
+ "Move complex structures to root level properties",
123
+ ],
124
+ },
125
+ )
126
+
127
+ # Property count validation
128
+ if schema.get("type") == "object":
129
+ obj_properties: Dict[str, Any] = schema.get("properties", {})
130
+ if len(obj_properties) > SchemaLimits.MAX_PROPERTIES:
131
+ raise SchemaValidationError(
132
+ f"Schema exceeds maximum of {SchemaLimits.MAX_PROPERTIES} properties",
133
+ context={
134
+ "path": current_path,
135
+ "count": len(obj_properties),
136
+ "tips": [
137
+ "Reduce the number of properties",
138
+ "Consider grouping related properties into sub-objects",
139
+ "Remove any unused or optional properties",
140
+ ],
141
+ },
142
+ )
143
+
144
+ # Validate each property
145
+ for prop_name, prop_schema in obj_properties.items():
146
+ validate_openai_schema(prop_schema, path + [prop_name])
147
+
148
+ # Array validation
149
+ elif schema.get("type") == "array":
150
+ if "items" in schema:
151
+ validate_openai_schema(schema["items"], path + ["items"])
152
+
153
+ # Enum validation
154
+ if "enum" in schema:
155
+ enum_values = schema["enum"]
156
+ if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES:
157
+ raise SchemaValidationError(
158
+ f"Enum exceeds maximum of {SchemaLimits.MAX_ENUM_VALUES} values",
159
+ context={
160
+ "path": current_path,
161
+ "count": len(enum_values),
162
+ "tips": [
163
+ "Reduce the number of enum values",
164
+ "Consider using a different type or structure",
165
+ "Split into multiple smaller enums if possible",
166
+ ],
167
+ },
168
+ )
169
+
170
+ # Check enum string length for large enums
171
+ if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES_CHAR_CHECK:
172
+ total_chars = sum(len(str(v)) for v in enum_values)
173
+ if total_chars > SchemaLimits.MAX_ENUM_TOTAL_CHARS:
174
+ raise SchemaValidationError(
175
+ f"Enum values exceed maximum total length of {SchemaLimits.MAX_ENUM_TOTAL_CHARS} characters",
176
+ context={
177
+ "path": current_path,
178
+ "total_chars": total_chars,
179
+ "tips": [
180
+ "Reduce the length of enum values",
181
+ "Consider using shorter identifiers",
182
+ "Split into multiple smaller enums",
183
+ ],
184
+ },
185
+ )
186
+
187
+ # Prohibited keywords by type
188
+ type_prohibited = {
189
+ "object": ["patternProperties", "minProperties"],
190
+ "array": ["minItems", "maxItems", "uniqueItems"],
191
+ "string": ["pattern", "format", "minLength", "maxLength"],
192
+ "number": ["minimum", "maximum", "multipleOf"],
193
+ "integer": ["exclusiveMinimum", "exclusiveMaximum"],
194
+ }
195
+
196
+ schema_type = schema.get("type")
197
+ if schema_type in type_prohibited:
198
+ prohibited = set(type_prohibited[schema_type])
199
+ used_prohibited = prohibited.intersection(schema.keys())
200
+ if used_prohibited:
201
+ raise SchemaValidationError(
202
+ f"Schema uses prohibited keywords for type '{schema_type}'",
203
+ context={
204
+ "path": current_path,
205
+ "type": schema_type,
206
+ "prohibited_used": list(used_prohibited),
207
+ "tips": [
208
+ f"Remove these prohibited keywords: {list(used_prohibited)}",
209
+ "OpenAI structured output has limited keyword support",
210
+ "Use only basic type constraints",
211
+ ],
212
+ },
213
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ostruct-cli
3
- Version: 0.5.0
3
+ Version: 0.6.1
4
4
  Summary: CLI for OpenAI Structured Output
5
5
  Author: Yaniv Golan
6
6
  Author-email: yaniv@golan.name
@@ -19,7 +19,7 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
19
19
  Requires-Dist: openai-structured (>=2.0.0,<3.0.0)
20
20
  Requires-Dist: pydantic (>=2.6.3,<3.0.0)
21
21
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
22
- Requires-Dist: tiktoken (>=0.9.0,<0.10.0)
22
+ Requires-Dist: tiktoken (==0.9.0)
23
23
  Requires-Dist: tomli (>=2.0.1,<3.0.0) ; python_version < "3.11"
24
24
  Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
25
25
  Requires-Dist: werkzeug (>=3.1.3,<4.0.0)
@@ -37,19 +37,62 @@ Command-line interface for working with OpenAI models and structured output, pow
37
37
 
38
38
  ## Features
39
39
 
40
- - Generate structured output from natural language using OpenAI models
41
- - Rich template system for defining output schemas
40
+ - Generate structured JSON output from natural language using OpenAI models and a JSON schema
41
+ - Rich template system for defining prompts (Jinja2-based)
42
42
  - Automatic token counting and context window management
43
43
  - Streaming support for real-time output
44
- - Caching system for cost optimization
45
44
  - Secure handling of sensitive data
46
45
 
47
46
  ## Installation
48
47
 
48
+ ### For Users
49
+
50
+ To install the latest stable version from PyPI:
51
+
49
52
  ```bash
50
53
  pip install ostruct-cli
51
54
  ```
52
55
 
56
+ ### For Developers
57
+
58
+ If you plan to contribute to the project, see the [Development Setup](#development-setup) section below for instructions on setting up the development environment with Poetry.
59
+
60
+ ## Shell Completion
61
+
62
+ ostruct-cli supports shell completion for Bash, Zsh, and Fish shells. To enable it:
63
+
64
+ ### Bash
65
+
66
+ Add this to your `~/.bashrc`:
67
+
68
+ ```bash
69
+ eval "$(_OSTRUCT_COMPLETE=bash_source ostruct)"
70
+ ```
71
+
72
+ ### Zsh
73
+
74
+ Add this to your `~/.zshrc`:
75
+
76
+ ```bash
77
+ eval "$(_OSTRUCT_COMPLETE=zsh_source ostruct)"
78
+ ```
79
+
80
+ ### Fish
81
+
82
+ Add this to your `~/.config/fish/completions/ostruct.fish`:
83
+
84
+ ```fish
85
+ eval (env _OSTRUCT_COMPLETE=fish_source ostruct)
86
+ ```
87
+
88
+ After adding the appropriate line, restart your shell or source the configuration file.
89
+ Shell completion will help you with:
90
+
91
+ - Command options and their arguments
92
+ - File paths for template and schema files
93
+ - Directory paths for `-d` and `--base-dir` options
94
+ - And more!
95
+
53
96
  ## Quick Start
54
97
 
55
98
  1. Set your OpenAI API key:
@@ -58,57 +101,193 @@ pip install ostruct-cli
58
101
  export OPENAI_API_KEY=your-api-key
59
102
  ```
60
103
 
61
- 2. Create a task template file `task.j2`:
104
+ ### Example 1: Using stdin (Simplest)
62
105
 
63
- ```
64
- Extract information about the person: {{ stdin }}
106
+ 1. Create a template file `extract_person.j2`:
107
+
108
+ ```jinja
109
+ Extract information about the person from this text: {{ stdin }}
65
110
  ```
66
111
 
67
- 3. Create a schema file `schema.json`:
112
+ 2. Create a schema file `schema.json`:
68
113
 
69
114
  ```json
70
115
  {
71
116
  "type": "object",
72
117
  "properties": {
73
- "name": {
74
- "type": "string",
75
- "description": "The person's full name"
76
- },
77
- "age": {
78
- "type": "integer",
79
- "description": "The person's age"
80
- },
81
- "occupation": {
82
- "type": "string",
83
- "description": "The person's job or profession"
118
+ "person": {
119
+ "type": "object",
120
+ "properties": {
121
+ "name": {
122
+ "type": "string",
123
+ "description": "The person's full name"
124
+ },
125
+ "age": {
126
+ "type": "integer",
127
+ "description": "The person's age"
128
+ },
129
+ "occupation": {
130
+ "type": "string",
131
+ "description": "The person's job or profession"
132
+ }
133
+ },
134
+ "required": ["name", "age", "occupation"],
135
+ "additionalProperties": false
84
136
  }
85
137
  },
86
- "required": ["name", "age", "occupation"]
138
+ "required": ["person"],
139
+ "additionalProperties": false
140
+ }
141
+ ```
142
+
143
+ 3. Run the CLI:
144
+
145
+ ```bash
146
+ # Basic usage
147
+ echo "John Smith is a 35-year-old software engineer" | ostruct run extract_person.j2 schema.json
148
+
149
+ # For longer text using heredoc
150
+ cat << EOF | ostruct run extract_person.j2 schema.json
151
+ John Smith is a 35-year-old software engineer
152
+ working at Tech Corp. He has been programming
153
+ for over 10 years.
154
+ EOF
155
+
156
+ # With advanced options
157
+ echo "John Smith is a 35-year-old software engineer" | \
158
+ ostruct run extract_person.j2 schema.json \
159
+ --model gpt-4o \
160
+ --sys-prompt "Extract precise information about the person" \
161
+ --temperature 0.7
162
+ ```
163
+
164
+ The command will output:
165
+
166
+ ```json
167
+ {
168
+ "person": {
169
+ "name": "John Smith",
170
+ "age": 35,
171
+ "occupation": "software engineer"
172
+ }
87
173
  }
88
174
  ```
89
175
 
90
- 4. Run the CLI:
176
+ ### Example 2: Processing a Single File
177
+
178
+ 1. Create a template file `extract_from_file.j2`:
179
+
180
+ ```jinja
181
+ Extract information about the person from this text: {{ text.content }}
182
+ ```
183
+
184
+ 2. Use the same schema file `schema.json` as above.
185
+
186
+ 3. Run the CLI:
91
187
 
92
188
  ```bash
93
- ostruct run task.j2 schema.json
189
+ # Basic usage
190
+ ostruct run extract_from_file.j2 schema.json -f text input.txt
191
+
192
+ # With advanced options
193
+ ostruct run extract_from_file.j2 schema.json \
194
+ -f text input.txt \
195
+ --model gpt-4o \
196
+ --max-output-tokens 1000 \
197
+ --temperature 0.7
94
198
  ```
95
199
 
96
- Or with more options:
200
+ The command will output:
201
+
202
+ ```json
203
+ {
204
+ "person": {
205
+ "name": "John Smith",
206
+ "age": 35,
207
+ "occupation": "software engineer"
208
+ }
209
+ }
210
+ ```
211
+
212
+ ### Example 3: Processing Multiple Files
213
+
214
+ 1. Create a template file `extract_from_profiles.j2`:
215
+
216
+ ```jinja
217
+ Extract information about the people from this data:
218
+
219
+ {% for profile in profiles %}
220
+ == {{ profile.name }}
221
+
222
+ {{ profile.content }}
223
+
224
+ {% endfor %}
225
+ ```
226
+
227
+ 2. Use the same schema file `schema.json` as above, but updated for multiple people:
228
+
229
+ ```json
230
+ {
231
+ "type": "object",
232
+ "properties": {
233
+ "people": {
234
+ "type": "array",
235
+ "items": {
236
+ "type": "object",
237
+ "properties": {
238
+ "name": {
239
+ "type": "string",
240
+ "description": "The person's full name"
241
+ },
242
+ "age": {
243
+ "type": "integer",
244
+ "description": "The person's age"
245
+ },
246
+ "occupation": {
247
+ "type": "string",
248
+ "description": "The person's job or profession"
249
+ }
250
+ },
251
+ "required": ["name", "age", "occupation"],
252
+ "additionalProperties": false
253
+ }
254
+ }
255
+ },
256
+ "required": ["people"],
257
+ "additionalProperties": false
258
+ }
259
+ ```
260
+
261
+ 3. Run the CLI:
97
262
 
98
263
  ```bash
99
- ostruct run task.j2 schema.json \
100
- -f content input.txt \
101
- -m gpt-4o \
102
- --sys-prompt "You are an expert content analyzer"
264
+ # Basic usage
265
+ ostruct run extract_from_profiles.j2 schema.json -p profiles "profiles/*.txt"
266
+
267
+ # With advanced options
268
+ ostruct run extract_from_profiles.j2 schema.json \
269
+ -p profiles "profiles/*.txt" \
270
+ --model gpt-4o \
271
+ --sys-prompt "Extract precise information about the person" \
272
+ --temperature 0.5
103
273
  ```
104
274
 
105
- Output:
275
+ The command will output:
106
276
 
107
277
  ```json
108
278
  {
109
- "name": "John Smith",
110
- "age": 35,
111
- "occupation": "software engineer"
279
+ "people": [
280
+ {
281
+ "name": "John Smith",
282
+ "age": 35,
283
+ "occupation": "software engineer"
284
+ },
285
+ {
286
+ "name": "Jane Doe",
287
+ "age": 28,
288
+ "occupation": "data scientist"
289
+ }
290
+ ]
112
291
  }
113
292
  ```
114
293
 
@@ -2,15 +2,17 @@ ostruct/__init__.py,sha256=X6zo6V7ZNMv731Wi388aTVQngD1410ExGwGx4J6lpyo,187
2
2
  ostruct/cli/__init__.py,sha256=sYHKT6o1kFy1acbXejzAvVm8Cy8U91Yf1l4DlzquHKg,409
3
3
  ostruct/cli/base_errors.py,sha256=S1cQxoiALbXKPxzgLo6XdSWpzPRb7RKz0QARmu9Zt4g,5987
4
4
  ostruct/cli/cache_manager.py,sha256=ej3KrRfkKKZ_lEp2JswjbJ5bW2ncsvna9NeJu81cqqs,5192
5
- ostruct/cli/cli.py,sha256=R9k4eHpREmvQJb-JLY1VRiWZJO8fJcer1QgnaDX0RrY,74011
5
+ ostruct/cli/cli.py,sha256=lagB4j8G1hg2NmAYvWEarA24qYuY2w-cuRWiqUzoWik,65105
6
6
  ostruct/cli/click_options.py,sha256=WbRJdB9sO63ChN3fnCP7XWs73DHKl0C1ervfwL11am0,11371
7
- ostruct/cli/errors.py,sha256=Muc4PygxON7M4bdZJ7-apztK9MrF252PXLPVNEogUv0,13322
7
+ ostruct/cli/errors.py,sha256=zJdJ-AyzjCE8glVKbJGAcB-Mz1J1SlzTDJDmhqAVFYc,14930
8
8
  ostruct/cli/exit_codes.py,sha256=uNjvQeUGwU1mlUJYIDrExAn7YlwOXZo603yLAwpqIwk,338
9
9
  ostruct/cli/file_info.py,sha256=ilpT8IuckfhadLF1QQAPLXJp7p8kVpffDEEJ2erHPZU,14485
10
10
  ostruct/cli/file_list.py,sha256=jLuCd1ardoAXX8FNwPgIqEM-ixzr1xP5ZSqXo2lmrj0,11270
11
11
  ostruct/cli/file_utils.py,sha256=J3-6fbEGQ7KD_bU81pAxueHLv9XV0X7f8FSMt_0AJGQ,22537
12
+ ostruct/cli/model_creation.py,sha256=TmqJVdnZOYtTctNihOlxWIbyAfX-zfxehP9rp2t6P2c,17586
12
13
  ostruct/cli/path_utils.py,sha256=j44q1OoLkqMErgK-qEuhuIZ1VyzqRIvNgxR1et9PoXA,4813
13
14
  ostruct/cli/progress.py,sha256=rj9nVEco5UeZORMbzd7mFJpFGJjbH9KbBFh5oTE5Anw,3415
15
+ ostruct/cli/schema_validation.py,sha256=ohEuxJ0KF93qphj0JSZDnrxDn0C2ZU37g-U2JY03onM,8154
14
16
  ostruct/cli/security/__init__.py,sha256=CQpkCgTFYlA1p6atpQeNgIKtE4LZGUKt4EbytbGKpCs,846
15
17
  ostruct/cli/security/allowed_checker.py,sha256=N5UXlpjdj5zAbKk-lRDlHiHV3KtQHtJNhtZI_qGB4zw,1638
16
18
  ostruct/cli/security/base.py,sha256=q9YUdHEj2eg5w8GEw5403E9OQKIjZbEiaWsvYFnCGLw,1359
@@ -35,8 +37,8 @@ ostruct/cli/token_utils.py,sha256=r4KPEO3Sec18Q6mU0aClK6XGShvusgUggXEQgEPPlaA,13
35
37
  ostruct/cli/utils.py,sha256=1UCl4rHjBWKR5EKugvlVGHiHjO3XXmqvkgeAUSyIPDU,831
36
38
  ostruct/cli/validators.py,sha256=BYFZeebCPZObTUjO1TaAMpsD6h7ROkYAFn9C7uf1Q68,2992
37
39
  ostruct/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- ostruct_cli-0.5.0.dist-info/LICENSE,sha256=QUOY6QCYVxAiH8vdrUTDqe3i9hQ5bcNczppDSVpLTjk,1068
39
- ostruct_cli-0.5.0.dist-info/METADATA,sha256=1SaOOVJvTKEqaheLw2MZzdwtinTgw2x_ms3xerwuCKA,6533
40
- ostruct_cli-0.5.0.dist-info/WHEEL,sha256=7dDg4QLnNKTvwIDR9Ac8jJaAmBC_owJrckbC0jjThyA,88
41
- ostruct_cli-0.5.0.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
42
- ostruct_cli-0.5.0.dist-info/RECORD,,
40
+ ostruct_cli-0.6.1.dist-info/LICENSE,sha256=QUOY6QCYVxAiH8vdrUTDqe3i9hQ5bcNczppDSVpLTjk,1068
41
+ ostruct_cli-0.6.1.dist-info/METADATA,sha256=2D0_QCNb3xN2Y_K1pMB5WmZBcU8KkN2rqS9qwZMa-pc,10426
42
+ ostruct_cli-0.6.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
43
+ ostruct_cli-0.6.1.dist-info/entry_points.txt,sha256=NFq9IuqHVTem0j9zKjV8C1si_zGcP1RL6Wbvt9fUDXw,48
44
+ ostruct_cli-0.6.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.0
2
+ Generator: poetry-core 2.1.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any