ostruct-cli 0.6.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/cli.py +116 -70
- ostruct/cli/errors.py +61 -54
- ostruct/cli/model_creation.py +67 -94
- ostruct/cli/security/errors.py +1 -1
- ostruct/cli/security/normalization.py +1 -1
- ostruct/cli/security/security_manager.py +48 -7
- ostruct/cli/template_extensions.py +32 -1
- ostruct/cli/template_utils.py +175 -16
- ostruct/cli/utils.py +3 -1
- ostruct/cli/validators.py +6 -2
- {ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/METADATA +39 -177
- {ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/RECORD +15 -15
- {ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.6.1.dist-info → ostruct_cli-0.6.2.dist-info}/entry_points.txt +0 -0
@@ -39,10 +39,16 @@ class SecurityManager:
|
|
39
39
|
|
40
40
|
The security model is based on:
|
41
41
|
1. A base directory that serves as the root for all file operations
|
42
|
+
(typically set to the current working directory by higher-level functions)
|
42
43
|
2. A set of explicitly allowed directories that can be accessed outside the base directory
|
43
44
|
3. Special handling for temporary directories that are always allowed
|
44
45
|
4. Case-sensitive or case-insensitive path handling based on platform
|
45
46
|
|
47
|
+
Note:
|
48
|
+
While the SecurityManager class itself requires base_dir to be explicitly provided,
|
49
|
+
higher-level functions in the CLI layer (like validate_security_manager and file_utils)
|
50
|
+
will automatically use the current working directory as the base_dir if none is specified.
|
51
|
+
|
46
52
|
Example:
|
47
53
|
>>> sm = SecurityManager("/base/dir")
|
48
54
|
>>> sm.add_allowed_directory("/tmp")
|
@@ -62,7 +68,9 @@ class SecurityManager:
|
|
62
68
|
"""Initialize the SecurityManager.
|
63
69
|
|
64
70
|
Args:
|
65
|
-
base_dir: The root directory for file operations.
|
71
|
+
base_dir: The root directory for file operations. While this parameter is required here,
|
72
|
+
note that higher-level functions in the CLI layer will automatically use the
|
73
|
+
current working directory if no base_dir is specified.
|
66
74
|
allowed_dirs: Additional directories allowed for access.
|
67
75
|
allow_temp_paths: Whether to allow temporary directory paths.
|
68
76
|
max_symlink_depth: Maximum depth for symlink resolution.
|
@@ -234,20 +242,53 @@ class SecurityManager:
|
|
234
242
|
context={"reason": SecurityErrorReasons.SYMLINK_ERROR},
|
235
243
|
) from e
|
236
244
|
|
237
|
-
#
|
245
|
+
# Check for directory traversal attempts
|
246
|
+
if ".." in str(norm_path):
|
247
|
+
logger.error("Directory traversal attempt detected: %s", path)
|
248
|
+
raise PathSecurityError(
|
249
|
+
"Directory traversal attempt blocked",
|
250
|
+
path=str(path),
|
251
|
+
context={
|
252
|
+
"reason": SecurityErrorReasons.PATH_TRAVERSAL,
|
253
|
+
"base_dir": str(self._base_dir),
|
254
|
+
"allowed_dirs": [str(d) for d in self._allowed_dirs],
|
255
|
+
},
|
256
|
+
)
|
257
|
+
|
258
|
+
# Check for suspicious Unicode characters
|
259
|
+
if any(
|
260
|
+
c in str(norm_path)
|
261
|
+
for c in [
|
262
|
+
"\u2024",
|
263
|
+
"\u2025",
|
264
|
+
"\u2026",
|
265
|
+
"\u0085",
|
266
|
+
"\u2028",
|
267
|
+
"\u2029",
|
268
|
+
]
|
269
|
+
):
|
270
|
+
logger.error("Suspicious Unicode characters detected: %s", path)
|
271
|
+
raise PathSecurityError(
|
272
|
+
"Suspicious characters detected in path",
|
273
|
+
path=str(path),
|
274
|
+
context={
|
275
|
+
"reason": SecurityErrorReasons.UNSAFE_UNICODE,
|
276
|
+
"base_dir": str(self._base_dir),
|
277
|
+
"allowed_dirs": [str(d) for d in self._allowed_dirs],
|
278
|
+
},
|
279
|
+
)
|
280
|
+
|
281
|
+
# For non-symlinks, check if the normalized path is allowed
|
238
282
|
logger.debug("Checking if path is allowed: %s", norm_path)
|
239
283
|
if not self.is_path_allowed(norm_path):
|
240
284
|
logger.error(
|
241
|
-
"
|
285
|
+
"Path outside allowed directories: %s (base_dir=%s, allowed_dirs=%s)",
|
242
286
|
path,
|
243
287
|
self._base_dir,
|
244
288
|
self._allowed_dirs,
|
245
289
|
)
|
246
290
|
raise PathSecurityError(
|
247
|
-
|
248
|
-
f"Access denied: {os.path.basename(str(path))} is outside "
|
249
|
-
"base directory and not in allowed directories"
|
250
|
-
),
|
291
|
+
"Path outside allowed directories",
|
251
292
|
path=str(path),
|
252
293
|
context={
|
253
294
|
"reason": SecurityErrorReasons.PATH_OUTSIDE_ALLOWED,
|
@@ -16,6 +16,26 @@ class CommentExtension(Extension):
|
|
16
16
|
1. Contents of comment blocks are completely ignored during parsing
|
17
17
|
2. Variables inside comments are not validated or processed
|
18
18
|
3. Comments are stripped from the output
|
19
|
+
4. Nested comments are not allowed (will raise a syntax error)
|
20
|
+
|
21
|
+
Example:
|
22
|
+
Valid usage:
|
23
|
+
```jinja
|
24
|
+
{% comment %}
|
25
|
+
This is a comment
|
26
|
+
{{ some_var }} # This variable will be ignored
|
27
|
+
{% endcomment %}
|
28
|
+
```
|
29
|
+
|
30
|
+
Invalid usage (will raise error):
|
31
|
+
```jinja
|
32
|
+
{% comment %}
|
33
|
+
Outer comment
|
34
|
+
{% comment %} # Error: Nested comments are not allowed
|
35
|
+
Inner comment
|
36
|
+
{% endcomment %}
|
37
|
+
{% endcomment %}
|
38
|
+
```
|
19
39
|
"""
|
20
40
|
|
21
41
|
tags = {"comment"}
|
@@ -23,6 +43,9 @@ class CommentExtension(Extension):
|
|
23
43
|
def parse(self, parser: Parser) -> nodes.Node:
|
24
44
|
"""Parse a comment block, ignoring its contents.
|
25
45
|
|
46
|
+
Nested comments are not allowed and will raise a syntax error.
|
47
|
+
This keeps the template syntax simpler and more predictable.
|
48
|
+
|
26
49
|
Args:
|
27
50
|
parser: The Jinja2 parser instance
|
28
51
|
|
@@ -31,6 +54,7 @@ class CommentExtension(Extension):
|
|
31
54
|
|
32
55
|
Raises:
|
33
56
|
TemplateSyntaxError: If the comment block is not properly closed
|
57
|
+
or if a nested comment is found
|
34
58
|
"""
|
35
59
|
# Get the line number for error reporting
|
36
60
|
lineno = parser.stream.current.lineno
|
@@ -38,10 +62,17 @@ class CommentExtension(Extension):
|
|
38
62
|
# Skip the opening comment tag
|
39
63
|
next(parser.stream)
|
40
64
|
|
41
|
-
# Skip until we find {% endcomment %}
|
65
|
+
# Skip until we find {% endcomment %}, rejecting nested comments
|
42
66
|
while not parser.stream.current.test("name:endcomment"):
|
43
67
|
if parser.stream.current.type == "eof":
|
44
68
|
raise parser.fail("Unclosed comment block", lineno)
|
69
|
+
|
70
|
+
# Explicitly reject nested comments
|
71
|
+
if parser.stream.current.test("name:comment"):
|
72
|
+
raise parser.fail(
|
73
|
+
"Nested comments are not allowed. Use separate comment blocks instead.",
|
74
|
+
parser.stream.current.lineno,
|
75
|
+
)
|
45
76
|
next(parser.stream)
|
46
77
|
|
47
78
|
# Skip the endcomment tag
|
ostruct/cli/template_utils.py
CHANGED
@@ -47,30 +47,189 @@ class TemplateMetadataError(TaskTemplateError):
|
|
47
47
|
|
48
48
|
|
49
49
|
def validate_json_schema(schema: Dict[str, Any]) -> None:
|
50
|
-
"""Validate
|
51
|
-
|
52
|
-
This function checks that the provided dictionary is a valid JSON Schema,
|
53
|
-
following the JSON Schema specification.
|
50
|
+
"""Validate a JSON schema.
|
54
51
|
|
55
52
|
Args:
|
56
|
-
schema:
|
53
|
+
schema: The schema to validate
|
57
54
|
|
58
55
|
Raises:
|
59
56
|
SchemaValidationError: If the schema is invalid
|
60
57
|
"""
|
61
58
|
try:
|
62
|
-
#
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
59
|
+
# 1. Quick structural validation
|
60
|
+
if not isinstance(schema, dict):
|
61
|
+
raise SchemaValidationError(
|
62
|
+
"Invalid JSON Schema: Schema must be a JSON object",
|
63
|
+
context={
|
64
|
+
"validation_type": "schema",
|
65
|
+
"found": type(schema).__name__,
|
66
|
+
"tips": ["Ensure your schema is a valid JSON object"],
|
67
|
+
},
|
68
|
+
)
|
69
|
+
|
70
|
+
# 2. Extract and validate schema wrapper
|
71
|
+
schema_to_validate = schema.get("schema", schema)
|
72
|
+
if not isinstance(schema_to_validate, dict):
|
73
|
+
raise SchemaValidationError(
|
74
|
+
"Invalid JSON Schema: Inner schema must be a JSON object",
|
75
|
+
context={
|
76
|
+
"validation_type": "schema",
|
77
|
+
"found": type(schema_to_validate).__name__,
|
78
|
+
"tips": [
|
79
|
+
"If using a schema wrapper, ensure the inner schema is a valid JSON object"
|
80
|
+
],
|
81
|
+
},
|
82
|
+
)
|
83
|
+
|
84
|
+
# 3. Check for circular references with enhanced detection
|
85
|
+
def resolve_ref(ref: str, root: Dict[str, Any]) -> Dict[str, Any]:
|
86
|
+
"""Resolve a JSON reference to its target object."""
|
87
|
+
if not ref.startswith("#/"):
|
88
|
+
raise SchemaValidationError(
|
89
|
+
"Invalid JSON Schema: Only local references are supported",
|
90
|
+
context={
|
91
|
+
"validation_type": "schema",
|
92
|
+
"ref": ref,
|
93
|
+
"tips": [
|
94
|
+
"Use only local references (starting with #/)"
|
95
|
+
],
|
96
|
+
},
|
97
|
+
)
|
98
|
+
|
99
|
+
parts = ref[2:].split("/")
|
100
|
+
current = root
|
101
|
+
for part in parts:
|
102
|
+
if part not in current:
|
103
|
+
raise SchemaValidationError(
|
104
|
+
f"Invalid JSON Schema: Reference {ref} not found",
|
105
|
+
context={
|
106
|
+
"validation_type": "schema",
|
107
|
+
"ref": ref,
|
108
|
+
"tips": [
|
109
|
+
"Check that all references point to existing definitions"
|
110
|
+
],
|
111
|
+
},
|
112
|
+
)
|
113
|
+
current = current[part]
|
114
|
+
return current
|
115
|
+
|
116
|
+
def check_refs(
|
117
|
+
obj: Any,
|
118
|
+
path: List[str],
|
119
|
+
seen_refs: List[str],
|
120
|
+
root: Dict[str, Any],
|
121
|
+
) -> None:
|
122
|
+
"""Check for circular references in the schema."""
|
123
|
+
if isinstance(obj, dict):
|
124
|
+
if "$ref" in obj:
|
125
|
+
ref = obj["$ref"]
|
126
|
+
if ref in seen_refs:
|
127
|
+
raise SchemaValidationError(
|
128
|
+
"Invalid JSON Schema: Circular reference found",
|
129
|
+
context={
|
130
|
+
"validation_type": "schema",
|
131
|
+
"path": "/".join(path),
|
132
|
+
"ref": ref,
|
133
|
+
"found": "circular reference",
|
134
|
+
"tips": [
|
135
|
+
"Remove circular references in your schema",
|
136
|
+
"Use unique identifiers instead of nested references",
|
137
|
+
"Consider flattening your schema structure",
|
138
|
+
],
|
139
|
+
},
|
140
|
+
)
|
141
|
+
|
142
|
+
# Resolve the reference and check its contents
|
143
|
+
seen_refs.append(ref)
|
144
|
+
try:
|
145
|
+
resolved = resolve_ref(ref, root)
|
146
|
+
check_refs(resolved, path, seen_refs.copy(), root)
|
147
|
+
except SchemaValidationError:
|
148
|
+
raise
|
149
|
+
except Exception as e:
|
150
|
+
raise SchemaValidationError(
|
151
|
+
f"Invalid JSON Schema: Failed to resolve reference {ref}",
|
152
|
+
context={
|
153
|
+
"validation_type": "schema",
|
154
|
+
"path": "/".join(path),
|
155
|
+
"ref": ref,
|
156
|
+
"error": str(e),
|
157
|
+
"tips": [
|
158
|
+
"Check that all references are properly formatted"
|
159
|
+
],
|
160
|
+
},
|
161
|
+
)
|
162
|
+
|
163
|
+
for key, value in obj.items():
|
164
|
+
if key != "$ref": # Skip checking the reference itself
|
165
|
+
check_refs(value, path + [key], seen_refs.copy(), root)
|
166
|
+
elif isinstance(obj, list):
|
167
|
+
for i, value in enumerate(obj):
|
168
|
+
check_refs(value, path + [str(i)], seen_refs.copy(), root)
|
169
|
+
|
170
|
+
check_refs(schema_to_validate, [], [], schema_to_validate)
|
171
|
+
|
172
|
+
# 4. Check required root properties
|
173
|
+
if "type" not in schema_to_validate:
|
174
|
+
raise SchemaValidationError(
|
175
|
+
"Invalid JSON Schema: Missing required 'type' property",
|
176
|
+
context={
|
177
|
+
"validation_type": "schema",
|
178
|
+
"tips": ["Add a 'type' property to your schema root"],
|
179
|
+
},
|
180
|
+
)
|
181
|
+
|
182
|
+
# 5. Check for required fields not defined in properties
|
183
|
+
if schema_to_validate.get("type") == "object":
|
184
|
+
required_fields = schema_to_validate.get("required", [])
|
185
|
+
properties = schema_to_validate.get("properties", {})
|
186
|
+
missing_fields = [
|
187
|
+
field for field in required_fields if field not in properties
|
188
|
+
]
|
189
|
+
if missing_fields:
|
190
|
+
raise SchemaValidationError(
|
191
|
+
"Invalid JSON Schema: Required fields must be defined in properties",
|
192
|
+
context={
|
193
|
+
"validation_type": "schema",
|
194
|
+
"missing_fields": missing_fields,
|
195
|
+
"tips": [
|
196
|
+
"Add the following fields to 'properties':",
|
197
|
+
*[f" - {field}" for field in missing_fields],
|
198
|
+
"Or remove them from 'required' if they are not needed",
|
199
|
+
],
|
200
|
+
},
|
201
|
+
)
|
202
|
+
|
203
|
+
# 6. Validate against JSON Schema meta-schema
|
204
|
+
try:
|
205
|
+
validator = jsonschema.validators.validator_for(schema_to_validate)
|
206
|
+
validator.check_schema(schema_to_validate)
|
207
|
+
except jsonschema.exceptions.SchemaError as e:
|
208
|
+
raise SchemaValidationError(
|
209
|
+
f"Invalid JSON Schema: {str(e)}",
|
210
|
+
context={
|
211
|
+
"validation_type": "schema",
|
212
|
+
"path": "/".join(str(p) for p in e.path),
|
213
|
+
"details": e.message,
|
214
|
+
"tips": [
|
215
|
+
"Ensure your schema follows JSON Schema specification",
|
216
|
+
"Check property types and formats",
|
217
|
+
"Validate schema structure",
|
218
|
+
],
|
219
|
+
},
|
220
|
+
)
|
221
|
+
|
222
|
+
except SchemaValidationError:
|
223
|
+
raise # Re-raise SchemaValidationError without wrapping
|
72
224
|
except Exception as e:
|
73
|
-
raise SchemaValidationError(
|
225
|
+
raise SchemaValidationError(
|
226
|
+
f"Invalid JSON Schema: {str(e)}",
|
227
|
+
context={
|
228
|
+
"validation_type": "schema",
|
229
|
+
"error": str(e),
|
230
|
+
"tips": ["Check schema syntax", "Validate JSON structure"],
|
231
|
+
},
|
232
|
+
)
|
74
233
|
|
75
234
|
|
76
235
|
def validate_response(
|
ostruct/cli/utils.py
CHANGED
@@ -12,7 +12,7 @@ def parse_mapping(mapping: str) -> Tuple[str, str]:
|
|
12
12
|
mapping: Mapping string in format 'name=value'
|
13
13
|
|
14
14
|
Returns:
|
15
|
-
Tuple of (name, value)
|
15
|
+
Tuple of (name, value) with whitespace stripped from both parts
|
16
16
|
|
17
17
|
Raises:
|
18
18
|
ValueError: If mapping format is invalid
|
@@ -23,6 +23,8 @@ def parse_mapping(mapping: str) -> Tuple[str, str]:
|
|
23
23
|
raise ValueError("Invalid mapping format")
|
24
24
|
|
25
25
|
name, value = mapping.split("=", 1)
|
26
|
+
name = name.strip()
|
27
|
+
value = value.strip()
|
26
28
|
if not name:
|
27
29
|
raise VariableNameError("Empty name in mapping")
|
28
30
|
if not value:
|
ostruct/cli/validators.py
CHANGED
@@ -49,7 +49,7 @@ def validate_variable(
|
|
49
49
|
value: List of "name=value" strings
|
50
50
|
|
51
51
|
Returns:
|
52
|
-
List of validated (name, value) tuples
|
52
|
+
List of validated (name, value) tuples with whitespace stripped from both parts
|
53
53
|
|
54
54
|
Raises:
|
55
55
|
click.BadParameter: If validation fails
|
@@ -64,6 +64,8 @@ def validate_variable(
|
|
64
64
|
f"Variable must be in format name=value: {var}"
|
65
65
|
)
|
66
66
|
name, val = var.split("=", 1)
|
67
|
+
name = name.strip()
|
68
|
+
val = val.strip()
|
67
69
|
if not name.isidentifier():
|
68
70
|
raise click.BadParameter(f"Invalid variable name: {name}")
|
69
71
|
result.append((name, val))
|
@@ -81,7 +83,7 @@ def validate_json_variable(
|
|
81
83
|
value: List of "name=json_string" values
|
82
84
|
|
83
85
|
Returns:
|
84
|
-
List of validated (name, parsed_json) tuples
|
86
|
+
List of validated (name, parsed_json) tuples with whitespace stripped from name
|
85
87
|
|
86
88
|
Raises:
|
87
89
|
click.BadParameter: If validation fails
|
@@ -96,6 +98,8 @@ def validate_json_variable(
|
|
96
98
|
f'JSON variable must be in format name=\'{"json":"value"}\': {var}'
|
97
99
|
)
|
98
100
|
name, json_str = var.split("=", 1)
|
101
|
+
name = name.strip()
|
102
|
+
json_str = json_str.strip()
|
99
103
|
if not name.isidentifier():
|
100
104
|
raise VariableNameError(f"Invalid variable name: {name}")
|
101
105
|
try:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: ostruct-cli
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.2
|
4
4
|
Summary: CLI for OpenAI Structured Output
|
5
5
|
Author: Yaniv Golan
|
6
6
|
Author-email: yaniv@golan.name
|
@@ -33,7 +33,9 @@ Description-Content-Type: text/markdown
|
|
33
33
|
[](https://github.com/yaniv-golan/ostruct/actions/workflows/ci.yml)
|
34
34
|
[](https://opensource.org/licenses/MIT)
|
35
35
|
|
36
|
-
|
36
|
+
ostruct tranforms unstructured inputs into structured, usable JSON output using OpenAI APIs.
|
37
|
+
|
38
|
+
ostruct will process a set of plain text files (data, source code, CSV, etc), input variables, a dynamic prompt template, and a JSON schema specifying the desired output format, and will produce the result in JSON format.
|
37
39
|
|
38
40
|
## Features
|
39
41
|
|
@@ -43,6 +45,10 @@ Command-line interface for working with OpenAI models and structured output, pow
|
|
43
45
|
- Streaming support for real-time output
|
44
46
|
- Secure handling of sensitive data
|
45
47
|
|
48
|
+
## Requirements
|
49
|
+
|
50
|
+
- Python 3.10 or higher
|
51
|
+
|
46
52
|
## Installation
|
47
53
|
|
48
54
|
### For Users
|
@@ -209,196 +215,52 @@ The command will output:
|
|
209
215
|
}
|
210
216
|
```
|
211
217
|
|
212
|
-
|
218
|
+
## System Prompt Handling
|
213
219
|
|
214
|
-
|
220
|
+
ostruct-cli provides three ways to specify a system prompt, with a clear precedence order:
|
215
221
|
|
216
|
-
|
217
|
-
Extract information about the people from this data:
|
222
|
+
1. Command-line option (`--sys-prompt` or `--sys-file`):
|
218
223
|
|
219
|
-
|
220
|
-
|
224
|
+
```bash
|
225
|
+
# Direct string
|
226
|
+
ostruct run template.j2 schema.json --sys-prompt "You are an expert analyst"
|
221
227
|
|
222
|
-
|
228
|
+
# From file
|
229
|
+
ostruct run template.j2 schema.json --sys-file system_prompt.txt
|
230
|
+
```
|
223
231
|
|
224
|
-
|
225
|
-
```
|
232
|
+
2. Template frontmatter:
|
226
233
|
|
227
|
-
|
234
|
+
```jinja
|
235
|
+
---
|
236
|
+
system_prompt: You are an expert analyst
|
237
|
+
---
|
238
|
+
Extract information from: {{ text }}
|
239
|
+
```
|
228
240
|
|
229
|
-
|
230
|
-
{
|
231
|
-
"type": "object",
|
232
|
-
"properties": {
|
233
|
-
"people": {
|
234
|
-
"type": "array",
|
235
|
-
"items": {
|
236
|
-
"type": "object",
|
237
|
-
"properties": {
|
238
|
-
"name": {
|
239
|
-
"type": "string",
|
240
|
-
"description": "The person's full name"
|
241
|
-
},
|
242
|
-
"age": {
|
243
|
-
"type": "integer",
|
244
|
-
"description": "The person's age"
|
245
|
-
},
|
246
|
-
"occupation": {
|
247
|
-
"type": "string",
|
248
|
-
"description": "The person's job or profession"
|
249
|
-
}
|
250
|
-
},
|
251
|
-
"required": ["name", "age", "occupation"],
|
252
|
-
"additionalProperties": false
|
253
|
-
}
|
254
|
-
}
|
255
|
-
},
|
256
|
-
"required": ["people"],
|
257
|
-
"additionalProperties": false
|
258
|
-
}
|
259
|
-
```
|
241
|
+
3. Default system prompt (built into the CLI)
|
260
242
|
|
261
|
-
|
243
|
+
### Precedence Rules
|
262
244
|
|
263
|
-
|
264
|
-
# Basic usage
|
265
|
-
ostruct run extract_from_profiles.j2 schema.json -p profiles "profiles/*.txt"
|
245
|
+
When multiple system prompts are provided, they are resolved in this order:
|
266
246
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
--model gpt-4o \
|
271
|
-
--sys-prompt "Extract precise information about the person" \
|
272
|
-
--temperature 0.5
|
273
|
-
```
|
247
|
+
1. Command-line options take highest precedence:
|
248
|
+
- If both `--sys-prompt` and `--sys-file` are provided, `--sys-prompt` wins
|
249
|
+
- Use `--ignore-task-sysprompt` to ignore template frontmatter
|
274
250
|
|
275
|
-
|
251
|
+
2. Template frontmatter is used if:
|
252
|
+
- No command-line options are provided
|
253
|
+
- `--ignore-task-sysprompt` is not set
|
276
254
|
|
277
|
-
|
278
|
-
{
|
279
|
-
"people": [
|
280
|
-
{
|
281
|
-
"name": "John Smith",
|
282
|
-
"age": 35,
|
283
|
-
"occupation": "software engineer"
|
284
|
-
},
|
285
|
-
{
|
286
|
-
"name": "Jane Doe",
|
287
|
-
"age": 28,
|
288
|
-
"occupation": "data scientist"
|
289
|
-
}
|
290
|
-
]
|
291
|
-
}
|
292
|
-
```
|
293
|
-
|
294
|
-
### About Template Files
|
295
|
-
|
296
|
-
Template files use the `.j2` extension to indicate they contain Jinja2 template syntax. This convention:
|
297
|
-
|
298
|
-
- Enables proper syntax highlighting in most editors
|
299
|
-
- Makes it clear the file contains template logic
|
300
|
-
- Follows industry standards for Jinja2 templates
|
255
|
+
3. Default system prompt is used only if no other prompts are provided
|
301
256
|
|
302
|
-
|
303
|
-
|
304
|
-
The CLI revolves around a single subcommand called `run`. Basic usage:
|
257
|
+
Example combining multiple sources:
|
305
258
|
|
306
259
|
```bash
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
Common options include:
|
311
|
-
|
312
|
-
- File & Directory Inputs:
|
313
|
-
- `-f <NAME> <PATH>`: Map a single file to a variable name
|
314
|
-
- `-d <NAME> <DIR>`: Map a directory to a variable name
|
315
|
-
- `-p <NAME> <PATTERN>`: Map files matching a glob pattern to a variable name
|
316
|
-
- `-R, --recursive`: Enable recursive directory/pattern scanning
|
260
|
+
# Command-line prompt will override template frontmatter
|
261
|
+
ostruct run template.j2 schema.json --sys-prompt "Override prompt"
|
317
262
|
|
318
|
-
|
319
|
-
|
320
|
-
- `-J name='{"key":"value"}'`: Define a JSON variable
|
321
|
-
|
322
|
-
- Model Parameters:
|
323
|
-
- `-m, --model MODEL`: Select the OpenAI model (supported: gpt-4o, o1, o3-mini)
|
324
|
-
- `--temperature FLOAT`: Set sampling temperature (0.0-2.0)
|
325
|
-
- `--max-output-tokens INT`: Set maximum output tokens
|
326
|
-
- `--top-p FLOAT`: Set top-p sampling parameter (0.0-1.0)
|
327
|
-
- `--frequency-penalty FLOAT`: Adjust frequency penalty (-2.0-2.0)
|
328
|
-
- `--presence-penalty FLOAT`: Adjust presence penalty (-2.0-2.0)
|
329
|
-
- `--reasoning-effort [low|medium|high]`: Control model reasoning effort
|
330
|
-
|
331
|
-
- System Prompt:
|
332
|
-
- `--sys-prompt TEXT`: Provide system prompt directly
|
333
|
-
- `--sys-file FILE`: Load system prompt from file
|
334
|
-
- `--ignore-task-sysprompt`: Ignore system prompt in template frontmatter
|
335
|
-
|
336
|
-
- API Configuration:
|
337
|
-
- `--api-key KEY`: OpenAI API key (defaults to OPENAI_API_KEY env var)
|
338
|
-
- `--timeout FLOAT`: API timeout in seconds (default: 60.0)
|
339
|
-
|
340
|
-
## Debug Options
|
341
|
-
|
342
|
-
- `--debug-validation`: Show detailed schema validation debugging
|
343
|
-
- `--debug-openai-stream`: Enable low-level debug output for OpenAI streaming
|
344
|
-
- `--progress-level {none,basic,detailed}`: Set progress reporting level
|
345
|
-
- `none`: No progress indicators
|
346
|
-
- `basic`: Show key operation steps (default)
|
347
|
-
- `detailed`: Show all steps with additional info
|
348
|
-
- `--show-model-schema`: Display the generated Pydantic model schema
|
349
|
-
- `--verbose`: Enable verbose logging
|
350
|
-
- `--dry-run`: Validate and render template without making API calls
|
351
|
-
- `--no-progress`: Disable all progress indicators
|
352
|
-
|
353
|
-
All debug and error logs are written to:
|
354
|
-
|
355
|
-
- `~/.ostruct/logs/ostruct.log`: General application logs
|
356
|
-
- `~/.ostruct/logs/openai_stream.log`: OpenAI streaming operations logs
|
357
|
-
|
358
|
-
For more detailed documentation and examples, visit our [documentation](https://ostruct.readthedocs.io/).
|
359
|
-
|
360
|
-
## Development
|
361
|
-
|
362
|
-
To contribute or report issues, please visit our [GitHub repository](https://github.com/yaniv-golan/ostruct).
|
363
|
-
|
364
|
-
## Development Setup
|
365
|
-
|
366
|
-
1. Clone the repository:
|
367
|
-
|
368
|
-
```bash
|
369
|
-
git clone https://github.com/yanivgolan/ostruct.git
|
370
|
-
cd ostruct
|
371
|
-
```
|
372
|
-
|
373
|
-
2. Install Poetry if you haven't already:
|
374
|
-
|
375
|
-
```bash
|
376
|
-
curl -sSL https://install.python-poetry.org | python3 -
|
377
|
-
```
|
378
|
-
|
379
|
-
3. Install dependencies:
|
380
|
-
|
381
|
-
```bash
|
382
|
-
poetry install
|
383
|
-
```
|
384
|
-
|
385
|
-
4. Install openai-structured in editable mode:
|
386
|
-
|
387
|
-
```bash
|
388
|
-
poetry add --editable ../openai-structured # Adjust path as needed
|
263
|
+
# Ignore template frontmatter and use default
|
264
|
+
ostruct run template.j2 schema.json --ignore-task-sysprompt
|
389
265
|
```
|
390
266
|
|
391
|
-
5. Run tests:
|
392
|
-
|
393
|
-
```bash
|
394
|
-
poetry run pytest
|
395
|
-
```
|
396
|
-
|
397
|
-
## Contributing
|
398
|
-
|
399
|
-
Contributions are welcome! Please feel free to submit a Pull Request.
|
400
|
-
|
401
|
-
## License
|
402
|
-
|
403
|
-
This project is licensed under the MIT License - see the LICENSE file for details.
|
404
|
-
|