ostruct-cli 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/click_options.py +111 -8
- ostruct/cli/code_interpreter.py +210 -17
- ostruct/cli/commands/run.py +56 -0
- ostruct/cli/config.py +20 -1
- ostruct/cli/errors.py +2 -30
- ostruct/cli/file_info.py +55 -20
- ostruct/cli/file_utils.py +19 -3
- ostruct/cli/json_extract.py +75 -0
- ostruct/cli/model_creation.py +1 -1
- ostruct/cli/runner.py +461 -180
- ostruct/cli/sentinel.py +29 -0
- ostruct/cli/template_optimizer.py +11 -7
- ostruct/cli/template_processor.py +243 -115
- ostruct/cli/template_rendering.py +41 -1
- ostruct/cli/template_validation.py +41 -3
- ostruct/cli/types.py +14 -1
- {ostruct_cli-0.8.1.dist-info → ostruct_cli-0.8.3.dist-info}/METADATA +88 -2
- {ostruct_cli-0.8.1.dist-info → ostruct_cli-0.8.3.dist-info}/RECORD +21 -19
- {ostruct_cli-0.8.1.dist-info → ostruct_cli-0.8.3.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.8.1.dist-info → ostruct_cli-0.8.3.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.8.1.dist-info → ostruct_cli-0.8.3.dist-info}/entry_points.txt +0 -0
ostruct/cli/file_info.py
CHANGED
@@ -3,12 +3,26 @@
|
|
3
3
|
import hashlib
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
+
from enum import Enum
|
6
7
|
from pathlib import Path
|
7
8
|
from typing import Any, Optional
|
8
9
|
|
9
10
|
from .errors import FileReadError, OstructFileNotFoundError, PathSecurityError
|
10
11
|
from .security import SecurityManager
|
11
12
|
|
13
|
+
|
14
|
+
class FileRoutingIntent(Enum):
|
15
|
+
"""Represents the intended use of a file in the ostruct pipeline.
|
16
|
+
|
17
|
+
This enum helps distinguish between different file routing intentions
|
18
|
+
to provide appropriate warnings and optimizations.
|
19
|
+
"""
|
20
|
+
|
21
|
+
TEMPLATE_ONLY = "template_only" # -ft, --fta, legacy -f, -d
|
22
|
+
CODE_INTERPRETER = "code_interpreter" # -fc, --fca
|
23
|
+
FILE_SEARCH = "file_search" # -fs, --fsa
|
24
|
+
|
25
|
+
|
12
26
|
logger = logging.getLogger(__name__)
|
13
27
|
|
14
28
|
|
@@ -35,6 +49,7 @@ class FileInfo:
|
|
35
49
|
encoding: Optional[str] = None,
|
36
50
|
hash_value: Optional[str] = None,
|
37
51
|
routing_type: Optional[str] = None,
|
52
|
+
routing_intent: Optional[FileRoutingIntent] = None,
|
38
53
|
) -> None:
|
39
54
|
"""Initialize FileInfo instance.
|
40
55
|
|
@@ -45,6 +60,7 @@ class FileInfo:
|
|
45
60
|
encoding: Optional cached encoding
|
46
61
|
hash_value: Optional cached hash value
|
47
62
|
routing_type: How the file was routed (e.g., 'template', 'code-interpreter')
|
63
|
+
routing_intent: The intended use of the file in the pipeline
|
48
64
|
|
49
65
|
Raises:
|
50
66
|
FileNotFoundError: If the file does not exist
|
@@ -59,6 +75,7 @@ class FileInfo:
|
|
59
75
|
self.__size: Optional[int] = None
|
60
76
|
self.__mtime: Optional[float] = None
|
61
77
|
self.routing_type = routing_type
|
78
|
+
self.routing_intent = routing_intent
|
62
79
|
|
63
80
|
logger.debug(
|
64
81
|
"Creating FileInfo for path: %s, routing_type: %s",
|
@@ -132,23 +149,6 @@ class FileInfo:
|
|
132
149
|
f"Permission denied: {os.path.basename(str(path))}"
|
133
150
|
) from e
|
134
151
|
|
135
|
-
# Add warning for large template-only files accessed via .content
|
136
|
-
# Check if routing_type is 'template' or if it's part of a legacy -f/-d mapping
|
137
|
-
# For simplicity now, let's assume if routing_type is None it could be legacy template
|
138
|
-
is_template_routed = (
|
139
|
-
self.routing_type == "template" or self.routing_type is None
|
140
|
-
)
|
141
|
-
if (
|
142
|
-
is_template_routed and self.size and self.size > 100 * 1024
|
143
|
-
): # 100KB threshold
|
144
|
-
logger.warning(
|
145
|
-
f"File '{self.path}' ({self.size / 1024:.1f}KB) was routed for template-only access "
|
146
|
-
f"but its .content is being accessed. This will include the entire file content "
|
147
|
-
f"in the prompt sent to the AI. For large files intended for analysis or search, "
|
148
|
-
f"consider using -fc (Code Interpreter) or -fs (File Search) to optimize token usage, "
|
149
|
-
f"cost, and avoid exceeding model context limits."
|
150
|
-
)
|
151
|
-
|
152
152
|
@property
|
153
153
|
def path(self) -> str:
|
154
154
|
"""Get the path relative to security manager's base directory.
|
@@ -258,6 +258,32 @@ class FileInfo:
|
|
258
258
|
FileReadError: If the file cannot be read, wrapping the underlying cause
|
259
259
|
(FileNotFoundError, UnicodeDecodeError, etc)
|
260
260
|
"""
|
261
|
+
# Add warning for large template-only files accessed via .content
|
262
|
+
# Use intent-based logic with fallback to routing_type for backward compatibility
|
263
|
+
template_only_intents = {FileRoutingIntent.TEMPLATE_ONLY}
|
264
|
+
|
265
|
+
# Determine if this is template-only routing
|
266
|
+
is_template_only = False
|
267
|
+
if self.routing_intent is not None:
|
268
|
+
# Use intent if available (new logic)
|
269
|
+
is_template_only = self.routing_intent in template_only_intents
|
270
|
+
else:
|
271
|
+
# Fallback to old logic for backward compatibility
|
272
|
+
is_template_only = (
|
273
|
+
self.routing_type == "template" or self.routing_type is None
|
274
|
+
)
|
275
|
+
|
276
|
+
if (
|
277
|
+
is_template_only and self.size and self.size > 100 * 1024
|
278
|
+
): # 100KB threshold
|
279
|
+
logger.warning(
|
280
|
+
f"File '{self.path}' ({self.size / 1024:.1f}KB) was routed for template-only access "
|
281
|
+
f"but its .content is being accessed. This will include the entire file content "
|
282
|
+
f"in the prompt sent to the AI. For large files intended for analysis or search, "
|
283
|
+
f"consider using -fc (Code Interpreter) or -fs (File Search) to optimize token usage, "
|
284
|
+
f"cost, and avoid exceeding model context limits."
|
285
|
+
)
|
286
|
+
|
261
287
|
if self.__content is None:
|
262
288
|
try:
|
263
289
|
self._read_file()
|
@@ -384,6 +410,7 @@ class FileInfo:
|
|
384
410
|
path: str,
|
385
411
|
security_manager: SecurityManager,
|
386
412
|
routing_type: Optional[str] = None,
|
413
|
+
routing_intent: Optional[FileRoutingIntent] = None,
|
387
414
|
) -> "FileInfo":
|
388
415
|
"""Create FileInfo instance from path.
|
389
416
|
|
@@ -391,6 +418,7 @@ class FileInfo:
|
|
391
418
|
path: Path to file
|
392
419
|
security_manager: Security manager for path validation
|
393
420
|
routing_type: How the file was routed (e.g., 'template', 'code-interpreter')
|
421
|
+
routing_intent: The intended use of the file in the pipeline
|
394
422
|
|
395
423
|
Returns:
|
396
424
|
FileInfo instance
|
@@ -399,7 +427,12 @@ class FileInfo:
|
|
399
427
|
FileNotFoundError: If file does not exist
|
400
428
|
PathSecurityError: If path is not allowed
|
401
429
|
"""
|
402
|
-
return cls(
|
430
|
+
return cls(
|
431
|
+
path,
|
432
|
+
security_manager,
|
433
|
+
routing_type=routing_type,
|
434
|
+
routing_intent=routing_intent,
|
435
|
+
)
|
403
436
|
|
404
437
|
def __str__(self) -> str:
|
405
438
|
"""String representation showing path."""
|
@@ -419,8 +452,10 @@ class FileInfo:
|
|
419
452
|
|
420
453
|
Internal methods can modify private attributes, but external access is prevented.
|
421
454
|
"""
|
422
|
-
# Allow setting routing_type if
|
423
|
-
if name
|
455
|
+
# Allow setting routing_type and routing_intent if they're not already set (i.e., during __init__)
|
456
|
+
if name in ("routing_type", "routing_intent") and not hasattr(
|
457
|
+
self, name
|
458
|
+
):
|
424
459
|
object.__setattr__(self, name, value)
|
425
460
|
return
|
426
461
|
|
ostruct/cli/file_utils.py
CHANGED
@@ -58,7 +58,7 @@ from ostruct.cli.errors import (
|
|
58
58
|
PathSecurityError,
|
59
59
|
)
|
60
60
|
|
61
|
-
from .file_info import FileInfo
|
61
|
+
from .file_info import FileInfo, FileRoutingIntent
|
62
62
|
from .file_list import FileInfoList
|
63
63
|
from .security import SecurityManager
|
64
64
|
from .security.types import SecurityManagerProtocol
|
@@ -116,6 +116,7 @@ def collect_files_from_pattern(
|
|
116
116
|
pattern: str,
|
117
117
|
security_manager: SecurityManager,
|
118
118
|
routing_type: Optional[str] = None,
|
119
|
+
routing_intent: Optional["FileRoutingIntent"] = None,
|
119
120
|
) -> List[FileInfo]:
|
120
121
|
"""Collect files matching a glob pattern or exact file path.
|
121
122
|
|
@@ -123,6 +124,7 @@ def collect_files_from_pattern(
|
|
123
124
|
pattern: Glob pattern or file path to match
|
124
125
|
security_manager: Security manager for path validation
|
125
126
|
routing_type: How the file was routed
|
127
|
+
routing_intent: The intended use of the file in the pipeline
|
126
128
|
|
127
129
|
Returns:
|
128
130
|
List of FileInfo objects for matched files
|
@@ -134,7 +136,10 @@ def collect_files_from_pattern(
|
|
134
136
|
if os.path.isfile(pattern):
|
135
137
|
try:
|
136
138
|
file_info = FileInfo.from_path(
|
137
|
-
pattern,
|
139
|
+
pattern,
|
140
|
+
security_manager,
|
141
|
+
routing_type=routing_type,
|
142
|
+
routing_intent=routing_intent,
|
138
143
|
)
|
139
144
|
return [file_info]
|
140
145
|
except PathSecurityError:
|
@@ -154,7 +159,10 @@ def collect_files_from_pattern(
|
|
154
159
|
for path in matched_paths:
|
155
160
|
try:
|
156
161
|
file_info = FileInfo.from_path(
|
157
|
-
path,
|
162
|
+
path,
|
163
|
+
security_manager,
|
164
|
+
routing_type=routing_type,
|
165
|
+
routing_intent=routing_intent,
|
158
166
|
)
|
159
167
|
files.append(file_info)
|
160
168
|
except PathSecurityError:
|
@@ -172,6 +180,7 @@ def collect_files_from_directory(
|
|
172
180
|
recursive: bool = False,
|
173
181
|
allowed_extensions: Optional[List[str]] = None,
|
174
182
|
routing_type: Optional[str] = None,
|
183
|
+
routing_intent: Optional[FileRoutingIntent] = None,
|
175
184
|
**kwargs: Any,
|
176
185
|
) -> List[FileInfo]:
|
177
186
|
"""Collect files from a directory.
|
@@ -182,6 +191,7 @@ def collect_files_from_directory(
|
|
182
191
|
recursive: Whether to process subdirectories
|
183
192
|
allowed_extensions: List of allowed file extensions (without dot)
|
184
193
|
routing_type: How the file was routed
|
194
|
+
routing_intent: The intended use of the file in the pipeline
|
185
195
|
**kwargs: Additional arguments passed to FileInfo.from_path
|
186
196
|
|
187
197
|
Returns:
|
@@ -283,6 +293,7 @@ def collect_files_from_directory(
|
|
283
293
|
abs_path,
|
284
294
|
security_manager=security_manager,
|
285
295
|
routing_type=routing_type,
|
296
|
+
routing_intent=routing_intent,
|
286
297
|
**kwargs,
|
287
298
|
)
|
288
299
|
files.append(file_info)
|
@@ -347,6 +358,7 @@ def collect_files(
|
|
347
358
|
dir_extensions: Optional[List[str]] = None,
|
348
359
|
security_manager: Optional[SecurityManager] = None,
|
349
360
|
routing_type: Optional[str] = None,
|
361
|
+
routing_intent: Optional[FileRoutingIntent] = None,
|
350
362
|
**kwargs: Any,
|
351
363
|
) -> Dict[str, FileInfoList]:
|
352
364
|
"""Collect files from multiple sources.
|
@@ -359,6 +371,7 @@ def collect_files(
|
|
359
371
|
dir_extensions: List of file extensions to include in directory processing
|
360
372
|
security_manager: Security manager instance
|
361
373
|
routing_type: How the files were routed (passed to FileInfo)
|
374
|
+
routing_intent: The intended use of the file in the pipeline
|
362
375
|
**kwargs: Additional arguments passed to FileInfo.from_path
|
363
376
|
|
364
377
|
Returns:
|
@@ -411,6 +424,7 @@ def collect_files(
|
|
411
424
|
str(path),
|
412
425
|
security_manager=security_manager,
|
413
426
|
routing_type=routing_type,
|
427
|
+
routing_intent=routing_intent,
|
414
428
|
**kwargs,
|
415
429
|
)
|
416
430
|
files[name] = FileInfoList(
|
@@ -433,6 +447,7 @@ def collect_files(
|
|
433
447
|
str(pattern),
|
434
448
|
security_manager=security_manager,
|
435
449
|
routing_type=routing_type,
|
450
|
+
routing_intent=routing_intent,
|
436
451
|
**kwargs,
|
437
452
|
)
|
438
453
|
except PathSecurityError as e:
|
@@ -476,6 +491,7 @@ def collect_files(
|
|
476
491
|
recursive=dir_recursive,
|
477
492
|
allowed_extensions=dir_extensions,
|
478
493
|
routing_type=routing_type,
|
494
|
+
routing_intent=routing_intent,
|
479
495
|
**kwargs,
|
480
496
|
)
|
481
497
|
except PathSecurityError as e:
|
@@ -0,0 +1,75 @@
|
|
1
|
+
import json
|
2
|
+
import re
|
3
|
+
from typing import Tuple
|
4
|
+
|
5
|
+
# Regex to match JSON fenced blocks, handling newlines and indentation flexibly
|
6
|
+
# Uses non-greedy matching but ensures we get the complete JSON block
|
7
|
+
JSON_RE = re.compile(
|
8
|
+
r"```json\s*\n?([\s\S]*?)\n?\s*```", re.DOTALL | re.MULTILINE
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
def split_json_and_text(raw: str) -> Tuple[dict, str]:
|
13
|
+
"""
|
14
|
+
Extract JSON from fenced code blocks and return markdown-only text.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
raw: Raw response text containing fenced JSON and markdown
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
Tuple of (parsed_json_dict, markdown_text_after_json)
|
21
|
+
|
22
|
+
Raises:
|
23
|
+
ValueError: If no JSON block found or JSON is invalid
|
24
|
+
|
25
|
+
Note:
|
26
|
+
Returns first JSON block if multiple exist. Markdown text is everything
|
27
|
+
after the JSON block for downstream annotation processing.
|
28
|
+
|
29
|
+
Example:
|
30
|
+
>>> content = '''```json
|
31
|
+
... {"result": "success"}
|
32
|
+
... ```
|
33
|
+
...
|
34
|
+
... [Download file.txt](sandbox:/mnt/data/file.txt)'''
|
35
|
+
>>> data, markdown = split_json_and_text(content)
|
36
|
+
>>> data
|
37
|
+
{'result': 'success'}
|
38
|
+
>>> markdown.strip()
|
39
|
+
'[Download file.txt](sandbox:/mnt/data/file.txt)'
|
40
|
+
"""
|
41
|
+
# Find the start of the JSON block
|
42
|
+
start_pattern = re.compile(r"```json\s*", re.MULTILINE)
|
43
|
+
start_match = start_pattern.search(raw)
|
44
|
+
if not start_match:
|
45
|
+
raise ValueError("No ```json ... ``` block found")
|
46
|
+
|
47
|
+
# Find the content after the opening ```json
|
48
|
+
content_start = start_match.end()
|
49
|
+
|
50
|
+
# Look for the closing ``` that's not inside a JSON string
|
51
|
+
# We'll try multiple potential end positions and validate the JSON
|
52
|
+
end_pattern = re.compile(r"```", re.MULTILINE)
|
53
|
+
|
54
|
+
for end_match in end_pattern.finditer(raw, content_start):
|
55
|
+
# Extract potential JSON content
|
56
|
+
potential_json = raw[content_start : end_match.start()].strip()
|
57
|
+
|
58
|
+
try:
|
59
|
+
# Try to parse as JSON
|
60
|
+
data = json.loads(potential_json)
|
61
|
+
# If successful, we found the right closing ```
|
62
|
+
markdown_text = raw[end_match.end() :].lstrip()
|
63
|
+
return data, markdown_text
|
64
|
+
except json.JSONDecodeError:
|
65
|
+
# This ``` might be inside a JSON string, continue looking
|
66
|
+
# But if this is the only ``` we found, it's likely invalid JSON
|
67
|
+
continue
|
68
|
+
|
69
|
+
# If we get here, check if we found any closing ``` at all
|
70
|
+
if not list(end_pattern.finditer(raw, content_start)):
|
71
|
+
# No closing ``` found
|
72
|
+
raise ValueError("No ```json ... ``` block found")
|
73
|
+
else:
|
74
|
+
# Found closing ``` but JSON was invalid
|
75
|
+
raise ValueError("Invalid JSON in fenced block")
|
ostruct/cli/model_creation.py
CHANGED
@@ -197,7 +197,7 @@ def _get_type_with_constraints(
|
|
197
197
|
field_type_cls = Union[tuple(union_types)]
|
198
198
|
|
199
199
|
# Apply constraints based on the base type (non-null type)
|
200
|
-
if base_type
|
200
|
+
if base_type is str:
|
201
201
|
# Add string-specific constraints to field_kwargs
|
202
202
|
if "pattern" in field_schema:
|
203
203
|
field_kwargs["pattern"] = field_schema["pattern"]
|