ostruct-cli 0.1.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ostruct/cli/__init__.py +2 -2
- ostruct/cli/cli.py +513 -599
- ostruct/cli/click_options.py +248 -0
- ostruct/cli/errors.py +297 -139
- ostruct/cli/file_info.py +82 -11
- ostruct/cli/file_list.py +57 -1
- ostruct/cli/file_utils.py +99 -8
- ostruct/cli/security.py +793 -140
- ostruct/cli/security_types.py +3 -6
- ostruct/cli/template_extensions.py +1 -1
- ostruct/cli/template_io.py +4 -2
- ostruct/cli/template_rendering.py +0 -8
- ostruct/cli/template_validation.py +1 -1
- {ostruct_cli-0.1.4.dist-info → ostruct_cli-0.3.0.dist-info}/METADATA +4 -2
- ostruct_cli-0.3.0.dist-info/RECORD +28 -0
- ostruct_cli-0.1.4.dist-info/RECORD +0 -27
- {ostruct_cli-0.1.4.dist-info → ostruct_cli-0.3.0.dist-info}/LICENSE +0 -0
- {ostruct_cli-0.1.4.dist-info → ostruct_cli-0.3.0.dist-info}/WHEEL +0 -0
- {ostruct_cli-0.1.4.dist-info → ostruct_cli-0.3.0.dist-info}/entry_points.txt +0 -0
ostruct/cli/cli.py
CHANGED
@@ -1,18 +1,12 @@
|
|
1
1
|
"""Command-line interface for making structured OpenAI API calls."""
|
2
2
|
|
3
|
-
import argparse
|
4
3
|
import asyncio
|
5
4
|
import json
|
6
5
|
import logging
|
7
6
|
import os
|
8
7
|
import sys
|
8
|
+
from dataclasses import dataclass
|
9
9
|
from enum import Enum, IntEnum
|
10
|
-
|
11
|
-
if sys.version_info >= (3, 11):
|
12
|
-
from enum import StrEnum
|
13
|
-
|
14
|
-
from datetime import date, datetime, time
|
15
|
-
from pathlib import Path
|
16
10
|
from typing import (
|
17
11
|
Any,
|
18
12
|
Dict,
|
@@ -29,6 +23,13 @@ from typing import (
|
|
29
23
|
overload,
|
30
24
|
)
|
31
25
|
|
26
|
+
if sys.version_info >= (3, 11):
|
27
|
+
from enum import StrEnum
|
28
|
+
|
29
|
+
from datetime import date, datetime, time
|
30
|
+
from pathlib import Path
|
31
|
+
|
32
|
+
import click
|
32
33
|
import jinja2
|
33
34
|
import tiktoken
|
34
35
|
import yaml
|
@@ -71,8 +72,11 @@ from pydantic.functional_validators import BeforeValidator
|
|
71
72
|
from pydantic.types import constr
|
72
73
|
from typing_extensions import TypeAlias
|
73
74
|
|
74
|
-
from
|
75
|
+
from ostruct.cli.click_options import create_click_command
|
76
|
+
|
77
|
+
from .. import __version__ # noqa: F401 - Used in package metadata
|
75
78
|
from .errors import (
|
79
|
+
CLIError,
|
76
80
|
DirectoryNotFoundError,
|
77
81
|
FieldDefinitionError,
|
78
82
|
FileNotFoundError,
|
@@ -89,7 +93,6 @@ from .errors import (
|
|
89
93
|
)
|
90
94
|
from .file_utils import FileInfoList, TemplateValue, collect_files
|
91
95
|
from .path_utils import validate_path_mapping
|
92
|
-
from .progress import ProgressContext
|
93
96
|
from .security import SecurityManager
|
94
97
|
from .template_env import create_jinja_env
|
95
98
|
from .template_utils import SystemPromptError, render_template
|
@@ -97,6 +100,45 @@ from .template_utils import SystemPromptError, render_template
|
|
97
100
|
# Constants
|
98
101
|
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
|
99
102
|
|
103
|
+
|
104
|
+
@dataclass
|
105
|
+
class Namespace:
|
106
|
+
"""Compatibility class to mimic argparse.Namespace for existing code."""
|
107
|
+
|
108
|
+
task: Optional[str]
|
109
|
+
task_file: Optional[str]
|
110
|
+
file: List[str]
|
111
|
+
files: List[str]
|
112
|
+
dir: List[str]
|
113
|
+
allowed_dir: List[str]
|
114
|
+
base_dir: str
|
115
|
+
allowed_dir_file: Optional[str]
|
116
|
+
dir_recursive: bool
|
117
|
+
dir_ext: Optional[str]
|
118
|
+
var: List[str]
|
119
|
+
json_var: List[str]
|
120
|
+
system_prompt: Optional[str]
|
121
|
+
system_prompt_file: Optional[str]
|
122
|
+
ignore_task_sysprompt: bool
|
123
|
+
schema_file: str
|
124
|
+
model: str
|
125
|
+
temperature: float
|
126
|
+
max_tokens: Optional[int]
|
127
|
+
top_p: float
|
128
|
+
frequency_penalty: float
|
129
|
+
presence_penalty: float
|
130
|
+
timeout: float
|
131
|
+
output_file: Optional[str]
|
132
|
+
dry_run: bool
|
133
|
+
no_progress: bool
|
134
|
+
api_key: Optional[str]
|
135
|
+
verbose: bool
|
136
|
+
debug_openai_stream: bool
|
137
|
+
show_model_schema: bool
|
138
|
+
debug_validation: bool
|
139
|
+
progress_level: str = "basic" # Default to 'basic' if not specified
|
140
|
+
|
141
|
+
|
100
142
|
# Set up logging
|
101
143
|
logger = logging.getLogger(__name__)
|
102
144
|
|
@@ -206,7 +248,6 @@ def _get_type_with_constraints(
|
|
206
248
|
Returns:
|
207
249
|
Tuple of (type, field)
|
208
250
|
"""
|
209
|
-
field_type = field_schema.get("type")
|
210
251
|
field_kwargs: Dict[str, Any] = {}
|
211
252
|
|
212
253
|
# Add common field metadata
|
@@ -219,21 +260,40 @@ def _get_type_with_constraints(
|
|
219
260
|
if "readOnly" in field_schema:
|
220
261
|
field_kwargs["frozen"] = field_schema["readOnly"]
|
221
262
|
|
263
|
+
field_type = field_schema.get("type")
|
264
|
+
|
222
265
|
# Handle array type
|
223
266
|
if field_type == "array":
|
224
267
|
items_schema = field_schema.get("items", {})
|
225
268
|
if not items_schema:
|
226
269
|
return (List[Any], Field(**field_kwargs))
|
227
270
|
|
228
|
-
# Create nested model
|
229
|
-
|
230
|
-
items_schema,
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
271
|
+
# Create nested model for object items
|
272
|
+
if (
|
273
|
+
isinstance(items_schema, dict)
|
274
|
+
and items_schema.get("type") == "object"
|
275
|
+
):
|
276
|
+
array_item_model = create_dynamic_model(
|
277
|
+
items_schema,
|
278
|
+
base_name=f"{base_name}_{field_name}_Item",
|
279
|
+
show_schema=False,
|
280
|
+
debug_validation=False,
|
281
|
+
)
|
282
|
+
array_type: Type[List[Any]] = List[array_item_model] # type: ignore[valid-type]
|
283
|
+
return (array_type, Field(**field_kwargs))
|
284
|
+
|
285
|
+
# For non-object items, use the type directly
|
286
|
+
item_type = items_schema.get("type", "string")
|
287
|
+
if item_type == "string":
|
288
|
+
return (List[str], Field(**field_kwargs))
|
289
|
+
elif item_type == "integer":
|
290
|
+
return (List[int], Field(**field_kwargs))
|
291
|
+
elif item_type == "number":
|
292
|
+
return (List[float], Field(**field_kwargs))
|
293
|
+
elif item_type == "boolean":
|
294
|
+
return (List[bool], Field(**field_kwargs))
|
295
|
+
else:
|
296
|
+
return (List[Any], Field(**field_kwargs))
|
237
297
|
|
238
298
|
# Handle object type
|
239
299
|
if field_type == "object":
|
@@ -342,25 +402,43 @@ V = TypeVar("V")
|
|
342
402
|
|
343
403
|
|
344
404
|
def estimate_tokens_for_chat(
|
345
|
-
messages: List[Dict[str, str]],
|
405
|
+
messages: List[Dict[str, str]],
|
406
|
+
model: str,
|
407
|
+
encoder: Any = None,
|
346
408
|
) -> int:
|
347
|
-
"""Estimate the number of tokens in a chat completion.
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
409
|
+
"""Estimate the number of tokens in a chat completion.
|
410
|
+
|
411
|
+
Args:
|
412
|
+
messages: List of chat messages
|
413
|
+
model: Model name
|
414
|
+
encoder: Optional tiktoken encoder for testing. If provided, only uses encoder.encode() results.
|
415
|
+
"""
|
416
|
+
if encoder is None:
|
417
|
+
try:
|
418
|
+
# Try to get the encoding for the specific model
|
419
|
+
encoder = tiktoken.get_encoding("o200k_base")
|
420
|
+
except KeyError:
|
421
|
+
# Fall back to cl100k_base for unknown models
|
422
|
+
encoder = tiktoken.get_encoding("cl100k_base")
|
423
|
+
|
424
|
+
# Use standard token counting logic for real tiktoken encoders
|
425
|
+
num_tokens = 0
|
426
|
+
for message in messages:
|
427
|
+
# Add message overhead
|
428
|
+
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
|
429
|
+
for key, value in message.items():
|
430
|
+
num_tokens += len(encoder.encode(str(value)))
|
431
|
+
if key == "name": # if there's a name, the role is omitted
|
432
|
+
num_tokens -= 1 # role is omitted
|
433
|
+
num_tokens += 2 # every reply is primed with <im_start>assistant
|
434
|
+
return num_tokens
|
435
|
+
else:
|
436
|
+
# For mock encoders in tests, just return the length of encoded content
|
437
|
+
num_tokens = 0
|
438
|
+
for message in messages:
|
439
|
+
for value in message.values():
|
440
|
+
num_tokens += len(encoder.encode(str(value)))
|
441
|
+
return num_tokens
|
364
442
|
|
365
443
|
|
366
444
|
def get_default_token_limit(model: str) -> int:
|
@@ -370,15 +448,17 @@ def get_default_token_limit(model: str) -> int:
|
|
370
448
|
need to be updated if OpenAI changes the models' capabilities.
|
371
449
|
|
372
450
|
Args:
|
373
|
-
model: The model name (e.g., 'gpt-4o', '
|
451
|
+
model: The model name (e.g., 'gpt-4o', 'o1-mini', 'o3-mini')
|
374
452
|
|
375
453
|
Returns:
|
376
454
|
The default token limit for the model
|
377
455
|
"""
|
378
|
-
if "o1" in model:
|
379
|
-
return 100_000 # o1 supports up to 100K output tokens
|
456
|
+
if "o1-" in model:
|
457
|
+
return 100_000 # o1-mini supports up to 100K output tokens
|
380
458
|
elif "gpt-4o" in model:
|
381
|
-
return 16_384 # gpt-4o
|
459
|
+
return 16_384 # gpt-4o supports up to 16K output tokens
|
460
|
+
elif "o3-" in model:
|
461
|
+
return 16_384 # o3-mini supports up to 16K output tokens
|
382
462
|
else:
|
383
463
|
return 4_096 # default fallback
|
384
464
|
|
@@ -390,15 +470,15 @@ def get_context_window_limit(model: str) -> int:
|
|
390
470
|
need to be updated if OpenAI changes the models' capabilities.
|
391
471
|
|
392
472
|
Args:
|
393
|
-
model: The model name (e.g., 'gpt-4o', '
|
473
|
+
model: The model name (e.g., 'gpt-4o', 'o1-mini', 'o3-mini')
|
394
474
|
|
395
475
|
Returns:
|
396
476
|
The context window limit for the model
|
397
477
|
"""
|
398
|
-
if "o1" in model:
|
399
|
-
return 200_000 # o1 supports 200K total context window
|
400
|
-
elif "gpt-4o" in model:
|
401
|
-
return 128_000 # gpt-4o and
|
478
|
+
if "o1-" in model:
|
479
|
+
return 200_000 # o1-mini supports 200K total context window
|
480
|
+
elif "gpt-4o" in model or "o3-" in model:
|
481
|
+
return 128_000 # gpt-4o and o3-mini support 128K context window
|
402
482
|
else:
|
403
483
|
return 8_192 # default fallback
|
404
484
|
|
@@ -442,6 +522,7 @@ def validate_token_limits(
|
|
442
522
|
def process_system_prompt(
|
443
523
|
task_template: str,
|
444
524
|
system_prompt: Optional[str],
|
525
|
+
system_prompt_file: Optional[str],
|
445
526
|
template_context: Dict[str, Any],
|
446
527
|
env: jinja2.Environment,
|
447
528
|
ignore_task_sysprompt: bool = False,
|
@@ -450,7 +531,8 @@ def process_system_prompt(
|
|
450
531
|
|
451
532
|
Args:
|
452
533
|
task_template: The task template string
|
453
|
-
system_prompt: Optional system prompt string
|
534
|
+
system_prompt: Optional system prompt string
|
535
|
+
system_prompt_file: Optional path to system prompt file
|
454
536
|
template_context: Template context for rendering
|
455
537
|
env: Jinja2 environment
|
456
538
|
ignore_task_sysprompt: Whether to ignore system prompt in task template
|
@@ -466,18 +548,24 @@ def process_system_prompt(
|
|
466
548
|
# Default system prompt
|
467
549
|
default_prompt = "You are a helpful assistant."
|
468
550
|
|
551
|
+
# Check for conflicting arguments
|
552
|
+
if system_prompt is not None and system_prompt_file is not None:
|
553
|
+
raise SystemPromptError(
|
554
|
+
"Cannot specify both --system-prompt and --system-prompt-file"
|
555
|
+
)
|
556
|
+
|
469
557
|
# Try to get system prompt from CLI argument first
|
470
|
-
if
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
raise SystemPromptError(f"Invalid system prompt file: {e}")
|
558
|
+
if system_prompt_file is not None:
|
559
|
+
try:
|
560
|
+
name, path = validate_path_mapping(
|
561
|
+
f"system_prompt={system_prompt_file}"
|
562
|
+
)
|
563
|
+
with open(path, "r", encoding="utf-8") as f:
|
564
|
+
system_prompt = f.read().strip()
|
565
|
+
except (FileNotFoundError, PathSecurityError) as e:
|
566
|
+
raise SystemPromptError(f"Invalid system prompt file: {e}")
|
480
567
|
|
568
|
+
if system_prompt is not None:
|
481
569
|
# Render system prompt with template context
|
482
570
|
try:
|
483
571
|
template = env.from_string(system_prompt)
|
@@ -600,30 +688,45 @@ def _validate_path_mapping_internal(
|
|
600
688
|
ValueError: If the format is invalid (missing "=").
|
601
689
|
OSError: If there is an underlying OS error (permissions, etc.).
|
602
690
|
"""
|
691
|
+
logger = logging.getLogger(__name__)
|
692
|
+
logger.debug("Starting path validation for mapping: %r", mapping)
|
693
|
+
logger.debug("Parameters - is_dir: %r, base_dir: %r", is_dir, base_dir)
|
694
|
+
|
603
695
|
try:
|
604
696
|
if not mapping or "=" not in mapping:
|
697
|
+
logger.debug("Invalid mapping format: %r", mapping)
|
605
698
|
raise ValueError(
|
606
699
|
"Invalid path mapping format. Expected format: name=path"
|
607
700
|
)
|
608
701
|
|
609
702
|
name, path = mapping.split("=", 1)
|
703
|
+
logger.debug("Split mapping - name: %r, path: %r", name, path)
|
704
|
+
|
610
705
|
if not name:
|
706
|
+
logger.debug("Empty name in mapping")
|
611
707
|
raise VariableNameError(
|
612
708
|
f"Empty name in {'directory' if is_dir else 'file'} mapping"
|
613
709
|
)
|
614
710
|
|
615
711
|
if not path:
|
712
|
+
logger.debug("Empty path in mapping")
|
616
713
|
raise VariableValueError("Path cannot be empty")
|
617
714
|
|
618
715
|
# Convert to Path object and resolve against base_dir if provided
|
716
|
+
logger.debug("Creating Path object for: %r", path)
|
619
717
|
path_obj = Path(path)
|
620
718
|
if base_dir:
|
719
|
+
logger.debug("Resolving against base_dir: %r", base_dir)
|
621
720
|
path_obj = Path(base_dir) / path_obj
|
721
|
+
logger.debug("Path object created: %r", path_obj)
|
622
722
|
|
623
723
|
# Resolve the path to catch directory traversal attempts
|
624
724
|
try:
|
725
|
+
logger.debug("Attempting to resolve path: %r", path_obj)
|
625
726
|
resolved_path = path_obj.resolve()
|
727
|
+
logger.debug("Resolved path: %r", resolved_path)
|
626
728
|
except OSError as e:
|
729
|
+
logger.error("Failed to resolve path: %s", e)
|
627
730
|
raise OSError(f"Failed to resolve path: {e}")
|
628
731
|
|
629
732
|
# Check for directory traversal
|
@@ -691,34 +794,45 @@ def _validate_path_mapping_internal(
|
|
691
794
|
raise
|
692
795
|
|
693
796
|
|
694
|
-
def validate_task_template(
|
797
|
+
def validate_task_template(
|
798
|
+
task: Optional[str], task_file: Optional[str]
|
799
|
+
) -> str:
|
695
800
|
"""Validate and load a task template.
|
696
801
|
|
697
802
|
Args:
|
698
|
-
task: The task template string
|
803
|
+
task: The task template string
|
804
|
+
task_file: Path to task template file
|
699
805
|
|
700
806
|
Returns:
|
701
807
|
The task template string
|
702
808
|
|
703
809
|
Raises:
|
704
|
-
TaskTemplateVariableError: If
|
810
|
+
TaskTemplateVariableError: If neither task nor task_file is provided, or if both are provided
|
705
811
|
TaskTemplateSyntaxError: If the template has invalid syntax
|
706
812
|
FileNotFoundError: If the template file does not exist
|
707
813
|
PathSecurityError: If the template file path violates security constraints
|
708
814
|
"""
|
709
|
-
|
815
|
+
if task is not None and task_file is not None:
|
816
|
+
raise TaskTemplateVariableError(
|
817
|
+
"Cannot specify both --task and --task-file"
|
818
|
+
)
|
710
819
|
|
711
|
-
|
712
|
-
|
713
|
-
|
820
|
+
if task is None and task_file is None:
|
821
|
+
raise TaskTemplateVariableError(
|
822
|
+
"Must specify either --task or --task-file"
|
823
|
+
)
|
824
|
+
|
825
|
+
template_content: str
|
826
|
+
if task_file is not None:
|
714
827
|
try:
|
715
|
-
name, path = validate_path_mapping(f"task={
|
828
|
+
name, path = validate_path_mapping(f"task={task_file}")
|
716
829
|
with open(path, "r", encoding="utf-8") as f:
|
717
830
|
template_content = f.read()
|
718
831
|
except (FileNotFoundError, PathSecurityError) as e:
|
719
|
-
raise TaskTemplateVariableError(
|
832
|
+
raise TaskTemplateVariableError(str(e))
|
833
|
+
else:
|
834
|
+
template_content = task # type: ignore # We know task is str here due to the checks above
|
720
835
|
|
721
|
-
# Validate template syntax
|
722
836
|
try:
|
723
837
|
env = jinja2.Environment(undefined=jinja2.StrictUndefined)
|
724
838
|
env.parse(template_content)
|
@@ -795,7 +909,7 @@ def validate_schema_file(
|
|
795
909
|
|
796
910
|
|
797
911
|
def collect_template_files(
|
798
|
-
args:
|
912
|
+
args: Namespace,
|
799
913
|
security_manager: SecurityManager,
|
800
914
|
) -> Dict[str, TemplateValue]:
|
801
915
|
"""Collect files from command line arguments.
|
@@ -828,14 +942,17 @@ def collect_template_files(
|
|
828
942
|
# Wrap file-related errors
|
829
943
|
raise ValueError(f"File access error: {e}")
|
830
944
|
except Exception as e:
|
945
|
+
# Don't wrap InvalidJSONError
|
946
|
+
if isinstance(e, InvalidJSONError):
|
947
|
+
raise
|
831
948
|
# Check if this is a wrapped security error
|
832
949
|
if isinstance(e.__cause__, PathSecurityError):
|
833
950
|
raise e.__cause__
|
834
|
-
# Wrap
|
951
|
+
# Wrap other errors
|
835
952
|
raise ValueError(f"Error collecting files: {e}")
|
836
953
|
|
837
954
|
|
838
|
-
def collect_simple_variables(args:
|
955
|
+
def collect_simple_variables(args: Namespace) -> Dict[str, str]:
|
839
956
|
"""Collect simple string variables from --var arguments.
|
840
957
|
|
841
958
|
Args:
|
@@ -868,7 +985,7 @@ def collect_simple_variables(args: argparse.Namespace) -> Dict[str, str]:
|
|
868
985
|
return variables
|
869
986
|
|
870
987
|
|
871
|
-
def collect_json_variables(args:
|
988
|
+
def collect_json_variables(args: Namespace) -> Dict[str, Any]:
|
872
989
|
"""Collect JSON variables from --json-var arguments.
|
873
990
|
|
874
991
|
Args:
|
@@ -898,7 +1015,7 @@ def collect_json_variables(args: argparse.Namespace) -> Dict[str, Any]:
|
|
898
1015
|
all_names.add(name)
|
899
1016
|
except json.JSONDecodeError as e:
|
900
1017
|
raise InvalidJSONError(
|
901
|
-
f"
|
1018
|
+
f"Error parsing JSON for variable '{name}': {str(e)}. Input was: {json_str}"
|
902
1019
|
)
|
903
1020
|
except ValueError:
|
904
1021
|
raise VariableNameError(
|
@@ -936,11 +1053,7 @@ def create_template_context(
|
|
936
1053
|
# Add file variables
|
937
1054
|
if files:
|
938
1055
|
for name, file_list in files.items():
|
939
|
-
|
940
|
-
if len(file_list) == 1:
|
941
|
-
context[name] = file_list[0]
|
942
|
-
else:
|
943
|
-
context[name] = file_list
|
1056
|
+
context[name] = file_list # Always keep FileInfoList wrapper
|
944
1057
|
|
945
1058
|
# Add simple variables
|
946
1059
|
if variables:
|
@@ -958,7 +1071,7 @@ def create_template_context(
|
|
958
1071
|
|
959
1072
|
|
960
1073
|
def create_template_context_from_args(
|
961
|
-
args:
|
1074
|
+
args: "Namespace",
|
962
1075
|
security_manager: SecurityManager,
|
963
1076
|
) -> Dict[str, Any]:
|
964
1077
|
"""Create template context from command line arguments.
|
@@ -1010,7 +1123,7 @@ def create_template_context_from_args(
|
|
1010
1123
|
json_value = json.loads(value)
|
1011
1124
|
except json.JSONDecodeError as e:
|
1012
1125
|
raise InvalidJSONError(
|
1013
|
-
f"
|
1126
|
+
f"Error parsing JSON for variable '{name}': {str(e)}. Input was: {value}"
|
1014
1127
|
)
|
1015
1128
|
if name in json_variables:
|
1016
1129
|
raise VariableNameError(
|
@@ -1046,44 +1159,103 @@ def create_template_context_from_args(
|
|
1046
1159
|
# Wrap file-related errors
|
1047
1160
|
raise ValueError(f"File access error: {e}")
|
1048
1161
|
except Exception as e:
|
1162
|
+
# Don't wrap InvalidJSONError
|
1163
|
+
if isinstance(e, InvalidJSONError):
|
1164
|
+
raise
|
1049
1165
|
# Check if this is a wrapped security error
|
1050
1166
|
if isinstance(e.__cause__, PathSecurityError):
|
1051
1167
|
raise e.__cause__
|
1052
|
-
# Wrap
|
1168
|
+
# Wrap other errors
|
1053
1169
|
raise ValueError(f"Error collecting files: {e}")
|
1054
1170
|
|
1055
1171
|
|
1056
1172
|
def validate_security_manager(
|
1057
1173
|
base_dir: Optional[str] = None,
|
1058
1174
|
allowed_dirs: Optional[List[str]] = None,
|
1059
|
-
|
1175
|
+
allowed_dir_file: Optional[str] = None,
|
1060
1176
|
) -> SecurityManager:
|
1061
|
-
"""
|
1177
|
+
"""Validate and create security manager.
|
1062
1178
|
|
1063
1179
|
Args:
|
1064
|
-
base_dir:
|
1065
|
-
allowed_dirs: Optional list of allowed
|
1066
|
-
|
1180
|
+
base_dir: Base directory for file access. Defaults to current working directory.
|
1181
|
+
allowed_dirs: Optional list of additional allowed directories
|
1182
|
+
allowed_dir_file: Optional file containing allowed directories
|
1067
1183
|
|
1068
1184
|
Returns:
|
1069
1185
|
Configured SecurityManager instance
|
1070
1186
|
|
1071
1187
|
Raises:
|
1072
|
-
|
1073
|
-
|
1188
|
+
PathSecurityError: If any paths violate security constraints
|
1189
|
+
DirectoryNotFoundError: If any directories do not exist
|
1074
1190
|
"""
|
1075
|
-
#
|
1076
|
-
|
1077
|
-
|
1191
|
+
# Use current working directory if base_dir is None
|
1192
|
+
if base_dir is None:
|
1193
|
+
base_dir = os.getcwd()
|
1078
1194
|
|
1079
|
-
if
|
1080
|
-
|
1195
|
+
# Default to empty list if allowed_dirs is None
|
1196
|
+
if allowed_dirs is None:
|
1197
|
+
allowed_dirs = []
|
1081
1198
|
|
1082
|
-
if
|
1083
|
-
|
1084
|
-
|
1199
|
+
# Add base directory if it exists
|
1200
|
+
try:
|
1201
|
+
base_dir_path = Path(base_dir).resolve()
|
1202
|
+
if not base_dir_path.exists():
|
1203
|
+
raise DirectoryNotFoundError(
|
1204
|
+
f"Base directory not found: {base_dir}"
|
1205
|
+
)
|
1206
|
+
if not base_dir_path.is_dir():
|
1207
|
+
raise DirectoryNotFoundError(
|
1208
|
+
f"Base directory is not a directory: {base_dir}"
|
1209
|
+
)
|
1210
|
+
all_allowed_dirs = [str(base_dir_path)]
|
1211
|
+
except OSError as e:
|
1212
|
+
raise DirectoryNotFoundError(f"Invalid base directory: {e}")
|
1085
1213
|
|
1086
|
-
|
1214
|
+
# Add explicitly allowed directories
|
1215
|
+
for dir_path in allowed_dirs:
|
1216
|
+
try:
|
1217
|
+
resolved_path = Path(dir_path).resolve()
|
1218
|
+
if not resolved_path.exists():
|
1219
|
+
raise DirectoryNotFoundError(
|
1220
|
+
f"Directory not found: {dir_path}"
|
1221
|
+
)
|
1222
|
+
if not resolved_path.is_dir():
|
1223
|
+
raise DirectoryNotFoundError(
|
1224
|
+
f"Path is not a directory: {dir_path}"
|
1225
|
+
)
|
1226
|
+
all_allowed_dirs.append(str(resolved_path))
|
1227
|
+
except OSError as e:
|
1228
|
+
raise DirectoryNotFoundError(f"Invalid directory path: {e}")
|
1229
|
+
|
1230
|
+
# Add directories from file if specified
|
1231
|
+
if allowed_dir_file:
|
1232
|
+
try:
|
1233
|
+
with open(allowed_dir_file, "r", encoding="utf-8") as f:
|
1234
|
+
for line in f:
|
1235
|
+
line = line.strip()
|
1236
|
+
if line and not line.startswith("#"):
|
1237
|
+
try:
|
1238
|
+
resolved_path = Path(line).resolve()
|
1239
|
+
if not resolved_path.exists():
|
1240
|
+
raise DirectoryNotFoundError(
|
1241
|
+
f"Directory not found: {line}"
|
1242
|
+
)
|
1243
|
+
if not resolved_path.is_dir():
|
1244
|
+
raise DirectoryNotFoundError(
|
1245
|
+
f"Path is not a directory: {line}"
|
1246
|
+
)
|
1247
|
+
all_allowed_dirs.append(str(resolved_path))
|
1248
|
+
except OSError as e:
|
1249
|
+
raise DirectoryNotFoundError(
|
1250
|
+
f"Invalid directory path in {allowed_dir_file}: {e}"
|
1251
|
+
)
|
1252
|
+
except OSError as e:
|
1253
|
+
raise DirectoryNotFoundError(
|
1254
|
+
f"Failed to read allowed directories file: {e}"
|
1255
|
+
)
|
1256
|
+
|
1257
|
+
# Create security manager with all allowed directories
|
1258
|
+
return SecurityManager(base_dir=base_dir, allowed_dirs=all_allowed_dirs)
|
1087
1259
|
|
1088
1260
|
|
1089
1261
|
def parse_var(var_str: str) -> Tuple[str, str]:
|
@@ -1143,8 +1315,8 @@ def parse_json_var(var_str: str) -> Tuple[str, Any]:
|
|
1143
1315
|
value = json.loads(json_str)
|
1144
1316
|
except json.JSONDecodeError as e:
|
1145
1317
|
raise InvalidJSONError(
|
1146
|
-
f"
|
1147
|
-
)
|
1318
|
+
f"Error parsing JSON for variable '{name}': {str(e)}. Input was: {json_str}"
|
1319
|
+
)
|
1148
1320
|
|
1149
1321
|
return name, value
|
1150
1322
|
|
@@ -1191,570 +1363,308 @@ def _create_enum_type(values: List[Any], field_name: str) -> Type[Enum]:
|
|
1191
1363
|
return type(f"{field_name.title()}Enum", (str, Enum), enum_dict)
|
1192
1364
|
|
1193
1365
|
|
1194
|
-
def
|
1195
|
-
"""
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
"
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
"
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
"
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
action="append",
|
1243
|
-
default=[],
|
1244
|
-
help="Map file pattern to variable (name=pattern)",
|
1245
|
-
metavar="NAME=PATTERN",
|
1246
|
-
)
|
1247
|
-
parser.add_argument(
|
1248
|
-
"--dir",
|
1249
|
-
action="append",
|
1250
|
-
default=[],
|
1251
|
-
help="Map directory to variable (name=path)",
|
1252
|
-
metavar="NAME=PATH",
|
1253
|
-
)
|
1254
|
-
parser.add_argument(
|
1255
|
-
"--allowed-dir",
|
1256
|
-
action="append",
|
1257
|
-
default=[],
|
1258
|
-
help="Additional allowed directory or @file",
|
1259
|
-
metavar="PATH",
|
1260
|
-
)
|
1261
|
-
parser.add_argument(
|
1262
|
-
"--base-dir",
|
1263
|
-
help="Base directory for file access (defaults to current directory)",
|
1264
|
-
default=os.getcwd(),
|
1265
|
-
)
|
1266
|
-
parser.add_argument(
|
1267
|
-
"--allowed-dirs-file",
|
1268
|
-
help="File containing list of allowed directories",
|
1269
|
-
)
|
1270
|
-
parser.add_argument(
|
1271
|
-
"--dir-recursive",
|
1272
|
-
action="store_true",
|
1273
|
-
help="Process directories recursively",
|
1274
|
-
)
|
1275
|
-
parser.add_argument(
|
1276
|
-
"--dir-ext",
|
1277
|
-
help="Comma-separated list of file extensions to include in directory processing",
|
1278
|
-
)
|
1279
|
-
|
1280
|
-
# Variable arguments
|
1281
|
-
parser.add_argument(
|
1282
|
-
"--var",
|
1283
|
-
action="append",
|
1284
|
-
default=[],
|
1285
|
-
help="Pass simple variables (name=value)",
|
1286
|
-
metavar="NAME=VALUE",
|
1287
|
-
)
|
1288
|
-
parser.add_argument(
|
1289
|
-
"--json-var",
|
1290
|
-
action="append",
|
1291
|
-
default=[],
|
1292
|
-
help="Pass JSON variables (name=json)",
|
1293
|
-
metavar="NAME=JSON",
|
1294
|
-
)
|
1295
|
-
|
1296
|
-
# System prompt options
|
1297
|
-
parser.add_argument(
|
1298
|
-
"--system-prompt",
|
1299
|
-
help=(
|
1300
|
-
"System prompt for the model (use @file to load from file, "
|
1301
|
-
"can also be specified in task template YAML frontmatter)"
|
1302
|
-
),
|
1303
|
-
default=DEFAULT_SYSTEM_PROMPT,
|
1304
|
-
)
|
1305
|
-
parser.add_argument(
|
1306
|
-
"--ignore-task-sysprompt",
|
1307
|
-
action="store_true",
|
1308
|
-
help="Ignore system prompt from task template YAML frontmatter",
|
1309
|
-
)
|
1310
|
-
|
1311
|
-
# Schema validation
|
1312
|
-
parser.add_argument(
|
1313
|
-
"--schema",
|
1314
|
-
dest="schema_file",
|
1315
|
-
required=True,
|
1316
|
-
help="JSON schema file for response validation",
|
1317
|
-
)
|
1318
|
-
parser.add_argument(
|
1319
|
-
"--validate-schema",
|
1320
|
-
action="store_true",
|
1321
|
-
help="Validate schema and response",
|
1322
|
-
)
|
1323
|
-
|
1324
|
-
# Model configuration
|
1325
|
-
parser.add_argument(
|
1326
|
-
"--model",
|
1327
|
-
default="gpt-4o-2024-08-06",
|
1328
|
-
help="Model to use",
|
1329
|
-
)
|
1330
|
-
parser.add_argument(
|
1331
|
-
"--temperature",
|
1332
|
-
type=float,
|
1333
|
-
default=0.0,
|
1334
|
-
help="Temperature (0.0-2.0)",
|
1335
|
-
)
|
1336
|
-
parser.add_argument(
|
1337
|
-
"--max-tokens",
|
1338
|
-
type=int,
|
1339
|
-
help="Maximum tokens to generate",
|
1340
|
-
)
|
1341
|
-
parser.add_argument(
|
1342
|
-
"--top-p",
|
1343
|
-
type=float,
|
1344
|
-
default=1.0,
|
1345
|
-
help="Top-p sampling (0.0-1.0)",
|
1346
|
-
)
|
1347
|
-
parser.add_argument(
|
1348
|
-
"--frequency-penalty",
|
1349
|
-
type=float,
|
1350
|
-
default=0.0,
|
1351
|
-
help="Frequency penalty (-2.0-2.0)",
|
1352
|
-
)
|
1353
|
-
parser.add_argument(
|
1354
|
-
"--presence-penalty",
|
1355
|
-
type=float,
|
1356
|
-
default=0.0,
|
1357
|
-
help="Presence penalty (-2.0-2.0)",
|
1358
|
-
)
|
1359
|
-
parser.add_argument(
|
1360
|
-
"--timeout",
|
1361
|
-
type=float,
|
1362
|
-
default=60.0,
|
1363
|
-
help="API timeout in seconds",
|
1364
|
-
)
|
1365
|
-
|
1366
|
-
# Output options
|
1367
|
-
parser.add_argument(
|
1368
|
-
"--output-file",
|
1369
|
-
help="Write JSON output to file",
|
1370
|
-
)
|
1371
|
-
parser.add_argument(
|
1372
|
-
"--dry-run",
|
1373
|
-
action="store_true",
|
1374
|
-
help="Simulate API call without making request",
|
1375
|
-
)
|
1376
|
-
parser.add_argument(
|
1377
|
-
"--no-progress",
|
1378
|
-
action="store_true",
|
1379
|
-
help="Disable progress indicators",
|
1380
|
-
)
|
1381
|
-
|
1382
|
-
# Other options
|
1383
|
-
parser.add_argument(
|
1384
|
-
"--api-key",
|
1385
|
-
help="OpenAI API key (overrides env var)",
|
1386
|
-
)
|
1387
|
-
parser.add_argument(
|
1388
|
-
"--verbose",
|
1389
|
-
action="store_true",
|
1390
|
-
help="Enable verbose output",
|
1391
|
-
)
|
1392
|
-
parser.add_argument(
|
1393
|
-
"--debug-openai-stream",
|
1394
|
-
action="store_true",
|
1395
|
-
help="Enable low-level debug output for OpenAI streaming (very verbose)",
|
1396
|
-
)
|
1397
|
-
parser.add_argument(
|
1398
|
-
"--version",
|
1399
|
-
action="version",
|
1400
|
-
version=f"%(prog)s {__version__}",
|
1401
|
-
)
|
1366
|
+
def handle_error(e: Exception) -> None:
|
1367
|
+
"""Handle errors by printing appropriate message and exiting with status code."""
|
1368
|
+
if isinstance(e, click.UsageError):
|
1369
|
+
# For UsageError, preserve the original message format
|
1370
|
+
if hasattr(e, "param") and e.param:
|
1371
|
+
# Missing parameter error
|
1372
|
+
msg = f"Missing option '--{e.param.name}'"
|
1373
|
+
click.echo(msg, err=True)
|
1374
|
+
else:
|
1375
|
+
# Other usage errors (like conflicting options)
|
1376
|
+
click.echo(str(e), err=True)
|
1377
|
+
sys.exit(ExitCode.USAGE_ERROR)
|
1378
|
+
elif isinstance(e, InvalidJSONError):
|
1379
|
+
# Use the original error message if available
|
1380
|
+
msg = str(e) if str(e) != "None" else "Invalid JSON"
|
1381
|
+
click.secho(msg, fg="red", err=True)
|
1382
|
+
sys.exit(ExitCode.DATA_ERROR)
|
1383
|
+
elif isinstance(e, FileNotFoundError):
|
1384
|
+
# Use the original error message if available
|
1385
|
+
msg = str(e) if str(e) != "None" else "File not found"
|
1386
|
+
click.secho(msg, fg="red", err=True)
|
1387
|
+
sys.exit(ExitCode.SCHEMA_ERROR)
|
1388
|
+
elif isinstance(e, TaskTemplateSyntaxError):
|
1389
|
+
# Use the original error message if available
|
1390
|
+
msg = str(e) if str(e) != "None" else "Template syntax error"
|
1391
|
+
click.secho(msg, fg="red", err=True)
|
1392
|
+
sys.exit(ExitCode.INTERNAL_ERROR)
|
1393
|
+
elif isinstance(e, CLIError):
|
1394
|
+
# Use the show method for CLIError and its subclasses
|
1395
|
+
e.show()
|
1396
|
+
sys.exit(
|
1397
|
+
e.exit_code if hasattr(e, "exit_code") else ExitCode.INTERNAL_ERROR
|
1398
|
+
)
|
1399
|
+
else:
|
1400
|
+
click.secho(f"Unexpected error: {str(e)}", fg="red", err=True)
|
1401
|
+
sys.exit(ExitCode.INTERNAL_ERROR)
|
1402
|
+
|
1403
|
+
|
1404
|
+
async def stream_structured_output(
|
1405
|
+
client: AsyncOpenAI,
|
1406
|
+
model: str,
|
1407
|
+
system_prompt: str,
|
1408
|
+
user_prompt: str,
|
1409
|
+
output_schema: Type[BaseModel],
|
1410
|
+
output_file: Optional[str] = None,
|
1411
|
+
**kwargs: Any,
|
1412
|
+
) -> None:
|
1413
|
+
"""Stream structured output from OpenAI API.
|
1402
1414
|
|
1403
|
-
|
1415
|
+
This function follows the guide's recommendation for a focused async streaming function.
|
1416
|
+
It handles the core streaming logic and resource cleanup.
|
1417
|
+
"""
|
1418
|
+
try:
|
1419
|
+
async for chunk in async_openai_structured_stream(
|
1420
|
+
client=client,
|
1421
|
+
model=model,
|
1422
|
+
output_schema=output_schema,
|
1423
|
+
system_prompt=system_prompt,
|
1424
|
+
user_prompt=user_prompt,
|
1425
|
+
**kwargs,
|
1426
|
+
):
|
1427
|
+
if not chunk:
|
1428
|
+
continue
|
1429
|
+
|
1430
|
+
# Process and output the chunk
|
1431
|
+
dumped = chunk.model_dump(mode="json")
|
1432
|
+
json_str = json.dumps(dumped, indent=2)
|
1433
|
+
|
1434
|
+
if output_file:
|
1435
|
+
with open(output_file, "a", encoding="utf-8") as f:
|
1436
|
+
f.write(json_str)
|
1437
|
+
f.write("\n")
|
1438
|
+
f.flush() # Ensure immediate flush to file
|
1439
|
+
else:
|
1440
|
+
# Print directly to stdout with immediate flush
|
1441
|
+
print(json_str, flush=True)
|
1442
|
+
|
1443
|
+
except (
|
1444
|
+
StreamInterruptedError,
|
1445
|
+
StreamBufferError,
|
1446
|
+
StreamParseError,
|
1447
|
+
APIResponseError,
|
1448
|
+
EmptyResponseError,
|
1449
|
+
InvalidResponseFormatError,
|
1450
|
+
) as e:
|
1451
|
+
logger.error(f"Stream error: {e}")
|
1452
|
+
raise
|
1453
|
+
finally:
|
1454
|
+
# Always ensure client is properly closed
|
1455
|
+
await client.close()
|
1404
1456
|
|
1405
1457
|
|
1406
|
-
async def
|
1407
|
-
"""
|
1458
|
+
async def run_cli_async(args: Namespace) -> ExitCode:
|
1459
|
+
"""Async wrapper for CLI operations.
|
1408
1460
|
|
1409
|
-
|
1410
|
-
|
1461
|
+
This function prepares everything needed for streaming and then calls
|
1462
|
+
the focused streaming function.
|
1411
1463
|
"""
|
1412
1464
|
try:
|
1413
|
-
|
1414
|
-
args = parser.parse_args()
|
1415
|
-
|
1416
|
-
# Configure logging
|
1417
|
-
log_level = logging.DEBUG if args.verbose else logging.INFO
|
1418
|
-
logger.setLevel(log_level)
|
1419
|
-
|
1420
|
-
# Create security manager
|
1465
|
+
# Validate and prepare all inputs
|
1421
1466
|
security_manager = validate_security_manager(
|
1422
1467
|
base_dir=args.base_dir,
|
1423
1468
|
allowed_dirs=args.allowed_dir,
|
1424
|
-
|
1469
|
+
allowed_dir_file=args.allowed_dir_file,
|
1425
1470
|
)
|
1426
1471
|
|
1427
|
-
|
1428
|
-
|
1429
|
-
|
1430
|
-
# Validate schema file
|
1472
|
+
task_template = validate_task_template(args.task, args.task_file)
|
1473
|
+
logger.debug("Validating schema from %s", args.schema_file)
|
1431
1474
|
schema = validate_schema_file(args.schema_file, args.verbose)
|
1432
|
-
|
1433
|
-
# Create template context
|
1434
1475
|
template_context = create_template_context_from_args(
|
1435
1476
|
args, security_manager
|
1436
1477
|
)
|
1437
|
-
|
1438
|
-
# Create Jinja environment
|
1439
1478
|
env = create_jinja_env()
|
1440
1479
|
|
1441
|
-
# Process system prompt
|
1442
|
-
|
1480
|
+
# Process system prompt and render task
|
1481
|
+
system_prompt = process_system_prompt(
|
1443
1482
|
task_template,
|
1444
1483
|
args.system_prompt,
|
1484
|
+
args.system_prompt_file,
|
1445
1485
|
template_context,
|
1446
1486
|
env,
|
1447
1487
|
args.ignore_task_sysprompt,
|
1448
1488
|
)
|
1449
|
-
|
1450
|
-
# Render task template
|
1451
1489
|
rendered_task = render_template(task_template, template_context, env)
|
1452
|
-
logger.info(
|
1490
|
+
logger.info("Rendered task template: %s", rendered_task)
|
1453
1491
|
|
1454
|
-
# If dry run, exit here
|
1455
1492
|
if args.dry_run:
|
1456
1493
|
logger.info("DRY RUN MODE")
|
1457
1494
|
return ExitCode.SUCCESS
|
1458
1495
|
|
1459
|
-
#
|
1496
|
+
# Create output model
|
1497
|
+
logger.debug("Creating output model")
|
1460
1498
|
try:
|
1461
|
-
logger.debug("[_main] Loading schema from %s", args.schema_file)
|
1462
|
-
schema = validate_schema_file(
|
1463
|
-
args.schema_file, verbose=args.verbose_schema
|
1464
|
-
)
|
1465
|
-
logger.debug("[_main] Creating output model")
|
1466
1499
|
output_model = create_dynamic_model(
|
1467
1500
|
schema,
|
1468
1501
|
base_name="OutputModel",
|
1469
1502
|
show_schema=args.show_model_schema,
|
1470
1503
|
debug_validation=args.debug_validation,
|
1471
1504
|
)
|
1472
|
-
logger.debug("
|
1473
|
-
except (
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1477
|
-
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
# Validate model support
|
1505
|
+
logger.debug("Successfully created output model")
|
1506
|
+
except (
|
1507
|
+
SchemaFileError,
|
1508
|
+
InvalidJSONError,
|
1509
|
+
SchemaValidationError,
|
1510
|
+
ModelCreationError,
|
1511
|
+
) as e:
|
1512
|
+
logger.error("Schema error: %s", str(e))
|
1513
|
+
raise # Let the error propagate with its context
|
1514
|
+
|
1515
|
+
# Validate model support and token usage
|
1484
1516
|
try:
|
1485
1517
|
supports_structured_output(args.model)
|
1486
|
-
except ModelNotSupportedError as e:
|
1487
|
-
logger.error(str(e))
|
1488
|
-
|
1489
|
-
|
1490
|
-
logger.error(str(e))
|
1491
|
-
return ExitCode.DATA_ERROR
|
1492
|
-
|
1493
|
-
# Estimate token usage
|
1518
|
+
except (ModelNotSupportedError, ModelVersionError) as e:
|
1519
|
+
logger.error("Model validation error: %s", str(e))
|
1520
|
+
raise # Let the error propagate with its context
|
1521
|
+
|
1494
1522
|
messages = [
|
1495
|
-
{"role": "system", "content":
|
1523
|
+
{"role": "system", "content": system_prompt},
|
1496
1524
|
{"role": "user", "content": rendered_task},
|
1497
1525
|
]
|
1498
1526
|
total_tokens = estimate_tokens_for_chat(messages, args.model)
|
1499
1527
|
context_limit = get_context_window_limit(args.model)
|
1500
|
-
|
1501
1528
|
if total_tokens > context_limit:
|
1502
|
-
|
1503
|
-
|
1529
|
+
msg = f"Total tokens ({total_tokens}) exceeds model context limit ({context_limit})"
|
1530
|
+
logger.error(msg)
|
1531
|
+
raise CLIError(
|
1532
|
+
msg,
|
1533
|
+
context={
|
1534
|
+
"total_tokens": total_tokens,
|
1535
|
+
"context_limit": context_limit,
|
1536
|
+
},
|
1504
1537
|
)
|
1505
|
-
return ExitCode.DATA_ERROR
|
1506
1538
|
|
1507
|
-
# Get API key
|
1539
|
+
# Get API key and create client
|
1508
1540
|
api_key = args.api_key or os.getenv("OPENAI_API_KEY")
|
1509
1541
|
if not api_key:
|
1510
|
-
|
1511
|
-
|
1512
|
-
)
|
1513
|
-
return ExitCode.USAGE_ERROR
|
1542
|
+
msg = "No OpenAI API key provided (--api-key or OPENAI_API_KEY env var)"
|
1543
|
+
logger.error(msg)
|
1544
|
+
raise CLIError(msg)
|
1514
1545
|
|
1515
|
-
# Create OpenAI client
|
1516
1546
|
client = AsyncOpenAI(api_key=api_key, timeout=args.timeout)
|
1517
1547
|
|
1518
|
-
# Create log callback
|
1548
|
+
# Create detailed log callback
|
1519
1549
|
def log_callback(
|
1520
1550
|
level: int, message: str, extra: dict[str, Any]
|
1521
1551
|
) -> None:
|
1522
|
-
# Only log if debug_openai_stream is enabled
|
1523
1552
|
if args.debug_openai_stream:
|
1524
|
-
|
1525
|
-
if extra: # Only add if there's actually extra data
|
1553
|
+
if extra:
|
1526
1554
|
extra_str = json.dumps(extra, indent=2)
|
1527
1555
|
message = f"{message}\nDetails:\n{extra_str}"
|
1528
|
-
|
1556
|
+
logger.log(level, message, extra=extra)
|
1529
1557
|
|
1530
|
-
#
|
1558
|
+
# Stream the output
|
1531
1559
|
try:
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
logger.debug("Empty chunk received, skipping")
|
1559
|
-
continue
|
1560
|
-
|
1561
|
-
# Write output
|
1562
|
-
try:
|
1563
|
-
logger.debug("Starting to process output chunk")
|
1564
|
-
dumped = chunk.model_dump(mode="json")
|
1565
|
-
logger.debug("Successfully dumped chunk to JSON")
|
1566
|
-
logger.debug("Dumped chunk: %s", dumped)
|
1567
|
-
logger.debug(
|
1568
|
-
"Chunk type: %s, length: %d",
|
1569
|
-
type(dumped),
|
1570
|
-
len(json.dumps(dumped)),
|
1571
|
-
)
|
1572
|
-
|
1573
|
-
if args.output_file:
|
1574
|
-
logger.debug(
|
1575
|
-
"Writing to output file: %s", args.output_file
|
1576
|
-
)
|
1577
|
-
try:
|
1578
|
-
with open(
|
1579
|
-
args.output_file, "a", encoding="utf-8"
|
1580
|
-
) as f:
|
1581
|
-
json_str = json.dumps(dumped, indent=2)
|
1582
|
-
logger.debug(
|
1583
|
-
"Writing JSON string of length %d",
|
1584
|
-
len(json_str),
|
1585
|
-
)
|
1586
|
-
f.write(json_str)
|
1587
|
-
f.write("\n")
|
1588
|
-
logger.debug("Successfully wrote to file")
|
1589
|
-
except Exception as e:
|
1590
|
-
logger.error(
|
1591
|
-
"Failed to write to output file: %s", e
|
1592
|
-
)
|
1593
|
-
else:
|
1594
|
-
logger.debug(
|
1595
|
-
"About to call progress.print_output with JSON string"
|
1596
|
-
)
|
1597
|
-
json_str = json.dumps(dumped, indent=2)
|
1598
|
-
logger.debug(
|
1599
|
-
"JSON string length before print_output: %d",
|
1600
|
-
len(json_str),
|
1601
|
-
)
|
1602
|
-
logger.debug(
|
1603
|
-
"First 100 chars of JSON string: %s",
|
1604
|
-
json_str[:100] if json_str else "",
|
1605
|
-
)
|
1606
|
-
progress.print_output(json_str)
|
1607
|
-
logger.debug(
|
1608
|
-
"Completed print_output call for JSON string"
|
1609
|
-
)
|
1610
|
-
|
1611
|
-
logger.debug("Starting progress update")
|
1612
|
-
progress.update()
|
1613
|
-
logger.debug("Completed progress update")
|
1614
|
-
except Exception as e:
|
1615
|
-
logger.error("Failed to process chunk: %s", e)
|
1616
|
-
logger.error("Chunk: %s", chunk)
|
1617
|
-
continue
|
1618
|
-
|
1619
|
-
logger.debug("Finished processing API response stream")
|
1620
|
-
|
1621
|
-
except StreamInterruptedError as e:
|
1622
|
-
logger.error(f"Stream interrupted: {e}")
|
1623
|
-
return ExitCode.API_ERROR
|
1624
|
-
except StreamBufferError as e:
|
1625
|
-
logger.error(f"Stream buffer error: {e}")
|
1626
|
-
return ExitCode.API_ERROR
|
1627
|
-
except StreamParseError as e:
|
1628
|
-
logger.error(f"Stream parse error: {e}")
|
1629
|
-
return ExitCode.API_ERROR
|
1630
|
-
except APIResponseError as e:
|
1631
|
-
logger.error(f"API response error: {e}")
|
1632
|
-
return ExitCode.API_ERROR
|
1633
|
-
except EmptyResponseError as e:
|
1634
|
-
logger.error(f"Empty response error: {e}")
|
1635
|
-
return ExitCode.API_ERROR
|
1636
|
-
except InvalidResponseFormatError as e:
|
1637
|
-
logger.error(f"Invalid response format: {e}")
|
1638
|
-
return ExitCode.API_ERROR
|
1560
|
+
await stream_structured_output(
|
1561
|
+
client=client,
|
1562
|
+
model=args.model,
|
1563
|
+
system_prompt=system_prompt,
|
1564
|
+
user_prompt=rendered_task,
|
1565
|
+
output_schema=output_model,
|
1566
|
+
output_file=args.output_file,
|
1567
|
+
temperature=args.temperature,
|
1568
|
+
max_tokens=args.max_tokens,
|
1569
|
+
top_p=args.top_p,
|
1570
|
+
frequency_penalty=args.frequency_penalty,
|
1571
|
+
presence_penalty=args.presence_penalty,
|
1572
|
+
timeout=args.timeout,
|
1573
|
+
on_log=log_callback,
|
1574
|
+
)
|
1575
|
+
return ExitCode.SUCCESS
|
1576
|
+
except (
|
1577
|
+
StreamInterruptedError,
|
1578
|
+
StreamBufferError,
|
1579
|
+
StreamParseError,
|
1580
|
+
APIResponseError,
|
1581
|
+
EmptyResponseError,
|
1582
|
+
InvalidResponseFormatError,
|
1583
|
+
) as e:
|
1584
|
+
logger.error("Stream error: %s", str(e))
|
1585
|
+
raise # Let stream errors propagate
|
1639
1586
|
except (APIConnectionError, InternalServerError) as e:
|
1640
|
-
logger.error(
|
1641
|
-
|
1587
|
+
logger.error("API connection error: %s", str(e))
|
1588
|
+
raise APIResponseError(str(e)) # Convert to our error type
|
1642
1589
|
except RateLimitError as e:
|
1643
|
-
logger.error(
|
1644
|
-
|
1645
|
-
except BadRequestError as e:
|
1646
|
-
logger.error(
|
1647
|
-
|
1648
|
-
|
1649
|
-
|
1650
|
-
return ExitCode.API_ERROR
|
1651
|
-
except OpenAIClientError as e:
|
1652
|
-
logger.error(f"OpenAI client error: {e}")
|
1653
|
-
return ExitCode.API_ERROR
|
1654
|
-
except Exception as e:
|
1655
|
-
logger.error(f"Unexpected error: {e}")
|
1656
|
-
return ExitCode.INTERNAL_ERROR
|
1657
|
-
|
1658
|
-
return ExitCode.SUCCESS
|
1590
|
+
logger.error("Rate limit exceeded: %s", str(e))
|
1591
|
+
raise APIResponseError(str(e)) # Convert to our error type
|
1592
|
+
except (BadRequestError, AuthenticationError, OpenAIClientError) as e:
|
1593
|
+
logger.error("API client error: %s", str(e))
|
1594
|
+
raise APIResponseError(str(e)) # Convert to our error type
|
1595
|
+
finally:
|
1596
|
+
await client.close()
|
1659
1597
|
|
1660
1598
|
except KeyboardInterrupt:
|
1661
|
-
logger.
|
1599
|
+
logger.info("Operation cancelled by user")
|
1662
1600
|
return ExitCode.INTERRUPTED
|
1663
|
-
except PathSecurityError as e:
|
1664
|
-
# Only log security errors if they haven't been logged already
|
1665
|
-
logger.debug(
|
1666
|
-
"[_main] Caught PathSecurityError: %s (logged=%s)",
|
1667
|
-
str(e),
|
1668
|
-
getattr(e, "has_been_logged", False),
|
1669
|
-
)
|
1670
|
-
if not getattr(e, "has_been_logged", False):
|
1671
|
-
logger.error(str(e))
|
1672
|
-
return ExitCode.SECURITY_ERROR
|
1673
|
-
except ValueError as e:
|
1674
|
-
# Get the original cause of the error
|
1675
|
-
cause = e.__cause__ or e.__context__
|
1676
|
-
if isinstance(cause, PathSecurityError):
|
1677
|
-
logger.debug(
|
1678
|
-
"[_main] Caught wrapped PathSecurityError in ValueError: %s (logged=%s)",
|
1679
|
-
str(cause),
|
1680
|
-
getattr(cause, "has_been_logged", False),
|
1681
|
-
)
|
1682
|
-
# Only log security errors if they haven't been logged already
|
1683
|
-
if not getattr(cause, "has_been_logged", False):
|
1684
|
-
logger.error(str(cause))
|
1685
|
-
return ExitCode.SECURITY_ERROR
|
1686
|
-
else:
|
1687
|
-
logger.debug("[_main] Caught ValueError: %s", str(e))
|
1688
|
-
logger.error(f"Invalid input: {e}")
|
1689
|
-
return ExitCode.DATA_ERROR
|
1690
1601
|
except Exception as e:
|
1691
|
-
|
1692
|
-
|
1693
|
-
|
1694
|
-
|
1695
|
-
|
1696
|
-
|
1697
|
-
|
1698
|
-
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
|
1704
|
-
|
1602
|
+
if isinstance(e, CLIError):
|
1603
|
+
raise # Let our custom errors propagate
|
1604
|
+
logger.exception("Unexpected error")
|
1605
|
+
raise CLIError(str(e), context={"error_type": type(e).__name__})
|
1606
|
+
|
1607
|
+
|
1608
|
+
def create_cli() -> click.Command:
|
1609
|
+
"""Create the CLI command.
|
1610
|
+
|
1611
|
+
Returns:
|
1612
|
+
click.Command: The CLI command object
|
1613
|
+
"""
|
1614
|
+
|
1615
|
+
@create_click_command()
|
1616
|
+
def cli(**kwargs: Any) -> None:
|
1617
|
+
"""CLI entry point for structured OpenAI API calls."""
|
1618
|
+
try:
|
1619
|
+
args = Namespace(**kwargs)
|
1620
|
+
|
1621
|
+
# Validate required arguments first
|
1622
|
+
if not args.task and not args.task_file:
|
1623
|
+
raise click.UsageError(
|
1624
|
+
"Must specify either --task or --task-file"
|
1625
|
+
)
|
1626
|
+
if not args.schema_file:
|
1627
|
+
raise click.UsageError("Missing option '--schema-file'")
|
1628
|
+
if args.task and args.task_file:
|
1629
|
+
raise click.UsageError(
|
1630
|
+
"Cannot specify both --task and --task-file"
|
1631
|
+
)
|
1632
|
+
if args.system_prompt and args.system_prompt_file:
|
1633
|
+
raise click.UsageError(
|
1634
|
+
"Cannot specify both --system-prompt and --system-prompt-file"
|
1635
|
+
)
|
1636
|
+
|
1637
|
+
# Run the async function synchronously
|
1638
|
+
exit_code = asyncio.run(run_cli_async(args))
|
1639
|
+
|
1640
|
+
if exit_code != ExitCode.SUCCESS:
|
1641
|
+
error_msg = f"Command failed with exit code {exit_code}"
|
1642
|
+
if hasattr(ExitCode, exit_code.name):
|
1643
|
+
error_msg = f"{error_msg} ({exit_code.name})"
|
1644
|
+
raise CLIError(error_msg, context={"exit_code": exit_code})
|
1645
|
+
|
1646
|
+
except click.UsageError:
|
1647
|
+
# Let Click handle usage errors directly
|
1648
|
+
raise
|
1649
|
+
except InvalidJSONError:
|
1650
|
+
# Let InvalidJSONError propagate directly
|
1651
|
+
raise
|
1652
|
+
except CLIError:
|
1653
|
+
# Let our custom errors propagate with their context
|
1654
|
+
raise
|
1655
|
+
except Exception as e:
|
1656
|
+
# Convert other exceptions to CLIError
|
1657
|
+
logger.exception("Unexpected error")
|
1658
|
+
raise CLIError(str(e), context={"error_type": type(e).__name__})
|
1659
|
+
|
1660
|
+
# The decorated function is a Command, but mypy can't detect this
|
1661
|
+
return cast(click.Command, cli)
|
1705
1662
|
|
1706
1663
|
|
1707
1664
|
def main() -> None:
|
1708
|
-
"""
|
1709
|
-
|
1710
|
-
|
1711
|
-
exit_code = asyncio.run(_main())
|
1712
|
-
sys.exit(exit_code.value)
|
1713
|
-
except KeyboardInterrupt:
|
1714
|
-
logger.error("Operation cancelled by user")
|
1715
|
-
sys.exit(ExitCode.INTERRUPTED.value)
|
1716
|
-
except PathSecurityError as e:
|
1717
|
-
# Only log security errors if they haven't been logged already
|
1718
|
-
logger.debug(
|
1719
|
-
"[main] Caught PathSecurityError: %s (logged=%s)",
|
1720
|
-
str(e),
|
1721
|
-
getattr(e, "has_been_logged", False),
|
1722
|
-
)
|
1723
|
-
if not getattr(e, "has_been_logged", False):
|
1724
|
-
logger.error(str(e))
|
1725
|
-
sys.exit(ExitCode.SECURITY_ERROR.value)
|
1726
|
-
except ValueError as e:
|
1727
|
-
# Get the original cause of the error
|
1728
|
-
cause = e.__cause__ or e.__context__
|
1729
|
-
if isinstance(cause, PathSecurityError):
|
1730
|
-
logger.debug(
|
1731
|
-
"[main] Caught wrapped PathSecurityError in ValueError: %s (logged=%s)",
|
1732
|
-
str(cause),
|
1733
|
-
getattr(cause, "has_been_logged", False),
|
1734
|
-
)
|
1735
|
-
# Only log security errors if they haven't been logged already
|
1736
|
-
if not getattr(cause, "has_been_logged", False):
|
1737
|
-
logger.error(str(cause))
|
1738
|
-
sys.exit(ExitCode.SECURITY_ERROR.value)
|
1739
|
-
else:
|
1740
|
-
logger.debug("[main] Caught ValueError: %s", str(e))
|
1741
|
-
logger.error(f"Invalid input: {e}")
|
1742
|
-
sys.exit(ExitCode.DATA_ERROR.value)
|
1743
|
-
except Exception as e:
|
1744
|
-
# Check if this is a wrapped security error
|
1745
|
-
if isinstance(e.__cause__, PathSecurityError):
|
1746
|
-
logger.debug(
|
1747
|
-
"[main] Caught wrapped PathSecurityError in Exception: %s (logged=%s)",
|
1748
|
-
str(e.__cause__),
|
1749
|
-
getattr(e.__cause__, "has_been_logged", False),
|
1750
|
-
)
|
1751
|
-
# Only log security errors if they haven't been logged already
|
1752
|
-
if not getattr(e.__cause__, "has_been_logged", False):
|
1753
|
-
logger.error(str(e.__cause__))
|
1754
|
-
sys.exit(ExitCode.SECURITY_ERROR.value)
|
1755
|
-
logger.debug("[main] Caught unexpected error: %s", str(e))
|
1756
|
-
logger.error(f"Unexpected error: {e}")
|
1757
|
-
sys.exit(ExitCode.INTERNAL_ERROR.value)
|
1665
|
+
"""Main entry point for the CLI."""
|
1666
|
+
cli = create_cli()
|
1667
|
+
cli(standalone_mode=False)
|
1758
1668
|
|
1759
1669
|
|
1760
1670
|
# Export public API
|
@@ -1766,7 +1676,7 @@ __all__ = [
|
|
1766
1676
|
"parse_json_var",
|
1767
1677
|
"create_dynamic_model",
|
1768
1678
|
"validate_path_mapping",
|
1769
|
-
"
|
1679
|
+
"create_cli",
|
1770
1680
|
"main",
|
1771
1681
|
]
|
1772
1682
|
|
@@ -1828,13 +1738,17 @@ def create_dynamic_model(
|
|
1828
1738
|
logger.info("Schema missing type field, assuming object type")
|
1829
1739
|
schema["type"] = "object"
|
1830
1740
|
|
1831
|
-
#
|
1741
|
+
# For non-object root schemas, create a wrapper model
|
1832
1742
|
if schema["type"] != "object":
|
1833
1743
|
if debug_validation:
|
1834
|
-
logger.
|
1835
|
-
"
|
1744
|
+
logger.info(
|
1745
|
+
"Converting non-object root schema to object wrapper"
|
1836
1746
|
)
|
1837
|
-
|
1747
|
+
schema = {
|
1748
|
+
"type": "object",
|
1749
|
+
"properties": {"value": schema},
|
1750
|
+
"required": ["value"],
|
1751
|
+
}
|
1838
1752
|
|
1839
1753
|
# Create model configuration
|
1840
1754
|
config = ConfigDict(
|