ostruct-cli 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/file_utils.py CHANGED
@@ -46,13 +46,14 @@ import codecs
46
46
  import glob
47
47
  import logging
48
48
  import os
49
- from typing import Any, Dict, List, Optional, Type, Union
49
+ from pathlib import Path
50
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
50
51
 
51
52
  import chardet
52
53
 
53
54
  from .errors import (
54
55
  DirectoryNotFoundError,
55
- FileNotFoundError,
56
+ OstructFileNotFoundError,
56
57
  PathSecurityError,
57
58
  )
58
59
  from .file_info import FileInfo
@@ -113,10 +114,10 @@ def collect_files_from_pattern(
113
114
  pattern: str,
114
115
  security_manager: SecurityManager,
115
116
  ) -> List[FileInfo]:
116
- """Collect files matching a glob pattern.
117
+ """Collect files matching a glob pattern or exact file path.
117
118
 
118
119
  Args:
119
- pattern: Glob pattern to match files
120
+ pattern: Glob pattern or file path to match
120
121
  security_manager: Security manager for path validation
121
122
 
122
123
  Returns:
@@ -125,7 +126,18 @@ def collect_files_from_pattern(
125
126
  Raises:
126
127
  PathSecurityError: If any matched file is outside base directory
127
128
  """
128
- # Expand pattern
129
+ # First check if it's an exact file path
130
+ if os.path.isfile(pattern):
131
+ try:
132
+ file_info = FileInfo.from_path(pattern, security_manager)
133
+ return [file_info]
134
+ except PathSecurityError:
135
+ raise
136
+ except Exception as e:
137
+ logger.warning("Could not process file %s: %s", pattern, str(e))
138
+ return []
139
+
140
+ # If not an exact file, try glob pattern
129
141
  matched_paths = glob.glob(pattern, recursive=True)
130
142
  if not matched_paths:
131
143
  logger.debug("No files matched pattern: %s", pattern)
@@ -140,8 +152,8 @@ def collect_files_from_pattern(
140
152
  except PathSecurityError:
141
153
  # Let security errors propagate
142
154
  raise
143
- except Exception:
144
- logger.warning("Could not process file %s", path)
155
+ except Exception as e:
156
+ logger.warning("Could not process file %s: %s", path, str(e))
145
157
 
146
158
  return files
147
159
 
@@ -256,20 +268,21 @@ def collect_files_from_directory(
256
268
  raise
257
269
 
258
270
  try:
271
+ # Use absolute path when creating FileInfo
259
272
  file_info = FileInfo.from_path(
260
- rel_path, security_manager=security_manager, **kwargs
273
+ abs_path, security_manager=security_manager, **kwargs
261
274
  )
262
275
  files.append(file_info)
263
- logger.debug("Added file to list: %s", rel_path)
276
+ logger.debug("Added file to list: %s", abs_path)
264
277
  except PathSecurityError as e:
265
278
  # Log and re-raise security errors immediately
266
279
  logger.error(
267
280
  "Security violation processing file: %s (%s)",
268
- rel_path,
281
+ abs_path,
269
282
  str(e),
270
283
  )
271
284
  raise
272
- except (FileNotFoundError, PermissionError) as e:
285
+ except (OstructFileNotFoundError, PermissionError) as e:
273
286
  # Skip legitimate file access errors
274
287
  logger.warning(
275
288
  "Skipping inaccessible file: %s (error: %s)",
@@ -289,39 +302,34 @@ def collect_files_from_directory(
289
302
 
290
303
 
291
304
  def _validate_and_split_mapping(
292
- mapping: str, mapping_type: str
305
+ mapping: tuple[str, Union[str, Path]], mapping_type: str
293
306
  ) -> tuple[str, str]:
294
- """Validate and split a name=value mapping.
307
+ """Validate a name/path tuple mapping.
295
308
 
296
309
  Args:
297
- mapping: The mapping string to validate (e.g. "name=value")
310
+ mapping: The mapping tuple (name, path)
298
311
  mapping_type: Type of mapping for error messages ("file", "pattern", or "directory")
299
312
 
300
313
  Returns:
301
- Tuple of (name, value)
314
+ The same tuple of (name, path)
302
315
 
303
316
  Raises:
304
317
  ValueError: If mapping format is invalid
305
318
  """
306
- try:
307
- name, value = mapping.split("=", 1)
308
- except ValueError:
309
- raise ValueError(
310
- f"Invalid {mapping_type} mapping format: {mapping!r} (missing '=' separator)"
311
- )
319
+ name, value = mapping
312
320
 
313
321
  if not name:
314
- raise ValueError(f"Empty name in {mapping_type} mapping: {mapping!r}")
322
+ raise ValueError(f"Empty name in {mapping_type} mapping")
315
323
  if not value:
316
- raise ValueError(f"Empty value in {mapping_type} mapping: {mapping!r}")
324
+ raise ValueError(f"Empty value in {mapping_type} mapping")
317
325
 
318
- return name, value
326
+ return name, str(value) # Convert Path to str if needed
319
327
 
320
328
 
321
329
  def collect_files(
322
- file_mappings: Optional[List[str]] = None,
323
- pattern_mappings: Optional[List[str]] = None,
324
- dir_mappings: Optional[List[str]] = None,
330
+ file_mappings: Optional[List[Tuple[str, Union[str, Path]]]] = None,
331
+ pattern_mappings: Optional[List[Tuple[str, Union[str, Path]]]] = None,
332
+ dir_mappings: Optional[List[Tuple[str, Union[str, Path]]]] = None,
325
333
  dir_recursive: bool = False,
326
334
  dir_extensions: Optional[List[str]] = None,
327
335
  security_manager: Optional[SecurityManager] = None,
@@ -330,9 +338,9 @@ def collect_files(
330
338
  """Collect files from multiple sources.
331
339
 
332
340
  Args:
333
- file_mappings: List of file mappings in the format "name=path"
334
- pattern_mappings: List of pattern mappings in the format "name=pattern"
335
- dir_mappings: List of directory mappings in the format "name=directory"
341
+ file_mappings: List of file mappings as (name, path) tuples
342
+ pattern_mappings: List of pattern mappings as (name, pattern) tuples
343
+ dir_mappings: List of directory mappings as (name, directory) tuples
336
344
  dir_recursive: Whether to process directories recursively
337
345
  dir_extensions: List of file extensions to include in directory processing
338
346
  security_manager: Security manager instance
@@ -383,7 +391,7 @@ def collect_files(
383
391
  raise ValueError(f"Duplicate file mapping: {name}")
384
392
 
385
393
  file_info = FileInfo.from_path(
386
- path, security_manager=security_manager, **kwargs
394
+ str(path), security_manager=security_manager, **kwargs
387
395
  )
388
396
  files[name] = FileInfoList([file_info], from_dir=False)
389
397
  logger.debug("Added single file mapping: %s -> %s", name, path)
@@ -398,7 +406,7 @@ def collect_files(
398
406
 
399
407
  try:
400
408
  matched_files = collect_files_from_pattern(
401
- pattern, security_manager=security_manager, **kwargs
409
+ str(pattern), security_manager=security_manager, **kwargs
402
410
  )
403
411
  except PathSecurityError as e:
404
412
  logger.debug("Security error in pattern mapping: %s", str(e))
@@ -465,7 +473,7 @@ def collect_files(
465
473
 
466
474
  if not files:
467
475
  logger.debug("No files found in any mappings")
468
- raise ValueError("No files found")
476
+ return files
469
477
 
470
478
  logger.debug("Collected files total mappings: %d", len(files))
471
479
  return files
@@ -609,14 +617,14 @@ def read_allowed_dirs_from_file(filepath: str) -> List[str]:
609
617
  A list of allowed directories as absolute paths.
610
618
 
611
619
  Raises:
612
- FileNotFoundError: If the file does not exist.
620
+ OstructFileNotFoundError: If the file does not exist.
613
621
  ValueError: If the file contains invalid data.
614
622
  """
615
623
  try:
616
624
  with open(filepath, "r") as f:
617
625
  lines = f.readlines()
618
626
  except OSError as e:
619
- raise FileNotFoundError(
627
+ raise OstructFileNotFoundError(
620
628
  f"Error reading allowed directories from file: {filepath}: {e}"
621
629
  )
622
630
 
ostruct/cli/path_utils.py CHANGED
@@ -1,17 +1,21 @@
1
1
  """Path validation utilities for the CLI."""
2
2
 
3
+ import logging
3
4
  from pathlib import Path
4
5
  from typing import Optional, Tuple
5
6
 
6
7
  from ostruct.cli.errors import (
7
8
  DirectoryNotFoundError,
8
- FileNotFoundError,
9
+ OstructFileNotFoundError,
10
+ PathSecurityError,
9
11
  VariableNameError,
10
12
  VariableValueError,
11
13
  )
12
- from ostruct.cli.security.errors import PathSecurityError, SecurityErrorReasons
14
+ from ostruct.cli.security.errors import SecurityErrorReasons
13
15
  from ostruct.cli.security.security_manager import SecurityManager
14
16
 
17
+ logger = logging.getLogger(__name__)
18
+
15
19
 
16
20
  def validate_path_mapping(
17
21
  mapping: str,
@@ -44,34 +48,52 @@ def validate_path_mapping(
44
48
  >>> validate_path_mapping("data=config/", is_dir=True) # Validates directory
45
49
  ('data', 'config/')
46
50
  """
51
+ logger.debug(
52
+ "Validating path mapping: %s (is_dir=%s, base_dir=%s)",
53
+ mapping,
54
+ is_dir,
55
+ base_dir,
56
+ )
57
+
47
58
  # Split into name and path parts
48
59
  try:
49
60
  name, path_str = mapping.split("=", 1)
50
61
  except ValueError:
62
+ logger.error("Invalid mapping format (missing '='): %s", mapping)
51
63
  raise ValueError(f"Invalid mapping format (missing '='): {mapping}")
52
64
 
53
65
  # Validate name
54
66
  name = name.strip()
55
67
  if not name:
68
+ logger.error("Variable name cannot be empty: %s", mapping)
56
69
  raise VariableNameError("Variable name cannot be empty")
57
70
  if not name.isidentifier():
71
+ logger.error("Invalid variable name: %s", name)
58
72
  raise VariableNameError(f"Invalid variable name: {name}")
59
73
 
60
74
  # Normalize path
61
75
  path_str = path_str.strip()
62
76
  if not path_str:
77
+ logger.error("Path cannot be empty: %s", mapping)
63
78
  raise VariableValueError("Path cannot be empty")
64
79
 
80
+ logger.debug("Creating Path object for: %s", path_str)
65
81
  # Create a Path object
66
82
  path = Path(path_str)
67
83
  if not path.is_absolute() and base_dir:
84
+ logger.debug(
85
+ "Converting relative path to absolute using base_dir: %s", base_dir
86
+ )
68
87
  path = Path(base_dir) / path
69
88
 
70
89
  # Validate path with security manager if provided
71
90
  if security_manager:
91
+ logger.debug("Validating path with security manager: %s", path)
72
92
  try:
73
93
  path = security_manager.validate_path(path)
94
+ logger.debug("Security validation passed: %s", path)
74
95
  except PathSecurityError as e:
96
+ logger.error("Security validation failed: %s - %s", path, e)
75
97
  if (
76
98
  e.context.get("reason")
77
99
  == SecurityErrorReasons.PATH_OUTSIDE_ALLOWED
@@ -89,16 +111,22 @@ def validate_path_mapping(
89
111
 
90
112
  # Check path existence and type
91
113
  if not path.exists():
114
+ logger.error("Path does not exist: %s", path)
92
115
  if is_dir:
93
116
  raise DirectoryNotFoundError(f"Directory not found: {path}")
94
- raise FileNotFoundError(f"File not found: {path}")
117
+ raise OstructFileNotFoundError(f"File not found: {path}")
95
118
 
96
119
  # Check path type
97
120
  if is_dir and not path.is_dir():
121
+ logger.error("Path exists but is not a directory: %s", path)
98
122
  raise DirectoryNotFoundError(
99
123
  f"Path exists but is not a directory: {path}"
100
124
  )
101
125
  elif not is_dir and not path.is_file():
102
- raise FileNotFoundError(f"Path exists but is not a file: {path}")
126
+ logger.error("Path exists but is not a file: %s", path)
127
+ raise OstructFileNotFoundError(
128
+ f"Path exists but is not a file: {path}"
129
+ )
103
130
 
131
+ logger.debug("Path validation successful: %s -> %s", name, path)
104
132
  return name, str(path)
@@ -0,0 +1,213 @@
1
+ from enum import IntEnum
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from .errors import SchemaValidationError
5
+
6
+
7
+ class SchemaLimits(IntEnum):
8
+ """Limits for OpenAI schema validation."""
9
+
10
+ MAX_NESTING_DEPTH = 5
11
+ MAX_PROPERTIES = 100
12
+ MAX_ENUM_VALUES = 500
13
+ MAX_ENUM_VALUES_CHAR_CHECK = 250
14
+ MAX_ENUM_TOTAL_CHARS = 7500
15
+
16
+
17
+ # Validates the schema against OpenAI's structured output requirements.
18
+ # https://platform.openai.com/docs/guides/structured-outputs
19
+
20
+
21
+ def validate_openai_schema(
22
+ schema: Dict[str, Any], path: Optional[List[str]] = None
23
+ ) -> None:
24
+ """Validate schema against OpenAI's structured output requirements.
25
+
26
+ Args:
27
+ schema: The JSON schema to validate
28
+ path: Current path in schema for nested validation
29
+
30
+ Raises:
31
+ SchemaValidationError: If schema violates any OpenAI requirements
32
+ """
33
+ path = path or []
34
+ current_path = "/".join(path) or "<root>"
35
+
36
+ # Root level validation
37
+ if not path: # Only check at root
38
+ if schema.get("type") != "object":
39
+ raise SchemaValidationError(
40
+ "Root schema must be type 'object'",
41
+ context={
42
+ "path": current_path,
43
+ "found": schema.get("type"),
44
+ "tips": [
45
+ "The root of your schema must be an object type",
46
+ "If you have an array, wrap it in an object property:",
47
+ {
48
+ "type": "object",
49
+ "properties": {
50
+ "items": {
51
+ "type": "array",
52
+ "items": "...your array schema...",
53
+ }
54
+ },
55
+ "required": ["items"],
56
+ "additionalProperties": False,
57
+ },
58
+ ],
59
+ },
60
+ )
61
+
62
+ if schema.get("additionalProperties") is not False:
63
+ raise SchemaValidationError(
64
+ "Root schema must set additionalProperties: false",
65
+ context={
66
+ "path": current_path,
67
+ "tips": [
68
+ "Add 'additionalProperties: false' to your root schema",
69
+ "This ensures only defined properties are allowed",
70
+ ],
71
+ },
72
+ )
73
+
74
+ # Validate required properties
75
+ root_properties = set(schema.get("properties", {}).keys())
76
+ required = set(schema.get("required", []))
77
+
78
+ if not root_properties:
79
+ raise SchemaValidationError(
80
+ "Root schema must define at least one property",
81
+ context={
82
+ "path": current_path,
83
+ "tips": [
84
+ "Add properties to your schema",
85
+ "Each property should define its type and any constraints",
86
+ ],
87
+ },
88
+ )
89
+
90
+ if required != root_properties:
91
+ missing = root_properties - required
92
+ extra = required - root_properties
93
+ tips = []
94
+ if missing:
95
+ tips.append(
96
+ f"Add these properties to 'required': {list(missing)}"
97
+ )
98
+ if extra:
99
+ tips.append(
100
+ f"Remove these from 'required' as they aren't defined: {list(extra)}"
101
+ )
102
+
103
+ raise SchemaValidationError(
104
+ "All properties must be required in root schema",
105
+ context={
106
+ "path": current_path,
107
+ "missing_required": list(missing),
108
+ "extra_required": list(extra),
109
+ "tips": tips,
110
+ },
111
+ )
112
+
113
+ # Structural validation
114
+ if len(path) > SchemaLimits.MAX_NESTING_DEPTH:
115
+ raise SchemaValidationError(
116
+ f"Schema exceeds maximum nesting depth of {SchemaLimits.MAX_NESTING_DEPTH} levels",
117
+ context={
118
+ "path": current_path,
119
+ "tips": [
120
+ "Flatten your schema structure",
121
+ "Consider combining nested objects",
122
+ "Move complex structures to root level properties",
123
+ ],
124
+ },
125
+ )
126
+
127
+ # Property count validation
128
+ if schema.get("type") == "object":
129
+ obj_properties: Dict[str, Any] = schema.get("properties", {})
130
+ if len(obj_properties) > SchemaLimits.MAX_PROPERTIES:
131
+ raise SchemaValidationError(
132
+ f"Schema exceeds maximum of {SchemaLimits.MAX_PROPERTIES} properties",
133
+ context={
134
+ "path": current_path,
135
+ "count": len(obj_properties),
136
+ "tips": [
137
+ "Reduce the number of properties",
138
+ "Consider grouping related properties into sub-objects",
139
+ "Remove any unused or optional properties",
140
+ ],
141
+ },
142
+ )
143
+
144
+ # Validate each property
145
+ for prop_name, prop_schema in obj_properties.items():
146
+ validate_openai_schema(prop_schema, path + [prop_name])
147
+
148
+ # Array validation
149
+ elif schema.get("type") == "array":
150
+ if "items" in schema:
151
+ validate_openai_schema(schema["items"], path + ["items"])
152
+
153
+ # Enum validation
154
+ if "enum" in schema:
155
+ enum_values = schema["enum"]
156
+ if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES:
157
+ raise SchemaValidationError(
158
+ f"Enum exceeds maximum of {SchemaLimits.MAX_ENUM_VALUES} values",
159
+ context={
160
+ "path": current_path,
161
+ "count": len(enum_values),
162
+ "tips": [
163
+ "Reduce the number of enum values",
164
+ "Consider using a different type or structure",
165
+ "Split into multiple smaller enums if possible",
166
+ ],
167
+ },
168
+ )
169
+
170
+ # Check enum string length for large enums
171
+ if len(enum_values) > SchemaLimits.MAX_ENUM_VALUES_CHAR_CHECK:
172
+ total_chars = sum(len(str(v)) for v in enum_values)
173
+ if total_chars > SchemaLimits.MAX_ENUM_TOTAL_CHARS:
174
+ raise SchemaValidationError(
175
+ f"Enum values exceed maximum total length of {SchemaLimits.MAX_ENUM_TOTAL_CHARS} characters",
176
+ context={
177
+ "path": current_path,
178
+ "total_chars": total_chars,
179
+ "tips": [
180
+ "Reduce the length of enum values",
181
+ "Consider using shorter identifiers",
182
+ "Split into multiple smaller enums",
183
+ ],
184
+ },
185
+ )
186
+
187
+ # Prohibited keywords by type
188
+ type_prohibited = {
189
+ "object": ["patternProperties", "minProperties"],
190
+ "array": ["minItems", "maxItems", "uniqueItems"],
191
+ "string": ["pattern", "format", "minLength", "maxLength"],
192
+ "number": ["minimum", "maximum", "multipleOf"],
193
+ "integer": ["exclusiveMinimum", "exclusiveMaximum"],
194
+ }
195
+
196
+ schema_type = schema.get("type")
197
+ if schema_type in type_prohibited:
198
+ prohibited = set(type_prohibited[schema_type])
199
+ used_prohibited = prohibited.intersection(schema.keys())
200
+ if used_prohibited:
201
+ raise SchemaValidationError(
202
+ f"Schema uses prohibited keywords for type '{schema_type}'",
203
+ context={
204
+ "path": current_path,
205
+ "type": schema_type,
206
+ "prohibited_used": list(used_prohibited),
207
+ "tips": [
208
+ f"Remove these prohibited keywords: {list(used_prohibited)}",
209
+ "OpenAI structured output has limited keyword support",
210
+ "Use only basic type constraints",
211
+ ],
212
+ },
213
+ )
@@ -25,6 +25,9 @@ def is_path_in_allowed_dirs(
25
25
  Returns:
26
26
  True if path is within one of the allowed directories; False otherwise.
27
27
 
28
+ Raises:
29
+ TypeError: If path is None or not a string/Path object.
30
+
28
31
  Example:
29
32
  >>> allowed = [Path("/base"), Path("/tmp")]
30
33
  >>> is_path_in_allowed_dirs("/base/file.txt", allowed)
@@ -32,6 +35,11 @@ def is_path_in_allowed_dirs(
32
35
  >>> is_path_in_allowed_dirs("/etc/passwd", allowed)
33
36
  False
34
37
  """
38
+ if path is None:
39
+ raise TypeError("path must be a string or Path object")
40
+ if not isinstance(path, (str, Path)):
41
+ raise TypeError("path must be a string or Path object")
42
+
35
43
  norm_path = normalize_path(path)
36
44
  norm_allowed = [normalize_path(d) for d in allowed_dirs]
37
45
 
@@ -0,0 +1,46 @@
1
+ """Base class for security-related errors."""
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ from ostruct.cli.base_errors import CLIError
6
+ from ostruct.cli.exit_codes import ExitCode
7
+
8
+
9
+ class SecurityErrorBase(CLIError):
10
+ """Base class for security-related errors."""
11
+
12
+ def __init__(
13
+ self,
14
+ message: str,
15
+ context: Optional[Dict[str, Any]] = None,
16
+ details: Optional[str] = None,
17
+ has_been_logged: bool = False,
18
+ ) -> None:
19
+ """Initialize security error.
20
+
21
+ Args:
22
+ message: The error message.
23
+ context: Additional context for the error.
24
+ details: Detailed explanation of the error.
25
+ has_been_logged: Whether the error has been logged.
26
+ """
27
+ if context is None:
28
+ context = {}
29
+ context["category"] = "security"
30
+ super().__init__(
31
+ message,
32
+ context=context,
33
+ exit_code=ExitCode.SECURITY_ERROR,
34
+ details=details,
35
+ )
36
+ self._has_been_logged = has_been_logged
37
+
38
+ @property
39
+ def has_been_logged(self) -> bool:
40
+ """Whether this error has been logged."""
41
+ return self._has_been_logged
42
+
43
+ @has_been_logged.setter
44
+ def has_been_logged(self, value: bool) -> None:
45
+ """Set whether this error has been logged."""
46
+ self._has_been_logged = value