ostruct-cli 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ostruct/cli/file_info.py CHANGED
@@ -3,12 +3,26 @@
3
3
  import hashlib
4
4
  import logging
5
5
  import os
6
+ from enum import Enum
6
7
  from pathlib import Path
7
8
  from typing import Any, Optional
8
9
 
9
10
  from .errors import FileReadError, OstructFileNotFoundError, PathSecurityError
10
11
  from .security import SecurityManager
11
12
 
13
+
14
+ class FileRoutingIntent(Enum):
15
+ """Represents the intended use of a file in the ostruct pipeline.
16
+
17
+ This enum helps distinguish between different file routing intentions
18
+ to provide appropriate warnings and optimizations.
19
+ """
20
+
21
+ TEMPLATE_ONLY = "template_only" # -ft, --fta, legacy -f, -d
22
+ CODE_INTERPRETER = "code_interpreter" # -fc, --fca
23
+ FILE_SEARCH = "file_search" # -fs, --fsa
24
+
25
+
12
26
  logger = logging.getLogger(__name__)
13
27
 
14
28
 
@@ -35,6 +49,7 @@ class FileInfo:
35
49
  encoding: Optional[str] = None,
36
50
  hash_value: Optional[str] = None,
37
51
  routing_type: Optional[str] = None,
52
+ routing_intent: Optional[FileRoutingIntent] = None,
38
53
  ) -> None:
39
54
  """Initialize FileInfo instance.
40
55
 
@@ -45,6 +60,7 @@ class FileInfo:
45
60
  encoding: Optional cached encoding
46
61
  hash_value: Optional cached hash value
47
62
  routing_type: How the file was routed (e.g., 'template', 'code-interpreter')
63
+ routing_intent: The intended use of the file in the pipeline
48
64
 
49
65
  Raises:
50
66
  FileNotFoundError: If the file does not exist
@@ -59,6 +75,7 @@ class FileInfo:
59
75
  self.__size: Optional[int] = None
60
76
  self.__mtime: Optional[float] = None
61
77
  self.routing_type = routing_type
78
+ self.routing_intent = routing_intent
62
79
 
63
80
  logger.debug(
64
81
  "Creating FileInfo for path: %s, routing_type: %s",
@@ -132,23 +149,6 @@ class FileInfo:
132
149
  f"Permission denied: {os.path.basename(str(path))}"
133
150
  ) from e
134
151
 
135
- # Add warning for large template-only files accessed via .content
136
- # Check if routing_type is 'template' or if it's part of a legacy -f/-d mapping
137
- # For simplicity now, let's assume if routing_type is None it could be legacy template
138
- is_template_routed = (
139
- self.routing_type == "template" or self.routing_type is None
140
- )
141
- if (
142
- is_template_routed and self.size and self.size > 100 * 1024
143
- ): # 100KB threshold
144
- logger.warning(
145
- f"File '{self.path}' ({self.size / 1024:.1f}KB) was routed for template-only access "
146
- f"but its .content is being accessed. This will include the entire file content "
147
- f"in the prompt sent to the AI. For large files intended for analysis or search, "
148
- f"consider using -fc (Code Interpreter) or -fs (File Search) to optimize token usage, "
149
- f"cost, and avoid exceeding model context limits."
150
- )
151
-
152
152
  @property
153
153
  def path(self) -> str:
154
154
  """Get the path relative to security manager's base directory.
@@ -258,6 +258,32 @@ class FileInfo:
258
258
  FileReadError: If the file cannot be read, wrapping the underlying cause
259
259
  (FileNotFoundError, UnicodeDecodeError, etc)
260
260
  """
261
+ # Add warning for large template-only files accessed via .content
262
+ # Use intent-based logic with fallback to routing_type for backward compatibility
263
+ template_only_intents = {FileRoutingIntent.TEMPLATE_ONLY}
264
+
265
+ # Determine if this is template-only routing
266
+ is_template_only = False
267
+ if self.routing_intent is not None:
268
+ # Use intent if available (new logic)
269
+ is_template_only = self.routing_intent in template_only_intents
270
+ else:
271
+ # Fallback to old logic for backward compatibility
272
+ is_template_only = (
273
+ self.routing_type == "template" or self.routing_type is None
274
+ )
275
+
276
+ if (
277
+ is_template_only and self.size and self.size > 100 * 1024
278
+ ): # 100KB threshold
279
+ logger.warning(
280
+ f"File '{self.path}' ({self.size / 1024:.1f}KB) was routed for template-only access "
281
+ f"but its .content is being accessed. This will include the entire file content "
282
+ f"in the prompt sent to the AI. For large files intended for analysis or search, "
283
+ f"consider using -fc (Code Interpreter) or -fs (File Search) to optimize token usage, "
284
+ f"cost, and avoid exceeding model context limits."
285
+ )
286
+
261
287
  if self.__content is None:
262
288
  try:
263
289
  self._read_file()
@@ -384,6 +410,7 @@ class FileInfo:
384
410
  path: str,
385
411
  security_manager: SecurityManager,
386
412
  routing_type: Optional[str] = None,
413
+ routing_intent: Optional[FileRoutingIntent] = None,
387
414
  ) -> "FileInfo":
388
415
  """Create FileInfo instance from path.
389
416
 
@@ -391,6 +418,7 @@ class FileInfo:
391
418
  path: Path to file
392
419
  security_manager: Security manager for path validation
393
420
  routing_type: How the file was routed (e.g., 'template', 'code-interpreter')
421
+ routing_intent: The intended use of the file in the pipeline
394
422
 
395
423
  Returns:
396
424
  FileInfo instance
@@ -399,7 +427,12 @@ class FileInfo:
399
427
  FileNotFoundError: If file does not exist
400
428
  PathSecurityError: If path is not allowed
401
429
  """
402
- return cls(path, security_manager, routing_type=routing_type)
430
+ return cls(
431
+ path,
432
+ security_manager,
433
+ routing_type=routing_type,
434
+ routing_intent=routing_intent,
435
+ )
403
436
 
404
437
  def __str__(self) -> str:
405
438
  """String representation showing path."""
@@ -419,8 +452,10 @@ class FileInfo:
419
452
 
420
453
  Internal methods can modify private attributes, but external access is prevented.
421
454
  """
422
- # Allow setting routing_type if it's not already set (i.e., during __init__)
423
- if name == "routing_type" and not hasattr(self, name):
455
+ # Allow setting routing_type and routing_intent if they're not already set (i.e., during __init__)
456
+ if name in ("routing_type", "routing_intent") and not hasattr(
457
+ self, name
458
+ ):
424
459
  object.__setattr__(self, name, value)
425
460
  return
426
461
 
ostruct/cli/file_utils.py CHANGED
@@ -58,7 +58,7 @@ from ostruct.cli.errors import (
58
58
  PathSecurityError,
59
59
  )
60
60
 
61
- from .file_info import FileInfo
61
+ from .file_info import FileInfo, FileRoutingIntent
62
62
  from .file_list import FileInfoList
63
63
  from .security import SecurityManager
64
64
  from .security.types import SecurityManagerProtocol
@@ -116,6 +116,7 @@ def collect_files_from_pattern(
116
116
  pattern: str,
117
117
  security_manager: SecurityManager,
118
118
  routing_type: Optional[str] = None,
119
+ routing_intent: Optional["FileRoutingIntent"] = None,
119
120
  ) -> List[FileInfo]:
120
121
  """Collect files matching a glob pattern or exact file path.
121
122
 
@@ -123,6 +124,7 @@ def collect_files_from_pattern(
123
124
  pattern: Glob pattern or file path to match
124
125
  security_manager: Security manager for path validation
125
126
  routing_type: How the file was routed
127
+ routing_intent: The intended use of the file in the pipeline
126
128
 
127
129
  Returns:
128
130
  List of FileInfo objects for matched files
@@ -134,7 +136,10 @@ def collect_files_from_pattern(
134
136
  if os.path.isfile(pattern):
135
137
  try:
136
138
  file_info = FileInfo.from_path(
137
- pattern, security_manager, routing_type=routing_type
139
+ pattern,
140
+ security_manager,
141
+ routing_type=routing_type,
142
+ routing_intent=routing_intent,
138
143
  )
139
144
  return [file_info]
140
145
  except PathSecurityError:
@@ -154,7 +159,10 @@ def collect_files_from_pattern(
154
159
  for path in matched_paths:
155
160
  try:
156
161
  file_info = FileInfo.from_path(
157
- path, security_manager, routing_type=routing_type
162
+ path,
163
+ security_manager,
164
+ routing_type=routing_type,
165
+ routing_intent=routing_intent,
158
166
  )
159
167
  files.append(file_info)
160
168
  except PathSecurityError:
@@ -172,6 +180,7 @@ def collect_files_from_directory(
172
180
  recursive: bool = False,
173
181
  allowed_extensions: Optional[List[str]] = None,
174
182
  routing_type: Optional[str] = None,
183
+ routing_intent: Optional[FileRoutingIntent] = None,
175
184
  **kwargs: Any,
176
185
  ) -> List[FileInfo]:
177
186
  """Collect files from a directory.
@@ -182,6 +191,7 @@ def collect_files_from_directory(
182
191
  recursive: Whether to process subdirectories
183
192
  allowed_extensions: List of allowed file extensions (without dot)
184
193
  routing_type: How the file was routed
194
+ routing_intent: The intended use of the file in the pipeline
185
195
  **kwargs: Additional arguments passed to FileInfo.from_path
186
196
 
187
197
  Returns:
@@ -283,6 +293,7 @@ def collect_files_from_directory(
283
293
  abs_path,
284
294
  security_manager=security_manager,
285
295
  routing_type=routing_type,
296
+ routing_intent=routing_intent,
286
297
  **kwargs,
287
298
  )
288
299
  files.append(file_info)
@@ -347,6 +358,7 @@ def collect_files(
347
358
  dir_extensions: Optional[List[str]] = None,
348
359
  security_manager: Optional[SecurityManager] = None,
349
360
  routing_type: Optional[str] = None,
361
+ routing_intent: Optional[FileRoutingIntent] = None,
350
362
  **kwargs: Any,
351
363
  ) -> Dict[str, FileInfoList]:
352
364
  """Collect files from multiple sources.
@@ -359,6 +371,7 @@ def collect_files(
359
371
  dir_extensions: List of file extensions to include in directory processing
360
372
  security_manager: Security manager instance
361
373
  routing_type: How the files were routed (passed to FileInfo)
374
+ routing_intent: The intended use of the file in the pipeline
362
375
  **kwargs: Additional arguments passed to FileInfo.from_path
363
376
 
364
377
  Returns:
@@ -411,6 +424,7 @@ def collect_files(
411
424
  str(path),
412
425
  security_manager=security_manager,
413
426
  routing_type=routing_type,
427
+ routing_intent=routing_intent,
414
428
  **kwargs,
415
429
  )
416
430
  files[name] = FileInfoList(
@@ -433,6 +447,7 @@ def collect_files(
433
447
  str(pattern),
434
448
  security_manager=security_manager,
435
449
  routing_type=routing_type,
450
+ routing_intent=routing_intent,
436
451
  **kwargs,
437
452
  )
438
453
  except PathSecurityError as e:
@@ -476,6 +491,7 @@ def collect_files(
476
491
  recursive=dir_recursive,
477
492
  allowed_extensions=dir_extensions,
478
493
  routing_type=routing_type,
494
+ routing_intent=routing_intent,
479
495
  **kwargs,
480
496
  )
481
497
  except PathSecurityError as e:
@@ -0,0 +1,75 @@
1
+ import json
2
+ import re
3
+ from typing import Tuple
4
+
5
+ # Regex to match JSON fenced blocks, handling newlines and indentation flexibly
6
+ # Uses non-greedy matching but ensures we get the complete JSON block
7
+ JSON_RE = re.compile(
8
+ r"```json\s*\n?([\s\S]*?)\n?\s*```", re.DOTALL | re.MULTILINE
9
+ )
10
+
11
+
12
+ def split_json_and_text(raw: str) -> Tuple[dict, str]:
13
+ """
14
+ Extract JSON from fenced code blocks and return markdown-only text.
15
+
16
+ Args:
17
+ raw: Raw response text containing fenced JSON and markdown
18
+
19
+ Returns:
20
+ Tuple of (parsed_json_dict, markdown_text_after_json)
21
+
22
+ Raises:
23
+ ValueError: If no JSON block found or JSON is invalid
24
+
25
+ Note:
26
+ Returns first JSON block if multiple exist. Markdown text is everything
27
+ after the JSON block for downstream annotation processing.
28
+
29
+ Example:
30
+ >>> content = '''```json
31
+ ... {"result": "success"}
32
+ ... ```
33
+ ...
34
+ ... [Download file.txt](sandbox:/mnt/data/file.txt)'''
35
+ >>> data, markdown = split_json_and_text(content)
36
+ >>> data
37
+ {'result': 'success'}
38
+ >>> markdown.strip()
39
+ '[Download file.txt](sandbox:/mnt/data/file.txt)'
40
+ """
41
+ # Find the start of the JSON block
42
+ start_pattern = re.compile(r"```json\s*", re.MULTILINE)
43
+ start_match = start_pattern.search(raw)
44
+ if not start_match:
45
+ raise ValueError("No ```json ... ``` block found")
46
+
47
+ # Find the content after the opening ```json
48
+ content_start = start_match.end()
49
+
50
+ # Look for the closing ``` that's not inside a JSON string
51
+ # We'll try multiple potential end positions and validate the JSON
52
+ end_pattern = re.compile(r"```", re.MULTILINE)
53
+
54
+ for end_match in end_pattern.finditer(raw, content_start):
55
+ # Extract potential JSON content
56
+ potential_json = raw[content_start : end_match.start()].strip()
57
+
58
+ try:
59
+ # Try to parse as JSON
60
+ data = json.loads(potential_json)
61
+ # If successful, we found the right closing ```
62
+ markdown_text = raw[end_match.end() :].lstrip()
63
+ return data, markdown_text
64
+ except json.JSONDecodeError:
65
+ # This ``` might be inside a JSON string, continue looking
66
+ # But if this is the only ``` we found, it's likely invalid JSON
67
+ continue
68
+
69
+ # If we get here, check if we found any closing ``` at all
70
+ if not list(end_pattern.finditer(raw, content_start)):
71
+ # No closing ``` found
72
+ raise ValueError("No ```json ... ``` block found")
73
+ else:
74
+ # Found closing ``` but JSON was invalid
75
+ raise ValueError("Invalid JSON in fenced block")
@@ -197,7 +197,7 @@ def _get_type_with_constraints(
197
197
  field_type_cls = Union[tuple(union_types)]
198
198
 
199
199
  # Apply constraints based on the base type (non-null type)
200
- if base_type == str:
200
+ if base_type is str:
201
201
  # Add string-specific constraints to field_kwargs
202
202
  if "pattern" in field_schema:
203
203
  field_kwargs["pattern"] = field_schema["pattern"]