ai-pipeline-core 0.1.14__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ai_pipeline_core/__init__.py +21 -13
  2. ai_pipeline_core/documents/document.py +202 -51
  3. ai_pipeline_core/documents/document_list.py +148 -24
  4. ai_pipeline_core/documents/flow_document.py +2 -6
  5. ai_pipeline_core/documents/task_document.py +0 -4
  6. ai_pipeline_core/documents/temporary_document.py +1 -8
  7. ai_pipeline_core/flow/config.py +174 -5
  8. ai_pipeline_core/llm/__init__.py +1 -6
  9. ai_pipeline_core/llm/ai_messages.py +137 -4
  10. ai_pipeline_core/llm/client.py +118 -65
  11. ai_pipeline_core/llm/model_options.py +6 -7
  12. ai_pipeline_core/llm/model_response.py +17 -16
  13. ai_pipeline_core/llm/model_types.py +3 -7
  14. ai_pipeline_core/logging/__init__.py +0 -2
  15. ai_pipeline_core/logging/logging_config.py +0 -6
  16. ai_pipeline_core/logging/logging_mixin.py +2 -10
  17. ai_pipeline_core/pipeline.py +54 -68
  18. ai_pipeline_core/prefect.py +12 -3
  19. ai_pipeline_core/prompt_manager.py +14 -7
  20. ai_pipeline_core/settings.py +13 -5
  21. ai_pipeline_core/simple_runner/__init__.py +1 -11
  22. ai_pipeline_core/simple_runner/cli.py +13 -12
  23. ai_pipeline_core/simple_runner/simple_runner.py +34 -189
  24. ai_pipeline_core/storage/__init__.py +8 -0
  25. ai_pipeline_core/storage/storage.py +628 -0
  26. ai_pipeline_core/tracing.py +234 -30
  27. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/METADATA +35 -20
  28. ai_pipeline_core-0.2.1.dist-info/RECORD +38 -0
  29. ai_pipeline_core-0.1.14.dist-info/RECORD +0 -36
  30. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/WHEEL +0 -0
  31. {ai_pipeline_core-0.1.14.dist-info → ai_pipeline_core-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -3,7 +3,8 @@
3
3
  @public
4
4
  """
5
5
 
6
- from typing import Any, Iterable, SupportsIndex, Union, overload
6
+ from copy import deepcopy
7
+ from typing import Any, Callable, Iterable, SupportsIndex, Union, overload
7
8
 
8
9
  from typing_extensions import Self
9
10
 
@@ -17,8 +18,8 @@ class DocumentList(list[Document]):
17
18
 
18
19
  Specialized list with validation and filtering for documents.
19
20
 
20
- Best Practice: Use default constructor in 90% of cases. Only enable
21
- validate_same_type or validate_duplicates when you explicitly need them.
21
+ Best Practice: Use default constructor by default, unless instructed otherwise.
22
+ Only enable validate_same_type or validate_duplicates when you explicitly need them.
22
23
 
23
24
  Example:
24
25
  >>> # RECOMMENDED - default constructor for most cases
@@ -37,6 +38,7 @@ class DocumentList(list[Document]):
37
38
  documents: list[Document] | None = None,
38
39
  validate_same_type: bool = False,
39
40
  validate_duplicates: bool = False,
41
+ frozen: bool = False,
40
42
  ) -> None:
41
43
  """Initialize DocumentList.
42
44
 
@@ -46,12 +48,15 @@ class DocumentList(list[Document]):
46
48
  documents: Initial list of documents.
47
49
  validate_same_type: Enforce same document type.
48
50
  validate_duplicates: Prevent duplicate filenames.
51
+ frozen: If True, list is immutable from creation.
49
52
  """
50
53
  super().__init__()
51
54
  self._validate_same_type = validate_same_type
52
55
  self._validate_duplicates = validate_duplicates
56
+ self._frozen = False # Initialize as unfrozen to allow initial population
53
57
  if documents:
54
58
  self.extend(documents)
59
+ self._frozen = frozen # Set frozen state after initial population
55
60
 
56
61
  def _validate_no_duplicates(self) -> None:
57
62
  """Check for duplicate document names.
@@ -109,18 +114,51 @@ class DocumentList(list[Document]):
109
114
  self._validate_no_description_files()
110
115
  self._validate_types()
111
116
 
117
+ def freeze(self) -> None:
118
+ """Permanently freeze the list, preventing modifications.
119
+
120
+ Once frozen, the list cannot be unfrozen.
121
+ """
122
+ self._frozen = True
123
+
124
+ def copy(self) -> "DocumentList":
125
+ """Create an unfrozen deep copy of the list.
126
+
127
+ Returns:
128
+ New unfrozen DocumentList with deep-copied documents.
129
+ """
130
+ copied_docs = deepcopy(list(self))
131
+ return DocumentList(
132
+ documents=copied_docs,
133
+ validate_same_type=self._validate_same_type,
134
+ validate_duplicates=self._validate_duplicates,
135
+ frozen=False, # Copies are always unfrozen
136
+ )
137
+
138
+ def _check_frozen(self) -> None:
139
+ """Check if list is frozen and raise if it is.
140
+
141
+ Raises:
142
+ RuntimeError: If the list is frozen.
143
+ """
144
+ if self._frozen:
145
+ raise RuntimeError("Cannot modify frozen DocumentList")
146
+
112
147
  def append(self, document: Document) -> None:
113
148
  """Add a document to the end of the list."""
149
+ self._check_frozen()
114
150
  super().append(document)
115
151
  self._validate()
116
152
 
117
153
  def extend(self, documents: Iterable[Document]) -> None:
118
154
  """Add multiple documents to the list."""
155
+ self._check_frozen()
119
156
  super().extend(documents)
120
157
  self._validate()
121
158
 
122
159
  def insert(self, index: SupportsIndex, document: Document) -> None:
123
160
  """Insert a document at the specified position."""
161
+ self._check_frozen()
124
162
  super().insert(index, document)
125
163
  self._validate()
126
164
 
@@ -132,6 +170,7 @@ class DocumentList(list[Document]):
132
170
 
133
171
  def __setitem__(self, index: Union[SupportsIndex, slice], value: Any) -> None:
134
172
  """Set item or slice with validation."""
173
+ self._check_frozen()
135
174
  super().__setitem__(index, value)
136
175
  self._validate()
137
176
 
@@ -141,10 +180,48 @@ class DocumentList(list[Document]):
141
180
  Returns:
142
181
  Self: This DocumentList after modification.
143
182
  """
183
+ self._check_frozen()
144
184
  result = super().__iadd__(other)
145
185
  self._validate()
146
186
  return result
147
187
 
188
+ def __delitem__(self, index: Union[SupportsIndex, slice]) -> None:
189
+ """Delete item or slice from list."""
190
+ self._check_frozen()
191
+ super().__delitem__(index)
192
+
193
+ def pop(self, index: SupportsIndex = -1) -> Document:
194
+ """Remove and return item at index.
195
+
196
+ Returns:
197
+ Document removed from the list.
198
+ """
199
+ self._check_frozen()
200
+ return super().pop(index)
201
+
202
+ def remove(self, document: Document) -> None:
203
+ """Remove first occurrence of document."""
204
+ self._check_frozen()
205
+ super().remove(document)
206
+
207
+ def clear(self) -> None:
208
+ """Remove all items from list."""
209
+ self._check_frozen()
210
+ super().clear()
211
+
212
+ def reverse(self) -> None:
213
+ """Reverse list in place."""
214
+ self._check_frozen()
215
+ super().reverse()
216
+
217
+ def sort(self, *, key: Callable[[Document], Any] | None = None, reverse: bool = False) -> None:
218
+ """Sort list in place."""
219
+ self._check_frozen()
220
+ if key is None:
221
+ super().sort(reverse=reverse) # type: ignore[call-arg]
222
+ else:
223
+ super().sort(key=key, reverse=reverse)
224
+
148
225
  @overload
149
226
  def filter_by(self, arg: str) -> "DocumentList": ...
150
227
 
@@ -164,6 +241,9 @@ class DocumentList(list[Document]):
164
241
 
165
242
  @public
166
243
 
244
+ ALWAYS returns a DocumentList (which may be empty), never raises an exception
245
+ for no matches. Use this when you want to process all matching documents.
246
+
167
247
  Args:
168
248
  arg: Can be one of:
169
249
  - str: Single document name to filter by
@@ -174,7 +254,9 @@ class DocumentList(list[Document]):
174
254
  (list, tuple, set, generator, or any iterable)
175
255
 
176
256
  Returns:
177
- New DocumentList with filtered documents.
257
+ New DocumentList with filtered documents (may be empty).
258
+ - Returns ALL matching documents
259
+ - Empty DocumentList if no matches found
178
260
 
179
261
  Raises:
180
262
  TypeError: If arg is not a valid type (not str, type, or iterable),
@@ -182,12 +264,19 @@ class DocumentList(list[Document]):
182
264
  AttributeError: If arg is expected to be iterable but doesn't support iteration.
183
265
 
184
266
  Example:
185
- >>> docs.filter_by("file.txt") # Filter by single name
186
- >>> docs.filter_by(MyDocument) # Filter by single type
187
- >>> docs.filter_by([Doc1, Doc2]) # Filter by multiple types (list)
188
- >>> docs.filter_by({"file1.txt", "file2.txt"}) # Filter by multiple names (set)
189
- >>> docs.filter_by((SubDoc, AnotherDoc)) # Filter by multiple types (tuple)
190
- >>> docs.filter_by(name for name in ["a.txt", "b.txt"]) # Generator expression
267
+ >>> # Returns list with all matching documents
268
+ >>> matching_docs = docs.filter_by("file.txt") # May be empty
269
+ >>> for doc in matching_docs:
270
+ ... process(doc)
271
+ >>>
272
+ >>> # Filter by type - returns all instances
273
+ >>> config_docs = docs.filter_by(ConfigDocument)
274
+ >>> print(f"Found {len(config_docs)} config documents")
275
+ >>>
276
+ >>> # Filter by multiple names
277
+ >>> important_docs = docs.filter_by(["config.yaml", "settings.json"])
278
+ >>> if not important_docs: # Check if empty
279
+ ... print("No important documents found")
191
280
  """
192
281
  if isinstance(arg, str):
193
282
  # Filter by single name
@@ -257,38 +346,73 @@ class DocumentList(list[Document]):
257
346
  def get_by(self, arg: type[Document], required: bool = True) -> Document | None: ...
258
347
 
259
348
  def get_by(self, arg: str | type[Document], required: bool = True) -> Document | None:
260
- """Get a single document by name or type.
349
+ """Get EXACTLY ONE document by name or type.
261
350
 
262
351
  @public
263
352
 
353
+ IMPORTANT: This method expects to find exactly one matching document.
354
+ - If no matches and required=True: raises ValueError
355
+ - If no matches and required=False: returns None
356
+ - If multiple matches: ALWAYS raises ValueError (ambiguous)
357
+
358
+ When required=True (default), you do NOT need to check for None:
359
+ >>> doc = docs.get_by("config.yaml") # Will raise if not found
360
+ >>> # No need for: if doc is not None <- This is redundant!
361
+ >>> print(doc.content) # Safe to use directly
362
+
264
363
  Args:
265
364
  arg: Document name (str) or document type.
266
- required: If True, raises ValueError when not found. If False, returns None.
365
+ required: If True (default), raises ValueError when not found.
366
+ If False, returns None when not found.
267
367
 
268
368
  Returns:
269
- The first matching document, or None if not found and required=False.
369
+ The single matching document, or None if not found and required=False.
270
370
 
271
371
  Raises:
272
- ValueError: If required=True and document not found.
372
+ ValueError: If required=True and document not found, OR if multiple
373
+ documents match (ambiguous result).
273
374
  TypeError: If arg is not a string or Document type.
274
375
 
275
376
  Example:
276
- >>> doc = docs.get_by("file.txt") # Get by name, raises if not found
277
- >>> doc = docs.get_by(MyDocument, required=False) # Returns None if not found
377
+ >>> # CORRECT - No need to check for None when required=True (default)
378
+ >>> doc = docs.get_by("file.txt") # Raises if not found
379
+ >>> print(doc.content) # Safe to use directly
380
+ >>>
381
+ >>> # When using required=False, check for None
382
+ >>> doc = docs.get_by("optional.txt", required=False)
383
+ >>> if doc is not None:
384
+ ... print(doc.content)
385
+ >>>
386
+ >>> # Will raise if multiple documents have same type
387
+ >>> # Use filter_by() instead if you want all matches
388
+ >>> try:
389
+ ... doc = docs.get_by(ConfigDocument) # Error if 2+ configs
390
+ >>> except ValueError as e:
391
+ ... configs = docs.filter_by(ConfigDocument) # Get all instead
278
392
  """
279
393
  if isinstance(arg, str):
280
- # Get by name
281
- for doc in self:
282
- if doc.name == arg:
283
- return doc
394
+ # Get by name - collect all matches to check for duplicates
395
+ matches = [doc for doc in self if doc.name == arg]
396
+ if len(matches) > 1:
397
+ raise ValueError(
398
+ f"Multiple documents found with name '{arg}'. "
399
+ f"Found {len(matches)} matches. Use filter_by() to get all matches."
400
+ )
401
+ if matches:
402
+ return matches[0]
284
403
  if required:
285
404
  raise ValueError(f"Document with name '{arg}' not found")
286
405
  return None
287
406
  elif isinstance(arg, type): # type: ignore[reportUnnecessaryIsInstance]
288
- # Get by type (including subclasses)
289
- for doc in self:
290
- if isinstance(doc, arg):
291
- return doc
407
+ # Get by type (including subclasses) - collect all matches
408
+ matches = [doc for doc in self if isinstance(doc, arg)]
409
+ if len(matches) > 1:
410
+ raise ValueError(
411
+ f"Multiple documents found of type '{arg.__name__}'. "
412
+ f"Found {len(matches)} matches. Use filter_by() to get all matches."
413
+ )
414
+ if matches:
415
+ return matches[0]
292
416
  if required:
293
417
  raise ValueError(f"Document of type '{arg.__name__}' not found")
294
418
  return None
@@ -24,24 +24,20 @@ class FlowDocument(Document):
24
24
  - Persisted to file system between pipeline steps
25
25
  - Survives across multiple flow runs
26
26
  - Used for flow inputs and outputs
27
- - Saved in directories named after the document's canonical name
27
+ - Saved in directories organized by the document's type/name
28
28
 
29
29
  Creating FlowDocuments:
30
30
  Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
31
31
  See Document.create() for detailed usage examples.
32
32
 
33
33
  Persistence:
34
- Documents are saved to: {output_dir}/{canonical_name}/{filename}
34
+ Documents are saved under an output directory path associated with the document's type/name.
35
35
  For example: output/my_doc/data.json
36
36
 
37
37
  Note:
38
38
  - Cannot instantiate FlowDocument directly - must subclass
39
39
  - Used with FlowConfig to define flow input/output types
40
40
  - No additional abstract methods to implement
41
-
42
- See Also:
43
- TaskDocument: For temporary documents within task execution
44
- TemporaryDocument: For documents that are never persisted
45
41
  """
46
42
 
47
43
  def __init__(
@@ -43,10 +43,6 @@ class TaskDocument(Document):
43
43
  - Not saved by simple_runner utilities
44
44
  - Reduces I/O overhead for temporary data
45
45
  - No additional abstract methods to implement
46
-
47
- See Also:
48
- FlowDocument: For documents that persist across flow runs
49
- TemporaryDocument: Alternative for non-persistent documents
50
46
  """
51
47
 
52
48
  def __init__(
@@ -1,7 +1,5 @@
1
1
  """Temporary document implementation for non-persistent data.
2
2
 
3
- @public
4
-
5
3
  This module provides the TemporaryDocument class for documents that
6
4
  are never persisted, regardless of context.
7
5
  """
@@ -15,8 +13,6 @@ from .document import Document
15
13
  class TemporaryDocument(Document):
16
14
  r"""Concrete document class for data that is never persisted.
17
15
 
18
- @public
19
-
20
16
  TemporaryDocument is a final (non-subclassable) document type for
21
17
  data that should never be saved to disk, regardless of whether it's
22
18
  used in a flow or task context. Unlike FlowDocument and TaskDocument
@@ -28,6 +24,7 @@ class TemporaryDocument(Document):
28
24
  - Cannot be subclassed (annotated with Python's @final decorator in code)
29
25
  - Useful for transient data like API responses or intermediate calculations
30
26
  - Ignored by simple_runner save operations
27
+ - Useful for tests and debugging
31
28
 
32
29
  Creating TemporaryDocuments:
33
30
  Same as Document - use `create()` for automatic conversion, `__init__` for bytes.
@@ -48,10 +45,6 @@ class TemporaryDocument(Document):
48
45
  - This is a final class and cannot be subclassed
49
46
  - Use when you explicitly want to prevent persistence
50
47
  - Useful for sensitive data that shouldn't be written to disk
51
-
52
- See Also:
53
- FlowDocument: For documents that persist across flow runs
54
- TaskDocument: For documents temporary within task execution
55
48
  """
56
49
 
57
50
  def __init_subclass__(cls, **kwargs: Any) -> None:
@@ -10,11 +10,16 @@ Best Practice:
10
10
  to ensure type safety and proper validation of output documents.
11
11
  """
12
12
 
13
+ import json
13
14
  from abc import ABC
14
15
  from typing import Any, ClassVar, Iterable
15
16
 
16
- from ai_pipeline_core.documents import DocumentList, FlowDocument
17
+ from ai_pipeline_core.documents import Document, DocumentList, FlowDocument
17
18
  from ai_pipeline_core.exceptions import DocumentValidationError
19
+ from ai_pipeline_core.logging import get_pipeline_logger
20
+ from ai_pipeline_core.storage import Storage
21
+
22
+ logger = get_pipeline_logger(__name__)
18
23
 
19
24
 
20
25
  class FlowConfig(ABC):
@@ -51,8 +56,10 @@ class FlowConfig(ABC):
51
56
  ... OUTPUT_DOCUMENT_TYPE = ProcessedDocument # Different type!
52
57
  >>>
53
58
  >>> # Use in @pipeline_flow - RECOMMENDED PATTERN
54
- >>> @pipeline_flow(name="processing")
55
- >>> async def process(config: ProcessingFlowConfig, docs: DocumentList) -> DocumentList:
59
+ >>> @pipeline_flow(config=ProcessingFlowConfig, name="processing")
60
+ >>> async def process(
61
+ ... project_name: str, docs: DocumentList, flow_options: FlowOptions
62
+ ... ) -> DocumentList:
56
63
  ... outputs = []
57
64
  ... # ... processing logic ...
58
65
  ... return config.create_and_validate_output(outputs)
@@ -289,8 +296,10 @@ class FlowConfig(ABC):
289
296
  DocumentValidationError: If output type doesn't match OUTPUT_DOCUMENT_TYPE.
290
297
 
291
298
  Example:
292
- >>> @pipeline_flow(name="my_flow")
293
- >>> async def process_flow(config: MyFlowConfig, ...) -> DocumentList:
299
+ >>> @pipeline_flow(config=MyFlowConfig, name="my_flow")
300
+ >>> async def process_flow(
301
+ ... project_name: str, documents: DocumentList, flow_options: FlowOptions
302
+ ... ) -> DocumentList:
294
303
  >>> outputs = []
295
304
  >>> # ... processing logic ...
296
305
  >>> outputs.append(OutputDoc(...))
@@ -312,3 +321,163 @@ class FlowConfig(ABC):
312
321
  documents = DocumentList(list(output)) # type: ignore[arg-type]
313
322
  cls.validate_output_documents(documents)
314
323
  return documents
324
+
325
+ @classmethod
326
+ async def load_documents(
327
+ cls,
328
+ uri: str,
329
+ ) -> DocumentList:
330
+ """Load documents from storage matching INPUT_DOCUMENT_TYPES.
331
+
332
+ Loads documents from a storage location based on the class's INPUT_DOCUMENT_TYPES.
333
+ Supports both local filesystem and Google Cloud Storage backends.
334
+ Automatically loads metadata (.description.md and .sources.json) when present.
335
+
336
+ Args:
337
+ uri: Storage URI (file://, gs://, or local path)
338
+
339
+ Returns:
340
+ DocumentList containing loaded documents matching INPUT_DOCUMENT_TYPES
341
+
342
+ Example:
343
+ >>> # Load from local filesystem
344
+ >>> docs = await MyFlowConfig.load_documents("./data")
345
+ >>>
346
+ >>> # Load from GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
347
+ >>> docs = await MyFlowConfig.load_documents("gs://bucket/data")
348
+ """
349
+ # Use INPUT_DOCUMENT_TYPES if not specified
350
+ storage = await Storage.from_uri(uri)
351
+ loaded_documents = DocumentList()
352
+
353
+ # Process each document type
354
+ for doc_type in cls.INPUT_DOCUMENT_TYPES:
355
+ canonical_name = doc_type.canonical_name()
356
+ doc_storage = storage.with_base(canonical_name)
357
+
358
+ # Check if subdirectory exists
359
+ if not await doc_storage.exists(""):
360
+ logger.debug(f"Subdirectory {canonical_name} not found, skipping")
361
+ continue
362
+
363
+ # List files in subdirectory
364
+ objects = await doc_storage.list("", recursive=False, include_dirs=False)
365
+
366
+ # Create lookup set for metadata files
367
+ object_keys = {obj.key for obj in objects}
368
+
369
+ # Filter out metadata files
370
+ doc_files = [
371
+ obj
372
+ for obj in objects
373
+ if not obj.key.endswith(Document.DESCRIPTION_EXTENSION)
374
+ and not obj.key.endswith(Document.SOURCES_EXTENSION)
375
+ ]
376
+
377
+ for obj in doc_files:
378
+ try:
379
+ # Load document content
380
+ content = await doc_storage.read_bytes(obj.key)
381
+
382
+ # Load metadata if present
383
+ description = None
384
+ sources: list[str] = []
385
+
386
+ # Check for description in objects list
387
+ desc_path = f"{obj.key}{Document.DESCRIPTION_EXTENSION}"
388
+ if desc_path in object_keys:
389
+ try:
390
+ description = await doc_storage.read_text(desc_path)
391
+ except Exception as e:
392
+ logger.warning(f"Failed to load description for {obj.key}: {e}")
393
+
394
+ # Check for sources in objects list
395
+ sources_path = f"{obj.key}{Document.SOURCES_EXTENSION}"
396
+ if sources_path in object_keys:
397
+ try:
398
+ sources_text = await doc_storage.read_text(sources_path)
399
+ sources = json.loads(sources_text)
400
+ except Exception as e:
401
+ logger.warning(f"Failed to load sources for {obj.key}: {e}")
402
+
403
+ # Create document instance
404
+ doc = doc_type(
405
+ name=obj.key,
406
+ content=content,
407
+ description=description,
408
+ sources=sources,
409
+ )
410
+
411
+ loaded_documents.append(doc)
412
+ logger.debug(f"Loaded {doc_type.__name__} document: {obj.key}")
413
+ except Exception as e:
414
+ logger.error(f"Failed to load {doc_type.__name__} document {obj.key}: {e}")
415
+
416
+ logger.info(f"Loaded {len(loaded_documents)} documents from {uri}")
417
+ return loaded_documents
418
+
419
+ @classmethod
420
+ async def save_documents(
421
+ cls,
422
+ uri: str,
423
+ documents: DocumentList,
424
+ *,
425
+ validate_output_type: bool = True,
426
+ ) -> None:
427
+ """Save documents to storage with metadata.
428
+
429
+ Saves FlowDocument instances to a storage location with their content
430
+ and metadata files (Document.DESCRIPTION_EXTENSION and Document.SOURCES_EXTENSION).
431
+ Non-FlowDocument instances (TaskDocument, TemporaryDocument) are skipped.
432
+
433
+ Args:
434
+ uri: Storage URI (file://, gs://, or local path)
435
+ documents: DocumentList to save
436
+ validate_output_type: If True, validate documents match cls.OUTPUT_DOCUMENT_TYPE
437
+
438
+ Raises:
439
+ DocumentValidationError: If validate_output_type=True and documents don't match
440
+ OUTPUT_DOCUMENT_TYPE
441
+
442
+ Example:
443
+ >>> # Save to local filesystem
444
+ >>> await MyFlowConfig.save_documents("./output", docs)
445
+ >>>
446
+ >>> # Save to GCS (uses GCS_SERVICE_ACCOUNT_FILE from settings if configured)
447
+ >>> await MyFlowConfig.save_documents("gs://bucket/output", docs)
448
+ """
449
+ # Validate output type if requested
450
+ if validate_output_type:
451
+ cls.validate_output_documents(documents)
452
+
453
+ storage = await Storage.from_uri(uri)
454
+ saved_count = 0
455
+
456
+ for doc in documents:
457
+ # Skip non-FlowDocument instances
458
+ if not isinstance(doc, FlowDocument):
459
+ logger.warning(f"Skipping non-FlowDocument: {type(doc).__name__}")
460
+ continue
461
+
462
+ # Get canonical name for subdirectory
463
+ canonical_name = doc.canonical_name()
464
+ doc_storage = storage.with_base(canonical_name)
465
+
466
+ # Save document content
467
+ await doc_storage.write_bytes(doc.name, doc.content)
468
+ saved_count += 1
469
+
470
+ # Save description if present
471
+ if doc.description:
472
+ desc_path = f"{doc.name}{Document.DESCRIPTION_EXTENSION}"
473
+ await doc_storage.write_text(desc_path, doc.description)
474
+
475
+ # Save sources if present
476
+ if doc.sources:
477
+ sources_path = f"{doc.name}{Document.SOURCES_EXTENSION}"
478
+ sources_json = json.dumps(doc.sources, indent=2)
479
+ await doc_storage.write_text(sources_path, sources_json)
480
+
481
+ logger.debug(f"Saved {type(doc).__name__} document: {doc.name}")
482
+
483
+ logger.info(f"Saved {saved_count} documents to {uri}")
@@ -8,8 +8,6 @@ from .ai_messages import AIMessages, AIMessageType
8
8
  from .client import (
9
9
  generate,
10
10
  generate_structured,
11
- generate_with_retry_for_testing,
12
- process_messages_for_testing,
13
11
  )
14
12
  from .model_options import ModelOptions
15
13
  from .model_response import ModelResponse, StructuredModelResponse
@@ -19,12 +17,9 @@ __all__ = [
19
17
  "AIMessages",
20
18
  "AIMessageType",
21
19
  "ModelName",
22
- "ModelOptions",
23
20
  "ModelResponse",
21
+ "ModelOptions",
24
22
  "StructuredModelResponse",
25
23
  "generate",
26
24
  "generate_structured",
27
- # Internal functions exposed for testing only
28
- "process_messages_for_testing",
29
- "generate_with_retry_for_testing",
30
25
  ]