ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -67,19 +67,8 @@ def detect_mime_type(content: bytes, name: str) -> str:
67
67
  Only the first 1024 bytes are analyzed for content detection.
68
68
  Extension-based detection is O(1) lookup.
69
69
 
70
- Note:
71
- Extension-based detection is preferred for text formats as
72
- content analysis can sometimes misidentify structured text.
73
-
74
- Example:
75
- >>> detect_mime_type(b'{"key": "value"}', "data.json")
76
- 'application/json'
77
- >>> detect_mime_type(b'Hello World', "text.txt")
78
- 'text/plain'
79
- >>> detect_mime_type(b'', "empty.txt")
80
- 'text/plain'
81
- >>> detect_mime_type(b'\\x89PNG', "image.xyz")
82
- 'image/png' # Magic detects PNG despite wrong extension
70
+ Extension-based detection is preferred for text formats as
71
+ content analysis can sometimes misidentify structured text.
83
72
  """
84
73
  # Check for empty content
85
74
  if len(content) == 0:
@@ -99,40 +88,13 @@ def detect_mime_type(content: bytes, name: str) -> str:
99
88
  return mime
100
89
  except (AttributeError, OSError, magic.MagicException) as e:
101
90
  logger.warning(f"MIME detection failed for {name}: {e}")
102
- except Exception as e:
103
- logger.error(f"Unexpected error in MIME detection for {name}: {e}")
91
+ except Exception:
92
+ logger.exception(f"Unexpected error in MIME detection for {name}")
104
93
 
105
94
  # Final fallback based on extension or default
106
95
  return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
107
96
 
108
97
 
109
- def mime_type_from_extension(name: str) -> str:
110
- """Get MIME type based solely on file extension.
111
-
112
- Simple extension-based MIME type detection without content analysis.
113
- This is a legacy function maintained for backward compatibility.
114
-
115
- Args:
116
- name: Filename with extension.
117
-
118
- Returns:
119
- MIME type based on extension, or 'application/octet-stream'
120
- if extension is unknown.
121
-
122
- Note:
123
- Prefer detect_mime_type() for more accurate detection.
124
- This function only checks the file extension.
125
-
126
- Example:
127
- >>> mime_type_from_extension("document.pdf")
128
- 'application/pdf'
129
- >>> mime_type_from_extension("unknown.xyz")
130
- 'application/octet-stream'
131
- """
132
- ext = name.lower().split(".")[-1] if "." in name else ""
133
- return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
134
-
135
-
136
98
  def is_text_mime_type(mime_type: str) -> bool:
137
99
  """Check if MIME type represents text-based content.
138
100
 
@@ -153,13 +115,6 @@ def is_text_mime_type(mime_type: str) -> bool:
153
115
  - application/yaml
154
116
  - application/x-yaml
155
117
 
156
- Example:
157
- >>> is_text_mime_type('text/plain')
158
- True
159
- >>> is_text_mime_type('application/json')
160
- True
161
- >>> is_text_mime_type('image/png')
162
- False
163
118
  """
164
119
  text_types = [
165
120
  "text/",
@@ -181,15 +136,8 @@ def is_json_mime_type(mime_type: str) -> bool:
181
136
  Returns:
182
137
  True if MIME type is 'application/json', False otherwise.
183
138
 
184
- Note:
185
- Only matches exact 'application/json', not variants like
186
- 'application/ld+json' or 'application/vnd.api+json'.
187
-
188
- Example:
189
- >>> is_json_mime_type('application/json')
190
- True
191
- >>> is_json_mime_type('text/json') # Not standard JSON MIME
192
- False
139
+ Only matches exact 'application/json', not variants like
140
+ 'application/ld+json' or 'application/vnd.api+json'.
193
141
  """
194
142
  return mime_type == "application/json"
195
143
 
@@ -209,13 +157,8 @@ def is_yaml_mime_type(mime_type: str) -> bool:
209
157
  - application/yaml (standard)
210
158
  - application/x-yaml (legacy)
211
159
 
212
- Example:
213
- >>> is_yaml_mime_type('application/yaml')
214
- True
215
- >>> is_yaml_mime_type('application/x-yaml')
216
- True
217
160
  """
218
- return mime_type == "application/yaml" or mime_type == "application/x-yaml"
161
+ return mime_type in {"application/yaml", "application/x-yaml"}
219
162
 
220
163
 
221
164
  def is_pdf_mime_type(mime_type: str) -> bool:
@@ -227,15 +170,8 @@ def is_pdf_mime_type(mime_type: str) -> bool:
227
170
  Returns:
228
171
  True if MIME type is 'application/pdf', False otherwise.
229
172
 
230
- Note:
231
- PDF documents require special handling in the LLM module
232
- and are supported by certain vision-capable models.
233
-
234
- Example:
235
- >>> is_pdf_mime_type('application/pdf')
236
- True
237
- >>> is_pdf_mime_type('text/plain')
238
- False
173
+ PDF documents require special handling in the LLM module
174
+ and are supported by certain vision-capable models.
239
175
  """
240
176
  return mime_type == "application/pdf"
241
177
 
@@ -257,15 +193,8 @@ def is_image_mime_type(mime_type: str) -> bool:
257
193
  - image/webp
258
194
  - image/svg+xml
259
195
 
260
- Note:
261
- Image documents are automatically encoded for vision-capable
262
- LLM models in the AIMessages.document_to_prompt() method.
263
-
264
- Example:
265
- >>> is_image_mime_type('image/png')
266
- True
267
- >>> is_image_mime_type('application/pdf')
268
- False
196
+ Image documents are automatically encoded for vision-capable
197
+ LLM models in the AIMessages.document_to_prompt() method.
269
198
  """
270
199
  return mime_type.startswith("image/")
271
200
 
@@ -285,8 +214,6 @@ def is_llm_supported_image(mime_type: str) -> bool:
285
214
  Unsupported image formats (gif, bmp, tiff, svg, etc.) need conversion
286
215
  to PNG before sending to the LLM.
287
216
 
288
- @public
289
-
290
217
  Args:
291
218
  mime_type: MIME type string to check.
292
219
 
@@ -5,15 +5,14 @@ canonical key generation, and hash validation used throughout the document syste
5
5
  """
6
6
 
7
7
  import re
8
- from typing import Any, Iterable, Type
8
+ from collections.abc import Iterable
9
+ from typing import Any
9
10
  from urllib.parse import urlparse
10
11
 
11
12
 
12
13
  def sanitize_url(url: str) -> str:
13
14
  """Sanitize URL or query string for use in filenames.
14
15
 
15
- @public
16
-
17
16
  Removes or replaces characters that are invalid in filenames.
18
17
 
19
18
  Args:
@@ -63,15 +62,13 @@ def camel_to_snake(name: str) -> str:
63
62
 
64
63
 
65
64
  def canonical_name_key(
66
- obj_or_name: Type[Any] | str,
65
+ obj_or_name: type[Any] | str,
67
66
  *,
68
67
  max_parent_suffixes: int = 3,
69
68
  extra_suffixes: Iterable[str] = (),
70
69
  ) -> str:
71
70
  """Produce a canonical snake_case key from a class or name.
72
71
 
73
- @public
74
-
75
72
  Process:
76
73
  1) Starting with the class name (or given string),
77
74
  2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
@@ -120,8 +117,6 @@ def canonical_name_key(
120
117
  def is_document_sha256(value: str) -> bool:
121
118
  """Check if a string is a valid base32-encoded SHA256 hash with proper entropy.
122
119
 
123
- @public
124
-
125
120
  This function validates that a string is not just formatted like a SHA256 hash,
126
121
  but actually has the entropy characteristics of a real hash. It checks:
127
122
  1. Correct length (52 characters without padding)
@@ -174,7 +169,4 @@ def is_document_sha256(value: str) -> bool:
174
169
  # Require at least 8 unique characters (out of 32 possible in base32)
175
170
  # This prevents patterns like "AAAAAAA..." from being identified as real hashes
176
171
  unique_chars = len(set(value))
177
- if unique_chars < 8:
178
- return False
179
-
180
- return True
172
+ return unique_chars >= 8
@@ -1,97 +1,45 @@
1
1
  """Exception hierarchy for AI Pipeline Core.
2
2
 
3
- @public
4
-
5
3
  This module defines the exception hierarchy used throughout the AI Pipeline Core library.
6
4
  All exceptions inherit from PipelineCoreError, providing a consistent error handling interface.
7
5
  """
8
6
 
9
7
 
10
8
  class PipelineCoreError(Exception):
11
- """Base exception for all AI Pipeline Core errors.
12
-
13
- @public
14
- """
15
-
16
- pass
9
+ """Base exception for all AI Pipeline Core errors."""
17
10
 
18
11
 
19
12
  class DocumentError(PipelineCoreError):
20
- """Base exception for document-related errors.
21
-
22
- @public
23
- """
24
-
25
- pass
13
+ """Base exception for document-related errors."""
26
14
 
27
15
 
28
16
  class DocumentValidationError(DocumentError):
29
- """Raised when document validation fails.
30
-
31
- @public
32
- """
33
-
34
- pass
17
+ """Raised when document validation fails."""
35
18
 
36
19
 
37
20
  class DocumentSizeError(DocumentValidationError):
38
- """Raised when document content exceeds MAX_CONTENT_SIZE limit.
39
-
40
- @public
41
- """
42
-
43
- pass
21
+ """Raised when document content exceeds MAX_CONTENT_SIZE limit."""
44
22
 
45
23
 
46
24
  class DocumentNameError(DocumentValidationError):
47
- """Raised when document name contains invalid characters or patterns.
48
-
49
- @public
50
- """
51
-
52
- pass
25
+ """Raised when document name contains invalid characters or patterns."""
53
26
 
54
27
 
55
28
  class LLMError(PipelineCoreError):
56
- """Raised when LLM generation fails after all retries.
57
-
58
- @public
59
- """
60
-
61
- pass
29
+ """Raised when LLM generation fails after all retries."""
62
30
 
63
31
 
64
32
  class PromptError(PipelineCoreError):
65
- """Base exception for prompt template errors.
66
-
67
- @public
68
- """
69
-
70
- pass
33
+ """Base exception for prompt template errors."""
71
34
 
72
35
 
73
36
  class PromptRenderError(PromptError):
74
- """Raised when Jinja2 template rendering fails.
75
-
76
- @public
77
- """
78
-
79
- pass
37
+ """Raised when Jinja2 template rendering fails."""
80
38
 
81
39
 
82
40
  class PromptNotFoundError(PromptError):
83
- """Raised when prompt template file is not found in search paths.
84
-
85
- @public
86
- """
87
-
88
- pass
41
+ """Raised when prompt template file is not found in search paths."""
89
42
 
90
43
 
91
44
  class MimeTypeError(DocumentError):
92
- """Raised when MIME type detection or validation fails.
93
-
94
- @public
95
- """
96
-
97
- pass
45
+ """Raised when MIME type detection or validation fails."""
@@ -1,67 +1,53 @@
1
1
  """Image processing utilities for LLM vision models.
2
2
 
3
- @public
4
-
5
3
  Splits large images, compresses to JPEG, and respects model-specific constraints.
6
4
  Designed for website screenshots, document pages, and other visual content
7
5
  sent to vision-capable LLMs.
8
-
9
- Quick Start:
10
- >>> from ai_pipeline_core.images import process_image, ImagePreset
11
- >>>
12
- >>> result = process_image(screenshot_bytes)
13
- >>> for part in result:
14
- ... send_to_llm(part.data, context=part.label)
15
- >>>
16
- >>> result = process_image(screenshot_bytes, preset=ImagePreset.GEMINI)
17
6
  """
18
7
 
19
8
  from enum import StrEnum
20
9
 
21
10
  from pydantic import BaseModel, Field
22
11
 
23
- from ai_pipeline_core.documents import Document, TemporaryDocument
12
+ from ai_pipeline_core.documents import Document
24
13
 
25
14
  from ._processing import execute_split, load_and_normalize, plan_split
26
15
 
27
16
  __all__ = [
17
+ "ImageDocument",
18
+ "ImagePart",
28
19
  "ImagePreset",
29
20
  "ImageProcessingConfig",
30
- "ImagePart",
31
- "ProcessedImage",
32
21
  "ImageProcessingError",
22
+ "ProcessedImage",
33
23
  "process_image",
34
24
  "process_image_to_documents",
35
25
  ]
36
26
 
37
27
 
28
+ class ImageDocument(Document): # noqa: RUF067
29
+ """Concrete document for processed image parts."""
30
+
31
+
38
32
  # ---------------------------------------------------------------------------
39
33
  # Configuration
40
34
  # ---------------------------------------------------------------------------
41
35
 
42
36
 
43
- class ImagePreset(StrEnum):
44
- """Presets for LLM vision model constraints.
45
-
46
- @public
47
- """
37
+ class ImagePreset(StrEnum): # noqa: RUF067
38
+ """Presets for LLM vision model constraints."""
48
39
 
49
40
  GEMINI = "gemini"
50
41
  CLAUDE = "claude"
51
42
  GPT4V = "gpt4v"
52
43
 
53
44
 
54
- class ImageProcessingConfig(BaseModel):
45
+ class ImageProcessingConfig(BaseModel): # noqa: RUF067
55
46
  """Configuration for image processing.
56
47
 
57
- @public
58
-
59
48
  Use ``for_preset`` for standard configurations or construct directly for
60
49
  custom constraints.
61
50
 
62
- Example:
63
- >>> config = ImageProcessingConfig.for_preset(ImagePreset.GEMINI)
64
- >>> config = ImageProcessingConfig(max_dimension=2000, jpeg_quality=80)
65
51
  """
66
52
 
67
53
  model_config = {"frozen": True}
@@ -98,14 +84,11 @@ class ImageProcessingConfig(BaseModel):
98
84
 
99
85
  @classmethod
100
86
  def for_preset(cls, preset: ImagePreset) -> "ImageProcessingConfig":
101
- """Create configuration from a model preset.
102
-
103
- @public
104
- """
87
+ """Create configuration from a model preset."""
105
88
  return _PRESETS[preset]
106
89
 
107
90
 
108
- _PRESETS: dict[ImagePreset, ImageProcessingConfig] = {
91
+ _PRESETS: dict[ImagePreset, ImageProcessingConfig] = { # noqa: RUF067
109
92
  ImagePreset.GEMINI: ImageProcessingConfig(
110
93
  max_dimension=3000,
111
94
  max_pixels=9_000_000,
@@ -129,11 +112,8 @@ _PRESETS: dict[ImagePreset, ImageProcessingConfig] = {
129
112
  # ---------------------------------------------------------------------------
130
113
 
131
114
 
132
- class ImagePart(BaseModel):
133
- """A single processed image part.
134
-
135
- @public
136
- """
115
+ class ImagePart(BaseModel): # noqa: RUF067
116
+ """A single processed image part."""
137
117
 
138
118
  model_config = {"frozen": True}
139
119
 
@@ -147,20 +127,15 @@ class ImagePart(BaseModel):
147
127
 
148
128
  @property
149
129
  def label(self) -> str:
150
- """Human-readable label for LLM context, 1-indexed.
151
-
152
- @public
153
- """
130
+ """Human-readable label for LLM context, 1-indexed."""
154
131
  if self.total == 1:
155
132
  return "Full image"
156
133
  return f"Part {self.index + 1}/{self.total}"
157
134
 
158
135
 
159
- class ProcessedImage(BaseModel):
136
+ class ProcessedImage(BaseModel): # noqa: RUF067
160
137
  """Result of image processing.
161
138
 
162
- @public
163
-
164
139
  Iterable: ``for part in result`` iterates over parts.
165
140
  """
166
141
 
@@ -176,10 +151,7 @@ class ProcessedImage(BaseModel):
176
151
 
177
152
  @property
178
153
  def compression_ratio(self) -> float:
179
- """Output size / input size (lower means more compression).
180
-
181
- @public
182
- """
154
+ """Output size / input size (lower means more compression)."""
183
155
  if self.original_bytes <= 0:
184
156
  return 1.0
185
157
  return self.output_bytes / self.original_bytes
@@ -199,11 +171,8 @@ class ProcessedImage(BaseModel):
199
171
  # ---------------------------------------------------------------------------
200
172
 
201
173
 
202
- class ImageProcessingError(Exception):
203
- """Image processing failed.
204
-
205
- @public
206
- """
174
+ class ImageProcessingError(Exception): # noqa: RUF067
175
+ """Image processing failed."""
207
176
 
208
177
 
209
178
  # ---------------------------------------------------------------------------
@@ -211,15 +180,13 @@ class ImageProcessingError(Exception):
211
180
  # ---------------------------------------------------------------------------
212
181
 
213
182
 
214
- def process_image(
183
+ def process_image( # noqa: RUF067
215
184
  image: bytes | Document,
216
185
  preset: ImagePreset = ImagePreset.GEMINI,
217
186
  config: ImageProcessingConfig | None = None,
218
187
  ) -> ProcessedImage:
219
188
  """Process an image for LLM vision models.
220
189
 
221
- @public
222
-
223
190
  Splits tall images vertically with overlap, trims width if needed, and
224
191
  compresses to JPEG. The default preset is **GEMINI** (3 000 px, 9 M pixels).
225
192
 
@@ -234,10 +201,6 @@ def process_image(
234
201
  Raises:
235
202
  ImageProcessingError: If the image cannot be decoded or processed.
236
203
 
237
- Example:
238
- >>> result = process_image(screenshot_bytes)
239
- >>> for part in result:
240
- ... print(part.label, len(part.data))
241
204
  """
242
205
  effective = config if config is not None else ImageProcessingConfig.for_preset(preset)
243
206
 
@@ -248,7 +211,7 @@ def process_image(
248
211
  elif isinstance(image, bytes): # type: ignore[reportUnnecessaryIsInstance]
249
212
  raw = image
250
213
  else:
251
- raise ImageProcessingError(f"Unsupported image input type: {type(image)}")
214
+ raise ImageProcessingError(f"Unsupported image input type: {type(image)}") # pyright: ignore[reportUnreachable]
252
215
 
253
216
  if not raw:
254
217
  raise ImageProcessingError("Empty image data")
@@ -306,42 +269,26 @@ def process_image(
306
269
  )
307
270
 
308
271
 
309
- def process_image_to_documents(
272
+ def process_image_to_documents( # noqa: RUF067
310
273
  image: bytes | Document,
311
274
  preset: ImagePreset = ImagePreset.GEMINI,
312
275
  config: ImageProcessingConfig | None = None,
313
276
  name_prefix: str = "image",
314
- sources: list[str] | None = None,
315
- ) -> list[TemporaryDocument]:
316
- """Process an image and return parts as ``TemporaryDocument`` list.
317
-
318
- @public
277
+ sources: tuple[str, ...] | None = None,
278
+ ) -> list[ImageDocument]:
279
+ """Process an image and return parts as ImageDocument list.
319
280
 
320
281
  Convenience wrapper around ``process_image`` for direct integration
321
282
  with ``AIMessages``.
322
-
323
- Args:
324
- image: Raw image bytes or a Document.
325
- preset: Model preset (ignored when *config* is provided).
326
- config: Custom configuration.
327
- name_prefix: Prefix for generated document names.
328
- sources: Optional provenance references attached to each document.
329
-
330
- Returns:
331
- List of ``TemporaryDocument`` instances with JPEG image data.
332
-
333
- Example:
334
- >>> docs = process_image_to_documents(screenshot_bytes)
335
- >>> messages = AIMessages(docs)
336
283
  """
337
284
  result = process_image(image, preset=preset, config=config)
338
285
 
339
- # Resolve sources
340
- doc_sources: list[str] = list(sources or [])
286
+ source_list: list[str] = list(sources or ())
341
287
  if isinstance(image, Document):
342
- doc_sources.append(image.sha256)
288
+ source_list.append(image.sha256)
289
+ doc_sources = tuple(source_list) if source_list else None
343
290
 
344
- documents: list[TemporaryDocument] = []
291
+ documents: list[ImageDocument] = []
345
292
  for part in result.parts:
346
293
  if len(result.parts) == 1:
347
294
  name = f"{name_prefix}.jpg"
@@ -351,11 +298,11 @@ def process_image_to_documents(
351
298
  desc = part.label
352
299
 
353
300
  documents.append(
354
- TemporaryDocument.create(
301
+ ImageDocument.create(
355
302
  name=name,
356
303
  content=part.data,
357
304
  description=desc,
358
- sources=doc_sources or None,
305
+ sources=doc_sources,
359
306
  )
360
307
  )
361
308
 
@@ -21,7 +21,7 @@ class SplitPlan:
21
21
  warnings: list[str]
22
22
 
23
23
 
24
- def plan_split(
24
+ def plan_split( # noqa: PLR0917
25
25
  width: int,
26
26
  height: int,
27
27
  max_dimension: int,
@@ -71,10 +71,7 @@ def plan_split(
71
71
 
72
72
  # Auto-reduce if exceeds max_parts
73
73
  if num_parts > max_parts:
74
- warnings.append(
75
- f"Image requires {num_parts} parts but max is {max_parts}. "
76
- f"Reducing to {max_parts} parts with larger step."
77
- )
74
+ warnings.append(f"Image requires {num_parts} parts but max is {max_parts}. Reducing to {max_parts} parts with larger step.")
78
75
  num_parts = max_parts
79
76
  if num_parts > 1:
80
77
  step = (height - tile_h) // (num_parts - 1)
@@ -97,10 +94,7 @@ def load_and_normalize(data: bytes) -> Image.Image:
97
94
  img.load()
98
95
 
99
96
  if img.width * img.height > PIL_MAX_PIXELS:
100
- raise ValueError(
101
- f"Image too large: {img.width}x{img.height} = {img.width * img.height:,} pixels "
102
- f"(limit: {PIL_MAX_PIXELS:,})"
103
- )
97
+ raise ValueError(f"Image too large: {img.width}x{img.height} = {img.width * img.height:,} pixels (limit: {PIL_MAX_PIXELS:,})")
104
98
 
105
99
  # Fix EXIF orientation (important for mobile photos)
106
100
  img = ImageOps.exif_transpose(img)
@@ -110,7 +104,7 @@ def load_and_normalize(data: bytes) -> Image.Image:
110
104
  def encode_jpeg(img: Image.Image, quality: int) -> bytes:
111
105
  """Encode PIL Image as JPEG bytes."""
112
106
  # Convert to RGB if needed (JPEG doesn't support alpha)
113
- if img.mode not in ("RGB", "L"):
107
+ if img.mode not in {"RGB", "L"}:
114
108
  img = img.convert("RGB")
115
109
 
116
110
  buf = BytesIO()
@@ -135,7 +129,7 @@ def execute_split(
135
129
  width = plan.trim_width
136
130
 
137
131
  # Convert to RGB once for JPEG
138
- if img.mode not in ("RGB", "L"):
132
+ if img.mode not in {"RGB", "L"}:
139
133
  img = img.convert("RGB")
140
134
 
141
135
  parts: list[tuple[bytes, int, int, int, int]] = []
@@ -1,7 +1,8 @@
1
1
  """Large Language Model integration via LiteLLM proxy.
2
2
 
3
3
  This package provides OpenAI API-compatible LLM interactions with built-in retry logic,
4
- LMNR tracing, and structured output generation using Pydantic models.
4
+ LMNR tracing, and structured output generation using Pydantic models. Supports per-call
5
+ observability via purpose and expected_cost parameters for span naming and cost tracking.
5
6
  """
6
7
 
7
8
  from .ai_messages import AIMessages, AIMessageType
@@ -10,15 +11,16 @@ from .client import (
10
11
  generate_structured,
11
12
  )
12
13
  from .model_options import ModelOptions
13
- from .model_response import ModelResponse, StructuredModelResponse
14
+ from .model_response import Citation, ModelResponse, StructuredModelResponse
14
15
  from .model_types import ModelName
15
16
 
16
17
  __all__ = [
17
- "AIMessages",
18
18
  "AIMessageType",
19
+ "AIMessages",
20
+ "Citation",
19
21
  "ModelName",
20
- "ModelResponse",
21
22
  "ModelOptions",
23
+ "ModelResponse",
22
24
  "StructuredModelResponse",
23
25
  "generate",
24
26
  "generate_structured",