ai-pipeline-core 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/PKG-INFO +3 -3
  2. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/README.md +1 -1
  3. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/__init__.py +1 -1
  4. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/document.py +59 -5
  5. ai_pipeline_core-0.1.6/ai_pipeline_core/documents/mime_type.py +110 -0
  6. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/tracing.py +4 -0
  7. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/pyproject.toml +3 -3
  8. ai_pipeline_core-0.1.4/ai_pipeline_core/documents/mime_type.py +0 -78
  9. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/.gitignore +0 -0
  10. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/LICENSE +0 -0
  11. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/__init__.py +0 -0
  12. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/document_list.py +0 -0
  13. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/flow_document.py +0 -0
  14. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/task_document.py +0 -0
  15. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/utils.py +0 -0
  16. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/exceptions.py +0 -0
  17. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/flow/__init__.py +0 -0
  18. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/flow/config.py +0 -0
  19. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/__init__.py +0 -0
  20. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/ai_messages.py +0 -0
  21. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/client.py +0 -0
  22. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_options.py +0 -0
  23. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_response.py +0 -0
  24. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_types.py +0 -0
  25. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/__init__.py +0 -0
  26. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging.yml +0 -0
  27. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging_config.py +0 -0
  28. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  29. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/prompt_manager.py +0 -0
  30. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/py.typed +0 -0
  31. {ai_pipeline_core-0.1.4 → ai_pipeline_core-0.1.6}/ai_pipeline_core/settings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -20,7 +20,7 @@ Classifier: Typing :: Typed
20
20
  Requires-Python: >=3.12
21
21
  Requires-Dist: httpx>=0.28.1
22
22
  Requires-Dist: jinja2>=3.1.6
23
- Requires-Dist: lmnr>=0.7.4
23
+ Requires-Dist: lmnr>=0.7.5
24
24
  Requires-Dist: openai>=1.99.9
25
25
  Requires-Dist: prefect>=3.4.13
26
26
  Requires-Dist: pydantic-settings>=2.10.1
@@ -471,7 +471,7 @@ Built with:
471
471
 
472
472
  ## Stability Notice
473
473
 
474
- **Current Version**: 0.1.2
474
+ **Current Version**: 0.1.6
475
475
  **Status**: Internal Preview
476
476
  **API Stability**: Unstable - Breaking changes expected
477
477
  **Recommended Use**: Learning and reference only
@@ -429,7 +429,7 @@ Built with:
429
429
 
430
430
  ## Stability Notice
431
431
 
432
- **Current Version**: 0.1.2
432
+ **Current Version**: 0.1.6
433
433
  **Status**: Internal Preview
434
434
  **API Stability**: Unstable - Breaking changes expected
435
435
  **Recommended Use**: Learning and reference only
@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
16
16
  from .settings import settings
17
17
  from .tracing import trace
18
18
 
19
- __version__ = "0.1.2"
19
+ __version__ = "0.1.6"
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
6
6
  from base64 import b32encode
7
7
  from enum import StrEnum
8
8
  from functools import cached_property
9
- from typing import Any, ClassVar, Literal, Self
9
+ from typing import Any, ClassVar, Literal, Self, TypeVar
10
10
 
11
11
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
12
12
  from ruamel.yaml import YAML
@@ -19,13 +19,16 @@ from .mime_type import (
19
19
  is_image_mime_type,
20
20
  is_pdf_mime_type,
21
21
  is_text_mime_type,
22
+ is_yaml_mime_type,
22
23
  )
23
24
 
25
+ TModel = TypeVar("TModel", bound=BaseModel)
26
+
24
27
 
25
28
  class Document(BaseModel, ABC):
26
29
  """Abstract base class for all documents"""
27
30
 
28
- MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024 # 10MB default
31
+ MAX_CONTENT_SIZE: ClassVar[int] = 25 * 1024 * 1024 # 25MB default
29
32
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
30
33
  MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
31
34
 
@@ -105,7 +108,7 @@ class Document(BaseModel, ABC):
105
108
  except TypeError:
106
109
  raise DocumentNameError(f"{cls.__name__}.FILES must be an Enum of string values")
107
110
 
108
- if name not in allowed:
111
+ if len(allowed) > 0 and name not in allowed:
109
112
  allowed_str = ", ".join(sorted(allowed))
110
113
  raise DocumentNameError(f"Invalid filename '{name}'. Allowed names: {allowed_str}")
111
114
 
@@ -207,15 +210,40 @@ class Document(BaseModel, ABC):
207
210
  """Parse document as JSON"""
208
211
  return json.loads(self.as_text())
209
212
 
213
+ def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
214
+ """Parse document as a pydantic model and return the validated instance"""
215
+ data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
216
+ return model_type.model_validate(data)
217
+
210
218
  def as_markdown_list(self) -> list[str]:
211
219
  """Parse document as a markdown list"""
212
220
  return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
213
221
 
214
222
  @classmethod
215
- def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
223
+ def create(
224
+ cls,
225
+ name: str,
226
+ description: str | None,
227
+ content: bytes | str | BaseModel | list[str] | Any,
228
+ ) -> Self:
216
229
  """Create a document from a name, description, and content"""
217
- if isinstance(content, str):
230
+ is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
231
+ is_json_extension = name.endswith(".json")
232
+ is_markdown_extension = name.endswith(".md")
233
+ is_str_list = isinstance(content, list) and all(isinstance(item, str) for item in content)
234
+ if isinstance(content, bytes):
235
+ pass
236
+ elif isinstance(content, str):
218
237
  content = content.encode("utf-8")
238
+ elif is_str_list and is_markdown_extension:
239
+ return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
240
+ elif is_yaml_extension:
241
+ return cls.create_as_yaml(name, description, content)
242
+ elif is_json_extension:
243
+ return cls.create_as_json(name, description, content)
244
+ else:
245
+ raise ValueError(f"Unsupported content type: {type(content)} for {name}")
246
+
219
247
  return cls(name=name, description=description, content=content)
220
248
 
221
249
  @classmethod
@@ -230,6 +258,32 @@ class Document(BaseModel, ABC):
230
258
  content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
231
259
  return cls.create(name, description, content)
232
260
 
261
+ @classmethod
262
+ def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
263
+ """Create a document from a name, description, and JSON data"""
264
+ assert name.endswith(".json"), f"Document name must end with .json: {name}"
265
+ if isinstance(data, BaseModel):
266
+ data = data.model_dump(mode="json")
267
+ content = json.dumps(data, indent=2).encode("utf-8")
268
+ return cls.create(name, description, content)
269
+
270
+ @classmethod
271
+ def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
272
+ """Create a document from a name, description, and YAML data"""
273
+ assert name.endswith(".yaml") or name.endswith(".yml"), (
274
+ f"Document name must end with .yaml or .yml: {name}"
275
+ )
276
+ if isinstance(data, BaseModel):
277
+ data = data.model_dump()
278
+ yaml = YAML()
279
+ yaml.indent(mapping=2, sequence=4, offset=2)
280
+ from io import BytesIO
281
+
282
+ stream = BytesIO()
283
+ yaml.dump(data, stream)
284
+ content = stream.getvalue()
285
+ return cls.create(name, description, content)
286
+
233
287
  def serialize_model(self) -> dict[str, Any]:
234
288
  """Serialize document to a dictionary with proper encoding."""
235
289
  result = {
@@ -0,0 +1,110 @@
1
+ """MIME type detection utilities for documents"""
2
+
3
+ import magic
4
+
5
+ from ai_pipeline_core.logging import get_pipeline_logger
6
+
7
+ logger = get_pipeline_logger(__name__)
8
+
9
+ # Extension to MIME type mapping for common formats
10
+ # These are formats where extension-based detection is more reliable
11
+ EXTENSION_MIME_MAP = {
12
+ "md": "text/markdown",
13
+ "txt": "text/plain",
14
+ "pdf": "application/pdf",
15
+ "png": "image/png",
16
+ "jpg": "image/jpeg",
17
+ "jpeg": "image/jpeg",
18
+ "gif": "image/gif",
19
+ "bmp": "image/bmp",
20
+ "webp": "image/webp",
21
+ "json": "application/json",
22
+ "yaml": "application/yaml",
23
+ "yml": "application/yaml",
24
+ "xml": "text/xml",
25
+ "html": "text/html",
26
+ "htm": "text/html",
27
+ "py": "text/x-python",
28
+ "css": "text/css",
29
+ "js": "application/javascript",
30
+ "ts": "application/typescript",
31
+ "tsx": "application/typescript",
32
+ "jsx": "application/javascript",
33
+ }
34
+
35
+
36
+ def detect_mime_type(content: bytes, name: str) -> str:
37
+ """Detect MIME type from content and filename
38
+
39
+ Uses a hybrid approach:
40
+ 1. Check for empty content
41
+ 2. Try extension-based detection for known formats
42
+ 3. Fall back to magic content detection
43
+ 4. Final fallback to application/octet-stream
44
+ """
45
+
46
+ # Check for empty content
47
+ if len(content) == 0:
48
+ return "application/x-empty"
49
+
50
+ # Try extension-based detection first for known formats
51
+ # This is more reliable for text formats that magic might misidentify
52
+ ext = name.lower().split(".")[-1] if "." in name else ""
53
+ if ext in EXTENSION_MIME_MAP:
54
+ return EXTENSION_MIME_MAP[ext]
55
+
56
+ # Try content-based detection with magic
57
+ try:
58
+ mime = magic.from_buffer(content[:1024], mime=True)
59
+ # If magic returns a valid mime type, use it
60
+ if mime and mime != "application/octet-stream":
61
+ return mime
62
+ except (AttributeError, OSError, magic.MagicException) as e:
63
+ logger.warning(f"MIME detection failed for {name}: {e}")
64
+ except Exception as e:
65
+ logger.error(f"Unexpected error in MIME detection for {name}: {e}")
66
+
67
+ # Final fallback based on extension or default
68
+ return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
69
+
70
+
71
+ def mime_type_from_extension(name: str) -> str:
72
+ """Get MIME type based on file extension
73
+
74
+ Legacy function kept for compatibility
75
+ """
76
+ ext = name.lower().split(".")[-1] if "." in name else ""
77
+ return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
78
+
79
+
80
+ def is_text_mime_type(mime_type: str) -> bool:
81
+ """Check if MIME type represents text content"""
82
+ text_types = [
83
+ "text/",
84
+ "application/json",
85
+ "application/xml",
86
+ "application/javascript",
87
+ "application/yaml",
88
+ "application/x-yaml",
89
+ ]
90
+ return any(mime_type.startswith(t) for t in text_types)
91
+
92
+
93
+ def is_json_mime_type(mime_type: str) -> bool:
94
+ """Check if MIME type is JSON"""
95
+ return mime_type == "application/json"
96
+
97
+
98
+ def is_yaml_mime_type(mime_type: str) -> bool:
99
+ """Check if MIME type is YAML"""
100
+ return mime_type == "application/yaml" or mime_type == "application/x-yaml"
101
+
102
+
103
+ def is_pdf_mime_type(mime_type: str) -> bool:
104
+ """Check if MIME type is PDF"""
105
+ return mime_type == "application/pdf"
106
+
107
+
108
+ def is_image_mime_type(mime_type: str) -> bool:
109
+ """Check if MIME type is an image"""
110
+ return mime_type.startswith("image/")
@@ -103,6 +103,7 @@ def trace(
103
103
  ignore_inputs: list[str] | None = None,
104
104
  input_formatter: Callable[..., str] | None = None,
105
105
  output_formatter: Callable[..., str] | None = None,
106
+ preserve_global_context: bool = True,
106
107
  ) -> Callable[[Callable[P, R]], Callable[P, R]] | Callable[P, R]:
107
108
  """Decorator that wires Laminar tracing and observation into a function.
108
109
 
@@ -136,6 +137,7 @@ def trace(
136
137
  _ignore_inputs = ignore_inputs
137
138
  _input_formatter = input_formatter
138
139
  _output_formatter = output_formatter
140
+ _preserve_global_context = preserve_global_context
139
141
 
140
142
  # --- Check debug_only flag and environment variable ---
141
143
  if debug_only and os.getenv("LMNR_DEBUG", "").lower() != "true":
@@ -173,6 +175,8 @@ def trace(
173
175
  observe_params["input_formatter"] = _input_formatter
174
176
  if _output_formatter is not None:
175
177
  observe_params["output_formatter"] = _output_formatter
178
+ if _preserve_global_context:
179
+ observe_params["preserve_global_context"] = _preserve_global_context
176
180
 
177
181
  return observe_params
178
182
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.1.4"
3
+ version = "0.1.6"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -22,7 +22,7 @@ classifiers = [
22
22
  dependencies = [
23
23
  "httpx>=0.28.1",
24
24
  "Jinja2>=3.1.6",
25
- "lmnr>=0.7.4",
25
+ "lmnr>=0.7.5",
26
26
  "openai>=1.99.9",
27
27
  "prefect>=3.4.13",
28
28
  "pydantic-settings>=2.10.1",
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
140
140
  reportMissingParameterType = "warning"
141
141
 
142
142
  [tool.bumpversion]
143
- current_version = "0.1.4"
143
+ current_version = "0.1.6"
144
144
  commit = true
145
145
  tag = true
146
146
  tag_name = "v{new_version}"
@@ -1,78 +0,0 @@
1
- """MIME type detection utilities for documents"""
2
-
3
- import magic
4
-
5
- from ai_pipeline_core.logging import get_pipeline_logger
6
-
7
- logger = get_pipeline_logger(__name__)
8
-
9
-
10
- def detect_mime_type(content: bytes, name: str) -> str:
11
- """Detect MIME type from content using python-magic"""
12
-
13
- try:
14
- if name.endswith(".md") and content.decode("utf-8"):
15
- return "text/markdown"
16
- except UnicodeDecodeError:
17
- pass
18
-
19
- if len(content) <= 4:
20
- return "application/x-empty"
21
-
22
- try:
23
- mime = magic.from_buffer(content[:1024], mime=True)
24
- return mime
25
- except (AttributeError, OSError, magic.MagicException) as e:
26
- logger.warning(f"MIME detection failed for {name}: {e}, falling back to extension")
27
- return mime_type_from_extension(name)
28
- except Exception as e:
29
- logger.error(f"Unexpected error in MIME detection for {name}: {e}")
30
- return mime_type_from_extension(name)
31
-
32
-
33
- def mime_type_from_extension(name: str) -> str:
34
- """Get MIME type based on file extension"""
35
- ext = name.lower().split(".")[-1] if "." in name else ""
36
-
37
- mime_map = {
38
- "md": "text/markdown",
39
- "txt": "text/plain",
40
- "pdf": "application/pdf",
41
- "png": "image/png",
42
- "jpg": "image/jpeg",
43
- "jpeg": "image/jpeg",
44
- "gif": "image/gif",
45
- "bmp": "image/bmp",
46
- "webp": "image/webp",
47
- "json": "application/json",
48
- "yaml": "application/yaml",
49
- "yml": "application/yaml",
50
- "xml": "text/xml",
51
- "html": "text/html",
52
- "htm": "text/html",
53
- }
54
-
55
- return mime_map.get(ext, "application/octet-stream")
56
-
57
-
58
- def is_text_mime_type(mime_type: str) -> bool:
59
- """Check if MIME type represents text content"""
60
- text_types = [
61
- "text/",
62
- "application/json",
63
- "application/xml",
64
- "application/javascript",
65
- "application/yaml",
66
- "application/x-yaml",
67
- ]
68
- return any(mime_type.startswith(t) for t in text_types)
69
-
70
-
71
- def is_pdf_mime_type(mime_type: str) -> bool:
72
- """Check if MIME type is PDF"""
73
- return mime_type == "application/pdf"
74
-
75
-
76
- def is_image_mime_type(mime_type: str) -> bool:
77
- """Check if MIME type is an image"""
78
- return mime_type.startswith("image/")