ai-pipeline-core 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/PKG-INFO +2 -2
  2. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/README.md +1 -1
  3. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/__init__.py +1 -1
  4. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/document.py +57 -3
  5. ai_pipeline_core-0.1.6/ai_pipeline_core/documents/mime_type.py +110 -0
  6. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/pyproject.toml +2 -2
  7. ai_pipeline_core-0.1.5/ai_pipeline_core/documents/mime_type.py +0 -78
  8. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/.gitignore +0 -0
  9. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/LICENSE +0 -0
  10. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/__init__.py +0 -0
  11. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/document_list.py +0 -0
  12. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/flow_document.py +0 -0
  13. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/task_document.py +0 -0
  14. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/documents/utils.py +0 -0
  15. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/exceptions.py +0 -0
  16. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/flow/__init__.py +0 -0
  17. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/flow/config.py +0 -0
  18. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/__init__.py +0 -0
  19. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/ai_messages.py +0 -0
  20. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/client.py +0 -0
  21. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_options.py +0 -0
  22. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_response.py +0 -0
  23. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/llm/model_types.py +0 -0
  24. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/__init__.py +0 -0
  25. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging.yml +0 -0
  26. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging_config.py +0 -0
  27. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  28. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/prompt_manager.py +0 -0
  29. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/py.typed +0 -0
  30. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/settings.py +0 -0
  31. {ai_pipeline_core-0.1.5 → ai_pipeline_core-0.1.6}/ai_pipeline_core/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -471,7 +471,7 @@ Built with:
471
471
 
472
472
  ## Stability Notice
473
473
 
474
- **Current Version**: 0.1.5
474
+ **Current Version**: 0.1.6
475
475
  **Status**: Internal Preview
476
476
  **API Stability**: Unstable - Breaking changes expected
477
477
  **Recommended Use**: Learning and reference only
@@ -429,7 +429,7 @@ Built with:
429
429
 
430
430
  ## Stability Notice
431
431
 
432
- **Current Version**: 0.1.5
432
+ **Current Version**: 0.1.6
433
433
  **Status**: Internal Preview
434
434
  **API Stability**: Unstable - Breaking changes expected
435
435
  **Recommended Use**: Learning and reference only
@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
16
16
  from .settings import settings
17
17
  from .tracing import trace
18
18
 
19
- __version__ = "0.1.4"
19
+ __version__ = "0.1.6"
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
6
6
  from base64 import b32encode
7
7
  from enum import StrEnum
8
8
  from functools import cached_property
9
- from typing import Any, ClassVar, Literal, Self
9
+ from typing import Any, ClassVar, Literal, Self, TypeVar
10
10
 
11
11
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
12
12
  from ruamel.yaml import YAML
@@ -19,8 +19,11 @@ from .mime_type import (
19
19
  is_image_mime_type,
20
20
  is_pdf_mime_type,
21
21
  is_text_mime_type,
22
+ is_yaml_mime_type,
22
23
  )
23
24
 
25
+ TModel = TypeVar("TModel", bound=BaseModel)
26
+
24
27
 
25
28
  class Document(BaseModel, ABC):
26
29
  """Abstract base class for all documents"""
@@ -207,15 +210,40 @@ class Document(BaseModel, ABC):
207
210
  """Parse document as JSON"""
208
211
  return json.loads(self.as_text())
209
212
 
213
+ def as_pydantic_model(self, model_type: type[TModel]) -> TModel:
214
+ """Parse document as a pydantic model and return the validated instance"""
215
+ data = self.as_yaml() if is_yaml_mime_type(self.mime_type) else self.as_json()
216
+ return model_type.model_validate(data)
217
+
210
218
  def as_markdown_list(self) -> list[str]:
211
219
  """Parse document as a markdown list"""
212
220
  return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
213
221
 
214
222
  @classmethod
215
- def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
223
+ def create(
224
+ cls,
225
+ name: str,
226
+ description: str | None,
227
+ content: bytes | str | BaseModel | list[str] | Any,
228
+ ) -> Self:
216
229
  """Create a document from a name, description, and content"""
217
- if isinstance(content, str):
230
+ is_yaml_extension = name.endswith(".yaml") or name.endswith(".yml")
231
+ is_json_extension = name.endswith(".json")
232
+ is_markdown_extension = name.endswith(".md")
233
+ is_str_list = isinstance(content, list) and all(isinstance(item, str) for item in content)
234
+ if isinstance(content, bytes):
235
+ pass
236
+ elif isinstance(content, str):
218
237
  content = content.encode("utf-8")
238
+ elif is_str_list and is_markdown_extension:
239
+ return cls.create_as_markdown_list(name, description, content) # type: ignore[arg-type]
240
+ elif is_yaml_extension:
241
+ return cls.create_as_yaml(name, description, content)
242
+ elif is_json_extension:
243
+ return cls.create_as_json(name, description, content)
244
+ else:
245
+ raise ValueError(f"Unsupported content type: {type(content)} for {name}")
246
+
219
247
  return cls(name=name, description=description, content=content)
220
248
 
221
249
  @classmethod
@@ -230,6 +258,32 @@ class Document(BaseModel, ABC):
230
258
  content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
231
259
  return cls.create(name, description, content)
232
260
 
261
+ @classmethod
262
+ def create_as_json(cls, name: str, description: str | None, data: Any) -> Self:
263
+ """Create a document from a name, description, and JSON data"""
264
+ assert name.endswith(".json"), f"Document name must end with .json: {name}"
265
+ if isinstance(data, BaseModel):
266
+ data = data.model_dump(mode="json")
267
+ content = json.dumps(data, indent=2).encode("utf-8")
268
+ return cls.create(name, description, content)
269
+
270
+ @classmethod
271
+ def create_as_yaml(cls, name: str, description: str | None, data: Any) -> Self:
272
+ """Create a document from a name, description, and YAML data"""
273
+ assert name.endswith(".yaml") or name.endswith(".yml"), (
274
+ f"Document name must end with .yaml or .yml: {name}"
275
+ )
276
+ if isinstance(data, BaseModel):
277
+ data = data.model_dump()
278
+ yaml = YAML()
279
+ yaml.indent(mapping=2, sequence=4, offset=2)
280
+ from io import BytesIO
281
+
282
+ stream = BytesIO()
283
+ yaml.dump(data, stream)
284
+ content = stream.getvalue()
285
+ return cls.create(name, description, content)
286
+
233
287
  def serialize_model(self) -> dict[str, Any]:
234
288
  """Serialize document to a dictionary with proper encoding."""
235
289
  result = {
@@ -0,0 +1,110 @@
1
+ """MIME type detection utilities for documents"""
2
+
3
+ import magic
4
+
5
+ from ai_pipeline_core.logging import get_pipeline_logger
6
+
7
+ logger = get_pipeline_logger(__name__)
8
+
9
+ # Extension to MIME type mapping for common formats
10
+ # These are formats where extension-based detection is more reliable
11
+ EXTENSION_MIME_MAP = {
12
+ "md": "text/markdown",
13
+ "txt": "text/plain",
14
+ "pdf": "application/pdf",
15
+ "png": "image/png",
16
+ "jpg": "image/jpeg",
17
+ "jpeg": "image/jpeg",
18
+ "gif": "image/gif",
19
+ "bmp": "image/bmp",
20
+ "webp": "image/webp",
21
+ "json": "application/json",
22
+ "yaml": "application/yaml",
23
+ "yml": "application/yaml",
24
+ "xml": "text/xml",
25
+ "html": "text/html",
26
+ "htm": "text/html",
27
+ "py": "text/x-python",
28
+ "css": "text/css",
29
+ "js": "application/javascript",
30
+ "ts": "application/typescript",
31
+ "tsx": "application/typescript",
32
+ "jsx": "application/javascript",
33
+ }
34
+
35
+
36
+ def detect_mime_type(content: bytes, name: str) -> str:
37
+ """Detect MIME type from content and filename
38
+
39
+ Uses a hybrid approach:
40
+ 1. Check for empty content
41
+ 2. Try extension-based detection for known formats
42
+ 3. Fall back to magic content detection
43
+ 4. Final fallback to application/octet-stream
44
+ """
45
+
46
+ # Check for empty content
47
+ if len(content) == 0:
48
+ return "application/x-empty"
49
+
50
+ # Try extension-based detection first for known formats
51
+ # This is more reliable for text formats that magic might misidentify
52
+ ext = name.lower().split(".")[-1] if "." in name else ""
53
+ if ext in EXTENSION_MIME_MAP:
54
+ return EXTENSION_MIME_MAP[ext]
55
+
56
+ # Try content-based detection with magic
57
+ try:
58
+ mime = magic.from_buffer(content[:1024], mime=True)
59
+ # If magic returns a valid mime type, use it
60
+ if mime and mime != "application/octet-stream":
61
+ return mime
62
+ except (AttributeError, OSError, magic.MagicException) as e:
63
+ logger.warning(f"MIME detection failed for {name}: {e}")
64
+ except Exception as e:
65
+ logger.error(f"Unexpected error in MIME detection for {name}: {e}")
66
+
67
+ # Final fallback based on extension or default
68
+ return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
69
+
70
+
71
+ def mime_type_from_extension(name: str) -> str:
72
+ """Get MIME type based on file extension
73
+
74
+ Legacy function kept for compatibility
75
+ """
76
+ ext = name.lower().split(".")[-1] if "." in name else ""
77
+ return EXTENSION_MIME_MAP.get(ext, "application/octet-stream")
78
+
79
+
80
+ def is_text_mime_type(mime_type: str) -> bool:
81
+ """Check if MIME type represents text content"""
82
+ text_types = [
83
+ "text/",
84
+ "application/json",
85
+ "application/xml",
86
+ "application/javascript",
87
+ "application/yaml",
88
+ "application/x-yaml",
89
+ ]
90
+ return any(mime_type.startswith(t) for t in text_types)
91
+
92
+
93
+ def is_json_mime_type(mime_type: str) -> bool:
94
+ """Check if MIME type is JSON"""
95
+ return mime_type == "application/json"
96
+
97
+
98
+ def is_yaml_mime_type(mime_type: str) -> bool:
99
+ """Check if MIME type is YAML"""
100
+ return mime_type == "application/yaml" or mime_type == "application/x-yaml"
101
+
102
+
103
+ def is_pdf_mime_type(mime_type: str) -> bool:
104
+ """Check if MIME type is PDF"""
105
+ return mime_type == "application/pdf"
106
+
107
+
108
+ def is_image_mime_type(mime_type: str) -> bool:
109
+ """Check if MIME type is an image"""
110
+ return mime_type.startswith("image/")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.1.5"
3
+ version = "0.1.6"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
140
140
  reportMissingParameterType = "warning"
141
141
 
142
142
  [tool.bumpversion]
143
- current_version = "0.1.5"
143
+ current_version = "0.1.6"
144
144
  commit = true
145
145
  tag = true
146
146
  tag_name = "v{new_version}"
@@ -1,78 +0,0 @@
1
- """MIME type detection utilities for documents"""
2
-
3
- import magic
4
-
5
- from ai_pipeline_core.logging import get_pipeline_logger
6
-
7
- logger = get_pipeline_logger(__name__)
8
-
9
-
10
- def detect_mime_type(content: bytes, name: str) -> str:
11
- """Detect MIME type from content using python-magic"""
12
-
13
- try:
14
- if name.endswith(".md") and content.decode("utf-8"):
15
- return "text/markdown"
16
- except UnicodeDecodeError:
17
- pass
18
-
19
- if len(content) <= 4:
20
- return "application/x-empty"
21
-
22
- try:
23
- mime = magic.from_buffer(content[:1024], mime=True)
24
- return mime
25
- except (AttributeError, OSError, magic.MagicException) as e:
26
- logger.warning(f"MIME detection failed for {name}: {e}, falling back to extension")
27
- return mime_type_from_extension(name)
28
- except Exception as e:
29
- logger.error(f"Unexpected error in MIME detection for {name}: {e}")
30
- return mime_type_from_extension(name)
31
-
32
-
33
- def mime_type_from_extension(name: str) -> str:
34
- """Get MIME type based on file extension"""
35
- ext = name.lower().split(".")[-1] if "." in name else ""
36
-
37
- mime_map = {
38
- "md": "text/markdown",
39
- "txt": "text/plain",
40
- "pdf": "application/pdf",
41
- "png": "image/png",
42
- "jpg": "image/jpeg",
43
- "jpeg": "image/jpeg",
44
- "gif": "image/gif",
45
- "bmp": "image/bmp",
46
- "webp": "image/webp",
47
- "json": "application/json",
48
- "yaml": "application/yaml",
49
- "yml": "application/yaml",
50
- "xml": "text/xml",
51
- "html": "text/html",
52
- "htm": "text/html",
53
- }
54
-
55
- return mime_map.get(ext, "application/octet-stream")
56
-
57
-
58
- def is_text_mime_type(mime_type: str) -> bool:
59
- """Check if MIME type represents text content"""
60
- text_types = [
61
- "text/",
62
- "application/json",
63
- "application/xml",
64
- "application/javascript",
65
- "application/yaml",
66
- "application/x-yaml",
67
- ]
68
- return any(mime_type.startswith(t) for t in text_types)
69
-
70
-
71
- def is_pdf_mime_type(mime_type: str) -> bool:
72
- """Check if MIME type is PDF"""
73
- return mime_type == "application/pdf"
74
-
75
-
76
- def is_image_mime_type(mime_type: str) -> bool:
77
- """Check if MIME type is an image"""
78
- return mime_type.startswith("image/")