ai-pipeline-core 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/PKG-INFO +11 -9
  2. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/README.md +10 -8
  3. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/__init__.py +1 -1
  4. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/document.py +38 -10
  5. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/flow_document.py +7 -1
  6. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/task_document.py +7 -1
  7. ai_pipeline_core-0.1.3/ai_pipeline_core/documents/utils.py +85 -0
  8. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/client.py +1 -1
  9. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/pyproject.toml +2 -2
  10. ai_pipeline_core-0.1.1/ai_pipeline_core/documents/utils.py +0 -33
  11. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/.gitignore +0 -0
  12. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/LICENSE +0 -0
  13. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/__init__.py +0 -0
  14. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/document_list.py +0 -0
  15. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/documents/mime_type.py +0 -0
  16. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/exceptions.py +0 -0
  17. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/flow/__init__.py +0 -0
  18. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/flow/config.py +0 -0
  19. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/__init__.py +0 -0
  20. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/ai_messages.py +0 -0
  21. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/model_options.py +0 -0
  22. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/model_response.py +0 -0
  23. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/llm/model_types.py +0 -0
  24. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/logging/__init__.py +0 -0
  25. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/logging/logging.yml +0 -0
  26. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/logging/logging_config.py +0 -0
  27. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  28. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/prompt_manager.py +0 -0
  29. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/py.typed +0 -0
  30. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/settings.py +0 -0
  31. {ai_pipeline_core-0.1.1 → ai_pipeline_core-0.1.3}/ai_pipeline_core/tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -438,9 +438,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
438
438
  ## Documentation
439
439
 
440
440
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
441
- - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
442
- - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
443
- - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
441
+
442
+ ### dependencies_docs/ Directory
443
+ > [!NOTE]
444
+ > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
445
+
446
+ **AI Assistant Dependency Guides:**
447
+ - [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
448
+ - [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
449
+ - [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
444
450
 
445
451
  ## License
446
452
 
@@ -465,13 +471,9 @@ Built with:
465
471
 
466
472
  ## Stability Notice
467
473
 
468
- **Current Version**: 0.1.1
474
+ **Current Version**: 0.1.2
469
475
  **Status**: Internal Preview
470
476
  **API Stability**: Unstable - Breaking changes expected
471
477
  **Recommended Use**: Learning and reference only
472
478
 
473
479
  For production use, please fork this repository and maintain your own stable version.
474
-
475
- ---
476
-
477
- **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
@@ -396,9 +396,15 @@ For learning purposes, see [CLAUDE.md](CLAUDE.md) for our comprehensive coding s
396
396
  ## Documentation
397
397
 
398
398
  - [CLAUDE.md](CLAUDE.md) - Detailed coding standards and architecture guide
399
- - [Prefect Integration](docs/prefect.md) - Prefect patterns and best practices
400
- - [Deployment Guide](docs/prefect_deployment.md) - Production deployment
401
- - [Prefect Logging](docs/prefect_logging.md) - Logging configuration guide
399
+
400
+ ### dependencies_docs/ Directory
401
+ > [!NOTE]
402
+ > The `dependencies_docs/` directory contains guides for AI assistants (like Claude Code) on how to interact with the project's external dependencies and tooling, NOT user documentation for ai-pipeline-core itself. These files are excluded from repository listings to avoid confusion.
403
+
404
+ **AI Assistant Dependency Guides:**
405
+ - [Prefect Integration](dependencies_docs/prefect.md) - Prefect patterns and best practices for AI assistants
406
+ - [Deployment Guide](dependencies_docs/prefect_deployment.md) - Production deployment guide for AI assistants
407
+ - [Prefect Logging](dependencies_docs/prefect_logging.md) - Logging configuration guide for AI assistants
402
408
 
403
409
  ## License
404
410
 
@@ -423,13 +429,9 @@ Built with:
423
429
 
424
430
  ## Stability Notice
425
431
 
426
- **Current Version**: 0.1.1
432
+ **Current Version**: 0.1.2
427
433
  **Status**: Internal Preview
428
434
  **API Stability**: Unstable - Breaking changes expected
429
435
  **Recommended Use**: Learning and reference only
430
436
 
431
437
  For production use, please fork this repository and maintain your own stable version.
432
-
433
- ---
434
-
435
- **Remember**: The best code is no code. The second best is minimal, typed, async code that does exactly what's needed.
@@ -16,7 +16,7 @@ from .prompt_manager import PromptManager
16
16
  from .settings import settings
17
17
  from .tracing import trace
18
18
 
19
- __version__ = "0.1.1"
19
+ __version__ = "0.1.2"
20
20
 
21
21
  __all__ = [
22
22
  "Document",
@@ -1,6 +1,7 @@
1
1
  import base64
2
2
  import hashlib
3
3
  import json
4
+ import re
4
5
  from abc import ABC, abstractmethod
5
6
  from base64 import b32encode
6
7
  from enum import StrEnum
@@ -10,6 +11,7 @@ from typing import Any, ClassVar, Literal, Self
10
11
  from pydantic import BaseModel, ConfigDict, field_serializer, field_validator
11
12
  from ruamel.yaml import YAML
12
13
 
14
+ from ai_pipeline_core.documents.utils import canonical_name_key
13
15
  from ai_pipeline_core.exceptions import DocumentNameError, DocumentSizeError
14
16
 
15
17
  from .mime_type import (
@@ -25,6 +27,13 @@ class Document(BaseModel, ABC):
25
27
 
26
28
  MAX_CONTENT_SIZE: ClassVar[int] = 10 * 1024 * 1024 # 10MB default
27
29
  DESCRIPTION_EXTENSION: ClassVar[str] = ".description.md"
30
+ MARKDOWN_LIST_SEPARATOR: ClassVar[str] = "\n\n---\n\n"
31
+
32
+ def __init__(self, **data: Any) -> None:
33
+ """Prevent direct instantiation of abstract Document class."""
34
+ if type(self) is Document:
35
+ raise TypeError("Cannot instantiate abstract Document class directly")
36
+ super().__init__(**data)
28
37
 
29
38
  # Optional enum of allowed file names. Subclasses may set this.
30
39
  # This is used to validate the document name.
@@ -179,10 +188,10 @@ class Document(BaseModel, ABC):
179
188
  """Check if document is an image"""
180
189
  return is_image_mime_type(self.mime_type)
181
190
 
182
- @property
183
- def should_be_cached(self) -> bool:
184
- """Check if document should be cached"""
185
- return False
191
+ @classmethod
192
+ def canonical_name(cls) -> str:
193
+ """Get the canonical name of the document"""
194
+ return canonical_name_key(cls)
186
195
 
187
196
  def as_text(self) -> str:
188
197
  """Parse document as text"""
@@ -192,15 +201,34 @@ class Document(BaseModel, ABC):
192
201
 
193
202
  def as_yaml(self) -> Any:
194
203
  """Parse document as YAML"""
195
- if not self.is_text:
196
- raise ValueError(f"Document is not text: {self.name}")
197
- return YAML().load(self.content.decode("utf-8")) # type: ignore
204
+ return YAML().load(self.as_text())
198
205
 
199
206
  def as_json(self) -> Any:
200
207
  """Parse document as JSON"""
201
- if not self.is_text:
202
- raise ValueError(f"Document is not text: {self.name}")
203
- return json.loads(self.content.decode("utf-8"))
208
+ return json.loads(self.as_text())
209
+
210
+ def as_markdown_list(self) -> list[str]:
211
+ """Parse document as a markdown list"""
212
+ return self.as_text().split(self.MARKDOWN_LIST_SEPARATOR)
213
+
214
+ @classmethod
215
+ def create(cls, name: str, description: str | None, content: bytes | str) -> Self:
216
+ """Create a document from a name, description, and content"""
217
+ if isinstance(content, str):
218
+ content = content.encode("utf-8")
219
+ return cls(name=name, description=description, content=content)
220
+
221
+ @classmethod
222
+ def create_as_markdown_list(cls, name: str, description: str | None, items: list[str]) -> Self:
223
+ """Create a document from a name, description, and list of strings"""
224
+ # remove other list separators (lines that are only the separator + whitespace)
225
+ separator = Document.MARKDOWN_LIST_SEPARATOR.strip()
226
+ pattern = re.compile(rf"^[ \t]*{re.escape(separator)}[ \t]*(?:\r?\n|$)", flags=re.MULTILINE)
227
+ # Normalize CRLF/CR to LF before cleaning to ensure consistent behavior
228
+ normalized_items = [re.sub(r"\r\n?", "\n", item) for item in items]
229
+ cleaned_items = [pattern.sub("", item) for item in normalized_items]
230
+ content = Document.MARKDOWN_LIST_SEPARATOR.join(cleaned_items)
231
+ return cls.create(name, description, content)
204
232
 
205
233
  def serialize_model(self) -> dict[str, Any]:
206
234
  """Serialize document to a dictionary with proper encoding."""
@@ -1,6 +1,6 @@
1
1
  """Flow-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -15,6 +15,12 @@ class FlowDocument(Document):
15
15
  Compared to TaskDocument, FlowDocument are persistent across Prefect flow runs.
16
16
  """
17
17
 
18
+ def __init__(self, **data: Any) -> None:
19
+ """Prevent direct instantiation of abstract FlowDocument class."""
20
+ if type(self) is FlowDocument:
21
+ raise TypeError("Cannot instantiate abstract FlowDocument class directly")
22
+ super().__init__(**data)
23
+
18
24
  @final
19
25
  def get_base_type(self) -> Literal["flow"]:
20
26
  """Get the document type."""
@@ -1,6 +1,6 @@
1
1
  """Task-specific document base class."""
2
2
 
3
- from typing import Literal, final
3
+ from typing import Any, Literal, final
4
4
 
5
5
  from .document import Document
6
6
 
@@ -16,6 +16,12 @@ class TaskDocument(Document):
16
16
  They are used for intermediate results that are not needed after the task completes.
17
17
  """
18
18
 
19
+ def __init__(self, **data: Any) -> None:
20
+ """Prevent direct instantiation of abstract TaskDocument class."""
21
+ if type(self) is TaskDocument:
22
+ raise TypeError("Cannot instantiate abstract TaskDocument class directly")
23
+ super().__init__(**data)
24
+
19
25
  @final
20
26
  def get_base_type(self) -> Literal["task"]:
21
27
  """Get the document type."""
@@ -0,0 +1,85 @@
1
+ import re
2
+ from typing import Any, Iterable, Type
3
+ from urllib.parse import urlparse
4
+
5
+
6
+ def sanitize_url(url: str) -> str:
7
+ """
8
+ Sanitize URL or query string for use in filenames.
9
+ Removes or replaces characters that are invalid in filenames.
10
+ """
11
+ # Remove protocol if it's a URL
12
+ if url.startswith(("http://", "https://")):
13
+ parsed = urlparse(url)
14
+ # Use domain + path
15
+ url = parsed.netloc + parsed.path
16
+
17
+ # Replace invalid filename characters
18
+ sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
19
+
20
+ # Replace multiple underscores with single one
21
+ sanitized = re.sub(r"_+", "_", sanitized)
22
+
23
+ # Remove leading/trailing underscores and dots
24
+ sanitized = sanitized.strip("_.")
25
+
26
+ # Limit length to prevent too long filenames
27
+ if len(sanitized) > 100:
28
+ sanitized = sanitized[:100]
29
+
30
+ # Ensure we have something
31
+ if not sanitized:
32
+ sanitized = "unnamed"
33
+
34
+ return sanitized
35
+
36
+
37
+ def camel_to_snake(name: str) -> str:
38
+ """Convert CamelCase (incl. acronyms) to snake_case."""
39
+ s1 = re.sub(r"(.)([A-Z][a-z0-9]+)", r"\1_\2", name)
40
+ s2 = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1)
41
+ return s2.replace("__", "_").strip("_").lower()
42
+
43
+
44
+ def canonical_name_key(
45
+ obj_or_name: Type[Any] | str,
46
+ *,
47
+ max_parent_suffixes: int = 3,
48
+ extra_suffixes: Iterable[str] = (),
49
+ ) -> str:
50
+ """
51
+ Produce a canonical snake_case key from a class or name by:
52
+ 1) Starting with the class name (or given string),
53
+ 2) Stripping any trailing parent class names (up to `max_parent_suffixes` from the MRO),
54
+ 3) Stripping any `extra_suffixes`,
55
+ 4) Converting to snake_case.
56
+
57
+ Examples (given typical MROs):
58
+ FinalReportDocument(WorkflowDocument -> Document) -> 'final_report'
59
+ FooWorkflowDocument(WorkflowDocument -> Document) -> 'foo'
60
+ BarFlow(Config -> Base -> Flow) -> 'bar'
61
+ """
62
+ name = obj_or_name.__name__ if isinstance(obj_or_name, type) else str(obj_or_name)
63
+
64
+ # From MRO, collect up to N parent names to consider as removable suffixes
65
+ suffixes: list[str] = []
66
+ if isinstance(obj_or_name, type):
67
+ for base in obj_or_name.mro()[1 : 1 + max_parent_suffixes]:
68
+ if base is object:
69
+ continue
70
+ suffixes.append(base.__name__)
71
+
72
+ # Add any custom suffixes the caller wants to strip (e.g., 'Config')
73
+ suffixes.extend(extra_suffixes)
74
+
75
+ # Iteratively trim the longest matching suffix first
76
+ trimmed = True
77
+ while trimmed and suffixes:
78
+ trimmed = False
79
+ for sfx in sorted(set(suffixes), key=len, reverse=True):
80
+ if sfx and name.endswith(sfx):
81
+ name = name[: -len(sfx)]
82
+ trimmed = True
83
+ break
84
+
85
+ return camel_to_snake(name)
@@ -162,7 +162,7 @@ async def generate(
162
162
  T = TypeVar("T", bound=BaseModel)
163
163
 
164
164
 
165
- @trace
165
+ @trace(ignore_inputs=["context"])
166
166
  async def generate_structured(
167
167
  model: ModelName,
168
168
  response_format: type[T],
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -140,7 +140,7 @@ reportIncompatibleVariableOverride = "error"
140
140
  reportMissingParameterType = "warning"
141
141
 
142
142
  [tool.bumpversion]
143
- current_version = "0.1.1"
143
+ current_version = "0.1.3"
144
144
  commit = true
145
145
  tag = true
146
146
  tag_name = "v{new_version}"
@@ -1,33 +0,0 @@
1
- import re
2
- from urllib.parse import urlparse
3
-
4
-
5
- def sanitize_url(url: str) -> str:
6
- """
7
- Sanitize URL or query string for use in filenames.
8
- Removes or replaces characters that are invalid in filenames.
9
- """
10
- # Remove protocol if it's a URL
11
- if url.startswith(("http://", "https://")):
12
- parsed = urlparse(url)
13
- # Use domain + path
14
- url = parsed.netloc + parsed.path
15
-
16
- # Replace invalid filename characters
17
- sanitized = re.sub(r'[<>:"/\\|?*]', "_", url)
18
-
19
- # Replace multiple underscores with single one
20
- sanitized = re.sub(r"_+", "_", sanitized)
21
-
22
- # Remove leading/trailing underscores and dots
23
- sanitized = sanitized.strip("_.")
24
-
25
- # Limit length to prevent too long filenames
26
- if len(sanitized) > 100:
27
- sanitized = sanitized[:100]
28
-
29
- # Ensure we have something
30
- if not sanitized:
31
- sanitized = "unnamed"
32
-
33
- return sanitized