ai-pipeline-core 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/PKG-INFO +1 -1
  2. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/__init__.py +1 -1
  3. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/ai_messages.py +5 -2
  4. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/client.py +13 -19
  5. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_options.py +3 -3
  6. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_response.py +1 -1
  7. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/model_types.py +1 -1
  8. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/utils/deploy.py +20 -35
  9. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/pyproject.toml +2 -2
  10. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/.gitignore +0 -0
  11. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/LICENSE +0 -0
  12. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/README.md +0 -0
  13. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/__init__.py +0 -0
  14. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/document.py +0 -0
  15. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/document_list.py +0 -0
  16. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/flow_document.py +0 -0
  17. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/mime_type.py +0 -0
  18. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/task_document.py +0 -0
  19. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/temporary_document.py +0 -0
  20. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/documents/utils.py +0 -0
  21. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/exceptions.py +0 -0
  22. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/flow/__init__.py +0 -0
  23. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/flow/config.py +0 -0
  24. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/flow/options.py +0 -0
  25. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/llm/__init__.py +0 -0
  26. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/logging/__init__.py +0 -0
  27. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/logging/logging.yml +0 -0
  28. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/logging/logging_config.py +0 -0
  29. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/logging/logging_mixin.py +0 -0
  30. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/pipeline.py +0 -0
  31. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/prefect.py +0 -0
  32. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/prompt_manager.py +0 -0
  33. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/py.typed +0 -0
  34. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/settings.py +0 -0
  35. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/simple_runner/__init__.py +0 -0
  36. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/simple_runner/cli.py +0 -0
  37. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/simple_runner/simple_runner.py +0 -0
  38. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/storage/__init__.py +0 -0
  39. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/storage/storage.py +0 -0
  40. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/tracing.py +0 -0
  41. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/utils/__init__.py +0 -0
  42. {ai_pipeline_core-0.2.6 → ai_pipeline_core-0.2.7}/ai_pipeline_core/utils/remote_deployment.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-pipeline-core
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Core utilities for AI-powered processing pipelines using prefect
5
5
  Project-URL: Homepage, https://github.com/bbarwik/ai-pipeline-core
6
6
  Project-URL: Repository, https://github.com/bbarwik/ai-pipeline-core
@@ -118,7 +118,7 @@ from .prompt_manager import PromptManager
118
118
  from .settings import Settings
119
119
  from .tracing import TraceInfo, TraceLevel, set_trace_cost, trace
120
120
 
121
- __version__ = "0.2.6"
121
+ __version__ = "0.2.7"
122
122
 
123
123
  __all__ = [
124
124
  # Config/Settings
@@ -260,11 +260,14 @@ class AIMessages(list[AIMessageType]):
260
260
 
261
261
  for message in self:
262
262
  if isinstance(message, str):
263
- messages.append({"role": "user", "content": message})
263
+ messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
264
264
  elif isinstance(message, Document):
265
265
  messages.append({"role": "user", "content": AIMessages.document_to_prompt(message)})
266
266
  elif isinstance(message, ModelResponse): # type: ignore
267
- messages.append({"role": "assistant", "content": message.content})
267
+ messages.append({
268
+ "role": "assistant",
269
+ "content": [{"type": "text", "text": message.content}],
270
+ })
268
271
  else:
269
272
  raise ValueError(f"Unsupported message type: {type(message)}")
270
273
 
@@ -39,7 +39,7 @@ def _process_messages(
39
39
  context: AIMessages,
40
40
  messages: AIMessages,
41
41
  system_prompt: str | None = None,
42
- cache_ttl: str | None = "5m",
42
+ cache_ttl: str | None = "300s",
43
43
  ) -> list[ChatCompletionMessageParam]:
44
44
  """Process and format messages for LLM API consumption.
45
45
 
@@ -51,7 +51,7 @@ def _process_messages(
51
51
  context: Messages to be cached (typically expensive/static content).
52
52
  messages: Regular messages without caching (dynamic queries).
53
53
  system_prompt: Optional system instructions for the model.
54
- cache_ttl: Cache TTL for context messages (e.g. "120s", "5m", "1h").
54
+ cache_ttl: Cache TTL for context messages (e.g. "120s", "300s", "1h").
55
55
  Set to None or empty string to disable caching.
56
56
 
57
57
  Returns:
@@ -86,12 +86,17 @@ def _process_messages(
86
86
  # Use AIMessages.to_prompt() for context
87
87
  context_messages = context.to_prompt()
88
88
 
89
- # Apply caching to last context message if cache_ttl is set
89
+ # Apply caching to last context message and last content part if cache_ttl is set
90
90
  if cache_ttl:
91
91
  context_messages[-1]["cache_control"] = { # type: ignore
92
92
  "type": "ephemeral",
93
93
  "ttl": cache_ttl,
94
94
  }
95
+ assert isinstance(context_messages[-1]["content"], list) # type: ignore
96
+ context_messages[-1]["content"][-1]["cache_control"] = { # type: ignore
97
+ "type": "ephemeral",
98
+ "ttl": cache_ttl,
99
+ }
95
100
 
96
101
  processed_messages.extend(context_messages)
97
102
 
@@ -237,6 +242,10 @@ async def _generate_with_retry(
237
242
  if not context and not messages:
238
243
  raise ValueError("Either context or messages must be provided")
239
244
 
245
+ if "gemini" in model.lower() and context.approximate_tokens_count < 5000:
246
+ # Bug fix for minimum explicit context size for Gemini models
247
+ options.cache_ttl = None
248
+
240
249
  processed_messages = _process_messages(
241
250
  context, messages, options.system_prompt, options.cache_ttl
242
251
  )
@@ -374,26 +383,11 @@ async def generate(
374
383
  ... ])
375
384
  >>> response = await llm.generate("gpt-5", messages=messages)
376
385
 
377
- Configuration via LiteLLM Proxy:
378
- >>> # Configure temperature in litellm_config.yaml:
379
- >>> # model_list:
380
- >>> # - model_name: gpt-5
381
- >>> # litellm_params:
382
- >>> # model: openai/gpt-4o
383
- >>> # temperature: 0.3
384
- >>> # max_tokens: 1000
385
- >>>
386
- >>> # Configure retry logic in proxy:
387
- >>> # general_settings:
388
- >>> # master_key: sk-1234
389
- >>> # max_retries: 5
390
- >>> # retry_delay: 15
391
-
392
386
  Performance:
393
387
  - Context caching saves ~50-90% tokens on repeated calls
394
388
  - First call: full token cost
395
389
  - Subsequent calls (within cache TTL): only messages tokens
396
- - Default cache TTL is 5m (production-optimized)
390
+ - Default cache TTL is 300s/5 minutes (production-optimized)
397
391
  - Default retry logic: 3 attempts with 10s delay (production-optimized)
398
392
 
399
393
  Caching:
@@ -45,7 +45,7 @@ class ModelOptions(BaseModel):
45
45
 
46
46
  timeout: Maximum seconds to wait for response (default: 300).
47
47
 
48
- cache_ttl: Cache TTL for context messages (default: "5m").
48
+ cache_ttl: Cache TTL for context messages (default: "300s").
49
49
  String format like "60s", "5m", or None to disable caching.
50
50
  Applied to the last context message for efficient token reuse.
51
51
 
@@ -165,7 +165,7 @@ class ModelOptions(BaseModel):
165
165
  - search_context_size only works with search models
166
166
  - reasoning_effort only works with models that support explicit reasoning
167
167
  - response_format is set internally by generate_structured()
168
- - cache_ttl accepts formats like "120s", "5m" (default), "1h" or None to disable caching
168
+ - cache_ttl accepts formats like "120s", "5m", "1h" or None (default: "300s")
169
169
  - stop sequences are limited to 4 by most providers
170
170
  - user identifier helps track costs per end-user (max 256 chars)
171
171
  - extra_body allows passing provider-specific parameters
@@ -179,7 +179,7 @@ class ModelOptions(BaseModel):
179
179
  retries: int = 3
180
180
  retry_delay_seconds: int = 20
181
181
  timeout: int = 600
182
- cache_ttl: str | None = "5m"
182
+ cache_ttl: str | None = "300s"
183
183
  service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
184
184
  max_completion_tokens: int | None = None
185
185
  stop: str | list[str] | None = None
@@ -266,7 +266,7 @@ class ModelResponse(ChatCompletion):
266
266
 
267
267
  other_fields = self.__dict__
268
268
  for key, value in other_fields.items():
269
- if key in ["_model_options", "_metadata", "choices", "usage"]:
269
+ if key in ["_model_options", "_metadata", "choices"]:
270
270
  continue
271
271
  try:
272
272
  metadata[f"response.raw.{key}"] = json.dumps(value, indent=2, default=str)
@@ -20,7 +20,7 @@ ModelName: TypeAlias = (
20
20
  "grok-4",
21
21
  # Small models
22
22
  "gemini-2.5-flash",
23
- "gpt-5-nano",
23
+ "gpt-5-mini",
24
24
  "grok-4-fast",
25
25
  # Search models
26
26
  "gemini-2.5-flash-search",
@@ -7,8 +7,8 @@ This script:
7
7
  3. Creates/updates a Prefect deployment using the RunnerDeployment pattern
8
8
 
9
9
  Requirements:
10
- - .env file with PREFECT_API_URL and optionally PREFECT_API_KEY
11
- - .env file with PREFECT_GCS_BUCKET
10
+ - Settings configured with PREFECT_API_URL and optionally PREFECT_API_KEY
11
+ - Settings configured with PREFECT_GCS_BUCKET
12
12
  - pyproject.toml with project name and version
13
13
  - Local package installed for flow metadata extraction
14
14
 
@@ -18,7 +18,6 @@ Usage:
18
18
 
19
19
  import argparse
20
20
  import asyncio
21
- import os
22
21
  import subprocess
23
22
  import sys
24
23
  import tomllib
@@ -34,14 +33,6 @@ from prefect.flows import load_flow_from_entrypoint
34
33
  from ai_pipeline_core.settings import settings
35
34
  from ai_pipeline_core.storage import Storage
36
35
 
37
- # ============================================================================
38
- # Configuration
39
- # ============================================================================
40
-
41
- WORK_POOL_NAME = settings.prefect_work_pool_name
42
- DEFAULT_WORK_QUEUE = settings.prefect_work_queue_name
43
- PREDEFINED_BUCKET = settings.prefect_gcs_bucket
44
-
45
36
  # ============================================================================
46
37
  # Deployer Class
47
38
  # ============================================================================
@@ -57,7 +48,7 @@ class Deployer:
57
48
  def __init__(self):
58
49
  """Initialize deployer."""
59
50
  self.config = self._load_config()
60
- self._setup_prefect_env()
51
+ self._validate_prefect_settings()
61
52
 
62
53
  def _load_config(self) -> dict[str, Any]:
63
54
  """Load and normalize project configuration from pyproject.toml.
@@ -65,10 +56,10 @@ class Deployer:
65
56
  Returns:
66
57
  Configuration dictionary with project metadata and deployment settings.
67
58
  """
68
- if not PREDEFINED_BUCKET:
59
+ if not settings.prefect_gcs_bucket:
69
60
  self._die(
70
- "PREFECT_GCS_BUCKET not found in .env file.\n"
71
- "Create a .env file with:\n"
61
+ "PREFECT_GCS_BUCKET not configured in settings.\n"
62
+ "Configure via environment variable or .env file:\n"
72
63
  " PREFECT_GCS_BUCKET=your-bucket-name"
73
64
  )
74
65
 
@@ -97,33 +88,23 @@ class Deployer:
97
88
  "name": name,
98
89
  "package": package_name,
99
90
  "version": version,
100
- "bucket": PREDEFINED_BUCKET,
91
+ "bucket": settings.prefect_gcs_bucket,
101
92
  "folder": f"flows/{flow_folder}",
102
93
  "tarball": f"{package_name}-{version}.tar.gz",
103
- "work_pool": WORK_POOL_NAME,
104
- "work_queue": DEFAULT_WORK_QUEUE,
94
+ "work_pool": settings.prefect_work_pool_name,
95
+ "work_queue": settings.prefect_work_queue_name,
105
96
  }
106
97
 
107
- def _setup_prefect_env(self):
108
- """Configure Prefect environment variables from .env file."""
109
- self.api_url = os.getenv("PREFECT_API_URL")
98
+ def _validate_prefect_settings(self):
99
+ """Validate that required Prefect settings are configured."""
100
+ self.api_url = settings.prefect_api_url
110
101
  if not self.api_url:
111
102
  self._die(
112
- "PREFECT_API_URL not found in .env file.\n"
113
- "Create a .env file with:\n"
103
+ "PREFECT_API_URL not configured in settings.\n"
104
+ "Configure via environment variable or .env file:\n"
114
105
  " PREFECT_API_URL=https://api.prefect.cloud/api/accounts/.../workspaces/..."
115
106
  )
116
107
 
117
- os.environ["PREFECT_API_URL"] = self.api_url
118
-
119
- # Optional: API key for authentication
120
- if api_key := os.getenv("PREFECT_API_KEY"):
121
- os.environ["PREFECT_API_KEY"] = api_key
122
-
123
- # Optional: Alternative auth method
124
- if api_auth := os.getenv("PREFECT_API_AUTH_STRING"):
125
- os.environ["PREFECT_API_AUTH_STRING"] = api_auth
126
-
127
108
  def _run(self, cmd: str, check: bool = True) -> Optional[str]:
128
109
  """Execute shell command and return output.
129
110
 
@@ -345,12 +326,16 @@ Example:
345
326
  python -m ai_pipeline_core.utils.deploy
346
327
 
347
328
  Prerequisites:
348
- - .env file with PREFECT_API_URL (and optionally PREFECT_API_KEY)
349
- - .env file with PREFECT_GCS_BUCKET
329
+ - Settings configured with PREFECT_API_URL (and optionally PREFECT_API_KEY)
330
+ - Settings configured with PREFECT_GCS_BUCKET
350
331
  - pyproject.toml with project name and version
351
332
  - Package installed locally: pip install -e .
352
333
  - GCP authentication configured (via service account or default credentials)
353
334
  - Work pool created in Prefect UI or CLI
335
+
336
+ Settings can be configured via:
337
+ - Environment variables (e.g., export PREFECT_API_URL=...)
338
+ - .env file in the current directory
354
339
  """,
355
340
  )
356
341
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ai-pipeline-core"
3
- version = "0.2.6"
3
+ version = "0.2.7"
4
4
  description = "Core utilities for AI-powered processing pipelines using prefect"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -178,7 +178,7 @@ reportIncompatibleVariableOverride = "error"
178
178
  reportMissingParameterType = "warning"
179
179
 
180
180
  [tool.bumpversion]
181
- current_version = "0.2.6"
181
+ current_version = "0.2.7"
182
182
  commit = true
183
183
  tag = true
184
184
  tag_name = "v{new_version}"