ai-pipeline-core 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ai_pipeline_core/__init__.py +64 -158
  2. ai_pipeline_core/deployment/__init__.py +6 -18
  3. ai_pipeline_core/deployment/base.py +392 -212
  4. ai_pipeline_core/deployment/contract.py +6 -10
  5. ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
  6. ai_pipeline_core/deployment/helpers.py +16 -17
  7. ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
  8. ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
  9. ai_pipeline_core/docs_generator/__init__.py +54 -0
  10. ai_pipeline_core/docs_generator/__main__.py +5 -0
  11. ai_pipeline_core/docs_generator/cli.py +196 -0
  12. ai_pipeline_core/docs_generator/extractor.py +324 -0
  13. ai_pipeline_core/docs_generator/guide_builder.py +644 -0
  14. ai_pipeline_core/docs_generator/trimmer.py +35 -0
  15. ai_pipeline_core/docs_generator/validator.py +114 -0
  16. ai_pipeline_core/document_store/__init__.py +13 -0
  17. ai_pipeline_core/document_store/_summary.py +9 -0
  18. ai_pipeline_core/document_store/_summary_worker.py +170 -0
  19. ai_pipeline_core/document_store/clickhouse.py +492 -0
  20. ai_pipeline_core/document_store/factory.py +38 -0
  21. ai_pipeline_core/document_store/local.py +312 -0
  22. ai_pipeline_core/document_store/memory.py +85 -0
  23. ai_pipeline_core/document_store/protocol.py +68 -0
  24. ai_pipeline_core/documents/__init__.py +12 -14
  25. ai_pipeline_core/documents/_context_vars.py +85 -0
  26. ai_pipeline_core/documents/_hashing.py +52 -0
  27. ai_pipeline_core/documents/attachment.py +85 -0
  28. ai_pipeline_core/documents/context.py +128 -0
  29. ai_pipeline_core/documents/document.py +318 -1434
  30. ai_pipeline_core/documents/mime_type.py +11 -84
  31. ai_pipeline_core/documents/utils.py +4 -12
  32. ai_pipeline_core/exceptions.py +10 -62
  33. ai_pipeline_core/images/__init__.py +32 -85
  34. ai_pipeline_core/images/_processing.py +5 -11
  35. ai_pipeline_core/llm/__init__.py +6 -4
  36. ai_pipeline_core/llm/ai_messages.py +102 -90
  37. ai_pipeline_core/llm/client.py +229 -183
  38. ai_pipeline_core/llm/model_options.py +12 -84
  39. ai_pipeline_core/llm/model_response.py +53 -99
  40. ai_pipeline_core/llm/model_types.py +8 -23
  41. ai_pipeline_core/logging/__init__.py +2 -7
  42. ai_pipeline_core/logging/logging.yml +1 -1
  43. ai_pipeline_core/logging/logging_config.py +27 -37
  44. ai_pipeline_core/logging/logging_mixin.py +15 -41
  45. ai_pipeline_core/observability/__init__.py +32 -0
  46. ai_pipeline_core/observability/_debug/__init__.py +30 -0
  47. ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
  48. ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
  49. ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +133 -75
  50. ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
  51. ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
  52. ai_pipeline_core/observability/_debug/_types.py +75 -0
  53. ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
  54. ai_pipeline_core/observability/_document_tracking.py +146 -0
  55. ai_pipeline_core/observability/_initialization.py +194 -0
  56. ai_pipeline_core/observability/_logging_bridge.py +57 -0
  57. ai_pipeline_core/observability/_summary.py +81 -0
  58. ai_pipeline_core/observability/_tracking/__init__.py +6 -0
  59. ai_pipeline_core/observability/_tracking/_client.py +178 -0
  60. ai_pipeline_core/observability/_tracking/_internal.py +28 -0
  61. ai_pipeline_core/observability/_tracking/_models.py +138 -0
  62. ai_pipeline_core/observability/_tracking/_processor.py +158 -0
  63. ai_pipeline_core/observability/_tracking/_service.py +311 -0
  64. ai_pipeline_core/observability/_tracking/_writer.py +229 -0
  65. ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
  66. ai_pipeline_core/pipeline/__init__.py +10 -0
  67. ai_pipeline_core/pipeline/decorators.py +915 -0
  68. ai_pipeline_core/pipeline/options.py +16 -0
  69. ai_pipeline_core/prompt_manager.py +16 -102
  70. ai_pipeline_core/settings.py +26 -31
  71. ai_pipeline_core/testing.py +9 -0
  72. ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
  73. ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
  74. ai_pipeline_core/debug/__init__.py +0 -26
  75. ai_pipeline_core/documents/document_list.py +0 -420
  76. ai_pipeline_core/documents/flow_document.py +0 -112
  77. ai_pipeline_core/documents/task_document.py +0 -117
  78. ai_pipeline_core/documents/temporary_document.py +0 -74
  79. ai_pipeline_core/flow/__init__.py +0 -9
  80. ai_pipeline_core/flow/config.py +0 -494
  81. ai_pipeline_core/flow/options.py +0 -75
  82. ai_pipeline_core/pipeline.py +0 -718
  83. ai_pipeline_core/prefect.py +0 -63
  84. ai_pipeline_core/prompt_builder/__init__.py +0 -5
  85. ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
  86. ai_pipeline_core/prompt_builder/global_cache.py +0 -78
  87. ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
  88. ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
  89. ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
  90. ai_pipeline_core/storage/__init__.py +0 -8
  91. ai_pipeline_core/storage/storage.py +0 -628
  92. ai_pipeline_core/utils/__init__.py +0 -8
  93. ai_pipeline_core-0.3.4.dist-info/METADATA +0 -569
  94. ai_pipeline_core-0.3.4.dist-info/RECORD +0 -57
  95. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
  96. {ai_pipeline_core-0.3.4.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,229 @@
1
+ """Background writer thread for ClickHouse inserts and summary jobs."""
2
+
3
+ import asyncio
4
+ import contextlib
5
+ from collections.abc import Callable, Coroutine
6
+ from dataclasses import dataclass, field
7
+ from threading import Event, Thread
8
+
9
+ from lmnr.opentelemetry_lib.tracing import context as laminar_context
10
+ from opentelemetry import context as otel_context
11
+ from opentelemetry.context import Context
12
+ from pydantic import BaseModel
13
+
14
+ from ai_pipeline_core.logging import get_pipeline_logger
15
+
16
+ from ._client import ClickHouseClient
17
+ from ._models import SummaryRowBuilder
18
+
19
+ type SpanSummaryFn = Callable[[str, str, str], Coroutine[None, None, str]]
20
+
21
+ logger = get_pipeline_logger(__name__)
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class InsertBatch:
26
+ """Batch of rows to insert into a single table."""
27
+
28
+ table: str
29
+ rows: list[BaseModel] = field(default_factory=list)
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class SummaryJob:
34
+ """Job requesting LLM-generated summary for a span."""
35
+
36
+ span_id: str
37
+ label: str
38
+ output_hint: str
39
+ summary_model: str = "gemini-3-flash"
40
+ parent_otel_context: Context | None = field(default=None, hash=False, compare=False)
41
+ parent_laminar_context: Context | None = field(default=None, hash=False, compare=False)
42
+
43
+
44
+ _SENTINEL = object()
45
+
46
+
47
+ class ClickHouseWriter:
48
+ """Background writer that batches inserts and processes summary jobs.
49
+
50
+ Uses a dedicated thread with its own asyncio event loop. External callers
51
+ push work via ``write()`` and ``write_job()`` which use
52
+ ``loop.call_soon_threadsafe()``. The writer drains the queue in batches
53
+ for efficiency.
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ client: ClickHouseClient,
59
+ *,
60
+ summary_row_builder: SummaryRowBuilder | None = None,
61
+ span_summary_fn: SpanSummaryFn | None = None,
62
+ batch_size: int = 100,
63
+ flush_interval_seconds: float = 2.0,
64
+ ) -> None:
65
+ """Store config. Does NOT start the writer thread."""
66
+ self._client = client
67
+ self._summary_row_builder = summary_row_builder
68
+ self._span_summary_fn = span_summary_fn
69
+ self._batch_size = batch_size
70
+ self._flush_interval = flush_interval_seconds
71
+
72
+ self._loop: asyncio.AbstractEventLoop | None = None
73
+ self._queue: asyncio.Queue[InsertBatch | SummaryJob | object] | None = None
74
+ self._thread: Thread | None = None
75
+ self._shutdown = False
76
+ self._disabled = False
77
+ self._ready = Event()
78
+
79
+ def start(self) -> None:
80
+ """Start the background writer thread."""
81
+ if self._thread is not None:
82
+ return
83
+ self._thread = Thread(target=self._thread_main, name="ch-writer", daemon=True)
84
+ self._thread.start()
85
+ if not self._ready.wait(timeout=10.0):
86
+ logger.warning("ClickHouse writer thread did not start within 10 seconds")
87
+
88
+ def _thread_main(self) -> None:
89
+ """Entry point for background thread — creates event loop and runs."""
90
+ self._loop = asyncio.new_event_loop()
91
+ self._queue = asyncio.Queue()
92
+ self._ready.set()
93
+ try:
94
+ self._loop.run_until_complete(self._run())
95
+ finally:
96
+ self._loop.close()
97
+ self._loop = None
98
+
99
+ async def _run(self) -> None:
100
+ """Main async loop: connect, create tables, then drain queue."""
101
+ assert self._queue is not None, "_run() must be called after _queue is initialized"
102
+
103
+ if not await self._connect_with_retry():
104
+ return
105
+
106
+ pending: dict[str, list[BaseModel]] = {}
107
+ pending_jobs: list[SummaryJob] = []
108
+
109
+ while True:
110
+ try:
111
+ item = await asyncio.wait_for(self._queue.get(), timeout=self._flush_interval)
112
+ except TimeoutError:
113
+ self._flush_batches(pending)
114
+ await self._process_summary_jobs(pending_jobs)
115
+ continue
116
+
117
+ if item is _SENTINEL:
118
+ self._flush_batches(pending)
119
+ await self._process_summary_jobs(pending_jobs)
120
+ break
121
+
122
+ if isinstance(item, InsertBatch):
123
+ pending.setdefault(item.table, []).extend(item.rows)
124
+ if sum(len(v) for v in pending.values()) >= self._batch_size:
125
+ self._flush_batches(pending)
126
+ elif isinstance(item, SummaryJob):
127
+ pending_jobs.append(item)
128
+ elif isinstance(item, Event):
129
+ self._flush_batches(pending)
130
+ await self._process_summary_jobs(pending_jobs)
131
+ item.set()
132
+
133
+ async def _connect_with_retry(self, max_retries: int = 5, base_delay: float = 1.0) -> bool:
134
+ """Attempt to connect to ClickHouse with exponential backoff.
135
+
136
+ Returns True on success, False if all retries exhausted (writer is disabled).
137
+ """
138
+ for attempt in range(max_retries):
139
+ try:
140
+ self._client.connect()
141
+ self._client.ensure_tables()
142
+ return True
143
+ except Exception as e:
144
+ delay = base_delay * (2**attempt)
145
+ if attempt < max_retries - 1:
146
+ logger.warning(f"ClickHouse connection attempt {attempt + 1}/{max_retries} failed: {e}. Retrying in {delay:.0f}s")
147
+ await asyncio.sleep(delay)
148
+ else:
149
+ logger.warning(f"ClickHouse connection failed after {max_retries} attempts, tracking disabled: {e}")
150
+ self._disabled = True
151
+ return False
152
+ return False # unreachable
153
+
154
+ def _flush_batches(self, pending: dict[str, list[BaseModel]]) -> None:
155
+ """Flush all pending inserts to ClickHouse."""
156
+ flushed: list[str] = []
157
+ for table, rows in list(pending.items()):
158
+ if rows:
159
+ try:
160
+ self._client._insert_rows(table, rows) # pyright: ignore[reportPrivateUsage]
161
+ flushed.append(table)
162
+ except Exception as e:
163
+ logger.warning(f"Failed to insert {len(rows)} rows into {table}: {e}")
164
+ for table in flushed:
165
+ del pending[table]
166
+
167
+ async def _process_summary_jobs(self, jobs: list[SummaryJob]) -> None:
168
+ """Process all pending summary jobs in parallel."""
169
+ if not jobs:
170
+ return
171
+ await asyncio.gather(*[self._process_summary_job(job) for job in jobs])
172
+ jobs.clear()
173
+
174
+ async def _process_summary_job(self, job: SummaryJob) -> None:
175
+ """Process a span summary generation job."""
176
+ try:
177
+ if self._span_summary_fn:
178
+ otel_token = otel_context.attach(job.parent_otel_context) if job.parent_otel_context is not None else None
179
+ laminar_token = laminar_context.attach_context(job.parent_laminar_context) if job.parent_laminar_context is not None else None
180
+ try:
181
+ summary = await self._span_summary_fn(job.label, job.output_hint, job.summary_model)
182
+ finally:
183
+ if laminar_token is not None:
184
+ laminar_context.detach_context(laminar_token)
185
+ if otel_token is not None:
186
+ otel_context.detach(otel_token)
187
+ if summary and self._summary_row_builder:
188
+ row = self._summary_row_builder.build_span_summary_update(job.span_id, summary)
189
+ if row:
190
+ self._client.update_span(row)
191
+ except Exception as e:
192
+ logger.warning(f"Summary job failed: {e}")
193
+
194
+ def write(self, table: str, rows: list[BaseModel]) -> None:
195
+ """Enqueue rows for insertion. Thread-safe, non-blocking."""
196
+ if self._disabled or self._shutdown or self._loop is None or self._queue is None:
197
+ return
198
+ batch = InsertBatch(table=table, rows=rows)
199
+ with contextlib.suppress(RuntimeError):
200
+ self._loop.call_soon_threadsafe(self._queue.put_nowait, batch)
201
+
202
+ def write_job(self, job: SummaryJob) -> None:
203
+ """Enqueue a summary job. Thread-safe, non-blocking."""
204
+ if self._disabled or self._shutdown or self._loop is None or self._queue is None:
205
+ return
206
+ with contextlib.suppress(RuntimeError):
207
+ self._loop.call_soon_threadsafe(self._queue.put_nowait, job)
208
+
209
+ def flush(self, timeout: float = 30.0) -> None:
210
+ """Block until all queued items (including summary jobs) are processed."""
211
+ if self._disabled or self._shutdown or self._loop is None or self._queue is None:
212
+ return
213
+ barrier = Event()
214
+ try:
215
+ self._loop.call_soon_threadsafe(self._queue.put_nowait, barrier)
216
+ except RuntimeError:
217
+ return
218
+ barrier.wait(timeout=timeout)
219
+
220
+ def shutdown(self, timeout: float = 60.0) -> None:
221
+ """Signal shutdown and wait for the writer thread to finish."""
222
+ if self._shutdown:
223
+ return
224
+ self._shutdown = True
225
+ if self._loop is not None and self._queue is not None:
226
+ with contextlib.suppress(RuntimeError):
227
+ self._loop.call_soon_threadsafe(self._queue.put_nowait, _SENTINEL)
228
+ if self._thread is not None:
229
+ self._thread.join(timeout=timeout)