ai-pipeline-core 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_pipeline_core/__init__.py +70 -144
- ai_pipeline_core/deployment/__init__.py +6 -18
- ai_pipeline_core/deployment/base.py +392 -212
- ai_pipeline_core/deployment/contract.py +6 -10
- ai_pipeline_core/{utils → deployment}/deploy.py +50 -69
- ai_pipeline_core/deployment/helpers.py +16 -17
- ai_pipeline_core/{progress.py → deployment/progress.py} +23 -24
- ai_pipeline_core/{utils/remote_deployment.py → deployment/remote.py} +11 -14
- ai_pipeline_core/docs_generator/__init__.py +54 -0
- ai_pipeline_core/docs_generator/__main__.py +5 -0
- ai_pipeline_core/docs_generator/cli.py +196 -0
- ai_pipeline_core/docs_generator/extractor.py +324 -0
- ai_pipeline_core/docs_generator/guide_builder.py +644 -0
- ai_pipeline_core/docs_generator/trimmer.py +35 -0
- ai_pipeline_core/docs_generator/validator.py +114 -0
- ai_pipeline_core/document_store/__init__.py +13 -0
- ai_pipeline_core/document_store/_summary.py +9 -0
- ai_pipeline_core/document_store/_summary_worker.py +170 -0
- ai_pipeline_core/document_store/clickhouse.py +492 -0
- ai_pipeline_core/document_store/factory.py +38 -0
- ai_pipeline_core/document_store/local.py +312 -0
- ai_pipeline_core/document_store/memory.py +85 -0
- ai_pipeline_core/document_store/protocol.py +68 -0
- ai_pipeline_core/documents/__init__.py +12 -14
- ai_pipeline_core/documents/_context_vars.py +85 -0
- ai_pipeline_core/documents/_hashing.py +52 -0
- ai_pipeline_core/documents/attachment.py +85 -0
- ai_pipeline_core/documents/context.py +128 -0
- ai_pipeline_core/documents/document.py +318 -1434
- ai_pipeline_core/documents/mime_type.py +37 -82
- ai_pipeline_core/documents/utils.py +4 -12
- ai_pipeline_core/exceptions.py +10 -62
- ai_pipeline_core/images/__init__.py +32 -85
- ai_pipeline_core/images/_processing.py +5 -11
- ai_pipeline_core/llm/__init__.py +6 -4
- ai_pipeline_core/llm/ai_messages.py +106 -81
- ai_pipeline_core/llm/client.py +267 -158
- ai_pipeline_core/llm/model_options.py +12 -84
- ai_pipeline_core/llm/model_response.py +53 -99
- ai_pipeline_core/llm/model_types.py +8 -23
- ai_pipeline_core/logging/__init__.py +2 -7
- ai_pipeline_core/logging/logging.yml +1 -1
- ai_pipeline_core/logging/logging_config.py +27 -37
- ai_pipeline_core/logging/logging_mixin.py +15 -41
- ai_pipeline_core/observability/__init__.py +32 -0
- ai_pipeline_core/observability/_debug/__init__.py +30 -0
- ai_pipeline_core/observability/_debug/_auto_summary.py +94 -0
- ai_pipeline_core/{debug/config.py → observability/_debug/_config.py} +11 -7
- ai_pipeline_core/{debug/content.py → observability/_debug/_content.py} +134 -75
- ai_pipeline_core/{debug/processor.py → observability/_debug/_processor.py} +16 -17
- ai_pipeline_core/{debug/summary.py → observability/_debug/_summary.py} +113 -37
- ai_pipeline_core/observability/_debug/_types.py +75 -0
- ai_pipeline_core/{debug/writer.py → observability/_debug/_writer.py} +126 -196
- ai_pipeline_core/observability/_document_tracking.py +146 -0
- ai_pipeline_core/observability/_initialization.py +194 -0
- ai_pipeline_core/observability/_logging_bridge.py +57 -0
- ai_pipeline_core/observability/_summary.py +81 -0
- ai_pipeline_core/observability/_tracking/__init__.py +6 -0
- ai_pipeline_core/observability/_tracking/_client.py +178 -0
- ai_pipeline_core/observability/_tracking/_internal.py +28 -0
- ai_pipeline_core/observability/_tracking/_models.py +138 -0
- ai_pipeline_core/observability/_tracking/_processor.py +158 -0
- ai_pipeline_core/observability/_tracking/_service.py +311 -0
- ai_pipeline_core/observability/_tracking/_writer.py +229 -0
- ai_pipeline_core/{tracing.py → observability/tracing.py} +139 -335
- ai_pipeline_core/pipeline/__init__.py +10 -0
- ai_pipeline_core/pipeline/decorators.py +915 -0
- ai_pipeline_core/pipeline/options.py +16 -0
- ai_pipeline_core/prompt_manager.py +16 -102
- ai_pipeline_core/settings.py +26 -31
- ai_pipeline_core/testing.py +9 -0
- ai_pipeline_core-0.4.0.dist-info/METADATA +807 -0
- ai_pipeline_core-0.4.0.dist-info/RECORD +76 -0
- ai_pipeline_core/debug/__init__.py +0 -26
- ai_pipeline_core/documents/document_list.py +0 -420
- ai_pipeline_core/documents/flow_document.py +0 -112
- ai_pipeline_core/documents/task_document.py +0 -117
- ai_pipeline_core/documents/temporary_document.py +0 -74
- ai_pipeline_core/flow/__init__.py +0 -9
- ai_pipeline_core/flow/config.py +0 -494
- ai_pipeline_core/flow/options.py +0 -75
- ai_pipeline_core/pipeline.py +0 -718
- ai_pipeline_core/prefect.py +0 -63
- ai_pipeline_core/prompt_builder/__init__.py +0 -5
- ai_pipeline_core/prompt_builder/documents_prompt.jinja2 +0 -23
- ai_pipeline_core/prompt_builder/global_cache.py +0 -78
- ai_pipeline_core/prompt_builder/new_core_documents_prompt.jinja2 +0 -6
- ai_pipeline_core/prompt_builder/prompt_builder.py +0 -253
- ai_pipeline_core/prompt_builder/system_prompt.jinja2 +0 -41
- ai_pipeline_core/storage/__init__.py +0 -8
- ai_pipeline_core/storage/storage.py +0 -628
- ai_pipeline_core/utils/__init__.py +0 -8
- ai_pipeline_core-0.3.3.dist-info/METADATA +0 -569
- ai_pipeline_core-0.3.3.dist-info/RECORD +0 -57
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/WHEEL +0 -0
- {ai_pipeline_core-0.3.3.dist-info → ai_pipeline_core-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Background writer thread for ClickHouse inserts and summary jobs."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
from collections.abc import Callable, Coroutine
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from threading import Event, Thread
|
|
8
|
+
|
|
9
|
+
from lmnr.opentelemetry_lib.tracing import context as laminar_context
|
|
10
|
+
from opentelemetry import context as otel_context
|
|
11
|
+
from opentelemetry.context import Context
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from ai_pipeline_core.logging import get_pipeline_logger
|
|
15
|
+
|
|
16
|
+
from ._client import ClickHouseClient
|
|
17
|
+
from ._models import SummaryRowBuilder
|
|
18
|
+
|
|
19
|
+
type SpanSummaryFn = Callable[[str, str, str], Coroutine[None, None, str]]
|
|
20
|
+
|
|
21
|
+
logger = get_pipeline_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class InsertBatch:
|
|
26
|
+
"""Batch of rows to insert into a single table."""
|
|
27
|
+
|
|
28
|
+
table: str
|
|
29
|
+
rows: list[BaseModel] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class SummaryJob:
|
|
34
|
+
"""Job requesting LLM-generated summary for a span."""
|
|
35
|
+
|
|
36
|
+
span_id: str
|
|
37
|
+
label: str
|
|
38
|
+
output_hint: str
|
|
39
|
+
summary_model: str = "gemini-3-flash"
|
|
40
|
+
parent_otel_context: Context | None = field(default=None, hash=False, compare=False)
|
|
41
|
+
parent_laminar_context: Context | None = field(default=None, hash=False, compare=False)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_SENTINEL = object()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ClickHouseWriter:
|
|
48
|
+
"""Background writer that batches inserts and processes summary jobs.
|
|
49
|
+
|
|
50
|
+
Uses a dedicated thread with its own asyncio event loop. External callers
|
|
51
|
+
push work via ``write()`` and ``write_job()`` which use
|
|
52
|
+
``loop.call_soon_threadsafe()``. The writer drains the queue in batches
|
|
53
|
+
for efficiency.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
client: ClickHouseClient,
|
|
59
|
+
*,
|
|
60
|
+
summary_row_builder: SummaryRowBuilder | None = None,
|
|
61
|
+
span_summary_fn: SpanSummaryFn | None = None,
|
|
62
|
+
batch_size: int = 100,
|
|
63
|
+
flush_interval_seconds: float = 2.0,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Store config. Does NOT start the writer thread."""
|
|
66
|
+
self._client = client
|
|
67
|
+
self._summary_row_builder = summary_row_builder
|
|
68
|
+
self._span_summary_fn = span_summary_fn
|
|
69
|
+
self._batch_size = batch_size
|
|
70
|
+
self._flush_interval = flush_interval_seconds
|
|
71
|
+
|
|
72
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
73
|
+
self._queue: asyncio.Queue[InsertBatch | SummaryJob | object] | None = None
|
|
74
|
+
self._thread: Thread | None = None
|
|
75
|
+
self._shutdown = False
|
|
76
|
+
self._disabled = False
|
|
77
|
+
self._ready = Event()
|
|
78
|
+
|
|
79
|
+
def start(self) -> None:
|
|
80
|
+
"""Start the background writer thread."""
|
|
81
|
+
if self._thread is not None:
|
|
82
|
+
return
|
|
83
|
+
self._thread = Thread(target=self._thread_main, name="ch-writer", daemon=True)
|
|
84
|
+
self._thread.start()
|
|
85
|
+
if not self._ready.wait(timeout=10.0):
|
|
86
|
+
logger.warning("ClickHouse writer thread did not start within 10 seconds")
|
|
87
|
+
|
|
88
|
+
def _thread_main(self) -> None:
|
|
89
|
+
"""Entry point for background thread — creates event loop and runs."""
|
|
90
|
+
self._loop = asyncio.new_event_loop()
|
|
91
|
+
self._queue = asyncio.Queue()
|
|
92
|
+
self._ready.set()
|
|
93
|
+
try:
|
|
94
|
+
self._loop.run_until_complete(self._run())
|
|
95
|
+
finally:
|
|
96
|
+
self._loop.close()
|
|
97
|
+
self._loop = None
|
|
98
|
+
|
|
99
|
+
async def _run(self) -> None:
|
|
100
|
+
"""Main async loop: connect, create tables, then drain queue."""
|
|
101
|
+
assert self._queue is not None, "_run() must be called after _queue is initialized"
|
|
102
|
+
|
|
103
|
+
if not await self._connect_with_retry():
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
pending: dict[str, list[BaseModel]] = {}
|
|
107
|
+
pending_jobs: list[SummaryJob] = []
|
|
108
|
+
|
|
109
|
+
while True:
|
|
110
|
+
try:
|
|
111
|
+
item = await asyncio.wait_for(self._queue.get(), timeout=self._flush_interval)
|
|
112
|
+
except TimeoutError:
|
|
113
|
+
self._flush_batches(pending)
|
|
114
|
+
await self._process_summary_jobs(pending_jobs)
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if item is _SENTINEL:
|
|
118
|
+
self._flush_batches(pending)
|
|
119
|
+
await self._process_summary_jobs(pending_jobs)
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
if isinstance(item, InsertBatch):
|
|
123
|
+
pending.setdefault(item.table, []).extend(item.rows)
|
|
124
|
+
if sum(len(v) for v in pending.values()) >= self._batch_size:
|
|
125
|
+
self._flush_batches(pending)
|
|
126
|
+
elif isinstance(item, SummaryJob):
|
|
127
|
+
pending_jobs.append(item)
|
|
128
|
+
elif isinstance(item, Event):
|
|
129
|
+
self._flush_batches(pending)
|
|
130
|
+
await self._process_summary_jobs(pending_jobs)
|
|
131
|
+
item.set()
|
|
132
|
+
|
|
133
|
+
async def _connect_with_retry(self, max_retries: int = 5, base_delay: float = 1.0) -> bool:
|
|
134
|
+
"""Attempt to connect to ClickHouse with exponential backoff.
|
|
135
|
+
|
|
136
|
+
Returns True on success, False if all retries exhausted (writer is disabled).
|
|
137
|
+
"""
|
|
138
|
+
for attempt in range(max_retries):
|
|
139
|
+
try:
|
|
140
|
+
self._client.connect()
|
|
141
|
+
self._client.ensure_tables()
|
|
142
|
+
return True
|
|
143
|
+
except Exception as e:
|
|
144
|
+
delay = base_delay * (2**attempt)
|
|
145
|
+
if attempt < max_retries - 1:
|
|
146
|
+
logger.warning(f"ClickHouse connection attempt {attempt + 1}/{max_retries} failed: {e}. Retrying in {delay:.0f}s")
|
|
147
|
+
await asyncio.sleep(delay)
|
|
148
|
+
else:
|
|
149
|
+
logger.warning(f"ClickHouse connection failed after {max_retries} attempts, tracking disabled: {e}")
|
|
150
|
+
self._disabled = True
|
|
151
|
+
return False
|
|
152
|
+
return False # unreachable
|
|
153
|
+
|
|
154
|
+
def _flush_batches(self, pending: dict[str, list[BaseModel]]) -> None:
|
|
155
|
+
"""Flush all pending inserts to ClickHouse."""
|
|
156
|
+
flushed: list[str] = []
|
|
157
|
+
for table, rows in list(pending.items()):
|
|
158
|
+
if rows:
|
|
159
|
+
try:
|
|
160
|
+
self._client._insert_rows(table, rows) # pyright: ignore[reportPrivateUsage]
|
|
161
|
+
flushed.append(table)
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.warning(f"Failed to insert {len(rows)} rows into {table}: {e}")
|
|
164
|
+
for table in flushed:
|
|
165
|
+
del pending[table]
|
|
166
|
+
|
|
167
|
+
async def _process_summary_jobs(self, jobs: list[SummaryJob]) -> None:
|
|
168
|
+
"""Process all pending summary jobs in parallel."""
|
|
169
|
+
if not jobs:
|
|
170
|
+
return
|
|
171
|
+
await asyncio.gather(*[self._process_summary_job(job) for job in jobs])
|
|
172
|
+
jobs.clear()
|
|
173
|
+
|
|
174
|
+
async def _process_summary_job(self, job: SummaryJob) -> None:
|
|
175
|
+
"""Process a span summary generation job."""
|
|
176
|
+
try:
|
|
177
|
+
if self._span_summary_fn:
|
|
178
|
+
otel_token = otel_context.attach(job.parent_otel_context) if job.parent_otel_context is not None else None
|
|
179
|
+
laminar_token = laminar_context.attach_context(job.parent_laminar_context) if job.parent_laminar_context is not None else None
|
|
180
|
+
try:
|
|
181
|
+
summary = await self._span_summary_fn(job.label, job.output_hint, job.summary_model)
|
|
182
|
+
finally:
|
|
183
|
+
if laminar_token is not None:
|
|
184
|
+
laminar_context.detach_context(laminar_token)
|
|
185
|
+
if otel_token is not None:
|
|
186
|
+
otel_context.detach(otel_token)
|
|
187
|
+
if summary and self._summary_row_builder:
|
|
188
|
+
row = self._summary_row_builder.build_span_summary_update(job.span_id, summary)
|
|
189
|
+
if row:
|
|
190
|
+
self._client.update_span(row)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.warning(f"Summary job failed: {e}")
|
|
193
|
+
|
|
194
|
+
def write(self, table: str, rows: list[BaseModel]) -> None:
|
|
195
|
+
"""Enqueue rows for insertion. Thread-safe, non-blocking."""
|
|
196
|
+
if self._disabled or self._shutdown or self._loop is None or self._queue is None:
|
|
197
|
+
return
|
|
198
|
+
batch = InsertBatch(table=table, rows=rows)
|
|
199
|
+
with contextlib.suppress(RuntimeError):
|
|
200
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, batch)
|
|
201
|
+
|
|
202
|
+
def write_job(self, job: SummaryJob) -> None:
|
|
203
|
+
"""Enqueue a summary job. Thread-safe, non-blocking."""
|
|
204
|
+
if self._disabled or self._shutdown or self._loop is None or self._queue is None:
|
|
205
|
+
return
|
|
206
|
+
with contextlib.suppress(RuntimeError):
|
|
207
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, job)
|
|
208
|
+
|
|
209
|
+
def flush(self, timeout: float = 30.0) -> None:
|
|
210
|
+
"""Block until all queued items (including summary jobs) are processed."""
|
|
211
|
+
if self._disabled or self._shutdown or self._loop is None or self._queue is None:
|
|
212
|
+
return
|
|
213
|
+
barrier = Event()
|
|
214
|
+
try:
|
|
215
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, barrier)
|
|
216
|
+
except RuntimeError:
|
|
217
|
+
return
|
|
218
|
+
barrier.wait(timeout=timeout)
|
|
219
|
+
|
|
220
|
+
def shutdown(self, timeout: float = 60.0) -> None:
|
|
221
|
+
"""Signal shutdown and wait for the writer thread to finish."""
|
|
222
|
+
if self._shutdown:
|
|
223
|
+
return
|
|
224
|
+
self._shutdown = True
|
|
225
|
+
if self._loop is not None and self._queue is not None:
|
|
226
|
+
with contextlib.suppress(RuntimeError):
|
|
227
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, _SENTINEL)
|
|
228
|
+
if self._thread is not None:
|
|
229
|
+
self._thread.join(timeout=timeout)
|