flock-core 0.5.20__py3-none-any.whl → 0.5.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

@@ -0,0 +1,3 @@
1
+ """Streaming engine shared utilities."""
2
+
3
+ __all__ = ["sinks"]
@@ -0,0 +1,489 @@
1
+ """Shared streaming sink implementations for DSPy execution.
2
+
3
+ This module provides a composable sink pattern for consuming streaming output
4
+ from DSPy programs and routing it to different presentation layers (Rich terminal,
5
+ WebSocket dashboard, etc.).
6
+
7
+ Architecture
8
+ ------------
9
+ The StreamSink protocol defines a minimal interface that all sinks must implement.
10
+ Sinks receive normalized streaming events (status messages, tokens, final predictions)
11
+ and handle presentation-specific logic (Rich display updates, WebSocket broadcasts, etc.).
12
+
13
+ Sinks are designed to be:
14
+ - Composable: Multiple sinks can consume the same stream in parallel
15
+ - Isolated: Each sink maintains its own state and error handling
16
+ - Testable: Sinks can be tested independently with mock dependencies
17
+
18
+ Error Handling Contract
19
+ -----------------------
20
+ Sinks SHOULD NOT raise exceptions during normal streaming operations. Instead:
21
+ - Log errors and continue processing remaining events
22
+ - Use defensive programming (null checks, try/except where appropriate)
23
+ - Only raise exceptions for unrecoverable errors (e.g., invalid configuration)
24
+
25
+ The streaming loop treats sink exceptions as fatal and will abort the stream.
26
+ For fault tolerance, sinks should catch and log their own errors.
27
+
28
+ Example Usage
29
+ -------------
30
+ Basic WebSocket-only streaming:
31
+
32
+ async def ws_broadcast(event: StreamingOutputEvent) -> None:
33
+ await websocket_manager.broadcast(event)
34
+
35
+ def event_factory(output_type, content, seq, is_final):
36
+ return StreamingOutputEvent(
37
+ correlation_id="123",
38
+ agent_name="agent",
39
+ run_id="run-1",
40
+ output_type=output_type,
41
+ content=content,
42
+ sequence=seq,
43
+ is_final=is_final,
44
+ )
45
+
46
+ sink = WebSocketSink(ws_broadcast=ws_broadcast, event_factory=event_factory)
47
+
48
+ async for value in stream:
49
+ kind, text, field, final = normalize_value(value)
50
+ if kind == "status":
51
+ await sink.on_status(text)
52
+ elif kind == "token":
53
+ await sink.on_token(text, field)
54
+ elif kind == "prediction":
55
+ await sink.on_final(final, token_count)
56
+ break
57
+
58
+ await sink.flush()
59
+
60
+ Dual-sink composition (CLI with WebSocket):
61
+
62
+ sinks = []
63
+ if rich_enabled:
64
+ sinks.append(RichSink(...))
65
+ if ws_enabled:
66
+ sinks.append(WebSocketSink(...))
67
+
68
+ # Dispatch to all sinks
69
+ for sink in sinks:
70
+ await sink.on_token(text, field)
71
+ """
72
+
73
+ from __future__ import annotations
74
+
75
+ import asyncio
76
+ from collections.abc import Awaitable, Callable, MutableMapping, Sequence
77
+ from typing import (
78
+ Any,
79
+ Protocol,
80
+ runtime_checkable,
81
+ )
82
+
83
+ from pydantic import BaseModel
84
+
85
+ from flock.dashboard.events import StreamingOutputEvent
86
+ from flock.logging.logging import get_logger
87
+
88
+
89
+ logger = get_logger(__name__)
90
+
91
+
92
+ @runtime_checkable
93
+ class StreamSink(Protocol):
94
+ """Minimal sink protocol for consuming normalized stream events.
95
+
96
+ Sinks receive streaming events from DSPy execution and handle
97
+ presentation-specific logic (Rich display, WebSocket broadcast, etc.).
98
+
99
+ Implementations must be idempotent for on_final() to handle edge cases
100
+ where the stream loop might call it multiple times.
101
+
102
+ Error Handling
103
+ --------------
104
+ Implementations SHOULD catch and log their own errors rather than raising,
105
+ to prevent one sink failure from aborting the entire stream. Only raise
106
+ exceptions for unrecoverable errors during initialization/configuration.
107
+ """
108
+
109
+ async def on_status(self, text: str) -> None:
110
+ """Process a status message from the LLM.
111
+
112
+ Status messages are typically intermediate reasoning steps or
113
+ progress indicators (e.g., "Analyzing input...", "Generating response...").
114
+
115
+ Args:
116
+ text: Status message text (may include newlines)
117
+
118
+ Note:
119
+ Empty text should be ignored. Implementations should handle
120
+ this gracefully without raising.
121
+ """
122
+ ...
123
+
124
+ async def on_token(self, text: str, signature_field: str | None) -> None:
125
+ """Process a single token from the LLM output stream.
126
+
127
+ Tokens are emitted as the LLM generates text. signature_field indicates
128
+ which output field this token belongs to (for multi-field signatures).
129
+
130
+ Args:
131
+ text: Token text (typically a single word or word fragment)
132
+ signature_field: Name of the signature field being streamed,
133
+ or None if the token doesn't belong to a specific field
134
+
135
+ Note:
136
+ Empty text should be ignored. The field "description" is typically
137
+ skipped as it's the input prompt, not output.
138
+ """
139
+ ...
140
+
141
+ async def on_final(self, result: Any, tokens_emitted: int) -> None:
142
+ """Process the final prediction result.
143
+
144
+ Called once when streaming completes successfully. Contains the
145
+ complete DSPy Prediction object with all output fields populated.
146
+
147
+ Args:
148
+ result: DSPy Prediction object with output fields
149
+ tokens_emitted: Total number of tokens emitted during streaming
150
+
151
+ Note:
152
+ Implementations MUST be idempotent - this may be called multiple
153
+ times in edge cases. Use a finalization guard flag if necessary.
154
+ """
155
+ ...
156
+
157
+ async def flush(self) -> None:
158
+ """Flush any pending async operations.
159
+
160
+ Called after streaming completes to ensure all async tasks
161
+ (e.g., WebSocket broadcasts) complete before returning.
162
+
163
+ Implementations should await any background tasks and handle
164
+ errors gracefully (log but don't raise).
165
+
166
+ Note:
167
+ For synchronous sinks (e.g., Rich terminal), this is a no-op.
168
+ """
169
+ ...
170
+
171
+
172
+ class RichSink(StreamSink):
173
+ """Rich terminal sink responsible for mutating live display data.
174
+
175
+ This sink updates a mutable display_data dictionary that represents
176
+ the artifact being streamed. It accumulates status messages and tokens
177
+ in buffers, then replaces them with final structured data when streaming
178
+ completes.
179
+
180
+ The sink integrates with Rich's Live display context, calling a refresh
181
+ callback after each update to trigger terminal re-rendering.
182
+
183
+ Display Data Flow
184
+ -----------------
185
+ 1. Initialization: display_data contains empty payload fields and "streaming..." timestamp
186
+ 2. on_status(): Accumulates status messages in a buffer, updates display_data["status"]
187
+ 3. on_token(): Accumulates tokens in field-specific buffers, updates display_data["payload"]["_streaming"]
188
+ 4. on_final(): Replaces streaming buffers with final Prediction fields, removes "status", adds real timestamp
189
+ 5. flush(): No-op (Rich rendering is synchronous)
190
+
191
+ Error Handling
192
+ --------------
193
+ - refresh_panel() errors are caught and logged, never raised
194
+ - Idempotent: on_final() checks _finalized flag to prevent double-finalization
195
+ - Defensive: Uses setdefault() and get() to handle missing dictionary keys
196
+
197
+ Thread Safety
198
+ -------------
199
+ NOT thread-safe. Assumes single-threaded async execution within a single
200
+ Rich Live context. Multiple concurrent streams should use separate RichSink instances.
201
+
202
+ Example
203
+ -------
204
+ display_data = OrderedDict([("id", "artifact-123"), ("payload", {}), ...])
205
+ stream_buffers = defaultdict(list)
206
+
207
+ def refresh():
208
+ live.update(formatter.format_result(display_data, ...))
209
+
210
+ sink = RichSink(
211
+ display_data=display_data,
212
+ stream_buffers=stream_buffers,
213
+ status_field="_status",
214
+ signature_order=["output", "summary"],
215
+ formatter=formatter,
216
+ theme_dict=theme,
217
+ styles=styles,
218
+ agent_label="Agent - gpt-4",
219
+ refresh_panel=refresh,
220
+ timestamp_factory=lambda: datetime.now(UTC).isoformat(),
221
+ )
222
+
223
+ await sink.on_status("Processing...")
224
+ await sink.on_token("Hello", "output")
225
+ await sink.on_final(prediction, tokens_emitted=5)
226
+ await sink.flush()
227
+ """
228
+
229
+ def __init__(
230
+ self,
231
+ *,
232
+ display_data: MutableMapping[str, Any],
233
+ stream_buffers: MutableMapping[str, list[str]],
234
+ status_field: str,
235
+ signature_order: Sequence[str],
236
+ formatter: Any | None,
237
+ theme_dict: dict[str, Any] | None,
238
+ styles: dict[str, Any] | None,
239
+ agent_label: str | None,
240
+ refresh_panel: Callable[[], None],
241
+ timestamp_factory: Callable[[], str],
242
+ ) -> None:
243
+ self._display_data = display_data
244
+ self._stream_buffers = stream_buffers
245
+ self._status_field = status_field
246
+ self._signature_order = list(signature_order)
247
+ self._formatter = formatter
248
+ self._theme_dict = theme_dict
249
+ self._styles = styles
250
+ self._agent_label = agent_label
251
+ self._refresh_panel = refresh_panel
252
+ self._timestamp_factory = timestamp_factory
253
+ self._final_display = (
254
+ formatter,
255
+ display_data,
256
+ theme_dict,
257
+ styles,
258
+ agent_label,
259
+ )
260
+ # Ensure buffers exist for status updates
261
+ self._stream_buffers.setdefault(status_field, [])
262
+ self._finalized = False
263
+
264
+ def _refresh(self) -> None:
265
+ try:
266
+ self._refresh_panel()
267
+ except Exception:
268
+ logger.debug("Rich sink refresh panel callable failed", exc_info=True)
269
+
270
+ async def on_status(self, text: str) -> None:
271
+ if not text:
272
+ return
273
+
274
+ buffer = self._stream_buffers.setdefault(self._status_field, [])
275
+ buffer.append(f"{text}\n")
276
+ self._display_data["status"] = "".join(buffer)
277
+ self._refresh()
278
+
279
+ async def on_token(self, text: str, signature_field: str | None) -> None:
280
+ if not text:
281
+ return
282
+
283
+ if signature_field and signature_field != "description":
284
+ buffer_key = f"_stream_{signature_field}"
285
+ buffer = self._stream_buffers.setdefault(buffer_key, [])
286
+ buffer.append(str(text))
287
+ payload = self._display_data.setdefault("payload", {})
288
+ payload["_streaming"] = "".join(buffer)
289
+ else:
290
+ buffer = self._stream_buffers.setdefault(self._status_field, [])
291
+ buffer.append(str(text))
292
+ self._display_data["status"] = "".join(buffer)
293
+
294
+ self._refresh()
295
+
296
+ async def on_final(self, result: Any, tokens_emitted: int) -> None: # noqa: ARG002
297
+ if self._finalized:
298
+ return
299
+
300
+ payload_section: MutableMapping[str, Any] = self._display_data.setdefault(
301
+ "payload", {}
302
+ )
303
+ payload_section.clear()
304
+
305
+ for field_name in self._signature_order:
306
+ if field_name == "description":
307
+ continue
308
+ if not hasattr(result, field_name):
309
+ continue
310
+
311
+ value = getattr(result, field_name)
312
+ if isinstance(value, list):
313
+ payload_section[field_name] = [
314
+ item.model_dump() if isinstance(item, BaseModel) else item
315
+ for item in value
316
+ ]
317
+ elif isinstance(value, BaseModel):
318
+ payload_section[field_name] = value.model_dump()
319
+ else:
320
+ payload_section[field_name] = value
321
+
322
+ self._display_data["created_at"] = self._timestamp_factory()
323
+ self._display_data.pop("status", None)
324
+ payload_section.pop("_streaming", None)
325
+ self._refresh()
326
+ self._finalized = True
327
+
328
+ async def flush(self) -> None:
329
+ # Rich sink has no async resources to drain.
330
+ return None
331
+
332
+ @property
333
+ def final_display_data(
334
+ self,
335
+ ) -> tuple[
336
+ Any,
337
+ MutableMapping[str, Any],
338
+ dict[str, Any] | None,
339
+ dict[str, Any] | None,
340
+ str | None,
341
+ ]:
342
+ return self._final_display
343
+
344
+
345
+ class WebSocketSink(StreamSink):
346
+ """WebSocket-only sink that mirrors dashboard streaming behaviour.
347
+
348
+ This sink broadcasts StreamingOutputEvent messages via WebSocket for
349
+ real-time dashboard updates. It uses fire-and-forget task scheduling
350
+ to avoid blocking the streaming loop while ensuring all events are
351
+ delivered via flush().
352
+
353
+ Event Sequence
354
+ --------------
355
+ Each event gets a monotonically increasing sequence number for ordering:
356
+ - on_status("Loading"): seq=0, output_type="log", content="Loading\\n"
357
+ - on_token("Hello", field): seq=1, output_type="llm_token", content="Hello"
358
+ - on_token(" world", field): seq=2, output_type="llm_token", content=" world"
359
+ - on_final(pred, 2): seq=3, output_type="log", content="\\nAmount of output tokens: 2", is_final=True
360
+ - seq=4, output_type="log", content="--- End of output ---", is_final=True
361
+
362
+ The two terminal events are required for dashboard compatibility and must
363
+ appear in this exact order with is_final=True.
364
+
365
+ Task Management
366
+ ---------------
367
+ Events are broadcast using asyncio.create_task() to avoid blocking the
368
+ streaming loop. Tasks are tracked in a set and awaited during flush()
369
+ to ensure delivery before the stream completes.
370
+
371
+ Task lifecycle:
372
+ 1. _schedule() creates task and adds to _tasks set
373
+ 2. Task completion callback removes it from _tasks
374
+ 3. flush() awaits remaining tasks with error handling
375
+
376
+ Error Handling
377
+ --------------
378
+ - Scheduling errors: Logged and ignored (event dropped)
379
+ - Broadcast errors: Caught during flush(), logged but don't raise
380
+ - Idempotent: on_final() checks _finalized flag to prevent duplicate terminal events
381
+
382
+ Thread Safety
383
+ -------------
384
+ NOT thread-safe. Assumes single-threaded async execution. Multiple
385
+ concurrent streams should use separate WebSocketSink instances.
386
+
387
+ Example
388
+ -------
389
+ async def broadcast(event: StreamingOutputEvent):
390
+ await websocket_manager.send_json(event.model_dump())
391
+
392
+ def event_factory(output_type, content, seq, is_final):
393
+ return StreamingOutputEvent(
394
+ correlation_id="corr-123",
395
+ agent_name="analyzer",
396
+ run_id="run-456",
397
+ output_type=output_type,
398
+ content=content,
399
+ sequence=seq,
400
+ is_final=is_final,
401
+ artifact_id="artifact-789",
402
+ artifact_type="Report",
403
+ )
404
+
405
+ sink = WebSocketSink(ws_broadcast=broadcast, event_factory=event_factory)
406
+
407
+ await sink.on_status("Processing input")
408
+ await sink.on_token("Analysis", "output")
409
+ await sink.on_final(prediction, tokens_emitted=1)
410
+ await sink.flush() # Ensures all broadcasts complete
411
+ """
412
+
413
+ def __init__(
414
+ self,
415
+ *,
416
+ ws_broadcast: Callable[[StreamingOutputEvent], Awaitable[None]] | None,
417
+ event_factory: Callable[[str, str, int, bool], StreamingOutputEvent],
418
+ ) -> None:
419
+ self._ws_broadcast = ws_broadcast
420
+ self._event_factory = event_factory
421
+ self._sequence = 0
422
+ self._tasks: set[asyncio.Task[Any]] = set()
423
+ self._finalized = False
424
+
425
+ def _schedule(
426
+ self,
427
+ output_type: str,
428
+ content: str,
429
+ *,
430
+ is_final: bool,
431
+ advance_sequence: bool = True,
432
+ ) -> None:
433
+ if not self._ws_broadcast:
434
+ return
435
+
436
+ event = self._event_factory(output_type, content, self._sequence, is_final)
437
+ try:
438
+ task = asyncio.create_task(self._ws_broadcast(event))
439
+ except Exception as exc: # pragma: no cover - scheduling should rarely fail
440
+ logger.warning(f"Failed to schedule streaming event: {exc}")
441
+ return
442
+
443
+ self._tasks.add(task)
444
+ task.add_done_callback(self._tasks.discard)
445
+
446
+ if advance_sequence:
447
+ self._sequence += 1
448
+
449
+ async def on_status(self, text: str) -> None:
450
+ if not text:
451
+ return
452
+ self._schedule("log", f"{text}\n", is_final=False)
453
+
454
+ async def on_token(self, text: str, signature_field: str | None) -> None: # noqa: ARG002
455
+ if not text:
456
+ return
457
+ self._schedule("llm_token", text, is_final=False)
458
+
459
+ async def on_final(self, result: Any, tokens_emitted: int) -> None: # noqa: ARG002
460
+ if self._finalized:
461
+ return
462
+
463
+ self._schedule(
464
+ "log",
465
+ f"\nAmount of output tokens: {tokens_emitted}",
466
+ is_final=True,
467
+ )
468
+ self._schedule(
469
+ "log",
470
+ "--- End of output ---",
471
+ is_final=True,
472
+ )
473
+
474
+ self._finalized = True
475
+
476
+ async def flush(self) -> None:
477
+ if not self._tasks:
478
+ return
479
+
480
+ pending = list(self._tasks)
481
+ self._tasks.clear()
482
+
483
+ results = await asyncio.gather(*pending, return_exceptions=True)
484
+ for result in results:
485
+ if isinstance(result, Exception):
486
+ logger.warning(f"Streaming broadcast task failed: {result}")
487
+
488
+
489
+ __all__ = ["RichSink", "StreamSink", "WebSocketSink"]
@@ -0,0 +1,49 @@
1
+ """Semantic subscriptions for Flock.
2
+
3
+ This module provides semantic matching capabilities using sentence-transformers.
4
+ It's an optional feature that requires installing the [semantic] extra:
5
+
6
+ uv add flock-core[semantic]
7
+
8
+ If sentence-transformers is not installed, semantic features will gracefully
9
+ degrade and core Flock functionality remains unaffected.
10
+ """
11
+
12
+ # Try to import semantic features
13
+ try:
14
+ from sentence_transformers import SentenceTransformer # noqa: F401
15
+
16
+ from .context_provider import SemanticContextProvider
17
+ from .embedding_service import EmbeddingService
18
+
19
+ SEMANTIC_AVAILABLE = True
20
+ except ImportError as e:
21
+ SEMANTIC_AVAILABLE = False
22
+ _import_error = e
23
+
24
+ # Provide helpful error message when features are used
25
+ class EmbeddingService: # type: ignore
26
+ """Placeholder when semantic extras not installed."""
27
+
28
+ @staticmethod
29
+ def get_instance(*args, **kwargs):
30
+ raise ImportError(
31
+ "Semantic features require sentence-transformers. "
32
+ "Install with: uv add flock-core[semantic]"
33
+ ) from _import_error
34
+
35
+ class SemanticContextProvider: # type: ignore
36
+ """Placeholder when semantic extras not installed."""
37
+
38
+ def __init__(self, *args, **kwargs):
39
+ raise ImportError(
40
+ "Semantic features require sentence-transformers. "
41
+ "Install with: uv add flock-core[semantic]"
42
+ ) from _import_error
43
+
44
+
45
+ __all__ = [
46
+ "SEMANTIC_AVAILABLE",
47
+ "EmbeddingService",
48
+ "SemanticContextProvider",
49
+ ]