graphrefly 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. graphrefly/__init__.py +160 -0
  2. graphrefly/compat/__init__.py +18 -0
  3. graphrefly/compat/async_utils.py +228 -0
  4. graphrefly/compat/asyncio_runner.py +89 -0
  5. graphrefly/compat/trio_runner.py +81 -0
  6. graphrefly/core/__init__.py +142 -0
  7. graphrefly/core/clock.py +20 -0
  8. graphrefly/core/dynamic_node.py +749 -0
  9. graphrefly/core/guard.py +277 -0
  10. graphrefly/core/meta.py +149 -0
  11. graphrefly/core/node.py +963 -0
  12. graphrefly/core/protocol.py +460 -0
  13. graphrefly/core/runner.py +107 -0
  14. graphrefly/core/subgraph_locks.py +296 -0
  15. graphrefly/core/sugar.py +138 -0
  16. graphrefly/core/versioning.py +193 -0
  17. graphrefly/extra/__init__.py +313 -0
  18. graphrefly/extra/adapters.py +2149 -0
  19. graphrefly/extra/backoff.py +287 -0
  20. graphrefly/extra/backpressure.py +113 -0
  21. graphrefly/extra/checkpoint.py +307 -0
  22. graphrefly/extra/composite.py +303 -0
  23. graphrefly/extra/cron.py +133 -0
  24. graphrefly/extra/data_structures.py +707 -0
  25. graphrefly/extra/resilience.py +727 -0
  26. graphrefly/extra/sources.py +766 -0
  27. graphrefly/extra/tier1.py +1067 -0
  28. graphrefly/extra/tier2.py +1802 -0
  29. graphrefly/graph/__init__.py +31 -0
  30. graphrefly/graph/graph.py +2249 -0
  31. graphrefly/integrations/__init__.py +1 -0
  32. graphrefly/integrations/fastapi.py +767 -0
  33. graphrefly/patterns/__init__.py +5 -0
  34. graphrefly/patterns/ai.py +2132 -0
  35. graphrefly/patterns/cqrs.py +515 -0
  36. graphrefly/patterns/memory.py +639 -0
  37. graphrefly/patterns/messaging.py +553 -0
  38. graphrefly/patterns/orchestration.py +536 -0
  39. graphrefly/patterns/reactive_layout/__init__.py +81 -0
  40. graphrefly/patterns/reactive_layout/measurement_adapters.py +276 -0
  41. graphrefly/patterns/reactive_layout/reactive_block_layout.py +434 -0
  42. graphrefly/patterns/reactive_layout/reactive_layout.py +943 -0
  43. graphrefly/py.typed +1 -0
  44. graphrefly-0.1.0.dist-info/METADATA +253 -0
  45. graphrefly-0.1.0.dist-info/RECORD +47 -0
  46. graphrefly-0.1.0.dist-info/WHEEL +4 -0
  47. graphrefly-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2149 @@
1
+ """Protocol, system, and ingest adapters (roadmap 5.2, 5.3b).
2
+
3
+ Each adapter wraps an external protocol or system as a reactive :class:`~graphrefly.core.node.Node`
4
+ built on :func:`~graphrefly.core.node.node` -- no second protocol.
5
+
6
+ **Moved from sources.py:** ``from_http``, ``from_websocket`` / ``to_websocket``,
7
+ ``from_webhook``, ``to_sse``, ``from_mcp``, ``from_git_hook``, ``from_event_emitter``,
8
+ ``from_fs_watch``, ``sse_frame``, ``HttpBundle``.
9
+
10
+ **New (5.3b):** ``from_otel``, ``from_syslog`` / ``parse_syslog``, ``from_statsd`` /
11
+ ``parse_statsd``, ``from_prometheus`` / ``parse_prometheus_text``, ``from_kafka`` /
12
+ ``to_kafka``, ``from_redis_stream`` / ``to_redis_stream``, ``from_csv`` / ``from_ndjson``,
13
+ ``from_clickhouse_watch``.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import csv
19
+ import json
20
+ import os
21
+ import re
22
+ import threading
23
+ import urllib.error
24
+ import urllib.request
25
+ from contextlib import suppress
26
+ from dataclasses import dataclass
27
+ from datetime import UTC, datetime
28
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
29
+
30
+ if TYPE_CHECKING:
31
+ from collections.abc import Callable, Iterable, Iterator
32
+
33
+ from graphrefly.core.clock import wall_clock_ns
34
+ from graphrefly.core.node import Node, NodeActions, node
35
+ from graphrefly.core.protocol import Messages, MessageType, batch
36
+ from graphrefly.extra.resilience import WithStatusBundle, with_status
37
+
38
+
39
+ def _msg_val(m: tuple[Any, ...]) -> Any:
40
+ assert len(m) >= 2
41
+ return m[1]
42
+
43
+
44
+ @dataclass(frozen=True, slots=True)
45
+ class SinkTransportError:
46
+ """Error context for sink transport failures (to_kafka, to_redis_stream)."""
47
+
48
+ stage: str
49
+ error: Exception
50
+ value: Any
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # HttpBundle / from_http (moved from sources.py)
55
+ # ---------------------------------------------------------------------------
56
+
57
+
58
+ @dataclass(frozen=True, slots=True)
59
+ class HttpBundle(WithStatusBundle):
60
+ """Result of :func:`from_http`: pass-through value plus status companions."""
61
+
62
+ fetch_count: Node[int]
63
+ last_updated: Node[int]
64
+
65
+
66
+ def from_http(
67
+ url: str,
68
+ *,
69
+ method: str = "GET",
70
+ headers: dict[str, str] | None = None,
71
+ body: Any = None,
72
+ transform: Callable[[Any], Any] | None = None,
73
+ timeout_ns: int = 30_000_000_000,
74
+ **kwargs: Any,
75
+ ) -> HttpBundle:
76
+ """Create a one-shot reactive HTTP source with lifecycle tracking.
77
+
78
+ Uses :func:`urllib.request.urlopen` internally to remain zero-dependency.
79
+ Performs a single fetch when subscribed, then completes. For periodic
80
+ fetching, compose with ``switch_map`` and a time source.
81
+
82
+ Args:
83
+ url: The URL to fetch.
84
+ method: HTTP method (default ``"GET"``).
85
+ headers: Optional request headers.
86
+ body: Optional request body (converted to JSON if not a string).
87
+ transform: Optional function to transform raw response bytes
88
+ (signature: ``Callable[[bytes], Any]``). Default: ``json.loads``.
89
+ timeout_ns: Request timeout in **nanoseconds** (default ``30s``).
90
+ **kwargs: Passed to :func:`~graphrefly.core.node.node` as options.
91
+
92
+ Returns:
93
+ An :class:`HttpBundle` wrapping the primary node and companions.
94
+
95
+ Example:
96
+ ```python
97
+ from graphrefly.extra.adapters import from_http
98
+ from graphrefly.extra.tier2 import switch_map
99
+ from graphrefly.extra import from_timer
100
+
101
+ # One-shot:
102
+ api = from_http("https://api.example.com/data")
103
+
104
+ # Periodic polling via reactive composition:
105
+ polled = switch_map(lambda _: from_http(url))(from_timer(0, period=5.0))
106
+ ```
107
+ Notes:
108
+ This source is implemented with ``threading.Thread`` + ``urllib`` and does
109
+ not currently support external cancellation signals (TS ``AbortSignal`` parity
110
+ is deferred). Unsubscribe prevents any late emissions from being forwarded.
111
+ """
112
+ from graphrefly.core.sugar import state
113
+
114
+ ns_per_sec = 1_000_000_000
115
+ fetch_count = state(0, name=f"{kwargs.get('name', 'http')}/fetch_count")
116
+ last_updated = state(0, name=f"{kwargs.get('name', 'http')}/last_updated")
117
+
118
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
119
+ active = [True]
120
+
121
+ def task() -> None:
122
+ if not active[0]:
123
+ return
124
+ try:
125
+ data_bytes = None
126
+ if body is not None:
127
+ if isinstance(body, str):
128
+ data_bytes = body.encode("utf-8")
129
+ else:
130
+ data_bytes = json.dumps(body).encode("utf-8")
131
+
132
+ req = urllib.request.Request(url, data=data_bytes, method=method)
133
+ if headers:
134
+ for k, v in headers.items():
135
+ req.add_header(k, v)
136
+
137
+ with urllib.request.urlopen(req, timeout=timeout_ns / ns_per_sec) as response:
138
+ if not active[0]:
139
+ return
140
+ raw_data = response.read()
141
+ res_data = transform(raw_data) if transform else json.loads(raw_data)
142
+
143
+ if not active[0]:
144
+ return
145
+
146
+ with batch():
147
+ current_count = fetch_count.get()
148
+ next_count = (current_count if isinstance(current_count, int) else 0) + 1
149
+ fetch_count.down([(MessageType.DATA, next_count)])
150
+ last_updated.down([(MessageType.DATA, wall_clock_ns())])
151
+ actions.emit(res_data)
152
+ actions.down([(MessageType.COMPLETE,)])
153
+
154
+ except BaseException as err:
155
+ if not active[0]:
156
+ return
157
+ actions.down([(MessageType.ERROR, err)])
158
+
159
+ t = threading.Thread(target=task, daemon=True)
160
+ t.start()
161
+
162
+ def cleanup() -> None:
163
+ active[0] = False
164
+
165
+ return cleanup
166
+
167
+ out = node(
168
+ start,
169
+ describe_kind="http",
170
+ complete_when_deps_complete=False,
171
+ **kwargs,
172
+ )
173
+ tracked = with_status(out)
174
+
175
+ return HttpBundle(
176
+ node=tracked.node,
177
+ status=tracked.status,
178
+ error=tracked.error,
179
+ fetch_count=fetch_count,
180
+ last_updated=last_updated,
181
+ )
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # from_event_emitter (moved from sources.py)
186
+ # ---------------------------------------------------------------------------
187
+
188
+
189
+ def from_event_emitter(
190
+ emitter: Any,
191
+ event_name: str,
192
+ *,
193
+ add_method: str = "add_listener",
194
+ remove_method: str = "remove_listener",
195
+ ) -> Node[Any]:
196
+ """Subscribe to an event emitter (e.g. custom emitter).
197
+
198
+ Emits each event payload as DATA. Teardown removes the listener.
199
+ Compatible with any object that has add/remove listener methods.
200
+ """
201
+
202
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
203
+ active = [True]
204
+
205
+ def handler(*args: Any) -> None:
206
+ if not active[0]:
207
+ return
208
+ if len(args) == 1:
209
+ actions.emit(args[0])
210
+ else:
211
+ actions.emit(args)
212
+
213
+ getattr(emitter, add_method)(event_name, handler)
214
+
215
+ def cleanup() -> None:
216
+ active[0] = False
217
+ with suppress(Exception):
218
+ getattr(emitter, remove_method)(event_name, handler)
219
+
220
+ return cleanup
221
+
222
+ return node(start, describe_kind="from_event_emitter", complete_when_deps_complete=False)
223
+
224
+
225
+ # ---------------------------------------------------------------------------
226
+ # from_fs_watch helpers (moved from sources.py)
227
+ # ---------------------------------------------------------------------------
228
+
229
+
230
+ def _glob_to_regex(pattern: str) -> re.Pattern[str]:
231
+ out: list[str] = ["^"]
232
+ i = 0
233
+ while i < len(pattern):
234
+ ch = pattern[i]
235
+ if ch == "*":
236
+ if i + 1 < len(pattern) and pattern[i + 1] == "*":
237
+ out.append(".*")
238
+ i += 2
239
+ continue
240
+ out.append("[^/]*")
241
+ i += 1
242
+ continue
243
+ out.append(re.escape(ch))
244
+ i += 1
245
+ out.append("$")
246
+ return re.compile("".join(out))
247
+
248
+
249
+ def _matches_any(path: str, patterns: list[re.Pattern[str]]) -> bool:
250
+ return any(p.search(path) is not None for p in patterns)
251
+
252
+
253
+ def _build_watchdog_backend(
254
+ paths: list[str],
255
+ recursive: bool,
256
+ on_event: Callable[[str, str, str, str | None, str | None], None],
257
+ on_error: Callable[[BaseException], None],
258
+ ) -> tuple[list[Any], Callable[[], None]]:
259
+ try:
260
+ from watchdog.events import FileSystemEventHandler # type: ignore[import-not-found]
261
+ from watchdog.observers import Observer # type: ignore[import-not-found]
262
+ except Exception as err: # pragma: no cover - exercised via monkeypatch in tests
263
+ msg = (
264
+ "from_fs_watch requires watchdog (no polling fallback by design). "
265
+ "Install with `uv add watchdog`."
266
+ )
267
+ raise RuntimeError(msg) from err
268
+
269
+ class _Handler(FileSystemEventHandler): # type: ignore[misc]
270
+ def __init__(self, root: str) -> None:
271
+ super().__init__()
272
+ self._root = root
273
+
274
+ def on_any_event(self, event: Any) -> None:
275
+ if getattr(event, "is_directory", False):
276
+ return
277
+ try:
278
+ event_type = str(getattr(event, "event_type", "change"))
279
+ src_path = getattr(event, "src_path", None)
280
+ dest_path = getattr(event, "dest_path", None)
281
+ path = str(dest_path or src_path or getattr(event, "path", ""))
282
+ if path:
283
+ on_event(
284
+ event_type,
285
+ path,
286
+ self._root,
287
+ str(src_path) if src_path else None,
288
+ str(dest_path) if dest_path else None,
289
+ )
290
+ except BaseException as err: # pragma: no cover - defensive callback path
291
+ on_error(err)
292
+
293
+ observers: list[Any] = []
294
+ try:
295
+ for p in paths:
296
+ observer = Observer()
297
+ observer.schedule(_Handler(str(os.path.abspath(p))), p, recursive=recursive)
298
+ observer.daemon = True
299
+ observer.start()
300
+ observers.append(observer)
301
+ except Exception:
302
+ for observer in observers:
303
+ with suppress(Exception):
304
+ observer.stop()
305
+ for observer in observers:
306
+ with suppress(Exception):
307
+ observer.join(timeout=1.0)
308
+ raise
309
+
310
+ def stop() -> None:
311
+ for observer in observers:
312
+ observer.stop()
313
+ for observer in observers:
314
+ observer.join(timeout=1.0)
315
+
316
+ return observers, stop
317
+
318
+
319
+ def from_fs_watch(
320
+ paths: str | list[str],
321
+ *,
322
+ recursive: bool = True,
323
+ debounce: float = 0.1,
324
+ include: list[str] | None = None,
325
+ exclude: list[str] | None = None,
326
+ **kwargs: Any,
327
+ ) -> Node[Any]:
328
+ """Watch filesystem changes and emit debounced events.
329
+
330
+ This source intentionally uses event-driven OS watchers only (no polling fallback).
331
+ """
332
+ path_list = [paths] if isinstance(paths, str) else list(paths)
333
+ if len(path_list) == 0:
334
+ msg = "from_fs_watch expects at least one path"
335
+ raise ValueError(msg)
336
+ include_patterns = [_glob_to_regex(p) for p in (include or [])]
337
+ exclude_patterns = [
338
+ _glob_to_regex(p) for p in (exclude or ["**/node_modules/**", "**/.git/**", "**/dist/**"])
339
+ ]
340
+
341
+ def normalize_type(event_type: str) -> str:
342
+ low = event_type.lower()
343
+ if low in {"modified", "change", "changed"}:
344
+ return "change"
345
+ if low in {"created", "create"}:
346
+ return "create"
347
+ if low in {"deleted", "delete"}:
348
+ return "delete"
349
+ if low in {"moved", "rename", "renamed"}:
350
+ return "rename"
351
+ return "change"
352
+
353
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
354
+ lock = threading.Lock()
355
+ pending: dict[str, dict[str, Any]] = {}
356
+ timer: list[threading.Timer | None] = [None]
357
+ active = [True]
358
+ generation = [0]
359
+
360
+ def _noop_stop_backend() -> None:
361
+ return
362
+
363
+ stop_backend_ref: list[Callable[[], None]] = [_noop_stop_backend]
364
+
365
+ def flush(token: int) -> None:
366
+ batch_msgs: Messages = []
367
+ with lock:
368
+ timer[0] = None
369
+ if not active[0] or not pending:
370
+ return
371
+ if token != generation[0]:
372
+ pending.clear()
373
+ return
374
+ batch_msgs = [(MessageType.DATA, evt.copy()) for evt in pending.values()]
375
+ pending.clear()
376
+ with lock:
377
+ if not active[0] or token != generation[0]:
378
+ return
379
+ actions.down(batch_msgs)
380
+
381
+ def queue_event(
382
+ event_type: str,
383
+ raw_path: str,
384
+ root: str,
385
+ src_path: str | None,
386
+ dest_path: str | None,
387
+ ) -> None:
388
+ normalized_path = os.path.abspath(raw_path).replace("\\", "/")
389
+ normalized_root = os.path.abspath(root).replace("\\", "/")
390
+ rel_path = os.path.relpath(normalized_path, normalized_root).replace("\\", "/")
391
+ included = (
392
+ len(include_patterns) == 0
393
+ or _matches_any(normalized_path, include_patterns)
394
+ or _matches_any(rel_path, include_patterns)
395
+ )
396
+ if not included:
397
+ return
398
+ excluded = _matches_any(normalized_path, exclude_patterns) or _matches_any(
399
+ rel_path, exclude_patterns
400
+ )
401
+ if excluded:
402
+ return
403
+ event = {
404
+ "type": normalize_type(event_type),
405
+ "path": normalized_path,
406
+ "root": normalized_root,
407
+ "relative_path": rel_path,
408
+ "timestamp_ns": wall_clock_ns(),
409
+ }
410
+ if src_path is not None:
411
+ event["src_path"] = os.path.abspath(src_path).replace("\\", "/")
412
+ if dest_path is not None:
413
+ event["dest_path"] = os.path.abspath(dest_path).replace("\\", "/")
414
+ with lock:
415
+ if not active[0]:
416
+ return
417
+ pending[normalized_path] = event
418
+ if timer[0] is not None:
419
+ timer[0].cancel()
420
+ token = generation[0]
421
+ t = threading.Timer(debounce, lambda: flush(token))
422
+ t.daemon = True
423
+ t.start()
424
+ timer[0] = t
425
+
426
+ def emit_error(err: BaseException) -> None:
427
+ with lock:
428
+ if not active[0]:
429
+ return
430
+ active[0] = False
431
+ generation[0] += 1
432
+ if timer[0] is not None:
433
+ timer[0].cancel()
434
+ timer[0] = None
435
+ pending.clear()
436
+ stop_backend_ref[0]()
437
+ actions.down([(MessageType.ERROR, err)])
438
+
439
+ _observers, stop_backend = _build_watchdog_backend(
440
+ path_list,
441
+ recursive,
442
+ queue_event,
443
+ emit_error,
444
+ )
445
+ stop_backend_ref[0] = stop_backend
446
+
447
+ def cleanup() -> None:
448
+ with lock:
449
+ active[0] = False
450
+ generation[0] += 1
451
+ if timer[0] is not None:
452
+ timer[0].cancel()
453
+ timer[0] = None
454
+ pending.clear()
455
+ stop_backend()
456
+
457
+ return cleanup
458
+
459
+ return node(start, describe_kind="from_fs_watch", complete_when_deps_complete=False, **kwargs)
460
+
461
+
462
+ # ---------------------------------------------------------------------------
463
+ # from_webhook (moved from sources.py)
464
+ # ---------------------------------------------------------------------------
465
+
466
+
467
+ def from_webhook(
468
+ register: Callable[
469
+ [
470
+ Callable[[Any], None],
471
+ Callable[[BaseException | Any], None],
472
+ Callable[[], None],
473
+ ],
474
+ Callable[[], None] | None,
475
+ ],
476
+ ) -> Node[Any]:
477
+ """Bridge HTTP webhook callbacks into a GraphReFly source.
478
+
479
+ The ``register`` callback wires your runtime/framework callback into GraphReFly and may return
480
+ cleanup. It receives three functions: ``emit(payload)``, ``error(err)``, and ``complete()``.
481
+
482
+ This mirrors the source-adapter style of :func:`from_event_emitter`, but targets HTTP webhook
483
+ handlers from frameworks like FastAPI or Flask.
484
+
485
+ Example (FastAPI):
486
+ ```python
487
+ from fastapi import FastAPI, Request
488
+ from graphrefly.extra import from_webhook
489
+
490
+ app = FastAPI()
491
+ bridge: dict[str, object] = {}
492
+
493
+ def register(emit, error, complete):
494
+ bridge["emit"] = emit
495
+ bridge["error"] = error
496
+ bridge["complete"] = complete
497
+ return None
498
+
499
+ webhook_node = from_webhook(register)
500
+
501
+ @app.post("/webhook")
502
+ async def webhook(request: Request):
503
+ payload = await request.json()
504
+ bridge["emit"](payload)
505
+ return {"ok": True}
506
+ ```
507
+
508
+ Example (Flask):
509
+ ```python
510
+ from flask import Flask, jsonify, request
511
+ from graphrefly.extra import from_webhook
512
+
513
+ app = Flask(__name__)
514
+ bridge: dict[str, object] = {}
515
+
516
+ def register(emit, error, complete):
517
+ bridge["emit"] = emit
518
+ bridge["error"] = error
519
+ bridge["complete"] = complete
520
+ return None
521
+
522
+ webhook_node = from_webhook(register)
523
+
524
+ @app.post("/webhook")
525
+ def webhook():
526
+ try:
527
+ bridge["emit"](request.get_json(force=True))
528
+ return jsonify({"ok": True}), 200
529
+ except Exception as exc:
530
+ bridge["error"](exc)
531
+ return jsonify({"ok": False}), 500
532
+ ```
533
+ """
534
+
535
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
536
+ active = [True]
537
+
538
+ def emit(payload: Any) -> None:
539
+ if not active[0]:
540
+ return
541
+ actions.emit(payload)
542
+
543
+ def error(err: BaseException | Any) -> None:
544
+ if not active[0]:
545
+ return
546
+ active[0] = False
547
+ actions.down([(MessageType.ERROR, err)])
548
+
549
+ def complete() -> None:
550
+ if not active[0]:
551
+ return
552
+ active[0] = False
553
+ actions.down([(MessageType.COMPLETE,)])
554
+
555
+ try:
556
+ cleanup = register(emit, error, complete)
557
+ except BaseException as err:
558
+ actions.down([(MessageType.ERROR, err)])
559
+ cleanup = None
560
+
561
+ def stop() -> None:
562
+ active[0] = False
563
+ if cleanup is not None:
564
+ cleanup()
565
+
566
+ return stop
567
+
568
+ return node(start, describe_kind="from_webhook", complete_when_deps_complete=False)
569
+
570
+
571
+ # ---------------------------------------------------------------------------
572
+ # from_websocket / to_websocket (moved from sources.py)
573
+ # ---------------------------------------------------------------------------
574
+
575
+
576
+ def from_websocket(
577
+ socket: Any | None = None,
578
+ *,
579
+ register: Callable[
580
+ [
581
+ Callable[[Any], None],
582
+ Callable[[BaseException | Any], None],
583
+ Callable[[], None],
584
+ ],
585
+ Callable[[], None] | None,
586
+ ]
587
+ | None = None,
588
+ add_method: str = "add_listener",
589
+ remove_method: str = "remove_listener",
590
+ message_event: str = "message",
591
+ error_event: str = "error",
592
+ close_event: str = "close",
593
+ parse: Callable[[Any], Any] | None = None,
594
+ close_on_cleanup: bool = False,
595
+ ) -> Node[Any]:
596
+ """Bridge WebSocket events into a GraphReFly source.
597
+
598
+ You can either pass a ``register`` callback (preferred in Python for runtime-agnostic wiring)
599
+ or pass a socket-like object with ``add_method``/``remove_method`` listener APIs.
600
+
601
+ The ``register`` callback must be atomic: either fully register and return a cleanup callable,
602
+ or raise before any listener side effects.
603
+ """
604
+ if register is None and socket is None:
605
+ msg = "from_websocket requires either socket or register"
606
+ raise ValueError(msg)
607
+
608
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
609
+ lock = threading.Lock()
610
+ active = [True]
611
+ cleaned = [False]
612
+ cleanup: Callable[[], None] | None = None
613
+
614
+ def _run_cleanup_once() -> None:
615
+ nonlocal cleanup
616
+ fn: Callable[[], None] | None = None
617
+ with lock:
618
+ if cleaned[0]:
619
+ return
620
+ cleaned[0] = True
621
+ fn = cleanup
622
+ if fn is not None:
623
+ with suppress(Exception):
624
+ fn()
625
+
626
+ def _terminate(msgs: Messages) -> bool:
627
+ with lock:
628
+ if not active[0]:
629
+ return False
630
+ active[0] = False
631
+ _run_cleanup_once()
632
+ actions.down(msgs)
633
+ return True
634
+
635
+ def _extract_payload(value: Any) -> Any:
636
+ if hasattr(value, "data"):
637
+ return value.data
638
+ if isinstance(value, dict) and "data" in value:
639
+ return value["data"]
640
+ return value
641
+
642
+ def emit(payload: Any) -> None:
643
+ with lock:
644
+ if not active[0]:
645
+ return
646
+ try:
647
+ normalized = _extract_payload(payload)
648
+ with lock:
649
+ if not active[0]:
650
+ return
651
+ actions.emit(parse(normalized) if parse is not None else normalized)
652
+ except Exception as err:
653
+ _terminate([(MessageType.ERROR, err)])
654
+
655
+ def error(err: BaseException | Any) -> None:
656
+ if isinstance(err, BaseException):
657
+ _terminate([(MessageType.ERROR, err)])
658
+ return
659
+ _terminate([(MessageType.ERROR, RuntimeError(str(err)))])
660
+
661
+ def complete() -> None:
662
+ _terminate([(MessageType.COMPLETE,)])
663
+
664
+ if register is not None:
665
+ try:
666
+ cleanup = register(emit, error, complete)
667
+ if cleanup is None:
668
+ raise RuntimeError(
669
+ "from_websocket register contract violation: "
670
+ "register must return cleanup callable"
671
+ )
672
+ except Exception as err:
673
+ _terminate([(MessageType.ERROR, err)])
674
+ else:
675
+ assert socket is not None
676
+ listeners: list[tuple[str, Callable[..., None]]] = []
677
+
678
+ def on_message(*args: Any) -> None:
679
+ if len(args) == 1:
680
+ emit(args[0])
681
+ else:
682
+ emit(args)
683
+
684
+ def on_error(*args: Any) -> None:
685
+ if len(args) == 1:
686
+ error(args[0])
687
+ else:
688
+ error(args)
689
+
690
+ def on_close(*_args: Any) -> None:
691
+ complete()
692
+
693
+ try:
694
+ getattr(socket, add_method)(message_event, on_message)
695
+ listeners.append((message_event, on_message))
696
+ getattr(socket, add_method)(error_event, on_error)
697
+ listeners.append((error_event, on_error))
698
+ getattr(socket, add_method)(close_event, on_close)
699
+ listeners.append((close_event, on_close))
700
+ except Exception as err:
701
+ for event_name, fn in listeners:
702
+ with suppress(Exception):
703
+ getattr(socket, remove_method)(event_name, fn)
704
+ _terminate([(MessageType.ERROR, err)])
705
+
706
+ def cleanup() -> None:
707
+ for event_name, fn in listeners:
708
+ with suppress(Exception):
709
+ getattr(socket, remove_method)(event_name, fn)
710
+ if close_on_cleanup:
711
+ with suppress(Exception):
712
+ socket.close()
713
+
714
+ def stop() -> None:
715
+ with lock:
716
+ active[0] = False
717
+ _run_cleanup_once()
718
+
719
+ return stop
720
+
721
+ return node(start, describe_kind="from_websocket", complete_when_deps_complete=False)
722
+
723
+
724
+ def to_websocket(
725
+ source: Node[Any],
726
+ socket: Any | None = None,
727
+ *,
728
+ send: Callable[[Any], None] | None = None,
729
+ close: Callable[..., None] | None = None,
730
+ serialize: Callable[[Any], Any] | None = None,
731
+ close_on_complete: bool = True,
732
+ close_on_error: bool = True,
733
+ close_code: int | None = None,
734
+ close_reason: str | None = None,
735
+ on_transport_error: Callable[[dict[str, Any]], None] | None = None,
736
+ ) -> Callable[[], None]:
737
+ """Forward upstream DATA payloads to a WebSocket-like transport.
738
+
739
+ Transport failures from serialization/send/close are reported through
740
+ ``on_transport_error`` as a dict with ``stage``, ``error``, and ``message`` keys.
741
+ """
742
+ if send is None:
743
+ if socket is None:
744
+ msg = "to_websocket requires socket or send"
745
+ raise ValueError(msg)
746
+ send = socket.send
747
+ if close is None and socket is not None and hasattr(socket, "close"):
748
+ close = socket.close
749
+
750
+ def _serialize(value: Any) -> Any:
751
+ if serialize is not None:
752
+ return serialize(value)
753
+ if isinstance(value, (str, bytes, bytearray, memoryview)):
754
+ return value
755
+ try:
756
+ return json.dumps(value)
757
+ except TypeError:
758
+ return str(value)
759
+
760
+ closed = [False]
761
+
762
+ def _report_transport_error(
763
+ stage: str, err: Exception, message: tuple[Any, ...] | None
764
+ ) -> None:
765
+ if on_transport_error is None:
766
+ return
767
+ with suppress(Exception):
768
+ on_transport_error({"stage": stage, "error": err, "message": message})
769
+
770
+ def sink(msgs: Messages) -> None:
771
+ def _close(message: tuple[Any, ...]) -> None:
772
+ if close is None:
773
+ return
774
+ if closed[0]:
775
+ return
776
+ closed[0] = True
777
+ if close_code is None and close_reason is None:
778
+ try:
779
+ close()
780
+ except Exception as err:
781
+ _report_transport_error("close", err, message)
782
+ return
783
+ try:
784
+ close(close_code, close_reason)
785
+ except TypeError:
786
+ # Some close callables don't accept code/reason.
787
+ try:
788
+ close()
789
+ except Exception as err:
790
+ _report_transport_error("close", err, message)
791
+ except Exception as err:
792
+ _report_transport_error("close", err, message)
793
+
794
+ for msg in msgs:
795
+ t = msg[0]
796
+ if t is MessageType.DATA:
797
+ try:
798
+ payload = _serialize(msg[1] if len(msg) > 1 else None)
799
+ except Exception as err:
800
+ _report_transport_error("serialize", err, msg)
801
+ return
802
+ try:
803
+ send(payload)
804
+ except Exception as err:
805
+ _report_transport_error("send", err, msg)
806
+ return
807
+ elif (t is MessageType.COMPLETE and close_on_complete and close is not None) or (
808
+ t is MessageType.ERROR and close_on_error and close is not None
809
+ ):
810
+ _close(msg)
811
+
812
+ return source.subscribe(sink)
813
+
814
+
815
+ # ---------------------------------------------------------------------------
816
+ # SSE (moved from sources.py)
817
+ # ---------------------------------------------------------------------------
818
+
819
+
820
+ def sse_frame(event: str, data: str | None = None) -> str:
821
+ out = f"event: {event}\n"
822
+ if data is not None:
823
+ # Preserve trailing empty lines (matches TS split(/\\r?\\n/) framing behavior).
824
+ normalized = data.replace("\r\n", "\n")
825
+ for line in normalized.split("\n"):
826
+ out += f"data: {line}\n"
827
+ return f"{out}\n"
828
+
829
+
830
+ def to_sse(
831
+ source: Node[Any],
832
+ *,
833
+ serialize: Callable[[Any], str] | None = None,
834
+ data_event: str = "data",
835
+ error_event: str = "error",
836
+ complete_event: str = "complete",
837
+ include_resolved: bool = False,
838
+ include_dirty: bool = False,
839
+ keepalive_s: float | None = None,
840
+ cancel_event: threading.Event | None = None,
841
+ event_name_resolver: Callable[[Any], str] | None = None,
842
+ ) -> Iterator[str]:
843
+ """Convert node messages into standard SSE text frames.
844
+
845
+ This is a sink adapter implemented as a thin subscription bridge over GraphReFly
846
+ messages. The returned iterator yields framed SSE chunks (``event: ...`` and
847
+ ``data: ...`` lines, separated by a blank line).
848
+ """
849
+
850
+ import queue
851
+
852
+ q: queue.Queue[str | None] = queue.Queue()
853
+ done = threading.Event()
854
+
855
+ def encode(value: Any) -> str:
856
+ if isinstance(value, str):
857
+ return value
858
+ if serialize is not None:
859
+ return serialize(value)
860
+ if isinstance(value, BaseException):
861
+ return str(value)
862
+ try:
863
+ return json.dumps(value)
864
+ except TypeError:
865
+ return str(value)
866
+
867
+ def sink(msgs: Messages) -> None:
868
+ if done.is_set():
869
+ return
870
+ for msg in msgs:
871
+ t = msg[0]
872
+ if t is MessageType.DATA:
873
+ q.put(sse_frame(data_event, encode(msg[1] if len(msg) > 1 else None)))
874
+ continue
875
+ if t is MessageType.ERROR:
876
+ q.put(sse_frame(error_event, encode(msg[1] if len(msg) > 1 else None)))
877
+ done.set()
878
+ q.put(None)
879
+ return
880
+ if t is MessageType.COMPLETE:
881
+ q.put(sse_frame(complete_event))
882
+ done.set()
883
+ q.put(None)
884
+ return
885
+ if t is MessageType.RESOLVED and not include_resolved:
886
+ continue
887
+ if t is MessageType.DIRTY and not include_dirty:
888
+ continue
889
+ event = event_name_resolver(t) if event_name_resolver is not None else str(t)
890
+ data = encode(msg[1]) if len(msg) > 1 else None
891
+ q.put(sse_frame(event, data))
892
+
893
+ unsub = source.subscribe(sink)
894
+
895
+ keepalive_stop = threading.Event()
896
+ keepalive_thread: threading.Thread | None = None
897
+ if keepalive_s is not None and keepalive_s > 0:
898
+
899
+ def keepalive_loop() -> None:
900
+ while not keepalive_stop.wait(keepalive_s):
901
+ if done.is_set():
902
+ return
903
+ q.put(": keepalive\n\n")
904
+
905
+ keepalive_thread = threading.Thread(target=keepalive_loop, daemon=True)
906
+ keepalive_thread.start()
907
+
908
+ cancel_thread: threading.Thread | None = None
909
+ if cancel_event is not None:
910
+
911
+ def cancel_loop() -> None:
912
+ cancel_event.wait()
913
+ if done.is_set():
914
+ return
915
+ done.set()
916
+ q.put(None)
917
+
918
+ cancel_thread = threading.Thread(target=cancel_loop, daemon=True)
919
+ cancel_thread.start()
920
+
921
+ try:
922
+ while True:
923
+ chunk = q.get()
924
+ if chunk is None:
925
+ break
926
+ yield chunk
927
+ finally:
928
+ done.set()
929
+ keepalive_stop.set()
930
+ if keepalive_thread is not None:
931
+ keepalive_thread.join(timeout=0.05)
932
+ if cancel_thread is not None:
933
+ cancel_thread.join(timeout=0.05)
934
+ unsub()
935
+
936
+
937
+ # ---------------------------------------------------------------------------
938
+ # MCP (moved from sources.py)
939
+ # ---------------------------------------------------------------------------
940
+
941
+
942
+ def from_mcp(
943
+ client: Any,
944
+ *,
945
+ method: str = "notifications/message",
946
+ on_disconnect: Callable[[Callable[[Any], None]], None] | None = None,
947
+ **kwargs: Any,
948
+ ) -> Node[Any]:
949
+ """Wrap an MCP client's server-push notifications as a reactive source.
950
+
951
+ The caller owns the ``Client`` connection (``connect`` / ``close``). ``from_mcp``
952
+ only registers a notification handler for the chosen *method* and emits each
953
+ notification payload as ``DATA``.
954
+
955
+ **Disconnect detection:** MCP SDK does not expose a built-in disconnect event.
956
+ Pass ``on_disconnect`` to wire an external signal (e.g. transport ``close`` event)
957
+ so the source can emit ``ERROR`` and tear down reactively.
958
+
959
+ Args:
960
+ client: Any object with a ``set_notification_handler(method, handler)`` method
961
+ (duck-typed -- no SDK dependency).
962
+ method: MCP notification method to subscribe to. Default ``"notifications/message"``.
963
+ on_disconnect: Optional callback ``(cb) -> None`` -- call ``cb(err)`` when the
964
+ transport disconnects.
965
+
966
+ Returns:
967
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per server notification.
968
+
969
+ Example:
970
+ ```python
971
+ from graphrefly.extra import from_mcp
972
+ tools = from_mcp(client, method="notifications/tools/list_changed")
973
+ ```
974
+ """
975
+
976
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
977
+ active = [True]
978
+
979
+ def handler(notification: Any) -> None:
980
+ if active[0]:
981
+ actions.emit(notification)
982
+
983
+ client.set_notification_handler(method, handler)
984
+
985
+ if on_disconnect is not None:
986
+
987
+ def _on_dc(err: Any = None) -> None:
988
+ if not active[0]:
989
+ return
990
+ active[0] = False
991
+ error_value = err if err is not None else Exception("MCP client disconnected")
992
+ actions.down([(MessageType.ERROR, error_value)])
993
+
994
+ on_disconnect(_on_dc)
995
+
996
+ def cleanup() -> None:
997
+ active[0] = False
998
+ client.set_notification_handler(method, lambda _n: None)
999
+
1000
+ return cleanup
1001
+
1002
+ return node(start, describe_kind="producer", **kwargs)
1003
+
1004
+
1005
+ # ---------------------------------------------------------------------------
1006
+ # from_git_hook (moved from sources.py)
1007
+ # ---------------------------------------------------------------------------
1008
+
1009
+
1010
+ def from_git_hook(
1011
+ repo_path: str,
1012
+ *,
1013
+ poll_ms: int = 5000,
1014
+ include: list[str] | None = None,
1015
+ exclude: list[str] | None = None,
1016
+ **kwargs: Any,
1017
+ ) -> Node[Any]:
1018
+ """Git change detection as a reactive source.
1019
+
1020
+ Polls for new commits on an interval and emits a structured ``GitEvent`` dict
1021
+ whenever HEAD advances. Zero filesystem side effects -- no hook script installation.
1022
+
1023
+ **Limitations:** Polling cannot distinguish commit vs merge vs rebase -- ``hook``
1024
+ is always ``"post-commit"``. When multiple commits land between polls, files are
1025
+ aggregated but ``message``/``author`` reflect only the latest commit.
1026
+
1027
+ The emitted dict has keys: ``hook``, ``commit``, ``files``, ``message``, ``author``,
1028
+ ``timestamp_ns``.
1029
+
1030
+ Cross-repo usage::
1031
+
1032
+ merge([from_git_hook(ts_repo), from_git_hook(py_repo)])
1033
+
1034
+ Args:
1035
+ repo_path: Absolute path to the git repository root.
1036
+ poll_ms: Polling interval in milliseconds. Default ``5000``.
1037
+ include: Glob patterns -- only include matching changed files.
1038
+ exclude: Glob patterns -- exclude matching changed files.
1039
+
1040
+ Returns:
1041
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per new commit.
1042
+ """
1043
+ import subprocess
1044
+
1045
+ include_patterns = [_glob_to_regex(p) for p in (include or [])]
1046
+ exclude_patterns = [_glob_to_regex(p) for p in (exclude or [])]
1047
+
1048
+ def _git(cmd: list[str]) -> str:
1049
+ result = subprocess.run( # noqa: S603
1050
+ cmd,
1051
+ cwd=repo_path,
1052
+ capture_output=True,
1053
+ text=True,
1054
+ check=True,
1055
+ )
1056
+ return result.stdout.strip()
1057
+
1058
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1059
+ active = [True]
1060
+ timer: list[threading.Timer | None] = [None]
1061
+
1062
+ # P4: Seed with current HEAD; route errors through the protocol.
1063
+ try:
1064
+ last_seen = [_git(["git", "rev-parse", "HEAD"])]
1065
+ except Exception as err:
1066
+ actions.down([(MessageType.ERROR, err)])
1067
+ return lambda: None
1068
+
1069
+ def check() -> None:
1070
+ # P7: Top-level guard -- any unexpected exception tears down cleanly.
1071
+ try:
1072
+ _check_inner()
1073
+ except Exception as err:
1074
+ if active[0]:
1075
+ actions.down([(MessageType.ERROR, err)])
1076
+ cleanup()
1077
+
1078
+ def _check_inner() -> None:
1079
+ if not active[0]:
1080
+ return
1081
+ try:
1082
+ head = _git(["git", "rev-parse", "HEAD"])
1083
+ except Exception as err:
1084
+ if active[0]:
1085
+ actions.down([(MessageType.ERROR, err)])
1086
+ cleanup()
1087
+ return
1088
+
1089
+ if not active[0] or head == last_seen[0]:
1090
+ schedule()
1091
+ return
1092
+
1093
+ try:
1094
+ files_raw = _git(["git", "diff", "--name-only", f"{last_seen[0]}..{head}"])
1095
+ files = [f for f in files_raw.split("\n") if f]
1096
+
1097
+ if include_patterns:
1098
+ files = [f for f in files if _matches_any(f, include_patterns)]
1099
+ if exclude_patterns:
1100
+ files = [f for f in files if not _matches_any(f, exclude_patterns)]
1101
+
1102
+ # P2: Target captured head SHA, not implicit HEAD.
1103
+ message = _git(["git", "log", "-1", "--format=%s", head])
1104
+ author = _git(["git", "log", "-1", "--format=%an", head])
1105
+ except Exception as err:
1106
+ if active[0]:
1107
+ actions.down([(MessageType.ERROR, err)])
1108
+ cleanup()
1109
+ return
1110
+
1111
+ if not active[0]:
1112
+ return
1113
+ # P5: Emit before advancing last_seen.
1114
+ actions.emit(
1115
+ {
1116
+ "hook": "post-commit",
1117
+ "commit": head,
1118
+ "files": files,
1119
+ "message": message,
1120
+ "author": author,
1121
+ "timestamp_ns": wall_clock_ns(),
1122
+ }
1123
+ )
1124
+ last_seen[0] = head
1125
+ schedule()
1126
+
1127
+ def schedule() -> None:
1128
+ if not active[0]:
1129
+ return
1130
+ t = threading.Timer(poll_ms / 1000.0, check)
1131
+ t.daemon = True
1132
+ timer[0] = t
1133
+ t.start()
1134
+
1135
+ def cleanup() -> None:
1136
+ active[0] = False
1137
+ t = timer[0]
1138
+ if t is not None:
1139
+ t.cancel()
1140
+ timer[0] = None
1141
+
1142
+ schedule()
1143
+ return cleanup
1144
+
1145
+ return node(start, describe_kind="producer", **kwargs)
1146
+
1147
+
1148
+ # ===========================================================================
1149
+ # 5.3b -- Ingest adapters (universal source layer)
1150
+ # ===========================================================================
1151
+
1152
+
1153
+ # ---------------------------------------------------------------------------
1154
+ # OpenTelemetry (OTLP/HTTP)
1155
+ # ---------------------------------------------------------------------------
1156
+
1157
+
1158
+ @dataclass(frozen=True, slots=True)
1159
+ class OTelBundle:
1160
+ """Bundle returned by :func:`from_otel` -- one node per signal type."""
1161
+
1162
+ traces: Node[Any]
1163
+ metrics: Node[Any]
1164
+ logs: Node[Any]
1165
+
1166
+
1167
+ def from_otel(
1168
+ register: Callable[
1169
+ [dict[str, Callable[..., None]]],
1170
+ Callable[[], None] | None,
1171
+ ],
1172
+ ) -> OTelBundle:
1173
+ """OTLP/HTTP receiver -- accepts traces, metrics, and logs as separate reactive nodes.
1174
+
1175
+ The caller owns the HTTP server. ``from_otel`` receives a ``register`` callback that
1176
+ wires OTLP POST endpoints to the three signal handlers. Each signal type gets its
1177
+ own :class:`~graphrefly.core.node.Node` so downstream can subscribe selectively.
1178
+
1179
+ Args:
1180
+ register: Callback receiving a dict with ``on_traces``, ``on_metrics``,
1181
+ ``on_logs``, ``on_error`` handler functions. Must return a cleanup callable
1182
+ or ``None``.
1183
+
1184
+ Returns:
1185
+ :class:`OTelBundle` -- ``{ traces, metrics, logs }`` nodes.
1186
+
1187
+ Example:
1188
+ ```python
1189
+ from graphrefly.extra.adapters import from_otel
1190
+
1191
+ otel = from_otel(lambda h: (
1192
+ # wire your HTTP routes to h["on_traces"], h["on_metrics"], h["on_logs"]
1193
+ None
1194
+ ))
1195
+ ```
1196
+ """
1197
+ active = [True]
1198
+ teardown_count = [0]
1199
+ register_cleanup: list[Callable[[], None] | None] = [None]
1200
+
1201
+ def _run_register_cleanup() -> None:
1202
+ fn = register_cleanup[0]
1203
+ if fn is not None:
1204
+ register_cleanup[0] = None
1205
+ fn()
1206
+
1207
+ def _make_signal_node() -> Node[Any]:
1208
+ def start(_deps: list[Any], _actions: NodeActions) -> Callable[[], None]:
1209
+ def cleanup() -> None:
1210
+ teardown_count[0] += 1
1211
+ if teardown_count[0] >= 3:
1212
+ active[0] = False
1213
+ _run_register_cleanup()
1214
+
1215
+ return cleanup
1216
+
1217
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
1218
+
1219
+ traces = _make_signal_node()
1220
+ metrics = _make_signal_node()
1221
+ logs = _make_signal_node()
1222
+
1223
+ def _on_traces(spans: list[Any]) -> None:
1224
+ if not active[0]:
1225
+ return
1226
+ with batch():
1227
+ for s in spans:
1228
+ traces.down([(MessageType.DATA, s)])
1229
+
1230
+ def _on_metrics(ms: list[Any]) -> None:
1231
+ if not active[0]:
1232
+ return
1233
+ with batch():
1234
+ for m in ms:
1235
+ metrics.down([(MessageType.DATA, m)])
1236
+
1237
+ def _on_logs(ls: list[Any]) -> None:
1238
+ if not active[0]:
1239
+ return
1240
+ with batch():
1241
+ for lg in ls:
1242
+ logs.down([(MessageType.DATA, lg)])
1243
+
1244
+ def _on_error(err: BaseException | Any) -> None:
1245
+ if not active[0]:
1246
+ return
1247
+ active[0] = False
1248
+ for n in (traces, metrics, logs):
1249
+ n.down([(MessageType.ERROR, err)])
1250
+
1251
+ register_cleanup[0] = register(
1252
+ {
1253
+ "on_traces": _on_traces,
1254
+ "on_metrics": _on_metrics,
1255
+ "on_logs": _on_logs,
1256
+ "on_error": _on_error,
1257
+ }
1258
+ )
1259
+
1260
+ return OTelBundle(traces=traces, metrics=metrics, logs=logs)
1261
+
1262
+
1263
+ # ---------------------------------------------------------------------------
1264
+ # Syslog (RFC 5424)
1265
+ # ---------------------------------------------------------------------------
1266
+
1267
+
1268
+ def parse_syslog(raw: str) -> dict[str, Any]:
1269
+ """Parse a raw RFC 5424 syslog line into a structured dict.
1270
+
1271
+ Format: ``<PRI>VERSION TIMESTAMP HOSTNAME APP-NAME PROCID MSGID MSG``
1272
+
1273
+ Returns a dict with keys: ``facility``, ``severity``, ``timestamp``, ``hostname``,
1274
+ ``app_name``, ``proc_id``, ``msg_id``, ``message``, ``timestamp_ns``.
1275
+
1276
+ Falls back gracefully for unparseable input.
1277
+ """
1278
+ match = re.match(r"^<(\d{1,3})>\d?\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*(.*)", raw, re.S)
1279
+ if not match:
1280
+ now_ns = wall_clock_ns()
1281
+ timestamp = datetime.fromtimestamp(now_ns / 1e9, tz=UTC).isoformat()
1282
+ return {
1283
+ "facility": 1,
1284
+ "severity": 6,
1285
+ "timestamp": timestamp,
1286
+ "hostname": "-",
1287
+ "app_name": "-",
1288
+ "proc_id": "-",
1289
+ "msg_id": "-",
1290
+ "message": raw.strip(),
1291
+ "timestamp_ns": now_ns,
1292
+ }
1293
+ pri = int(match.group(1))
1294
+ return {
1295
+ "facility": pri >> 3,
1296
+ "severity": pri & 7,
1297
+ "timestamp": match.group(2),
1298
+ "hostname": match.group(3),
1299
+ "app_name": match.group(4),
1300
+ "proc_id": match.group(5),
1301
+ "msg_id": match.group(6),
1302
+ "message": (match.group(7) or "").strip(),
1303
+ "timestamp_ns": wall_clock_ns(),
1304
+ }
1305
+
1306
+
1307
+ def from_syslog(
1308
+ register: Callable[
1309
+ [
1310
+ Callable[[Any], None],
1311
+ Callable[[BaseException | Any], None],
1312
+ Callable[[], None],
1313
+ ],
1314
+ Callable[[], None] | None,
1315
+ ],
1316
+ ) -> Node[Any]:
1317
+ """RFC 5424 syslog receiver as a reactive source.
1318
+
1319
+ Reuses the :func:`from_webhook` registration pattern. The caller owns the
1320
+ UDP/TCP socket and parses raw lines via :func:`parse_syslog` before calling
1321
+ ``emit``.
1322
+
1323
+ Args:
1324
+ register: Callback wiring socket to ``emit``/``error``/``complete`` handlers.
1325
+
1326
+ Returns:
1327
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per syslog message.
1328
+ """
1329
+ return from_webhook(register)
1330
+
1331
+
1332
+ # ---------------------------------------------------------------------------
1333
+ # StatsD / DogStatsD
1334
+ # ---------------------------------------------------------------------------
1335
+
1336
+ _STATSD_TYPES: dict[str, str] = {
1337
+ "c": "counter",
1338
+ "g": "gauge",
1339
+ "ms": "timer",
1340
+ "h": "histogram",
1341
+ "s": "set",
1342
+ "d": "distribution",
1343
+ }
1344
+
1345
+
1346
+ def parse_statsd(line: str) -> dict[str, Any]:
1347
+ """Parse a raw StatsD/DogStatsD line into a structured dict.
1348
+
1349
+ Format: ``metric.name:value|type|@sampleRate|#tag1:val1,tag2:val2``
1350
+
1351
+ Returns a dict with keys: ``name``, ``value``, ``type``, ``sample_rate`` (optional),
1352
+ ``tags``, ``timestamp_ns``.
1353
+
1354
+ Raises :class:`ValueError` on invalid input.
1355
+ """
1356
+ parts = line.split("|")
1357
+ name_value = parts[0] if parts else ""
1358
+ split = name_value.split(":")
1359
+ if len(split) < 2 or not split[0]:
1360
+ msg = f"Invalid StatsD line: {line}"
1361
+ raise ValueError(msg)
1362
+ name = split[0].strip()
1363
+ value_str = split[1].strip()
1364
+ type_code = parts[1].strip() if len(parts) > 1 else "c"
1365
+ metric_type = _STATSD_TYPES.get(type_code, "counter")
1366
+ # Set types use string identifiers, not numeric values.
1367
+ if type_code == "s":
1368
+ value: float = 0
1369
+ else:
1370
+ value = float(value_str)
1371
+
1372
+ sample_rate: float | None = None
1373
+ tags: dict[str, str] = {}
1374
+
1375
+ for part in parts[2:]:
1376
+ p = part.strip()
1377
+ if p.startswith("@"):
1378
+ sample_rate = float(p[1:])
1379
+ elif p.startswith("#"):
1380
+ for tag in p[1:].split(","):
1381
+ kv = tag.split(":")
1382
+ if kv[0]:
1383
+ tags[kv[0]] = kv[1] if len(kv) > 1 else ""
1384
+
1385
+ result: dict[str, Any] = {
1386
+ "name": name,
1387
+ "value": value,
1388
+ "type": metric_type,
1389
+ "tags": tags,
1390
+ "timestamp_ns": wall_clock_ns(),
1391
+ }
1392
+ if sample_rate is not None:
1393
+ result["sample_rate"] = sample_rate
1394
+ return result
1395
+
1396
+
1397
+ def from_statsd(
1398
+ register: Callable[
1399
+ [
1400
+ Callable[[Any], None],
1401
+ Callable[[BaseException | Any], None],
1402
+ Callable[[], None],
1403
+ ],
1404
+ Callable[[], None] | None,
1405
+ ],
1406
+ ) -> Node[Any]:
1407
+ """StatsD/DogStatsD UDP receiver as a reactive source.
1408
+
1409
+ Reuses the :func:`from_webhook` registration pattern. The caller owns the
1410
+ UDP socket and parses raw lines via :func:`parse_statsd` before calling ``emit``.
1411
+
1412
+ Args:
1413
+ register: Callback wiring socket to ``emit``/``error``/``complete`` handlers.
1414
+
1415
+ Returns:
1416
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per metric line.
1417
+ """
1418
+ return from_webhook(register)
1419
+
1420
+
1421
+ # ---------------------------------------------------------------------------
1422
+ # Prometheus scrape
1423
+ # ---------------------------------------------------------------------------
1424
+
1425
+
1426
+ def parse_prometheus_text(text: str) -> list[dict[str, Any]]:
1427
+ """Parse Prometheus exposition format text into a list of metric dicts.
1428
+
1429
+ Each dict has keys: ``name``, ``labels``, ``value``, ``timestamp_ms`` (optional),
1430
+ ``type`` (optional), ``help`` (optional), ``timestamp_ns``.
1431
+ """
1432
+ results: list[dict[str, Any]] = []
1433
+ types: dict[str, str] = {}
1434
+ helps: dict[str, str] = {}
1435
+
1436
+ for raw_line in text.split("\n"):
1437
+ line = raw_line.strip()
1438
+ if not line:
1439
+ continue
1440
+
1441
+ if line.startswith("# TYPE "):
1442
+ rest = line[7:]
1443
+ space_idx = rest.index(" ") if " " in rest else -1
1444
+ if space_idx > 0:
1445
+ types[rest[:space_idx]] = rest[space_idx + 1 :].strip()
1446
+ continue
1447
+ if line.startswith("# HELP "):
1448
+ rest = line[7:]
1449
+ space_idx = rest.index(" ") if " " in rest else -1
1450
+ if space_idx > 0:
1451
+ helps[rest[:space_idx]] = rest[space_idx + 1 :].strip()
1452
+ continue
1453
+ if line.startswith("#"):
1454
+ continue
1455
+
1456
+ # metric_name{label="value"} 123 timestamp?
1457
+ brace_idx = line.find("{")
1458
+ if brace_idx >= 0:
1459
+ name = line[:brace_idx]
1460
+ close_brace = line.find("}", brace_idx)
1461
+ if close_brace < 0:
1462
+ continue
1463
+ label_str = line[brace_idx + 1 : close_brace]
1464
+ labels = _parse_prometheus_labels(label_str)
1465
+ after = line[close_brace + 1 :].strip().split()
1466
+ value_str = after[0] if after else ""
1467
+ ts_str = after[1] if len(after) > 1 else None
1468
+ else:
1469
+ parts = line.split()
1470
+ name = parts[0] if parts else ""
1471
+ value_str = parts[1] if len(parts) > 1 else ""
1472
+ ts_str = parts[2] if len(parts) > 2 else None
1473
+ labels = {}
1474
+
1475
+ if not name or not value_str:
1476
+ continue
1477
+
1478
+ base_name = re.sub(r"(_total|_count|_sum|_bucket|_created|_info)$", "", name)
1479
+ entry: dict[str, Any] = {
1480
+ "name": name,
1481
+ "labels": labels,
1482
+ "value": float(value_str),
1483
+ "timestamp_ns": wall_clock_ns(),
1484
+ }
1485
+ if ts_str:
1486
+ entry["timestamp_ms"] = float(ts_str)
1487
+ t = types.get(base_name) or types.get(name)
1488
+ if t:
1489
+ entry["type"] = t
1490
+ h = helps.get(base_name) or helps.get(name)
1491
+ if h:
1492
+ entry["help"] = h
1493
+ results.append(entry)
1494
+
1495
+ return results
1496
+
1497
+
1498
+ def _parse_prometheus_labels(s: str) -> dict[str, str]:
1499
+ labels: dict[str, str] = {}
1500
+ for m in re.finditer(r'(\w+)="((?:[^"\\]|\\.)*)"', s):
1501
+ labels[m.group(1)] = re.sub(r"\\(.)", r"\1", m.group(2))
1502
+ return labels
1503
+
1504
+
1505
+ def from_prometheus(
1506
+ endpoint: str,
1507
+ *,
1508
+ interval_ns: int = 15_000_000_000,
1509
+ headers: dict[str, str] | None = None,
1510
+ timeout_ns: int = 10_000_000_000,
1511
+ ) -> Node[Any]:
1512
+ """Scrape a Prometheus ``/metrics`` endpoint on a reactive timer interval.
1513
+
1514
+ Each scrape parses the exposition format and emits one ``DATA`` per metric line.
1515
+ Uses a timer thread internally (reactive timer source, not busy-wait polling).
1516
+
1517
+ Args:
1518
+ endpoint: URL of the Prometheus metrics endpoint.
1519
+ interval_ns: Scrape interval in nanoseconds. Default ``15_000_000_000`` (15s).
1520
+ headers: Optional request headers.
1521
+ timeout_ns: Request timeout in nanoseconds. Default ``10_000_000_000`` (10s).
1522
+
1523
+ Returns:
1524
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per metric per scrape.
1525
+ """
1526
+ interval_s = interval_ns / 1e9
1527
+ timeout_s = timeout_ns / 1e9
1528
+
1529
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1530
+ active = [True]
1531
+ running = [False]
1532
+ timer: list[threading.Timer | None] = [None]
1533
+
1534
+ def scrape() -> None:
1535
+ if not active[0]:
1536
+ return
1537
+ if running[0]:
1538
+ schedule()
1539
+ return
1540
+ running[0] = True
1541
+ try:
1542
+ req = urllib.request.Request(endpoint)
1543
+ req.add_header("Accept", "text/plain")
1544
+ if headers:
1545
+ for k, v in headers.items():
1546
+ req.add_header(k, v)
1547
+ with urllib.request.urlopen(req, timeout=timeout_s) as response:
1548
+ if not active[0]:
1549
+ return
1550
+ text = response.read().decode("utf-8")
1551
+ if not active[0]:
1552
+ return
1553
+ prom_metrics = parse_prometheus_text(text)
1554
+ for m in prom_metrics:
1555
+ if not active[0]:
1556
+ return
1557
+ actions.emit(m)
1558
+ except Exception as err:
1559
+ active[0] = False
1560
+ actions.down([(MessageType.ERROR, err)])
1561
+ return
1562
+ finally:
1563
+ running[0] = False
1564
+ schedule()
1565
+
1566
+ def schedule() -> None:
1567
+ if not active[0]:
1568
+ return
1569
+ t = threading.Timer(interval_s, scrape)
1570
+ t.daemon = True
1571
+ t.start()
1572
+ timer[0] = t
1573
+
1574
+ # Initial scrape in background thread.
1575
+ t = threading.Thread(target=scrape, daemon=True)
1576
+ t.start()
1577
+
1578
+ def cleanup() -> None:
1579
+ active[0] = False
1580
+ if timer[0] is not None:
1581
+ timer[0].cancel()
1582
+ timer[0] = None
1583
+
1584
+ return cleanup
1585
+
1586
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
1587
+
1588
+
1589
+ # ---------------------------------------------------------------------------
1590
+ # Kafka
1591
+ # ---------------------------------------------------------------------------
1592
+
1593
+
1594
+ @runtime_checkable
1595
+ class KafkaConsumerLike(Protocol):
1596
+ """Duck-typed Kafka consumer (compatible with confluent-kafka, aiokafka)."""
1597
+
1598
+ def subscribe(self, topics: list[str]) -> None: ...
1599
+ def run(self, callback: Callable[..., None]) -> None: ...
1600
+
1601
+
1602
+ @runtime_checkable
1603
+ class KafkaProducerLike(Protocol):
1604
+ """Duck-typed Kafka producer."""
1605
+
1606
+ def send(self, topic: str, *, key: Any = None, value: Any = None) -> None: ...
1607
+
1608
+
1609
+ def from_kafka(
1610
+ consumer: Any,
1611
+ topic: str,
1612
+ *,
1613
+ from_beginning: bool = False,
1614
+ deserialize: Callable[[Any], Any] | None = None,
1615
+ ) -> Node[Any]:
1616
+ """Kafka consumer as a reactive source.
1617
+
1618
+ Wraps a Kafka-compatible consumer. Each message becomes a ``DATA`` emission
1619
+ with structured metadata (topic, partition, key, value, headers, offset, timestamp).
1620
+
1621
+ Args:
1622
+ consumer: Kafka consumer instance with ``subscribe`` and ``run`` methods
1623
+ (caller owns connect/disconnect lifecycle).
1624
+ topic: Topic to consume from.
1625
+ from_beginning: Start from beginning of topic. Default ``False``.
1626
+ deserialize: Optional deserializer for message values. Default: ``json.loads``
1627
+ with fallback to string.
1628
+
1629
+ Returns:
1630
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per Kafka message.
1631
+ """
1632
+ if deserialize is None:
1633
+
1634
+ def _default_deserialize(buf: Any) -> Any:
1635
+ if buf is None:
1636
+ return None
1637
+ raw = buf if isinstance(buf, (str, bytes)) else str(buf)
1638
+ if isinstance(raw, bytes):
1639
+ raw = raw.decode("utf-8", errors="replace")
1640
+ try:
1641
+ return json.loads(raw)
1642
+ except (json.JSONDecodeError, ValueError):
1643
+ return raw
1644
+
1645
+ deserialize = _default_deserialize
1646
+
1647
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1648
+ active = [True]
1649
+
1650
+ def _run() -> None:
1651
+ try:
1652
+ consumer.subscribe([topic])
1653
+
1654
+ def on_message(
1655
+ *,
1656
+ topic: str = "",
1657
+ partition: int = 0,
1658
+ key: Any = None,
1659
+ value: Any = None,
1660
+ headers: dict[str, str] | None = None,
1661
+ offset: str = "0",
1662
+ timestamp: str = "",
1663
+ ) -> None:
1664
+ if not active[0]:
1665
+ return
1666
+ actions.emit(
1667
+ {
1668
+ "topic": topic,
1669
+ "partition": partition,
1670
+ "key": str(key) if key is not None else None,
1671
+ "value": deserialize(value),
1672
+ "headers": headers or {},
1673
+ "offset": offset,
1674
+ "timestamp": timestamp,
1675
+ "timestamp_ns": wall_clock_ns(),
1676
+ }
1677
+ )
1678
+
1679
+ consumer.run(on_message)
1680
+ except BaseException as err:
1681
+ if active[0]:
1682
+ actions.down([(MessageType.ERROR, err)])
1683
+
1684
+ t = threading.Thread(target=_run, daemon=True)
1685
+ t.start()
1686
+
1687
+ def cleanup() -> None:
1688
+ active[0] = False
1689
+
1690
+ return cleanup
1691
+
1692
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
1693
+
1694
+
1695
+ def to_kafka(
1696
+ source: Node[Any],
1697
+ producer: Any,
1698
+ topic: str,
1699
+ *,
1700
+ serialize: Callable[[Any], Any] | None = None,
1701
+ key_extractor: Callable[[Any], str | None] | None = None,
1702
+ on_transport_error: Callable[[SinkTransportError], None] | None = None,
1703
+ ) -> Callable[[], None]:
1704
+ """Kafka producer sink -- forwards upstream ``DATA`` to a Kafka topic.
1705
+
1706
+ Auto-subscribes and returns an unsubscribe function.
1707
+
1708
+ Args:
1709
+ source: Upstream node to forward.
1710
+ producer: Kafka producer instance with a ``send`` method.
1711
+ topic: Target topic.
1712
+ serialize: Optional serializer. Default: ``json.dumps``.
1713
+ key_extractor: Optional function to extract a message key from the value.
1714
+ on_transport_error: Optional callback for transport errors. Receives a
1715
+ :class:`SinkTransportError` with ``stage``, ``error``, and ``value``.
1716
+
1717
+ Returns:
1718
+ An unsubscribe ``Callable[[], None]`` to tear down the sink.
1719
+ """
1720
+ if serialize is None:
1721
+ serialize = json.dumps
1722
+
1723
+ def _on_message(msg: Any, _index: int, _actions: NodeActions) -> bool:
1724
+ if msg[0] is MessageType.DATA:
1725
+ value = msg[1] if len(msg) > 1 else None
1726
+ key = key_extractor(value) if key_extractor else None
1727
+ try:
1728
+ serialized = serialize(value)
1729
+ except Exception as err:
1730
+ if on_transport_error is not None:
1731
+ on_transport_error(
1732
+ SinkTransportError(stage="serialize", error=err, value=value)
1733
+ )
1734
+ return True
1735
+ try:
1736
+ producer.send(topic, key=key, value=serialized)
1737
+ except Exception as err:
1738
+ if on_transport_error is not None:
1739
+ on_transport_error(SinkTransportError(stage="send", error=err, value=value))
1740
+ return True
1741
+ return False
1742
+
1743
+ effect = node(
1744
+ [source],
1745
+ lambda _deps, _actions: lambda: None,
1746
+ describe_kind="effect",
1747
+ on_message=_on_message,
1748
+ )
1749
+ unsub = effect.subscribe(lambda _msgs: None)
1750
+ return unsub
1751
+
1752
+
1753
+ # ---------------------------------------------------------------------------
1754
+ # Redis Streams
1755
+ # ---------------------------------------------------------------------------
1756
+
1757
+
1758
+ @runtime_checkable
1759
+ class RedisClientLike(Protocol):
1760
+ """Duck-typed Redis client (compatible with redis-py, ioredis)."""
1761
+
1762
+ def xadd(self, name: str, fields: dict[str, str], **kwargs: Any) -> Any: ...
1763
+ def xread(self, streams: dict[str, str], **kwargs: Any) -> Any: ...
1764
+
1765
+
1766
+ def from_redis_stream(
1767
+ client: Any,
1768
+ key: str,
1769
+ *,
1770
+ block_ms: int = 5000,
1771
+ start_id: str = "$",
1772
+ parse: Callable[[dict[str, str]], Any] | None = None,
1773
+ ) -> Node[Any]:
1774
+ """Redis Streams consumer as a reactive source.
1775
+
1776
+ Uses XREAD with BLOCK to reactively consume stream entries.
1777
+
1778
+ Args:
1779
+ client: Redis client instance with ``xread`` method (caller owns connection).
1780
+ key: Redis stream key.
1781
+ block_ms: Block timeout in ms for XREAD. Default ``5000``.
1782
+ start_id: Start ID. Default ``"$"`` (new entries only).
1783
+ parse: Optional parser for raw Redis hash fields. Default: parse ``data``
1784
+ field as JSON, or return fields dict.
1785
+
1786
+ Returns:
1787
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per stream entry.
1788
+ """
1789
+ if parse is None:
1790
+
1791
+ def _default_parse(fields: dict[str, str]) -> Any:
1792
+ if "data" in fields:
1793
+ try:
1794
+ return json.loads(fields["data"])
1795
+ except (json.JSONDecodeError, ValueError):
1796
+ return fields["data"]
1797
+ return dict(fields)
1798
+
1799
+ parse = _default_parse
1800
+
1801
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1802
+ active = [True]
1803
+ last_id = [start_id]
1804
+
1805
+ def poll() -> None:
1806
+ while active[0]:
1807
+ try:
1808
+ result = client.xread(
1809
+ {key: last_id[0]},
1810
+ block=block_ms,
1811
+ )
1812
+ if not active[0]:
1813
+ return
1814
+ if result:
1815
+ for _stream_key, entries in result:
1816
+ for entry_id, fields in entries:
1817
+ last_id[0] = entry_id
1818
+ actions.emit(
1819
+ {
1820
+ "id": entry_id,
1821
+ "key": key,
1822
+ "data": parse(fields),
1823
+ "timestamp_ns": wall_clock_ns(),
1824
+ }
1825
+ )
1826
+ except BaseException as err:
1827
+ if not active[0]:
1828
+ return
1829
+ actions.down([(MessageType.ERROR, err)])
1830
+ return
1831
+
1832
+ t = threading.Thread(target=poll, daemon=True)
1833
+ t.start()
1834
+
1835
+ def cleanup() -> None:
1836
+ active[0] = False
1837
+
1838
+ return cleanup
1839
+
1840
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
1841
+
1842
+
1843
+ def to_redis_stream(
1844
+ source: Node[Any],
1845
+ client: Any,
1846
+ key: str,
1847
+ *,
1848
+ serialize: Callable[[Any], dict[str, str]] | None = None,
1849
+ max_len: int | None = None,
1850
+ on_transport_error: Callable[[SinkTransportError], None] | None = None,
1851
+ ) -> Callable[[], None]:
1852
+ """Redis Streams producer sink -- forwards upstream ``DATA`` to a Redis stream.
1853
+
1854
+ Auto-subscribes and returns an unsubscribe function.
1855
+
1856
+ Args:
1857
+ source: Upstream node to forward.
1858
+ client: Redis client instance with an ``xadd`` method.
1859
+ key: Redis stream key.
1860
+ serialize: Optional serializer returning a dict of string fields.
1861
+ Default: ``{"data": json.dumps(value)}``.
1862
+ max_len: Optional max stream length (MAXLEN ~).
1863
+ on_transport_error: Optional callback for transport errors. Receives a
1864
+ :class:`SinkTransportError` with ``stage``, ``error``, and ``value``.
1865
+
1866
+ Returns:
1867
+ An unsubscribe ``Callable[[], None]`` to tear down the sink.
1868
+ """
1869
+ if serialize is None:
1870
+
1871
+ def _default_serialize(v: Any) -> dict[str, str]:
1872
+ return {"data": json.dumps(v)}
1873
+
1874
+ serialize = _default_serialize
1875
+
1876
+ def _on_message(msg: Any, _index: int, _actions: NodeActions) -> bool:
1877
+ if msg[0] is MessageType.DATA:
1878
+ value = msg[1] if len(msg) > 1 else None
1879
+ try:
1880
+ fields = serialize(value)
1881
+ except Exception as err:
1882
+ if on_transport_error is not None:
1883
+ on_transport_error(
1884
+ SinkTransportError(stage="serialize", error=err, value=value)
1885
+ )
1886
+ return True
1887
+ try:
1888
+ xadd_kwargs: dict[str, Any] = {}
1889
+ if max_len is not None:
1890
+ xadd_kwargs["maxlen"] = max_len
1891
+ client.xadd(key, fields, **xadd_kwargs)
1892
+ except Exception as err:
1893
+ if on_transport_error is not None:
1894
+ on_transport_error(SinkTransportError(stage="send", error=err, value=value))
1895
+ return True
1896
+ return False
1897
+
1898
+ effect = node(
1899
+ [source],
1900
+ lambda _deps, _actions: lambda: None,
1901
+ describe_kind="effect",
1902
+ on_message=_on_message,
1903
+ )
1904
+ unsub = effect.subscribe(lambda _msgs: None)
1905
+ return unsub
1906
+
1907
+
1908
+ # ---------------------------------------------------------------------------
1909
+ # CSV ingest
1910
+ # ---------------------------------------------------------------------------
1911
+
1912
+
1913
+ def from_csv(
1914
+ source: Iterable[str],
1915
+ *,
1916
+ delimiter: str = ",",
1917
+ has_header: bool = True,
1918
+ columns: list[str] | None = None,
1919
+ parse_line: Callable[[str], list[str]] | None = None,
1920
+ ) -> Node[Any]:
1921
+ """CSV file/stream ingest for batch replay.
1922
+
1923
+ Accepts an ``Iterable[str]`` of CSV lines (file-like or generator) and emits one
1924
+ ``DATA`` per row as a dict. ``COMPLETE`` after all rows are emitted.
1925
+
1926
+ Args:
1927
+ source: Iterable of CSV text lines.
1928
+ delimiter: Column delimiter. Default ``","``.
1929
+ has_header: Whether the first row is a header. Default ``True``.
1930
+ columns: Explicit column names (overrides header row).
1931
+ parse_line: Optional custom line parser. When provided, each line is passed
1932
+ to this function instead of using ``csv.reader``. Must return a list of
1933
+ field strings.
1934
+
1935
+ Returns:
1936
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per parsed row.
1937
+ """
1938
+
1939
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1940
+ active = [True]
1941
+
1942
+ def drain() -> None:
1943
+ try:
1944
+ headers: list[str] | None = list(columns) if columns else None
1945
+ if parse_line is not None:
1946
+ rows_iter = (parse_line(line) for line in source)
1947
+ else:
1948
+ rows_iter = csv.reader(source, delimiter=delimiter)
1949
+ for row in rows_iter:
1950
+ if not active[0]:
1951
+ return
1952
+ if not any(cell.strip() for cell in row):
1953
+ continue
1954
+ if headers is None and has_header:
1955
+ headers = row
1956
+ continue
1957
+ if headers is None:
1958
+ headers = [f"col{i}" for i in range(len(row))]
1959
+ record: dict[str, str] = {}
1960
+ for i, h in enumerate(headers):
1961
+ record[h] = row[i] if i < len(row) else ""
1962
+ actions.emit(record)
1963
+ if active[0]:
1964
+ actions.down([(MessageType.COMPLETE,)])
1965
+ except BaseException as err:
1966
+ if active[0]:
1967
+ actions.down([(MessageType.ERROR, err)])
1968
+
1969
+ t = threading.Thread(target=drain, daemon=True)
1970
+ t.start()
1971
+
1972
+ def cleanup() -> None:
1973
+ active[0] = False
1974
+
1975
+ return cleanup
1976
+
1977
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
1978
+
1979
+
1980
+ # ---------------------------------------------------------------------------
1981
+ # NDJSON ingest
1982
+ # ---------------------------------------------------------------------------
1983
+
1984
+
1985
+ def from_ndjson(source: Iterable[str]) -> Node[Any]:
1986
+ """Newline-delimited JSON stream ingest for batch replay.
1987
+
1988
+ Accepts an ``Iterable[str]`` of lines and emits one ``DATA`` per parsed JSON object.
1989
+ ``COMPLETE`` after stream ends. Malformed lines emit ``ERROR``.
1990
+
1991
+ Args:
1992
+ source: Iterable of NDJSON text lines.
1993
+
1994
+ Returns:
1995
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per JSON line.
1996
+ """
1997
+
1998
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
1999
+ active = [True]
2000
+
2001
+ def drain() -> None:
2002
+ try:
2003
+ for line in source:
2004
+ if not active[0]:
2005
+ return
2006
+ trimmed = line.strip()
2007
+ if not trimmed:
2008
+ continue
2009
+ actions.emit(json.loads(trimmed))
2010
+ if active[0]:
2011
+ actions.down([(MessageType.COMPLETE,)])
2012
+ except BaseException as err:
2013
+ if active[0]:
2014
+ actions.down([(MessageType.ERROR, err)])
2015
+
2016
+ t = threading.Thread(target=drain, daemon=True)
2017
+ t.start()
2018
+
2019
+ def cleanup() -> None:
2020
+ active[0] = False
2021
+
2022
+ return cleanup
2023
+
2024
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
2025
+
2026
+
2027
+ # ---------------------------------------------------------------------------
2028
+ # ClickHouse live materialized view
2029
+ # ---------------------------------------------------------------------------
2030
+
2031
+
2032
+ @runtime_checkable
2033
+ class ClickHouseClientLike(Protocol):
2034
+ """Duck-typed ClickHouse client."""
2035
+
2036
+ def query(self, query: str, *, format: str = "JSONEachRow") -> Any: ...
2037
+
2038
+
2039
+ def from_clickhouse_watch(
2040
+ client: Any,
2041
+ query: str,
2042
+ *,
2043
+ interval_ns: int = 5_000_000_000,
2044
+ format: str = "JSONEachRow",
2045
+ ) -> Node[Any]:
2046
+ """ClickHouse live materialized view as a reactive source.
2047
+
2048
+ Polls a ClickHouse query on a reactive timer interval and emits rows.
2049
+ Uses a timer-driven approach (not busy-wait polling).
2050
+
2051
+ Args:
2052
+ client: ClickHouse client instance with a ``query`` method (caller owns connection).
2053
+ query: SQL query to execute on each interval.
2054
+ interval_ns: Polling interval in nanoseconds. Default ``5_000_000_000`` (5s).
2055
+ format: JSON format to request. Default ``"JSONEachRow"``.
2056
+
2057
+ Returns:
2058
+ A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per result row per scrape.
2059
+ """
2060
+ interval_s = interval_ns / 1e9
2061
+
2062
+ def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
2063
+ active = [True]
2064
+ running = [False]
2065
+ timer: list[threading.Timer | None] = [None]
2066
+
2067
+ def execute() -> None:
2068
+ if not active[0]:
2069
+ return
2070
+ if running[0]:
2071
+ schedule()
2072
+ return
2073
+ running[0] = True
2074
+ try:
2075
+ result = client.query(query, format=format)
2076
+ if not active[0]:
2077
+ return
2078
+ rows = result if isinstance(result, list) else list(result)
2079
+ for row in rows:
2080
+ if not active[0]:
2081
+ return
2082
+ actions.emit(row)
2083
+ except Exception as err:
2084
+ active[0] = False
2085
+ actions.down([(MessageType.ERROR, err)])
2086
+ return
2087
+ finally:
2088
+ running[0] = False
2089
+ schedule()
2090
+
2091
+ def schedule() -> None:
2092
+ if not active[0]:
2093
+ return
2094
+ t = threading.Timer(interval_s, execute)
2095
+ t.daemon = True
2096
+ t.start()
2097
+ timer[0] = t
2098
+
2099
+ # Initial execute in background thread.
2100
+ t = threading.Thread(target=execute, daemon=True)
2101
+ t.start()
2102
+
2103
+ def cleanup() -> None:
2104
+ active[0] = False
2105
+ if timer[0] is not None:
2106
+ timer[0].cancel()
2107
+ timer[0] = None
2108
+
2109
+ return cleanup
2110
+
2111
+ return node(start, describe_kind="producer", complete_when_deps_complete=False)
2112
+
2113
+
2114
+ # ---------------------------------------------------------------------------
2115
+ # __all__
2116
+ # ---------------------------------------------------------------------------
2117
+
2118
+ __all__ = [
2119
+ # Moved from sources.py
2120
+ "HttpBundle",
2121
+ "from_http",
2122
+ "from_event_emitter",
2123
+ "from_fs_watch",
2124
+ "from_webhook",
2125
+ "from_websocket",
2126
+ "to_websocket",
2127
+ "sse_frame",
2128
+ "to_sse",
2129
+ "from_mcp",
2130
+ "from_git_hook",
2131
+ # 5.3b -- Ingest adapters
2132
+ "SinkTransportError",
2133
+ "OTelBundle",
2134
+ "from_otel",
2135
+ "parse_syslog",
2136
+ "from_syslog",
2137
+ "parse_statsd",
2138
+ "from_statsd",
2139
+ "parse_prometheus_text",
2140
+ "from_prometheus",
2141
+ "from_kafka",
2142
+ "to_kafka",
2143
+ "from_redis_stream",
2144
+ "to_redis_stream",
2145
+ "from_csv",
2146
+ "from_ndjson",
2147
+ "from_clickhouse_watch",
2148
+ "ClickHouseClientLike",
2149
+ ]