graphrefly 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphrefly/__init__.py +160 -0
- graphrefly/compat/__init__.py +18 -0
- graphrefly/compat/async_utils.py +228 -0
- graphrefly/compat/asyncio_runner.py +89 -0
- graphrefly/compat/trio_runner.py +81 -0
- graphrefly/core/__init__.py +142 -0
- graphrefly/core/clock.py +20 -0
- graphrefly/core/dynamic_node.py +749 -0
- graphrefly/core/guard.py +277 -0
- graphrefly/core/meta.py +149 -0
- graphrefly/core/node.py +963 -0
- graphrefly/core/protocol.py +460 -0
- graphrefly/core/runner.py +107 -0
- graphrefly/core/subgraph_locks.py +296 -0
- graphrefly/core/sugar.py +138 -0
- graphrefly/core/versioning.py +193 -0
- graphrefly/extra/__init__.py +313 -0
- graphrefly/extra/adapters.py +2149 -0
- graphrefly/extra/backoff.py +287 -0
- graphrefly/extra/backpressure.py +113 -0
- graphrefly/extra/checkpoint.py +307 -0
- graphrefly/extra/composite.py +303 -0
- graphrefly/extra/cron.py +133 -0
- graphrefly/extra/data_structures.py +707 -0
- graphrefly/extra/resilience.py +727 -0
- graphrefly/extra/sources.py +766 -0
- graphrefly/extra/tier1.py +1067 -0
- graphrefly/extra/tier2.py +1802 -0
- graphrefly/graph/__init__.py +31 -0
- graphrefly/graph/graph.py +2249 -0
- graphrefly/integrations/__init__.py +1 -0
- graphrefly/integrations/fastapi.py +767 -0
- graphrefly/patterns/__init__.py +5 -0
- graphrefly/patterns/ai.py +2132 -0
- graphrefly/patterns/cqrs.py +515 -0
- graphrefly/patterns/memory.py +639 -0
- graphrefly/patterns/messaging.py +553 -0
- graphrefly/patterns/orchestration.py +536 -0
- graphrefly/patterns/reactive_layout/__init__.py +81 -0
- graphrefly/patterns/reactive_layout/measurement_adapters.py +276 -0
- graphrefly/patterns/reactive_layout/reactive_block_layout.py +434 -0
- graphrefly/patterns/reactive_layout/reactive_layout.py +943 -0
- graphrefly/py.typed +1 -0
- graphrefly-0.1.0.dist-info/METADATA +253 -0
- graphrefly-0.1.0.dist-info/RECORD +47 -0
- graphrefly-0.1.0.dist-info/WHEEL +4 -0
- graphrefly-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,2149 @@
|
|
|
1
|
+
"""Protocol, system, and ingest adapters (roadmap 5.2, 5.3b).
|
|
2
|
+
|
|
3
|
+
Each adapter wraps an external protocol or system as a reactive :class:`~graphrefly.core.node.Node`
|
|
4
|
+
built on :func:`~graphrefly.core.node.node` -- no second protocol.
|
|
5
|
+
|
|
6
|
+
**Moved from sources.py:** ``from_http``, ``from_websocket`` / ``to_websocket``,
|
|
7
|
+
``from_webhook``, ``to_sse``, ``from_mcp``, ``from_git_hook``, ``from_event_emitter``,
|
|
8
|
+
``from_fs_watch``, ``sse_frame``, ``HttpBundle``.
|
|
9
|
+
|
|
10
|
+
**New (5.3b):** ``from_otel``, ``from_syslog`` / ``parse_syslog``, ``from_statsd`` /
|
|
11
|
+
``parse_statsd``, ``from_prometheus`` / ``parse_prometheus_text``, ``from_kafka`` /
|
|
12
|
+
``to_kafka``, ``from_redis_stream`` / ``to_redis_stream``, ``from_csv`` / ``from_ndjson``,
|
|
13
|
+
``from_clickhouse_watch``.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import csv
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
import threading
|
|
23
|
+
import urllib.error
|
|
24
|
+
import urllib.request
|
|
25
|
+
from contextlib import suppress
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from datetime import UTC, datetime
|
|
28
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
32
|
+
|
|
33
|
+
from graphrefly.core.clock import wall_clock_ns
|
|
34
|
+
from graphrefly.core.node import Node, NodeActions, node
|
|
35
|
+
from graphrefly.core.protocol import Messages, MessageType, batch
|
|
36
|
+
from graphrefly.extra.resilience import WithStatusBundle, with_status
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _msg_val(m: tuple[Any, ...]) -> Any:
|
|
40
|
+
assert len(m) >= 2
|
|
41
|
+
return m[1]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True, slots=True)
|
|
45
|
+
class SinkTransportError:
|
|
46
|
+
"""Error context for sink transport failures (to_kafka, to_redis_stream)."""
|
|
47
|
+
|
|
48
|
+
stage: str
|
|
49
|
+
error: Exception
|
|
50
|
+
value: Any
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# HttpBundle / from_http (moved from sources.py)
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True, slots=True)
|
|
59
|
+
class HttpBundle(WithStatusBundle):
|
|
60
|
+
"""Result of :func:`from_http`: pass-through value plus status companions."""
|
|
61
|
+
|
|
62
|
+
fetch_count: Node[int]
|
|
63
|
+
last_updated: Node[int]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def from_http(
|
|
67
|
+
url: str,
|
|
68
|
+
*,
|
|
69
|
+
method: str = "GET",
|
|
70
|
+
headers: dict[str, str] | None = None,
|
|
71
|
+
body: Any = None,
|
|
72
|
+
transform: Callable[[Any], Any] | None = None,
|
|
73
|
+
timeout_ns: int = 30_000_000_000,
|
|
74
|
+
**kwargs: Any,
|
|
75
|
+
) -> HttpBundle:
|
|
76
|
+
"""Create a one-shot reactive HTTP source with lifecycle tracking.
|
|
77
|
+
|
|
78
|
+
Uses :func:`urllib.request.urlopen` internally to remain zero-dependency.
|
|
79
|
+
Performs a single fetch when subscribed, then completes. For periodic
|
|
80
|
+
fetching, compose with ``switch_map`` and a time source.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
url: The URL to fetch.
|
|
84
|
+
method: HTTP method (default ``"GET"``).
|
|
85
|
+
headers: Optional request headers.
|
|
86
|
+
body: Optional request body (converted to JSON if not a string).
|
|
87
|
+
transform: Optional function to transform raw response bytes
|
|
88
|
+
(signature: ``Callable[[bytes], Any]``). Default: ``json.loads``.
|
|
89
|
+
timeout_ns: Request timeout in **nanoseconds** (default ``30s``).
|
|
90
|
+
**kwargs: Passed to :func:`~graphrefly.core.node.node` as options.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
An :class:`HttpBundle` wrapping the primary node and companions.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
```python
|
|
97
|
+
from graphrefly.extra.adapters import from_http
|
|
98
|
+
from graphrefly.extra.tier2 import switch_map
|
|
99
|
+
from graphrefly.extra import from_timer
|
|
100
|
+
|
|
101
|
+
# One-shot:
|
|
102
|
+
api = from_http("https://api.example.com/data")
|
|
103
|
+
|
|
104
|
+
# Periodic polling via reactive composition:
|
|
105
|
+
polled = switch_map(lambda _: from_http(url))(from_timer(0, period=5.0))
|
|
106
|
+
```
|
|
107
|
+
Notes:
|
|
108
|
+
This source is implemented with ``threading.Thread`` + ``urllib`` and does
|
|
109
|
+
not currently support external cancellation signals (TS ``AbortSignal`` parity
|
|
110
|
+
is deferred). Unsubscribe prevents any late emissions from being forwarded.
|
|
111
|
+
"""
|
|
112
|
+
from graphrefly.core.sugar import state
|
|
113
|
+
|
|
114
|
+
ns_per_sec = 1_000_000_000
|
|
115
|
+
fetch_count = state(0, name=f"{kwargs.get('name', 'http')}/fetch_count")
|
|
116
|
+
last_updated = state(0, name=f"{kwargs.get('name', 'http')}/last_updated")
|
|
117
|
+
|
|
118
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
119
|
+
active = [True]
|
|
120
|
+
|
|
121
|
+
def task() -> None:
|
|
122
|
+
if not active[0]:
|
|
123
|
+
return
|
|
124
|
+
try:
|
|
125
|
+
data_bytes = None
|
|
126
|
+
if body is not None:
|
|
127
|
+
if isinstance(body, str):
|
|
128
|
+
data_bytes = body.encode("utf-8")
|
|
129
|
+
else:
|
|
130
|
+
data_bytes = json.dumps(body).encode("utf-8")
|
|
131
|
+
|
|
132
|
+
req = urllib.request.Request(url, data=data_bytes, method=method)
|
|
133
|
+
if headers:
|
|
134
|
+
for k, v in headers.items():
|
|
135
|
+
req.add_header(k, v)
|
|
136
|
+
|
|
137
|
+
with urllib.request.urlopen(req, timeout=timeout_ns / ns_per_sec) as response:
|
|
138
|
+
if not active[0]:
|
|
139
|
+
return
|
|
140
|
+
raw_data = response.read()
|
|
141
|
+
res_data = transform(raw_data) if transform else json.loads(raw_data)
|
|
142
|
+
|
|
143
|
+
if not active[0]:
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
with batch():
|
|
147
|
+
current_count = fetch_count.get()
|
|
148
|
+
next_count = (current_count if isinstance(current_count, int) else 0) + 1
|
|
149
|
+
fetch_count.down([(MessageType.DATA, next_count)])
|
|
150
|
+
last_updated.down([(MessageType.DATA, wall_clock_ns())])
|
|
151
|
+
actions.emit(res_data)
|
|
152
|
+
actions.down([(MessageType.COMPLETE,)])
|
|
153
|
+
|
|
154
|
+
except BaseException as err:
|
|
155
|
+
if not active[0]:
|
|
156
|
+
return
|
|
157
|
+
actions.down([(MessageType.ERROR, err)])
|
|
158
|
+
|
|
159
|
+
t = threading.Thread(target=task, daemon=True)
|
|
160
|
+
t.start()
|
|
161
|
+
|
|
162
|
+
def cleanup() -> None:
|
|
163
|
+
active[0] = False
|
|
164
|
+
|
|
165
|
+
return cleanup
|
|
166
|
+
|
|
167
|
+
out = node(
|
|
168
|
+
start,
|
|
169
|
+
describe_kind="http",
|
|
170
|
+
complete_when_deps_complete=False,
|
|
171
|
+
**kwargs,
|
|
172
|
+
)
|
|
173
|
+
tracked = with_status(out)
|
|
174
|
+
|
|
175
|
+
return HttpBundle(
|
|
176
|
+
node=tracked.node,
|
|
177
|
+
status=tracked.status,
|
|
178
|
+
error=tracked.error,
|
|
179
|
+
fetch_count=fetch_count,
|
|
180
|
+
last_updated=last_updated,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# from_event_emitter (moved from sources.py)
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def from_event_emitter(
|
|
190
|
+
emitter: Any,
|
|
191
|
+
event_name: str,
|
|
192
|
+
*,
|
|
193
|
+
add_method: str = "add_listener",
|
|
194
|
+
remove_method: str = "remove_listener",
|
|
195
|
+
) -> Node[Any]:
|
|
196
|
+
"""Subscribe to an event emitter (e.g. custom emitter).
|
|
197
|
+
|
|
198
|
+
Emits each event payload as DATA. Teardown removes the listener.
|
|
199
|
+
Compatible with any object that has add/remove listener methods.
|
|
200
|
+
"""
|
|
201
|
+
|
|
202
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
203
|
+
active = [True]
|
|
204
|
+
|
|
205
|
+
def handler(*args: Any) -> None:
|
|
206
|
+
if not active[0]:
|
|
207
|
+
return
|
|
208
|
+
if len(args) == 1:
|
|
209
|
+
actions.emit(args[0])
|
|
210
|
+
else:
|
|
211
|
+
actions.emit(args)
|
|
212
|
+
|
|
213
|
+
getattr(emitter, add_method)(event_name, handler)
|
|
214
|
+
|
|
215
|
+
def cleanup() -> None:
|
|
216
|
+
active[0] = False
|
|
217
|
+
with suppress(Exception):
|
|
218
|
+
getattr(emitter, remove_method)(event_name, handler)
|
|
219
|
+
|
|
220
|
+
return cleanup
|
|
221
|
+
|
|
222
|
+
return node(start, describe_kind="from_event_emitter", complete_when_deps_complete=False)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# from_fs_watch helpers (moved from sources.py)
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _glob_to_regex(pattern: str) -> re.Pattern[str]:
|
|
231
|
+
out: list[str] = ["^"]
|
|
232
|
+
i = 0
|
|
233
|
+
while i < len(pattern):
|
|
234
|
+
ch = pattern[i]
|
|
235
|
+
if ch == "*":
|
|
236
|
+
if i + 1 < len(pattern) and pattern[i + 1] == "*":
|
|
237
|
+
out.append(".*")
|
|
238
|
+
i += 2
|
|
239
|
+
continue
|
|
240
|
+
out.append("[^/]*")
|
|
241
|
+
i += 1
|
|
242
|
+
continue
|
|
243
|
+
out.append(re.escape(ch))
|
|
244
|
+
i += 1
|
|
245
|
+
out.append("$")
|
|
246
|
+
return re.compile("".join(out))
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _matches_any(path: str, patterns: list[re.Pattern[str]]) -> bool:
|
|
250
|
+
return any(p.search(path) is not None for p in patterns)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _build_watchdog_backend(
|
|
254
|
+
paths: list[str],
|
|
255
|
+
recursive: bool,
|
|
256
|
+
on_event: Callable[[str, str, str, str | None, str | None], None],
|
|
257
|
+
on_error: Callable[[BaseException], None],
|
|
258
|
+
) -> tuple[list[Any], Callable[[], None]]:
|
|
259
|
+
try:
|
|
260
|
+
from watchdog.events import FileSystemEventHandler # type: ignore[import-not-found]
|
|
261
|
+
from watchdog.observers import Observer # type: ignore[import-not-found]
|
|
262
|
+
except Exception as err: # pragma: no cover - exercised via monkeypatch in tests
|
|
263
|
+
msg = (
|
|
264
|
+
"from_fs_watch requires watchdog (no polling fallback by design). "
|
|
265
|
+
"Install with `uv add watchdog`."
|
|
266
|
+
)
|
|
267
|
+
raise RuntimeError(msg) from err
|
|
268
|
+
|
|
269
|
+
class _Handler(FileSystemEventHandler): # type: ignore[misc]
|
|
270
|
+
def __init__(self, root: str) -> None:
|
|
271
|
+
super().__init__()
|
|
272
|
+
self._root = root
|
|
273
|
+
|
|
274
|
+
def on_any_event(self, event: Any) -> None:
|
|
275
|
+
if getattr(event, "is_directory", False):
|
|
276
|
+
return
|
|
277
|
+
try:
|
|
278
|
+
event_type = str(getattr(event, "event_type", "change"))
|
|
279
|
+
src_path = getattr(event, "src_path", None)
|
|
280
|
+
dest_path = getattr(event, "dest_path", None)
|
|
281
|
+
path = str(dest_path or src_path or getattr(event, "path", ""))
|
|
282
|
+
if path:
|
|
283
|
+
on_event(
|
|
284
|
+
event_type,
|
|
285
|
+
path,
|
|
286
|
+
self._root,
|
|
287
|
+
str(src_path) if src_path else None,
|
|
288
|
+
str(dest_path) if dest_path else None,
|
|
289
|
+
)
|
|
290
|
+
except BaseException as err: # pragma: no cover - defensive callback path
|
|
291
|
+
on_error(err)
|
|
292
|
+
|
|
293
|
+
observers: list[Any] = []
|
|
294
|
+
try:
|
|
295
|
+
for p in paths:
|
|
296
|
+
observer = Observer()
|
|
297
|
+
observer.schedule(_Handler(str(os.path.abspath(p))), p, recursive=recursive)
|
|
298
|
+
observer.daemon = True
|
|
299
|
+
observer.start()
|
|
300
|
+
observers.append(observer)
|
|
301
|
+
except Exception:
|
|
302
|
+
for observer in observers:
|
|
303
|
+
with suppress(Exception):
|
|
304
|
+
observer.stop()
|
|
305
|
+
for observer in observers:
|
|
306
|
+
with suppress(Exception):
|
|
307
|
+
observer.join(timeout=1.0)
|
|
308
|
+
raise
|
|
309
|
+
|
|
310
|
+
def stop() -> None:
|
|
311
|
+
for observer in observers:
|
|
312
|
+
observer.stop()
|
|
313
|
+
for observer in observers:
|
|
314
|
+
observer.join(timeout=1.0)
|
|
315
|
+
|
|
316
|
+
return observers, stop
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def from_fs_watch(
|
|
320
|
+
paths: str | list[str],
|
|
321
|
+
*,
|
|
322
|
+
recursive: bool = True,
|
|
323
|
+
debounce: float = 0.1,
|
|
324
|
+
include: list[str] | None = None,
|
|
325
|
+
exclude: list[str] | None = None,
|
|
326
|
+
**kwargs: Any,
|
|
327
|
+
) -> Node[Any]:
|
|
328
|
+
"""Watch filesystem changes and emit debounced events.
|
|
329
|
+
|
|
330
|
+
This source intentionally uses event-driven OS watchers only (no polling fallback).
|
|
331
|
+
"""
|
|
332
|
+
path_list = [paths] if isinstance(paths, str) else list(paths)
|
|
333
|
+
if len(path_list) == 0:
|
|
334
|
+
msg = "from_fs_watch expects at least one path"
|
|
335
|
+
raise ValueError(msg)
|
|
336
|
+
include_patterns = [_glob_to_regex(p) for p in (include or [])]
|
|
337
|
+
exclude_patterns = [
|
|
338
|
+
_glob_to_regex(p) for p in (exclude or ["**/node_modules/**", "**/.git/**", "**/dist/**"])
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
def normalize_type(event_type: str) -> str:
|
|
342
|
+
low = event_type.lower()
|
|
343
|
+
if low in {"modified", "change", "changed"}:
|
|
344
|
+
return "change"
|
|
345
|
+
if low in {"created", "create"}:
|
|
346
|
+
return "create"
|
|
347
|
+
if low in {"deleted", "delete"}:
|
|
348
|
+
return "delete"
|
|
349
|
+
if low in {"moved", "rename", "renamed"}:
|
|
350
|
+
return "rename"
|
|
351
|
+
return "change"
|
|
352
|
+
|
|
353
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
354
|
+
lock = threading.Lock()
|
|
355
|
+
pending: dict[str, dict[str, Any]] = {}
|
|
356
|
+
timer: list[threading.Timer | None] = [None]
|
|
357
|
+
active = [True]
|
|
358
|
+
generation = [0]
|
|
359
|
+
|
|
360
|
+
def _noop_stop_backend() -> None:
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
stop_backend_ref: list[Callable[[], None]] = [_noop_stop_backend]
|
|
364
|
+
|
|
365
|
+
def flush(token: int) -> None:
|
|
366
|
+
batch_msgs: Messages = []
|
|
367
|
+
with lock:
|
|
368
|
+
timer[0] = None
|
|
369
|
+
if not active[0] or not pending:
|
|
370
|
+
return
|
|
371
|
+
if token != generation[0]:
|
|
372
|
+
pending.clear()
|
|
373
|
+
return
|
|
374
|
+
batch_msgs = [(MessageType.DATA, evt.copy()) for evt in pending.values()]
|
|
375
|
+
pending.clear()
|
|
376
|
+
with lock:
|
|
377
|
+
if not active[0] or token != generation[0]:
|
|
378
|
+
return
|
|
379
|
+
actions.down(batch_msgs)
|
|
380
|
+
|
|
381
|
+
def queue_event(
|
|
382
|
+
event_type: str,
|
|
383
|
+
raw_path: str,
|
|
384
|
+
root: str,
|
|
385
|
+
src_path: str | None,
|
|
386
|
+
dest_path: str | None,
|
|
387
|
+
) -> None:
|
|
388
|
+
normalized_path = os.path.abspath(raw_path).replace("\\", "/")
|
|
389
|
+
normalized_root = os.path.abspath(root).replace("\\", "/")
|
|
390
|
+
rel_path = os.path.relpath(normalized_path, normalized_root).replace("\\", "/")
|
|
391
|
+
included = (
|
|
392
|
+
len(include_patterns) == 0
|
|
393
|
+
or _matches_any(normalized_path, include_patterns)
|
|
394
|
+
or _matches_any(rel_path, include_patterns)
|
|
395
|
+
)
|
|
396
|
+
if not included:
|
|
397
|
+
return
|
|
398
|
+
excluded = _matches_any(normalized_path, exclude_patterns) or _matches_any(
|
|
399
|
+
rel_path, exclude_patterns
|
|
400
|
+
)
|
|
401
|
+
if excluded:
|
|
402
|
+
return
|
|
403
|
+
event = {
|
|
404
|
+
"type": normalize_type(event_type),
|
|
405
|
+
"path": normalized_path,
|
|
406
|
+
"root": normalized_root,
|
|
407
|
+
"relative_path": rel_path,
|
|
408
|
+
"timestamp_ns": wall_clock_ns(),
|
|
409
|
+
}
|
|
410
|
+
if src_path is not None:
|
|
411
|
+
event["src_path"] = os.path.abspath(src_path).replace("\\", "/")
|
|
412
|
+
if dest_path is not None:
|
|
413
|
+
event["dest_path"] = os.path.abspath(dest_path).replace("\\", "/")
|
|
414
|
+
with lock:
|
|
415
|
+
if not active[0]:
|
|
416
|
+
return
|
|
417
|
+
pending[normalized_path] = event
|
|
418
|
+
if timer[0] is not None:
|
|
419
|
+
timer[0].cancel()
|
|
420
|
+
token = generation[0]
|
|
421
|
+
t = threading.Timer(debounce, lambda: flush(token))
|
|
422
|
+
t.daemon = True
|
|
423
|
+
t.start()
|
|
424
|
+
timer[0] = t
|
|
425
|
+
|
|
426
|
+
def emit_error(err: BaseException) -> None:
|
|
427
|
+
with lock:
|
|
428
|
+
if not active[0]:
|
|
429
|
+
return
|
|
430
|
+
active[0] = False
|
|
431
|
+
generation[0] += 1
|
|
432
|
+
if timer[0] is not None:
|
|
433
|
+
timer[0].cancel()
|
|
434
|
+
timer[0] = None
|
|
435
|
+
pending.clear()
|
|
436
|
+
stop_backend_ref[0]()
|
|
437
|
+
actions.down([(MessageType.ERROR, err)])
|
|
438
|
+
|
|
439
|
+
_observers, stop_backend = _build_watchdog_backend(
|
|
440
|
+
path_list,
|
|
441
|
+
recursive,
|
|
442
|
+
queue_event,
|
|
443
|
+
emit_error,
|
|
444
|
+
)
|
|
445
|
+
stop_backend_ref[0] = stop_backend
|
|
446
|
+
|
|
447
|
+
def cleanup() -> None:
|
|
448
|
+
with lock:
|
|
449
|
+
active[0] = False
|
|
450
|
+
generation[0] += 1
|
|
451
|
+
if timer[0] is not None:
|
|
452
|
+
timer[0].cancel()
|
|
453
|
+
timer[0] = None
|
|
454
|
+
pending.clear()
|
|
455
|
+
stop_backend()
|
|
456
|
+
|
|
457
|
+
return cleanup
|
|
458
|
+
|
|
459
|
+
return node(start, describe_kind="from_fs_watch", complete_when_deps_complete=False, **kwargs)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
# ---------------------------------------------------------------------------
|
|
463
|
+
# from_webhook (moved from sources.py)
|
|
464
|
+
# ---------------------------------------------------------------------------
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def from_webhook(
|
|
468
|
+
register: Callable[
|
|
469
|
+
[
|
|
470
|
+
Callable[[Any], None],
|
|
471
|
+
Callable[[BaseException | Any], None],
|
|
472
|
+
Callable[[], None],
|
|
473
|
+
],
|
|
474
|
+
Callable[[], None] | None,
|
|
475
|
+
],
|
|
476
|
+
) -> Node[Any]:
|
|
477
|
+
"""Bridge HTTP webhook callbacks into a GraphReFly source.
|
|
478
|
+
|
|
479
|
+
The ``register`` callback wires your runtime/framework callback into GraphReFly and may return
|
|
480
|
+
cleanup. It receives three functions: ``emit(payload)``, ``error(err)``, and ``complete()``.
|
|
481
|
+
|
|
482
|
+
This mirrors the source-adapter style of :func:`from_event_emitter`, but targets HTTP webhook
|
|
483
|
+
handlers from frameworks like FastAPI or Flask.
|
|
484
|
+
|
|
485
|
+
Example (FastAPI):
|
|
486
|
+
```python
|
|
487
|
+
from fastapi import FastAPI, Request
|
|
488
|
+
from graphrefly.extra import from_webhook
|
|
489
|
+
|
|
490
|
+
app = FastAPI()
|
|
491
|
+
bridge: dict[str, object] = {}
|
|
492
|
+
|
|
493
|
+
def register(emit, error, complete):
|
|
494
|
+
bridge["emit"] = emit
|
|
495
|
+
bridge["error"] = error
|
|
496
|
+
bridge["complete"] = complete
|
|
497
|
+
return None
|
|
498
|
+
|
|
499
|
+
webhook_node = from_webhook(register)
|
|
500
|
+
|
|
501
|
+
@app.post("/webhook")
|
|
502
|
+
async def webhook(request: Request):
|
|
503
|
+
payload = await request.json()
|
|
504
|
+
bridge["emit"](payload)
|
|
505
|
+
return {"ok": True}
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
Example (Flask):
|
|
509
|
+
```python
|
|
510
|
+
from flask import Flask, jsonify, request
|
|
511
|
+
from graphrefly.extra import from_webhook
|
|
512
|
+
|
|
513
|
+
app = Flask(__name__)
|
|
514
|
+
bridge: dict[str, object] = {}
|
|
515
|
+
|
|
516
|
+
def register(emit, error, complete):
|
|
517
|
+
bridge["emit"] = emit
|
|
518
|
+
bridge["error"] = error
|
|
519
|
+
bridge["complete"] = complete
|
|
520
|
+
return None
|
|
521
|
+
|
|
522
|
+
webhook_node = from_webhook(register)
|
|
523
|
+
|
|
524
|
+
@app.post("/webhook")
|
|
525
|
+
def webhook():
|
|
526
|
+
try:
|
|
527
|
+
bridge["emit"](request.get_json(force=True))
|
|
528
|
+
return jsonify({"ok": True}), 200
|
|
529
|
+
except Exception as exc:
|
|
530
|
+
bridge["error"](exc)
|
|
531
|
+
return jsonify({"ok": False}), 500
|
|
532
|
+
```
|
|
533
|
+
"""
|
|
534
|
+
|
|
535
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
536
|
+
active = [True]
|
|
537
|
+
|
|
538
|
+
def emit(payload: Any) -> None:
|
|
539
|
+
if not active[0]:
|
|
540
|
+
return
|
|
541
|
+
actions.emit(payload)
|
|
542
|
+
|
|
543
|
+
def error(err: BaseException | Any) -> None:
|
|
544
|
+
if not active[0]:
|
|
545
|
+
return
|
|
546
|
+
active[0] = False
|
|
547
|
+
actions.down([(MessageType.ERROR, err)])
|
|
548
|
+
|
|
549
|
+
def complete() -> None:
|
|
550
|
+
if not active[0]:
|
|
551
|
+
return
|
|
552
|
+
active[0] = False
|
|
553
|
+
actions.down([(MessageType.COMPLETE,)])
|
|
554
|
+
|
|
555
|
+
try:
|
|
556
|
+
cleanup = register(emit, error, complete)
|
|
557
|
+
except BaseException as err:
|
|
558
|
+
actions.down([(MessageType.ERROR, err)])
|
|
559
|
+
cleanup = None
|
|
560
|
+
|
|
561
|
+
def stop() -> None:
|
|
562
|
+
active[0] = False
|
|
563
|
+
if cleanup is not None:
|
|
564
|
+
cleanup()
|
|
565
|
+
|
|
566
|
+
return stop
|
|
567
|
+
|
|
568
|
+
return node(start, describe_kind="from_webhook", complete_when_deps_complete=False)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
# ---------------------------------------------------------------------------
|
|
572
|
+
# from_websocket / to_websocket (moved from sources.py)
|
|
573
|
+
# ---------------------------------------------------------------------------
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def from_websocket(
|
|
577
|
+
socket: Any | None = None,
|
|
578
|
+
*,
|
|
579
|
+
register: Callable[
|
|
580
|
+
[
|
|
581
|
+
Callable[[Any], None],
|
|
582
|
+
Callable[[BaseException | Any], None],
|
|
583
|
+
Callable[[], None],
|
|
584
|
+
],
|
|
585
|
+
Callable[[], None] | None,
|
|
586
|
+
]
|
|
587
|
+
| None = None,
|
|
588
|
+
add_method: str = "add_listener",
|
|
589
|
+
remove_method: str = "remove_listener",
|
|
590
|
+
message_event: str = "message",
|
|
591
|
+
error_event: str = "error",
|
|
592
|
+
close_event: str = "close",
|
|
593
|
+
parse: Callable[[Any], Any] | None = None,
|
|
594
|
+
close_on_cleanup: bool = False,
|
|
595
|
+
) -> Node[Any]:
|
|
596
|
+
"""Bridge WebSocket events into a GraphReFly source.
|
|
597
|
+
|
|
598
|
+
You can either pass a ``register`` callback (preferred in Python for runtime-agnostic wiring)
|
|
599
|
+
or pass a socket-like object with ``add_method``/``remove_method`` listener APIs.
|
|
600
|
+
|
|
601
|
+
The ``register`` callback must be atomic: either fully register and return a cleanup callable,
|
|
602
|
+
or raise before any listener side effects.
|
|
603
|
+
"""
|
|
604
|
+
if register is None and socket is None:
|
|
605
|
+
msg = "from_websocket requires either socket or register"
|
|
606
|
+
raise ValueError(msg)
|
|
607
|
+
|
|
608
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
609
|
+
lock = threading.Lock()
|
|
610
|
+
active = [True]
|
|
611
|
+
cleaned = [False]
|
|
612
|
+
cleanup: Callable[[], None] | None = None
|
|
613
|
+
|
|
614
|
+
def _run_cleanup_once() -> None:
|
|
615
|
+
nonlocal cleanup
|
|
616
|
+
fn: Callable[[], None] | None = None
|
|
617
|
+
with lock:
|
|
618
|
+
if cleaned[0]:
|
|
619
|
+
return
|
|
620
|
+
cleaned[0] = True
|
|
621
|
+
fn = cleanup
|
|
622
|
+
if fn is not None:
|
|
623
|
+
with suppress(Exception):
|
|
624
|
+
fn()
|
|
625
|
+
|
|
626
|
+
def _terminate(msgs: Messages) -> bool:
|
|
627
|
+
with lock:
|
|
628
|
+
if not active[0]:
|
|
629
|
+
return False
|
|
630
|
+
active[0] = False
|
|
631
|
+
_run_cleanup_once()
|
|
632
|
+
actions.down(msgs)
|
|
633
|
+
return True
|
|
634
|
+
|
|
635
|
+
def _extract_payload(value: Any) -> Any:
|
|
636
|
+
if hasattr(value, "data"):
|
|
637
|
+
return value.data
|
|
638
|
+
if isinstance(value, dict) and "data" in value:
|
|
639
|
+
return value["data"]
|
|
640
|
+
return value
|
|
641
|
+
|
|
642
|
+
def emit(payload: Any) -> None:
|
|
643
|
+
with lock:
|
|
644
|
+
if not active[0]:
|
|
645
|
+
return
|
|
646
|
+
try:
|
|
647
|
+
normalized = _extract_payload(payload)
|
|
648
|
+
with lock:
|
|
649
|
+
if not active[0]:
|
|
650
|
+
return
|
|
651
|
+
actions.emit(parse(normalized) if parse is not None else normalized)
|
|
652
|
+
except Exception as err:
|
|
653
|
+
_terminate([(MessageType.ERROR, err)])
|
|
654
|
+
|
|
655
|
+
def error(err: BaseException | Any) -> None:
|
|
656
|
+
if isinstance(err, BaseException):
|
|
657
|
+
_terminate([(MessageType.ERROR, err)])
|
|
658
|
+
return
|
|
659
|
+
_terminate([(MessageType.ERROR, RuntimeError(str(err)))])
|
|
660
|
+
|
|
661
|
+
def complete() -> None:
|
|
662
|
+
_terminate([(MessageType.COMPLETE,)])
|
|
663
|
+
|
|
664
|
+
if register is not None:
|
|
665
|
+
try:
|
|
666
|
+
cleanup = register(emit, error, complete)
|
|
667
|
+
if cleanup is None:
|
|
668
|
+
raise RuntimeError(
|
|
669
|
+
"from_websocket register contract violation: "
|
|
670
|
+
"register must return cleanup callable"
|
|
671
|
+
)
|
|
672
|
+
except Exception as err:
|
|
673
|
+
_terminate([(MessageType.ERROR, err)])
|
|
674
|
+
else:
|
|
675
|
+
assert socket is not None
|
|
676
|
+
listeners: list[tuple[str, Callable[..., None]]] = []
|
|
677
|
+
|
|
678
|
+
def on_message(*args: Any) -> None:
|
|
679
|
+
if len(args) == 1:
|
|
680
|
+
emit(args[0])
|
|
681
|
+
else:
|
|
682
|
+
emit(args)
|
|
683
|
+
|
|
684
|
+
def on_error(*args: Any) -> None:
|
|
685
|
+
if len(args) == 1:
|
|
686
|
+
error(args[0])
|
|
687
|
+
else:
|
|
688
|
+
error(args)
|
|
689
|
+
|
|
690
|
+
def on_close(*_args: Any) -> None:
|
|
691
|
+
complete()
|
|
692
|
+
|
|
693
|
+
try:
|
|
694
|
+
getattr(socket, add_method)(message_event, on_message)
|
|
695
|
+
listeners.append((message_event, on_message))
|
|
696
|
+
getattr(socket, add_method)(error_event, on_error)
|
|
697
|
+
listeners.append((error_event, on_error))
|
|
698
|
+
getattr(socket, add_method)(close_event, on_close)
|
|
699
|
+
listeners.append((close_event, on_close))
|
|
700
|
+
except Exception as err:
|
|
701
|
+
for event_name, fn in listeners:
|
|
702
|
+
with suppress(Exception):
|
|
703
|
+
getattr(socket, remove_method)(event_name, fn)
|
|
704
|
+
_terminate([(MessageType.ERROR, err)])
|
|
705
|
+
|
|
706
|
+
def cleanup() -> None:
|
|
707
|
+
for event_name, fn in listeners:
|
|
708
|
+
with suppress(Exception):
|
|
709
|
+
getattr(socket, remove_method)(event_name, fn)
|
|
710
|
+
if close_on_cleanup:
|
|
711
|
+
with suppress(Exception):
|
|
712
|
+
socket.close()
|
|
713
|
+
|
|
714
|
+
def stop() -> None:
|
|
715
|
+
with lock:
|
|
716
|
+
active[0] = False
|
|
717
|
+
_run_cleanup_once()
|
|
718
|
+
|
|
719
|
+
return stop
|
|
720
|
+
|
|
721
|
+
return node(start, describe_kind="from_websocket", complete_when_deps_complete=False)
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def to_websocket(
|
|
725
|
+
source: Node[Any],
|
|
726
|
+
socket: Any | None = None,
|
|
727
|
+
*,
|
|
728
|
+
send: Callable[[Any], None] | None = None,
|
|
729
|
+
close: Callable[..., None] | None = None,
|
|
730
|
+
serialize: Callable[[Any], Any] | None = None,
|
|
731
|
+
close_on_complete: bool = True,
|
|
732
|
+
close_on_error: bool = True,
|
|
733
|
+
close_code: int | None = None,
|
|
734
|
+
close_reason: str | None = None,
|
|
735
|
+
on_transport_error: Callable[[dict[str, Any]], None] | None = None,
|
|
736
|
+
) -> Callable[[], None]:
|
|
737
|
+
"""Forward upstream DATA payloads to a WebSocket-like transport.
|
|
738
|
+
|
|
739
|
+
Transport failures from serialization/send/close are reported through
|
|
740
|
+
``on_transport_error`` as a dict with ``stage``, ``error``, and ``message`` keys.
|
|
741
|
+
"""
|
|
742
|
+
if send is None:
|
|
743
|
+
if socket is None:
|
|
744
|
+
msg = "to_websocket requires socket or send"
|
|
745
|
+
raise ValueError(msg)
|
|
746
|
+
send = socket.send
|
|
747
|
+
if close is None and socket is not None and hasattr(socket, "close"):
|
|
748
|
+
close = socket.close
|
|
749
|
+
|
|
750
|
+
def _serialize(value: Any) -> Any:
|
|
751
|
+
if serialize is not None:
|
|
752
|
+
return serialize(value)
|
|
753
|
+
if isinstance(value, (str, bytes, bytearray, memoryview)):
|
|
754
|
+
return value
|
|
755
|
+
try:
|
|
756
|
+
return json.dumps(value)
|
|
757
|
+
except TypeError:
|
|
758
|
+
return str(value)
|
|
759
|
+
|
|
760
|
+
closed = [False]
|
|
761
|
+
|
|
762
|
+
def _report_transport_error(
|
|
763
|
+
stage: str, err: Exception, message: tuple[Any, ...] | None
|
|
764
|
+
) -> None:
|
|
765
|
+
if on_transport_error is None:
|
|
766
|
+
return
|
|
767
|
+
with suppress(Exception):
|
|
768
|
+
on_transport_error({"stage": stage, "error": err, "message": message})
|
|
769
|
+
|
|
770
|
+
def sink(msgs: Messages) -> None:
|
|
771
|
+
def _close(message: tuple[Any, ...]) -> None:
|
|
772
|
+
if close is None:
|
|
773
|
+
return
|
|
774
|
+
if closed[0]:
|
|
775
|
+
return
|
|
776
|
+
closed[0] = True
|
|
777
|
+
if close_code is None and close_reason is None:
|
|
778
|
+
try:
|
|
779
|
+
close()
|
|
780
|
+
except Exception as err:
|
|
781
|
+
_report_transport_error("close", err, message)
|
|
782
|
+
return
|
|
783
|
+
try:
|
|
784
|
+
close(close_code, close_reason)
|
|
785
|
+
except TypeError:
|
|
786
|
+
# Some close callables don't accept code/reason.
|
|
787
|
+
try:
|
|
788
|
+
close()
|
|
789
|
+
except Exception as err:
|
|
790
|
+
_report_transport_error("close", err, message)
|
|
791
|
+
except Exception as err:
|
|
792
|
+
_report_transport_error("close", err, message)
|
|
793
|
+
|
|
794
|
+
for msg in msgs:
|
|
795
|
+
t = msg[0]
|
|
796
|
+
if t is MessageType.DATA:
|
|
797
|
+
try:
|
|
798
|
+
payload = _serialize(msg[1] if len(msg) > 1 else None)
|
|
799
|
+
except Exception as err:
|
|
800
|
+
_report_transport_error("serialize", err, msg)
|
|
801
|
+
return
|
|
802
|
+
try:
|
|
803
|
+
send(payload)
|
|
804
|
+
except Exception as err:
|
|
805
|
+
_report_transport_error("send", err, msg)
|
|
806
|
+
return
|
|
807
|
+
elif (t is MessageType.COMPLETE and close_on_complete and close is not None) or (
|
|
808
|
+
t is MessageType.ERROR and close_on_error and close is not None
|
|
809
|
+
):
|
|
810
|
+
_close(msg)
|
|
811
|
+
|
|
812
|
+
return source.subscribe(sink)
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
# ---------------------------------------------------------------------------
|
|
816
|
+
# SSE (moved from sources.py)
|
|
817
|
+
# ---------------------------------------------------------------------------
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def sse_frame(event: str, data: str | None = None) -> str:
|
|
821
|
+
out = f"event: {event}\n"
|
|
822
|
+
if data is not None:
|
|
823
|
+
# Preserve trailing empty lines (matches TS split(/\\r?\\n/) framing behavior).
|
|
824
|
+
normalized = data.replace("\r\n", "\n")
|
|
825
|
+
for line in normalized.split("\n"):
|
|
826
|
+
out += f"data: {line}\n"
|
|
827
|
+
return f"{out}\n"
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def to_sse(
|
|
831
|
+
source: Node[Any],
|
|
832
|
+
*,
|
|
833
|
+
serialize: Callable[[Any], str] | None = None,
|
|
834
|
+
data_event: str = "data",
|
|
835
|
+
error_event: str = "error",
|
|
836
|
+
complete_event: str = "complete",
|
|
837
|
+
include_resolved: bool = False,
|
|
838
|
+
include_dirty: bool = False,
|
|
839
|
+
keepalive_s: float | None = None,
|
|
840
|
+
cancel_event: threading.Event | None = None,
|
|
841
|
+
event_name_resolver: Callable[[Any], str] | None = None,
|
|
842
|
+
) -> Iterator[str]:
|
|
843
|
+
"""Convert node messages into standard SSE text frames.
|
|
844
|
+
|
|
845
|
+
This is a sink adapter implemented as a thin subscription bridge over GraphReFly
|
|
846
|
+
messages. The returned iterator yields framed SSE chunks (``event: ...`` and
|
|
847
|
+
``data: ...`` lines, separated by a blank line).
|
|
848
|
+
"""
|
|
849
|
+
|
|
850
|
+
import queue
|
|
851
|
+
|
|
852
|
+
q: queue.Queue[str | None] = queue.Queue()
|
|
853
|
+
done = threading.Event()
|
|
854
|
+
|
|
855
|
+
def encode(value: Any) -> str:
|
|
856
|
+
if isinstance(value, str):
|
|
857
|
+
return value
|
|
858
|
+
if serialize is not None:
|
|
859
|
+
return serialize(value)
|
|
860
|
+
if isinstance(value, BaseException):
|
|
861
|
+
return str(value)
|
|
862
|
+
try:
|
|
863
|
+
return json.dumps(value)
|
|
864
|
+
except TypeError:
|
|
865
|
+
return str(value)
|
|
866
|
+
|
|
867
|
+
def sink(msgs: Messages) -> None:
|
|
868
|
+
if done.is_set():
|
|
869
|
+
return
|
|
870
|
+
for msg in msgs:
|
|
871
|
+
t = msg[0]
|
|
872
|
+
if t is MessageType.DATA:
|
|
873
|
+
q.put(sse_frame(data_event, encode(msg[1] if len(msg) > 1 else None)))
|
|
874
|
+
continue
|
|
875
|
+
if t is MessageType.ERROR:
|
|
876
|
+
q.put(sse_frame(error_event, encode(msg[1] if len(msg) > 1 else None)))
|
|
877
|
+
done.set()
|
|
878
|
+
q.put(None)
|
|
879
|
+
return
|
|
880
|
+
if t is MessageType.COMPLETE:
|
|
881
|
+
q.put(sse_frame(complete_event))
|
|
882
|
+
done.set()
|
|
883
|
+
q.put(None)
|
|
884
|
+
return
|
|
885
|
+
if t is MessageType.RESOLVED and not include_resolved:
|
|
886
|
+
continue
|
|
887
|
+
if t is MessageType.DIRTY and not include_dirty:
|
|
888
|
+
continue
|
|
889
|
+
event = event_name_resolver(t) if event_name_resolver is not None else str(t)
|
|
890
|
+
data = encode(msg[1]) if len(msg) > 1 else None
|
|
891
|
+
q.put(sse_frame(event, data))
|
|
892
|
+
|
|
893
|
+
unsub = source.subscribe(sink)
|
|
894
|
+
|
|
895
|
+
keepalive_stop = threading.Event()
|
|
896
|
+
keepalive_thread: threading.Thread | None = None
|
|
897
|
+
if keepalive_s is not None and keepalive_s > 0:
|
|
898
|
+
|
|
899
|
+
def keepalive_loop() -> None:
|
|
900
|
+
while not keepalive_stop.wait(keepalive_s):
|
|
901
|
+
if done.is_set():
|
|
902
|
+
return
|
|
903
|
+
q.put(": keepalive\n\n")
|
|
904
|
+
|
|
905
|
+
keepalive_thread = threading.Thread(target=keepalive_loop, daemon=True)
|
|
906
|
+
keepalive_thread.start()
|
|
907
|
+
|
|
908
|
+
cancel_thread: threading.Thread | None = None
|
|
909
|
+
if cancel_event is not None:
|
|
910
|
+
|
|
911
|
+
def cancel_loop() -> None:
|
|
912
|
+
cancel_event.wait()
|
|
913
|
+
if done.is_set():
|
|
914
|
+
return
|
|
915
|
+
done.set()
|
|
916
|
+
q.put(None)
|
|
917
|
+
|
|
918
|
+
cancel_thread = threading.Thread(target=cancel_loop, daemon=True)
|
|
919
|
+
cancel_thread.start()
|
|
920
|
+
|
|
921
|
+
try:
|
|
922
|
+
while True:
|
|
923
|
+
chunk = q.get()
|
|
924
|
+
if chunk is None:
|
|
925
|
+
break
|
|
926
|
+
yield chunk
|
|
927
|
+
finally:
|
|
928
|
+
done.set()
|
|
929
|
+
keepalive_stop.set()
|
|
930
|
+
if keepalive_thread is not None:
|
|
931
|
+
keepalive_thread.join(timeout=0.05)
|
|
932
|
+
if cancel_thread is not None:
|
|
933
|
+
cancel_thread.join(timeout=0.05)
|
|
934
|
+
unsub()
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
# ---------------------------------------------------------------------------
|
|
938
|
+
# MCP (moved from sources.py)
|
|
939
|
+
# ---------------------------------------------------------------------------
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
def from_mcp(
|
|
943
|
+
client: Any,
|
|
944
|
+
*,
|
|
945
|
+
method: str = "notifications/message",
|
|
946
|
+
on_disconnect: Callable[[Callable[[Any], None]], None] | None = None,
|
|
947
|
+
**kwargs: Any,
|
|
948
|
+
) -> Node[Any]:
|
|
949
|
+
"""Wrap an MCP client's server-push notifications as a reactive source.
|
|
950
|
+
|
|
951
|
+
The caller owns the ``Client`` connection (``connect`` / ``close``). ``from_mcp``
|
|
952
|
+
only registers a notification handler for the chosen *method* and emits each
|
|
953
|
+
notification payload as ``DATA``.
|
|
954
|
+
|
|
955
|
+
**Disconnect detection:** MCP SDK does not expose a built-in disconnect event.
|
|
956
|
+
Pass ``on_disconnect`` to wire an external signal (e.g. transport ``close`` event)
|
|
957
|
+
so the source can emit ``ERROR`` and tear down reactively.
|
|
958
|
+
|
|
959
|
+
Args:
|
|
960
|
+
client: Any object with a ``set_notification_handler(method, handler)`` method
|
|
961
|
+
(duck-typed -- no SDK dependency).
|
|
962
|
+
method: MCP notification method to subscribe to. Default ``"notifications/message"``.
|
|
963
|
+
on_disconnect: Optional callback ``(cb) -> None`` -- call ``cb(err)`` when the
|
|
964
|
+
transport disconnects.
|
|
965
|
+
|
|
966
|
+
Returns:
|
|
967
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per server notification.
|
|
968
|
+
|
|
969
|
+
Example:
|
|
970
|
+
```python
|
|
971
|
+
from graphrefly.extra import from_mcp
|
|
972
|
+
tools = from_mcp(client, method="notifications/tools/list_changed")
|
|
973
|
+
```
|
|
974
|
+
"""
|
|
975
|
+
|
|
976
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
977
|
+
active = [True]
|
|
978
|
+
|
|
979
|
+
def handler(notification: Any) -> None:
|
|
980
|
+
if active[0]:
|
|
981
|
+
actions.emit(notification)
|
|
982
|
+
|
|
983
|
+
client.set_notification_handler(method, handler)
|
|
984
|
+
|
|
985
|
+
if on_disconnect is not None:
|
|
986
|
+
|
|
987
|
+
def _on_dc(err: Any = None) -> None:
|
|
988
|
+
if not active[0]:
|
|
989
|
+
return
|
|
990
|
+
active[0] = False
|
|
991
|
+
error_value = err if err is not None else Exception("MCP client disconnected")
|
|
992
|
+
actions.down([(MessageType.ERROR, error_value)])
|
|
993
|
+
|
|
994
|
+
on_disconnect(_on_dc)
|
|
995
|
+
|
|
996
|
+
def cleanup() -> None:
|
|
997
|
+
active[0] = False
|
|
998
|
+
client.set_notification_handler(method, lambda _n: None)
|
|
999
|
+
|
|
1000
|
+
return cleanup
|
|
1001
|
+
|
|
1002
|
+
return node(start, describe_kind="producer", **kwargs)
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
# ---------------------------------------------------------------------------
|
|
1006
|
+
# from_git_hook (moved from sources.py)
|
|
1007
|
+
# ---------------------------------------------------------------------------
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def from_git_hook(
|
|
1011
|
+
repo_path: str,
|
|
1012
|
+
*,
|
|
1013
|
+
poll_ms: int = 5000,
|
|
1014
|
+
include: list[str] | None = None,
|
|
1015
|
+
exclude: list[str] | None = None,
|
|
1016
|
+
**kwargs: Any,
|
|
1017
|
+
) -> Node[Any]:
|
|
1018
|
+
"""Git change detection as a reactive source.
|
|
1019
|
+
|
|
1020
|
+
Polls for new commits on an interval and emits a structured ``GitEvent`` dict
|
|
1021
|
+
whenever HEAD advances. Zero filesystem side effects -- no hook script installation.
|
|
1022
|
+
|
|
1023
|
+
**Limitations:** Polling cannot distinguish commit vs merge vs rebase -- ``hook``
|
|
1024
|
+
is always ``"post-commit"``. When multiple commits land between polls, files are
|
|
1025
|
+
aggregated but ``message``/``author`` reflect only the latest commit.
|
|
1026
|
+
|
|
1027
|
+
The emitted dict has keys: ``hook``, ``commit``, ``files``, ``message``, ``author``,
|
|
1028
|
+
``timestamp_ns``.
|
|
1029
|
+
|
|
1030
|
+
Cross-repo usage::
|
|
1031
|
+
|
|
1032
|
+
merge([from_git_hook(ts_repo), from_git_hook(py_repo)])
|
|
1033
|
+
|
|
1034
|
+
Args:
|
|
1035
|
+
repo_path: Absolute path to the git repository root.
|
|
1036
|
+
poll_ms: Polling interval in milliseconds. Default ``5000``.
|
|
1037
|
+
include: Glob patterns -- only include matching changed files.
|
|
1038
|
+
exclude: Glob patterns -- exclude matching changed files.
|
|
1039
|
+
|
|
1040
|
+
Returns:
|
|
1041
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per new commit.
|
|
1042
|
+
"""
|
|
1043
|
+
import subprocess
|
|
1044
|
+
|
|
1045
|
+
include_patterns = [_glob_to_regex(p) for p in (include or [])]
|
|
1046
|
+
exclude_patterns = [_glob_to_regex(p) for p in (exclude or [])]
|
|
1047
|
+
|
|
1048
|
+
def _git(cmd: list[str]) -> str:
|
|
1049
|
+
result = subprocess.run( # noqa: S603
|
|
1050
|
+
cmd,
|
|
1051
|
+
cwd=repo_path,
|
|
1052
|
+
capture_output=True,
|
|
1053
|
+
text=True,
|
|
1054
|
+
check=True,
|
|
1055
|
+
)
|
|
1056
|
+
return result.stdout.strip()
|
|
1057
|
+
|
|
1058
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1059
|
+
active = [True]
|
|
1060
|
+
timer: list[threading.Timer | None] = [None]
|
|
1061
|
+
|
|
1062
|
+
# P4: Seed with current HEAD; route errors through the protocol.
|
|
1063
|
+
try:
|
|
1064
|
+
last_seen = [_git(["git", "rev-parse", "HEAD"])]
|
|
1065
|
+
except Exception as err:
|
|
1066
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1067
|
+
return lambda: None
|
|
1068
|
+
|
|
1069
|
+
def check() -> None:
|
|
1070
|
+
# P7: Top-level guard -- any unexpected exception tears down cleanly.
|
|
1071
|
+
try:
|
|
1072
|
+
_check_inner()
|
|
1073
|
+
except Exception as err:
|
|
1074
|
+
if active[0]:
|
|
1075
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1076
|
+
cleanup()
|
|
1077
|
+
|
|
1078
|
+
def _check_inner() -> None:
|
|
1079
|
+
if not active[0]:
|
|
1080
|
+
return
|
|
1081
|
+
try:
|
|
1082
|
+
head = _git(["git", "rev-parse", "HEAD"])
|
|
1083
|
+
except Exception as err:
|
|
1084
|
+
if active[0]:
|
|
1085
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1086
|
+
cleanup()
|
|
1087
|
+
return
|
|
1088
|
+
|
|
1089
|
+
if not active[0] or head == last_seen[0]:
|
|
1090
|
+
schedule()
|
|
1091
|
+
return
|
|
1092
|
+
|
|
1093
|
+
try:
|
|
1094
|
+
files_raw = _git(["git", "diff", "--name-only", f"{last_seen[0]}..{head}"])
|
|
1095
|
+
files = [f for f in files_raw.split("\n") if f]
|
|
1096
|
+
|
|
1097
|
+
if include_patterns:
|
|
1098
|
+
files = [f for f in files if _matches_any(f, include_patterns)]
|
|
1099
|
+
if exclude_patterns:
|
|
1100
|
+
files = [f for f in files if not _matches_any(f, exclude_patterns)]
|
|
1101
|
+
|
|
1102
|
+
# P2: Target captured head SHA, not implicit HEAD.
|
|
1103
|
+
message = _git(["git", "log", "-1", "--format=%s", head])
|
|
1104
|
+
author = _git(["git", "log", "-1", "--format=%an", head])
|
|
1105
|
+
except Exception as err:
|
|
1106
|
+
if active[0]:
|
|
1107
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1108
|
+
cleanup()
|
|
1109
|
+
return
|
|
1110
|
+
|
|
1111
|
+
if not active[0]:
|
|
1112
|
+
return
|
|
1113
|
+
# P5: Emit before advancing last_seen.
|
|
1114
|
+
actions.emit(
|
|
1115
|
+
{
|
|
1116
|
+
"hook": "post-commit",
|
|
1117
|
+
"commit": head,
|
|
1118
|
+
"files": files,
|
|
1119
|
+
"message": message,
|
|
1120
|
+
"author": author,
|
|
1121
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1122
|
+
}
|
|
1123
|
+
)
|
|
1124
|
+
last_seen[0] = head
|
|
1125
|
+
schedule()
|
|
1126
|
+
|
|
1127
|
+
def schedule() -> None:
|
|
1128
|
+
if not active[0]:
|
|
1129
|
+
return
|
|
1130
|
+
t = threading.Timer(poll_ms / 1000.0, check)
|
|
1131
|
+
t.daemon = True
|
|
1132
|
+
timer[0] = t
|
|
1133
|
+
t.start()
|
|
1134
|
+
|
|
1135
|
+
def cleanup() -> None:
|
|
1136
|
+
active[0] = False
|
|
1137
|
+
t = timer[0]
|
|
1138
|
+
if t is not None:
|
|
1139
|
+
t.cancel()
|
|
1140
|
+
timer[0] = None
|
|
1141
|
+
|
|
1142
|
+
schedule()
|
|
1143
|
+
return cleanup
|
|
1144
|
+
|
|
1145
|
+
return node(start, describe_kind="producer", **kwargs)
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
# ===========================================================================
|
|
1149
|
+
# 5.3b -- Ingest adapters (universal source layer)
|
|
1150
|
+
# ===========================================================================
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
# ---------------------------------------------------------------------------
|
|
1154
|
+
# OpenTelemetry (OTLP/HTTP)
|
|
1155
|
+
# ---------------------------------------------------------------------------
|
|
1156
|
+
|
|
1157
|
+
|
|
1158
|
+
@dataclass(frozen=True, slots=True)
|
|
1159
|
+
class OTelBundle:
|
|
1160
|
+
"""Bundle returned by :func:`from_otel` -- one node per signal type."""
|
|
1161
|
+
|
|
1162
|
+
traces: Node[Any]
|
|
1163
|
+
metrics: Node[Any]
|
|
1164
|
+
logs: Node[Any]
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
def from_otel(
|
|
1168
|
+
register: Callable[
|
|
1169
|
+
[dict[str, Callable[..., None]]],
|
|
1170
|
+
Callable[[], None] | None,
|
|
1171
|
+
],
|
|
1172
|
+
) -> OTelBundle:
|
|
1173
|
+
"""OTLP/HTTP receiver -- accepts traces, metrics, and logs as separate reactive nodes.
|
|
1174
|
+
|
|
1175
|
+
The caller owns the HTTP server. ``from_otel`` receives a ``register`` callback that
|
|
1176
|
+
wires OTLP POST endpoints to the three signal handlers. Each signal type gets its
|
|
1177
|
+
own :class:`~graphrefly.core.node.Node` so downstream can subscribe selectively.
|
|
1178
|
+
|
|
1179
|
+
Args:
|
|
1180
|
+
register: Callback receiving a dict with ``on_traces``, ``on_metrics``,
|
|
1181
|
+
``on_logs``, ``on_error`` handler functions. Must return a cleanup callable
|
|
1182
|
+
or ``None``.
|
|
1183
|
+
|
|
1184
|
+
Returns:
|
|
1185
|
+
:class:`OTelBundle` -- ``{ traces, metrics, logs }`` nodes.
|
|
1186
|
+
|
|
1187
|
+
Example:
|
|
1188
|
+
```python
|
|
1189
|
+
from graphrefly.extra.adapters import from_otel
|
|
1190
|
+
|
|
1191
|
+
otel = from_otel(lambda h: (
|
|
1192
|
+
# wire your HTTP routes to h["on_traces"], h["on_metrics"], h["on_logs"]
|
|
1193
|
+
None
|
|
1194
|
+
))
|
|
1195
|
+
```
|
|
1196
|
+
"""
|
|
1197
|
+
active = [True]
|
|
1198
|
+
teardown_count = [0]
|
|
1199
|
+
register_cleanup: list[Callable[[], None] | None] = [None]
|
|
1200
|
+
|
|
1201
|
+
def _run_register_cleanup() -> None:
|
|
1202
|
+
fn = register_cleanup[0]
|
|
1203
|
+
if fn is not None:
|
|
1204
|
+
register_cleanup[0] = None
|
|
1205
|
+
fn()
|
|
1206
|
+
|
|
1207
|
+
def _make_signal_node() -> Node[Any]:
|
|
1208
|
+
def start(_deps: list[Any], _actions: NodeActions) -> Callable[[], None]:
|
|
1209
|
+
def cleanup() -> None:
|
|
1210
|
+
teardown_count[0] += 1
|
|
1211
|
+
if teardown_count[0] >= 3:
|
|
1212
|
+
active[0] = False
|
|
1213
|
+
_run_register_cleanup()
|
|
1214
|
+
|
|
1215
|
+
return cleanup
|
|
1216
|
+
|
|
1217
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
1218
|
+
|
|
1219
|
+
traces = _make_signal_node()
|
|
1220
|
+
metrics = _make_signal_node()
|
|
1221
|
+
logs = _make_signal_node()
|
|
1222
|
+
|
|
1223
|
+
def _on_traces(spans: list[Any]) -> None:
|
|
1224
|
+
if not active[0]:
|
|
1225
|
+
return
|
|
1226
|
+
with batch():
|
|
1227
|
+
for s in spans:
|
|
1228
|
+
traces.down([(MessageType.DATA, s)])
|
|
1229
|
+
|
|
1230
|
+
def _on_metrics(ms: list[Any]) -> None:
|
|
1231
|
+
if not active[0]:
|
|
1232
|
+
return
|
|
1233
|
+
with batch():
|
|
1234
|
+
for m in ms:
|
|
1235
|
+
metrics.down([(MessageType.DATA, m)])
|
|
1236
|
+
|
|
1237
|
+
def _on_logs(ls: list[Any]) -> None:
|
|
1238
|
+
if not active[0]:
|
|
1239
|
+
return
|
|
1240
|
+
with batch():
|
|
1241
|
+
for lg in ls:
|
|
1242
|
+
logs.down([(MessageType.DATA, lg)])
|
|
1243
|
+
|
|
1244
|
+
def _on_error(err: BaseException | Any) -> None:
|
|
1245
|
+
if not active[0]:
|
|
1246
|
+
return
|
|
1247
|
+
active[0] = False
|
|
1248
|
+
for n in (traces, metrics, logs):
|
|
1249
|
+
n.down([(MessageType.ERROR, err)])
|
|
1250
|
+
|
|
1251
|
+
register_cleanup[0] = register(
|
|
1252
|
+
{
|
|
1253
|
+
"on_traces": _on_traces,
|
|
1254
|
+
"on_metrics": _on_metrics,
|
|
1255
|
+
"on_logs": _on_logs,
|
|
1256
|
+
"on_error": _on_error,
|
|
1257
|
+
}
|
|
1258
|
+
)
|
|
1259
|
+
|
|
1260
|
+
return OTelBundle(traces=traces, metrics=metrics, logs=logs)
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
# ---------------------------------------------------------------------------
|
|
1264
|
+
# Syslog (RFC 5424)
|
|
1265
|
+
# ---------------------------------------------------------------------------
|
|
1266
|
+
|
|
1267
|
+
|
|
1268
|
+
def parse_syslog(raw: str) -> dict[str, Any]:
|
|
1269
|
+
"""Parse a raw RFC 5424 syslog line into a structured dict.
|
|
1270
|
+
|
|
1271
|
+
Format: ``<PRI>VERSION TIMESTAMP HOSTNAME APP-NAME PROCID MSGID MSG``
|
|
1272
|
+
|
|
1273
|
+
Returns a dict with keys: ``facility``, ``severity``, ``timestamp``, ``hostname``,
|
|
1274
|
+
``app_name``, ``proc_id``, ``msg_id``, ``message``, ``timestamp_ns``.
|
|
1275
|
+
|
|
1276
|
+
Falls back gracefully for unparseable input.
|
|
1277
|
+
"""
|
|
1278
|
+
match = re.match(r"^<(\d{1,3})>\d?\s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*(.*)", raw, re.S)
|
|
1279
|
+
if not match:
|
|
1280
|
+
now_ns = wall_clock_ns()
|
|
1281
|
+
timestamp = datetime.fromtimestamp(now_ns / 1e9, tz=UTC).isoformat()
|
|
1282
|
+
return {
|
|
1283
|
+
"facility": 1,
|
|
1284
|
+
"severity": 6,
|
|
1285
|
+
"timestamp": timestamp,
|
|
1286
|
+
"hostname": "-",
|
|
1287
|
+
"app_name": "-",
|
|
1288
|
+
"proc_id": "-",
|
|
1289
|
+
"msg_id": "-",
|
|
1290
|
+
"message": raw.strip(),
|
|
1291
|
+
"timestamp_ns": now_ns,
|
|
1292
|
+
}
|
|
1293
|
+
pri = int(match.group(1))
|
|
1294
|
+
return {
|
|
1295
|
+
"facility": pri >> 3,
|
|
1296
|
+
"severity": pri & 7,
|
|
1297
|
+
"timestamp": match.group(2),
|
|
1298
|
+
"hostname": match.group(3),
|
|
1299
|
+
"app_name": match.group(4),
|
|
1300
|
+
"proc_id": match.group(5),
|
|
1301
|
+
"msg_id": match.group(6),
|
|
1302
|
+
"message": (match.group(7) or "").strip(),
|
|
1303
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
def from_syslog(
|
|
1308
|
+
register: Callable[
|
|
1309
|
+
[
|
|
1310
|
+
Callable[[Any], None],
|
|
1311
|
+
Callable[[BaseException | Any], None],
|
|
1312
|
+
Callable[[], None],
|
|
1313
|
+
],
|
|
1314
|
+
Callable[[], None] | None,
|
|
1315
|
+
],
|
|
1316
|
+
) -> Node[Any]:
|
|
1317
|
+
"""RFC 5424 syslog receiver as a reactive source.
|
|
1318
|
+
|
|
1319
|
+
Reuses the :func:`from_webhook` registration pattern. The caller owns the
|
|
1320
|
+
UDP/TCP socket and parses raw lines via :func:`parse_syslog` before calling
|
|
1321
|
+
``emit``.
|
|
1322
|
+
|
|
1323
|
+
Args:
|
|
1324
|
+
register: Callback wiring socket to ``emit``/``error``/``complete`` handlers.
|
|
1325
|
+
|
|
1326
|
+
Returns:
|
|
1327
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per syslog message.
|
|
1328
|
+
"""
|
|
1329
|
+
return from_webhook(register)
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
# ---------------------------------------------------------------------------
|
|
1333
|
+
# StatsD / DogStatsD
|
|
1334
|
+
# ---------------------------------------------------------------------------
|
|
1335
|
+
|
|
1336
|
+
_STATSD_TYPES: dict[str, str] = {
|
|
1337
|
+
"c": "counter",
|
|
1338
|
+
"g": "gauge",
|
|
1339
|
+
"ms": "timer",
|
|
1340
|
+
"h": "histogram",
|
|
1341
|
+
"s": "set",
|
|
1342
|
+
"d": "distribution",
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
def parse_statsd(line: str) -> dict[str, Any]:
|
|
1347
|
+
"""Parse a raw StatsD/DogStatsD line into a structured dict.
|
|
1348
|
+
|
|
1349
|
+
Format: ``metric.name:value|type|@sampleRate|#tag1:val1,tag2:val2``
|
|
1350
|
+
|
|
1351
|
+
Returns a dict with keys: ``name``, ``value``, ``type``, ``sample_rate`` (optional),
|
|
1352
|
+
``tags``, ``timestamp_ns``.
|
|
1353
|
+
|
|
1354
|
+
Raises :class:`ValueError` on invalid input.
|
|
1355
|
+
"""
|
|
1356
|
+
parts = line.split("|")
|
|
1357
|
+
name_value = parts[0] if parts else ""
|
|
1358
|
+
split = name_value.split(":")
|
|
1359
|
+
if len(split) < 2 or not split[0]:
|
|
1360
|
+
msg = f"Invalid StatsD line: {line}"
|
|
1361
|
+
raise ValueError(msg)
|
|
1362
|
+
name = split[0].strip()
|
|
1363
|
+
value_str = split[1].strip()
|
|
1364
|
+
type_code = parts[1].strip() if len(parts) > 1 else "c"
|
|
1365
|
+
metric_type = _STATSD_TYPES.get(type_code, "counter")
|
|
1366
|
+
# Set types use string identifiers, not numeric values.
|
|
1367
|
+
if type_code == "s":
|
|
1368
|
+
value: float = 0
|
|
1369
|
+
else:
|
|
1370
|
+
value = float(value_str)
|
|
1371
|
+
|
|
1372
|
+
sample_rate: float | None = None
|
|
1373
|
+
tags: dict[str, str] = {}
|
|
1374
|
+
|
|
1375
|
+
for part in parts[2:]:
|
|
1376
|
+
p = part.strip()
|
|
1377
|
+
if p.startswith("@"):
|
|
1378
|
+
sample_rate = float(p[1:])
|
|
1379
|
+
elif p.startswith("#"):
|
|
1380
|
+
for tag in p[1:].split(","):
|
|
1381
|
+
kv = tag.split(":")
|
|
1382
|
+
if kv[0]:
|
|
1383
|
+
tags[kv[0]] = kv[1] if len(kv) > 1 else ""
|
|
1384
|
+
|
|
1385
|
+
result: dict[str, Any] = {
|
|
1386
|
+
"name": name,
|
|
1387
|
+
"value": value,
|
|
1388
|
+
"type": metric_type,
|
|
1389
|
+
"tags": tags,
|
|
1390
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1391
|
+
}
|
|
1392
|
+
if sample_rate is not None:
|
|
1393
|
+
result["sample_rate"] = sample_rate
|
|
1394
|
+
return result
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
def from_statsd(
|
|
1398
|
+
register: Callable[
|
|
1399
|
+
[
|
|
1400
|
+
Callable[[Any], None],
|
|
1401
|
+
Callable[[BaseException | Any], None],
|
|
1402
|
+
Callable[[], None],
|
|
1403
|
+
],
|
|
1404
|
+
Callable[[], None] | None,
|
|
1405
|
+
],
|
|
1406
|
+
) -> Node[Any]:
|
|
1407
|
+
"""StatsD/DogStatsD UDP receiver as a reactive source.
|
|
1408
|
+
|
|
1409
|
+
Reuses the :func:`from_webhook` registration pattern. The caller owns the
|
|
1410
|
+
UDP socket and parses raw lines via :func:`parse_statsd` before calling ``emit``.
|
|
1411
|
+
|
|
1412
|
+
Args:
|
|
1413
|
+
register: Callback wiring socket to ``emit``/``error``/``complete`` handlers.
|
|
1414
|
+
|
|
1415
|
+
Returns:
|
|
1416
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per metric line.
|
|
1417
|
+
"""
|
|
1418
|
+
return from_webhook(register)
|
|
1419
|
+
|
|
1420
|
+
|
|
1421
|
+
# ---------------------------------------------------------------------------
|
|
1422
|
+
# Prometheus scrape
|
|
1423
|
+
# ---------------------------------------------------------------------------
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
def parse_prometheus_text(text: str) -> list[dict[str, Any]]:
|
|
1427
|
+
"""Parse Prometheus exposition format text into a list of metric dicts.
|
|
1428
|
+
|
|
1429
|
+
Each dict has keys: ``name``, ``labels``, ``value``, ``timestamp_ms`` (optional),
|
|
1430
|
+
``type`` (optional), ``help`` (optional), ``timestamp_ns``.
|
|
1431
|
+
"""
|
|
1432
|
+
results: list[dict[str, Any]] = []
|
|
1433
|
+
types: dict[str, str] = {}
|
|
1434
|
+
helps: dict[str, str] = {}
|
|
1435
|
+
|
|
1436
|
+
for raw_line in text.split("\n"):
|
|
1437
|
+
line = raw_line.strip()
|
|
1438
|
+
if not line:
|
|
1439
|
+
continue
|
|
1440
|
+
|
|
1441
|
+
if line.startswith("# TYPE "):
|
|
1442
|
+
rest = line[7:]
|
|
1443
|
+
space_idx = rest.index(" ") if " " in rest else -1
|
|
1444
|
+
if space_idx > 0:
|
|
1445
|
+
types[rest[:space_idx]] = rest[space_idx + 1 :].strip()
|
|
1446
|
+
continue
|
|
1447
|
+
if line.startswith("# HELP "):
|
|
1448
|
+
rest = line[7:]
|
|
1449
|
+
space_idx = rest.index(" ") if " " in rest else -1
|
|
1450
|
+
if space_idx > 0:
|
|
1451
|
+
helps[rest[:space_idx]] = rest[space_idx + 1 :].strip()
|
|
1452
|
+
continue
|
|
1453
|
+
if line.startswith("#"):
|
|
1454
|
+
continue
|
|
1455
|
+
|
|
1456
|
+
# metric_name{label="value"} 123 timestamp?
|
|
1457
|
+
brace_idx = line.find("{")
|
|
1458
|
+
if brace_idx >= 0:
|
|
1459
|
+
name = line[:brace_idx]
|
|
1460
|
+
close_brace = line.find("}", brace_idx)
|
|
1461
|
+
if close_brace < 0:
|
|
1462
|
+
continue
|
|
1463
|
+
label_str = line[brace_idx + 1 : close_brace]
|
|
1464
|
+
labels = _parse_prometheus_labels(label_str)
|
|
1465
|
+
after = line[close_brace + 1 :].strip().split()
|
|
1466
|
+
value_str = after[0] if after else ""
|
|
1467
|
+
ts_str = after[1] if len(after) > 1 else None
|
|
1468
|
+
else:
|
|
1469
|
+
parts = line.split()
|
|
1470
|
+
name = parts[0] if parts else ""
|
|
1471
|
+
value_str = parts[1] if len(parts) > 1 else ""
|
|
1472
|
+
ts_str = parts[2] if len(parts) > 2 else None
|
|
1473
|
+
labels = {}
|
|
1474
|
+
|
|
1475
|
+
if not name or not value_str:
|
|
1476
|
+
continue
|
|
1477
|
+
|
|
1478
|
+
base_name = re.sub(r"(_total|_count|_sum|_bucket|_created|_info)$", "", name)
|
|
1479
|
+
entry: dict[str, Any] = {
|
|
1480
|
+
"name": name,
|
|
1481
|
+
"labels": labels,
|
|
1482
|
+
"value": float(value_str),
|
|
1483
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1484
|
+
}
|
|
1485
|
+
if ts_str:
|
|
1486
|
+
entry["timestamp_ms"] = float(ts_str)
|
|
1487
|
+
t = types.get(base_name) or types.get(name)
|
|
1488
|
+
if t:
|
|
1489
|
+
entry["type"] = t
|
|
1490
|
+
h = helps.get(base_name) or helps.get(name)
|
|
1491
|
+
if h:
|
|
1492
|
+
entry["help"] = h
|
|
1493
|
+
results.append(entry)
|
|
1494
|
+
|
|
1495
|
+
return results
|
|
1496
|
+
|
|
1497
|
+
|
|
1498
|
+
def _parse_prometheus_labels(s: str) -> dict[str, str]:
|
|
1499
|
+
labels: dict[str, str] = {}
|
|
1500
|
+
for m in re.finditer(r'(\w+)="((?:[^"\\]|\\.)*)"', s):
|
|
1501
|
+
labels[m.group(1)] = re.sub(r"\\(.)", r"\1", m.group(2))
|
|
1502
|
+
return labels
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
def from_prometheus(
|
|
1506
|
+
endpoint: str,
|
|
1507
|
+
*,
|
|
1508
|
+
interval_ns: int = 15_000_000_000,
|
|
1509
|
+
headers: dict[str, str] | None = None,
|
|
1510
|
+
timeout_ns: int = 10_000_000_000,
|
|
1511
|
+
) -> Node[Any]:
|
|
1512
|
+
"""Scrape a Prometheus ``/metrics`` endpoint on a reactive timer interval.
|
|
1513
|
+
|
|
1514
|
+
Each scrape parses the exposition format and emits one ``DATA`` per metric line.
|
|
1515
|
+
Uses a timer thread internally (reactive timer source, not busy-wait polling).
|
|
1516
|
+
|
|
1517
|
+
Args:
|
|
1518
|
+
endpoint: URL of the Prometheus metrics endpoint.
|
|
1519
|
+
interval_ns: Scrape interval in nanoseconds. Default ``15_000_000_000`` (15s).
|
|
1520
|
+
headers: Optional request headers.
|
|
1521
|
+
timeout_ns: Request timeout in nanoseconds. Default ``10_000_000_000`` (10s).
|
|
1522
|
+
|
|
1523
|
+
Returns:
|
|
1524
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per metric per scrape.
|
|
1525
|
+
"""
|
|
1526
|
+
interval_s = interval_ns / 1e9
|
|
1527
|
+
timeout_s = timeout_ns / 1e9
|
|
1528
|
+
|
|
1529
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1530
|
+
active = [True]
|
|
1531
|
+
running = [False]
|
|
1532
|
+
timer: list[threading.Timer | None] = [None]
|
|
1533
|
+
|
|
1534
|
+
def scrape() -> None:
|
|
1535
|
+
if not active[0]:
|
|
1536
|
+
return
|
|
1537
|
+
if running[0]:
|
|
1538
|
+
schedule()
|
|
1539
|
+
return
|
|
1540
|
+
running[0] = True
|
|
1541
|
+
try:
|
|
1542
|
+
req = urllib.request.Request(endpoint)
|
|
1543
|
+
req.add_header("Accept", "text/plain")
|
|
1544
|
+
if headers:
|
|
1545
|
+
for k, v in headers.items():
|
|
1546
|
+
req.add_header(k, v)
|
|
1547
|
+
with urllib.request.urlopen(req, timeout=timeout_s) as response:
|
|
1548
|
+
if not active[0]:
|
|
1549
|
+
return
|
|
1550
|
+
text = response.read().decode("utf-8")
|
|
1551
|
+
if not active[0]:
|
|
1552
|
+
return
|
|
1553
|
+
prom_metrics = parse_prometheus_text(text)
|
|
1554
|
+
for m in prom_metrics:
|
|
1555
|
+
if not active[0]:
|
|
1556
|
+
return
|
|
1557
|
+
actions.emit(m)
|
|
1558
|
+
except Exception as err:
|
|
1559
|
+
active[0] = False
|
|
1560
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1561
|
+
return
|
|
1562
|
+
finally:
|
|
1563
|
+
running[0] = False
|
|
1564
|
+
schedule()
|
|
1565
|
+
|
|
1566
|
+
def schedule() -> None:
|
|
1567
|
+
if not active[0]:
|
|
1568
|
+
return
|
|
1569
|
+
t = threading.Timer(interval_s, scrape)
|
|
1570
|
+
t.daemon = True
|
|
1571
|
+
t.start()
|
|
1572
|
+
timer[0] = t
|
|
1573
|
+
|
|
1574
|
+
# Initial scrape in background thread.
|
|
1575
|
+
t = threading.Thread(target=scrape, daemon=True)
|
|
1576
|
+
t.start()
|
|
1577
|
+
|
|
1578
|
+
def cleanup() -> None:
|
|
1579
|
+
active[0] = False
|
|
1580
|
+
if timer[0] is not None:
|
|
1581
|
+
timer[0].cancel()
|
|
1582
|
+
timer[0] = None
|
|
1583
|
+
|
|
1584
|
+
return cleanup
|
|
1585
|
+
|
|
1586
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
# ---------------------------------------------------------------------------
|
|
1590
|
+
# Kafka
|
|
1591
|
+
# ---------------------------------------------------------------------------
|
|
1592
|
+
|
|
1593
|
+
|
|
1594
|
+
@runtime_checkable
|
|
1595
|
+
class KafkaConsumerLike(Protocol):
|
|
1596
|
+
"""Duck-typed Kafka consumer (compatible with confluent-kafka, aiokafka)."""
|
|
1597
|
+
|
|
1598
|
+
def subscribe(self, topics: list[str]) -> None: ...
|
|
1599
|
+
def run(self, callback: Callable[..., None]) -> None: ...
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
@runtime_checkable
|
|
1603
|
+
class KafkaProducerLike(Protocol):
|
|
1604
|
+
"""Duck-typed Kafka producer."""
|
|
1605
|
+
|
|
1606
|
+
def send(self, topic: str, *, key: Any = None, value: Any = None) -> None: ...
|
|
1607
|
+
|
|
1608
|
+
|
|
1609
|
+
def from_kafka(
|
|
1610
|
+
consumer: Any,
|
|
1611
|
+
topic: str,
|
|
1612
|
+
*,
|
|
1613
|
+
from_beginning: bool = False,
|
|
1614
|
+
deserialize: Callable[[Any], Any] | None = None,
|
|
1615
|
+
) -> Node[Any]:
|
|
1616
|
+
"""Kafka consumer as a reactive source.
|
|
1617
|
+
|
|
1618
|
+
Wraps a Kafka-compatible consumer. Each message becomes a ``DATA`` emission
|
|
1619
|
+
with structured metadata (topic, partition, key, value, headers, offset, timestamp).
|
|
1620
|
+
|
|
1621
|
+
Args:
|
|
1622
|
+
consumer: Kafka consumer instance with ``subscribe`` and ``run`` methods
|
|
1623
|
+
(caller owns connect/disconnect lifecycle).
|
|
1624
|
+
topic: Topic to consume from.
|
|
1625
|
+
from_beginning: Start from beginning of topic. Default ``False``.
|
|
1626
|
+
deserialize: Optional deserializer for message values. Default: ``json.loads``
|
|
1627
|
+
with fallback to string.
|
|
1628
|
+
|
|
1629
|
+
Returns:
|
|
1630
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per Kafka message.
|
|
1631
|
+
"""
|
|
1632
|
+
if deserialize is None:
|
|
1633
|
+
|
|
1634
|
+
def _default_deserialize(buf: Any) -> Any:
|
|
1635
|
+
if buf is None:
|
|
1636
|
+
return None
|
|
1637
|
+
raw = buf if isinstance(buf, (str, bytes)) else str(buf)
|
|
1638
|
+
if isinstance(raw, bytes):
|
|
1639
|
+
raw = raw.decode("utf-8", errors="replace")
|
|
1640
|
+
try:
|
|
1641
|
+
return json.loads(raw)
|
|
1642
|
+
except (json.JSONDecodeError, ValueError):
|
|
1643
|
+
return raw
|
|
1644
|
+
|
|
1645
|
+
deserialize = _default_deserialize
|
|
1646
|
+
|
|
1647
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1648
|
+
active = [True]
|
|
1649
|
+
|
|
1650
|
+
def _run() -> None:
|
|
1651
|
+
try:
|
|
1652
|
+
consumer.subscribe([topic])
|
|
1653
|
+
|
|
1654
|
+
def on_message(
|
|
1655
|
+
*,
|
|
1656
|
+
topic: str = "",
|
|
1657
|
+
partition: int = 0,
|
|
1658
|
+
key: Any = None,
|
|
1659
|
+
value: Any = None,
|
|
1660
|
+
headers: dict[str, str] | None = None,
|
|
1661
|
+
offset: str = "0",
|
|
1662
|
+
timestamp: str = "",
|
|
1663
|
+
) -> None:
|
|
1664
|
+
if not active[0]:
|
|
1665
|
+
return
|
|
1666
|
+
actions.emit(
|
|
1667
|
+
{
|
|
1668
|
+
"topic": topic,
|
|
1669
|
+
"partition": partition,
|
|
1670
|
+
"key": str(key) if key is not None else None,
|
|
1671
|
+
"value": deserialize(value),
|
|
1672
|
+
"headers": headers or {},
|
|
1673
|
+
"offset": offset,
|
|
1674
|
+
"timestamp": timestamp,
|
|
1675
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1676
|
+
}
|
|
1677
|
+
)
|
|
1678
|
+
|
|
1679
|
+
consumer.run(on_message)
|
|
1680
|
+
except BaseException as err:
|
|
1681
|
+
if active[0]:
|
|
1682
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1683
|
+
|
|
1684
|
+
t = threading.Thread(target=_run, daemon=True)
|
|
1685
|
+
t.start()
|
|
1686
|
+
|
|
1687
|
+
def cleanup() -> None:
|
|
1688
|
+
active[0] = False
|
|
1689
|
+
|
|
1690
|
+
return cleanup
|
|
1691
|
+
|
|
1692
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
1693
|
+
|
|
1694
|
+
|
|
1695
|
+
def to_kafka(
|
|
1696
|
+
source: Node[Any],
|
|
1697
|
+
producer: Any,
|
|
1698
|
+
topic: str,
|
|
1699
|
+
*,
|
|
1700
|
+
serialize: Callable[[Any], Any] | None = None,
|
|
1701
|
+
key_extractor: Callable[[Any], str | None] | None = None,
|
|
1702
|
+
on_transport_error: Callable[[SinkTransportError], None] | None = None,
|
|
1703
|
+
) -> Callable[[], None]:
|
|
1704
|
+
"""Kafka producer sink -- forwards upstream ``DATA`` to a Kafka topic.
|
|
1705
|
+
|
|
1706
|
+
Auto-subscribes and returns an unsubscribe function.
|
|
1707
|
+
|
|
1708
|
+
Args:
|
|
1709
|
+
source: Upstream node to forward.
|
|
1710
|
+
producer: Kafka producer instance with a ``send`` method.
|
|
1711
|
+
topic: Target topic.
|
|
1712
|
+
serialize: Optional serializer. Default: ``json.dumps``.
|
|
1713
|
+
key_extractor: Optional function to extract a message key from the value.
|
|
1714
|
+
on_transport_error: Optional callback for transport errors. Receives a
|
|
1715
|
+
:class:`SinkTransportError` with ``stage``, ``error``, and ``value``.
|
|
1716
|
+
|
|
1717
|
+
Returns:
|
|
1718
|
+
An unsubscribe ``Callable[[], None]`` to tear down the sink.
|
|
1719
|
+
"""
|
|
1720
|
+
if serialize is None:
|
|
1721
|
+
serialize = json.dumps
|
|
1722
|
+
|
|
1723
|
+
def _on_message(msg: Any, _index: int, _actions: NodeActions) -> bool:
|
|
1724
|
+
if msg[0] is MessageType.DATA:
|
|
1725
|
+
value = msg[1] if len(msg) > 1 else None
|
|
1726
|
+
key = key_extractor(value) if key_extractor else None
|
|
1727
|
+
try:
|
|
1728
|
+
serialized = serialize(value)
|
|
1729
|
+
except Exception as err:
|
|
1730
|
+
if on_transport_error is not None:
|
|
1731
|
+
on_transport_error(
|
|
1732
|
+
SinkTransportError(stage="serialize", error=err, value=value)
|
|
1733
|
+
)
|
|
1734
|
+
return True
|
|
1735
|
+
try:
|
|
1736
|
+
producer.send(topic, key=key, value=serialized)
|
|
1737
|
+
except Exception as err:
|
|
1738
|
+
if on_transport_error is not None:
|
|
1739
|
+
on_transport_error(SinkTransportError(stage="send", error=err, value=value))
|
|
1740
|
+
return True
|
|
1741
|
+
return False
|
|
1742
|
+
|
|
1743
|
+
effect = node(
|
|
1744
|
+
[source],
|
|
1745
|
+
lambda _deps, _actions: lambda: None,
|
|
1746
|
+
describe_kind="effect",
|
|
1747
|
+
on_message=_on_message,
|
|
1748
|
+
)
|
|
1749
|
+
unsub = effect.subscribe(lambda _msgs: None)
|
|
1750
|
+
return unsub
|
|
1751
|
+
|
|
1752
|
+
|
|
1753
|
+
# ---------------------------------------------------------------------------
|
|
1754
|
+
# Redis Streams
|
|
1755
|
+
# ---------------------------------------------------------------------------
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
@runtime_checkable
|
|
1759
|
+
class RedisClientLike(Protocol):
|
|
1760
|
+
"""Duck-typed Redis client (compatible with redis-py, ioredis)."""
|
|
1761
|
+
|
|
1762
|
+
def xadd(self, name: str, fields: dict[str, str], **kwargs: Any) -> Any: ...
|
|
1763
|
+
def xread(self, streams: dict[str, str], **kwargs: Any) -> Any: ...
|
|
1764
|
+
|
|
1765
|
+
|
|
1766
|
+
def from_redis_stream(
|
|
1767
|
+
client: Any,
|
|
1768
|
+
key: str,
|
|
1769
|
+
*,
|
|
1770
|
+
block_ms: int = 5000,
|
|
1771
|
+
start_id: str = "$",
|
|
1772
|
+
parse: Callable[[dict[str, str]], Any] | None = None,
|
|
1773
|
+
) -> Node[Any]:
|
|
1774
|
+
"""Redis Streams consumer as a reactive source.
|
|
1775
|
+
|
|
1776
|
+
Uses XREAD with BLOCK to reactively consume stream entries.
|
|
1777
|
+
|
|
1778
|
+
Args:
|
|
1779
|
+
client: Redis client instance with ``xread`` method (caller owns connection).
|
|
1780
|
+
key: Redis stream key.
|
|
1781
|
+
block_ms: Block timeout in ms for XREAD. Default ``5000``.
|
|
1782
|
+
start_id: Start ID. Default ``"$"`` (new entries only).
|
|
1783
|
+
parse: Optional parser for raw Redis hash fields. Default: parse ``data``
|
|
1784
|
+
field as JSON, or return fields dict.
|
|
1785
|
+
|
|
1786
|
+
Returns:
|
|
1787
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per stream entry.
|
|
1788
|
+
"""
|
|
1789
|
+
if parse is None:
|
|
1790
|
+
|
|
1791
|
+
def _default_parse(fields: dict[str, str]) -> Any:
|
|
1792
|
+
if "data" in fields:
|
|
1793
|
+
try:
|
|
1794
|
+
return json.loads(fields["data"])
|
|
1795
|
+
except (json.JSONDecodeError, ValueError):
|
|
1796
|
+
return fields["data"]
|
|
1797
|
+
return dict(fields)
|
|
1798
|
+
|
|
1799
|
+
parse = _default_parse
|
|
1800
|
+
|
|
1801
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1802
|
+
active = [True]
|
|
1803
|
+
last_id = [start_id]
|
|
1804
|
+
|
|
1805
|
+
def poll() -> None:
|
|
1806
|
+
while active[0]:
|
|
1807
|
+
try:
|
|
1808
|
+
result = client.xread(
|
|
1809
|
+
{key: last_id[0]},
|
|
1810
|
+
block=block_ms,
|
|
1811
|
+
)
|
|
1812
|
+
if not active[0]:
|
|
1813
|
+
return
|
|
1814
|
+
if result:
|
|
1815
|
+
for _stream_key, entries in result:
|
|
1816
|
+
for entry_id, fields in entries:
|
|
1817
|
+
last_id[0] = entry_id
|
|
1818
|
+
actions.emit(
|
|
1819
|
+
{
|
|
1820
|
+
"id": entry_id,
|
|
1821
|
+
"key": key,
|
|
1822
|
+
"data": parse(fields),
|
|
1823
|
+
"timestamp_ns": wall_clock_ns(),
|
|
1824
|
+
}
|
|
1825
|
+
)
|
|
1826
|
+
except BaseException as err:
|
|
1827
|
+
if not active[0]:
|
|
1828
|
+
return
|
|
1829
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1830
|
+
return
|
|
1831
|
+
|
|
1832
|
+
t = threading.Thread(target=poll, daemon=True)
|
|
1833
|
+
t.start()
|
|
1834
|
+
|
|
1835
|
+
def cleanup() -> None:
|
|
1836
|
+
active[0] = False
|
|
1837
|
+
|
|
1838
|
+
return cleanup
|
|
1839
|
+
|
|
1840
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
def to_redis_stream(
|
|
1844
|
+
source: Node[Any],
|
|
1845
|
+
client: Any,
|
|
1846
|
+
key: str,
|
|
1847
|
+
*,
|
|
1848
|
+
serialize: Callable[[Any], dict[str, str]] | None = None,
|
|
1849
|
+
max_len: int | None = None,
|
|
1850
|
+
on_transport_error: Callable[[SinkTransportError], None] | None = None,
|
|
1851
|
+
) -> Callable[[], None]:
|
|
1852
|
+
"""Redis Streams producer sink -- forwards upstream ``DATA`` to a Redis stream.
|
|
1853
|
+
|
|
1854
|
+
Auto-subscribes and returns an unsubscribe function.
|
|
1855
|
+
|
|
1856
|
+
Args:
|
|
1857
|
+
source: Upstream node to forward.
|
|
1858
|
+
client: Redis client instance with an ``xadd`` method.
|
|
1859
|
+
key: Redis stream key.
|
|
1860
|
+
serialize: Optional serializer returning a dict of string fields.
|
|
1861
|
+
Default: ``{"data": json.dumps(value)}``.
|
|
1862
|
+
max_len: Optional max stream length (MAXLEN ~).
|
|
1863
|
+
on_transport_error: Optional callback for transport errors. Receives a
|
|
1864
|
+
:class:`SinkTransportError` with ``stage``, ``error``, and ``value``.
|
|
1865
|
+
|
|
1866
|
+
Returns:
|
|
1867
|
+
An unsubscribe ``Callable[[], None]`` to tear down the sink.
|
|
1868
|
+
"""
|
|
1869
|
+
if serialize is None:
|
|
1870
|
+
|
|
1871
|
+
def _default_serialize(v: Any) -> dict[str, str]:
|
|
1872
|
+
return {"data": json.dumps(v)}
|
|
1873
|
+
|
|
1874
|
+
serialize = _default_serialize
|
|
1875
|
+
|
|
1876
|
+
def _on_message(msg: Any, _index: int, _actions: NodeActions) -> bool:
|
|
1877
|
+
if msg[0] is MessageType.DATA:
|
|
1878
|
+
value = msg[1] if len(msg) > 1 else None
|
|
1879
|
+
try:
|
|
1880
|
+
fields = serialize(value)
|
|
1881
|
+
except Exception as err:
|
|
1882
|
+
if on_transport_error is not None:
|
|
1883
|
+
on_transport_error(
|
|
1884
|
+
SinkTransportError(stage="serialize", error=err, value=value)
|
|
1885
|
+
)
|
|
1886
|
+
return True
|
|
1887
|
+
try:
|
|
1888
|
+
xadd_kwargs: dict[str, Any] = {}
|
|
1889
|
+
if max_len is not None:
|
|
1890
|
+
xadd_kwargs["maxlen"] = max_len
|
|
1891
|
+
client.xadd(key, fields, **xadd_kwargs)
|
|
1892
|
+
except Exception as err:
|
|
1893
|
+
if on_transport_error is not None:
|
|
1894
|
+
on_transport_error(SinkTransportError(stage="send", error=err, value=value))
|
|
1895
|
+
return True
|
|
1896
|
+
return False
|
|
1897
|
+
|
|
1898
|
+
effect = node(
|
|
1899
|
+
[source],
|
|
1900
|
+
lambda _deps, _actions: lambda: None,
|
|
1901
|
+
describe_kind="effect",
|
|
1902
|
+
on_message=_on_message,
|
|
1903
|
+
)
|
|
1904
|
+
unsub = effect.subscribe(lambda _msgs: None)
|
|
1905
|
+
return unsub
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
# ---------------------------------------------------------------------------
|
|
1909
|
+
# CSV ingest
|
|
1910
|
+
# ---------------------------------------------------------------------------
|
|
1911
|
+
|
|
1912
|
+
|
|
1913
|
+
def from_csv(
|
|
1914
|
+
source: Iterable[str],
|
|
1915
|
+
*,
|
|
1916
|
+
delimiter: str = ",",
|
|
1917
|
+
has_header: bool = True,
|
|
1918
|
+
columns: list[str] | None = None,
|
|
1919
|
+
parse_line: Callable[[str], list[str]] | None = None,
|
|
1920
|
+
) -> Node[Any]:
|
|
1921
|
+
"""CSV file/stream ingest for batch replay.
|
|
1922
|
+
|
|
1923
|
+
Accepts an ``Iterable[str]`` of CSV lines (file-like or generator) and emits one
|
|
1924
|
+
``DATA`` per row as a dict. ``COMPLETE`` after all rows are emitted.
|
|
1925
|
+
|
|
1926
|
+
Args:
|
|
1927
|
+
source: Iterable of CSV text lines.
|
|
1928
|
+
delimiter: Column delimiter. Default ``","``.
|
|
1929
|
+
has_header: Whether the first row is a header. Default ``True``.
|
|
1930
|
+
columns: Explicit column names (overrides header row).
|
|
1931
|
+
parse_line: Optional custom line parser. When provided, each line is passed
|
|
1932
|
+
to this function instead of using ``csv.reader``. Must return a list of
|
|
1933
|
+
field strings.
|
|
1934
|
+
|
|
1935
|
+
Returns:
|
|
1936
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per parsed row.
|
|
1937
|
+
"""
|
|
1938
|
+
|
|
1939
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1940
|
+
active = [True]
|
|
1941
|
+
|
|
1942
|
+
def drain() -> None:
|
|
1943
|
+
try:
|
|
1944
|
+
headers: list[str] | None = list(columns) if columns else None
|
|
1945
|
+
if parse_line is not None:
|
|
1946
|
+
rows_iter = (parse_line(line) for line in source)
|
|
1947
|
+
else:
|
|
1948
|
+
rows_iter = csv.reader(source, delimiter=delimiter)
|
|
1949
|
+
for row in rows_iter:
|
|
1950
|
+
if not active[0]:
|
|
1951
|
+
return
|
|
1952
|
+
if not any(cell.strip() for cell in row):
|
|
1953
|
+
continue
|
|
1954
|
+
if headers is None and has_header:
|
|
1955
|
+
headers = row
|
|
1956
|
+
continue
|
|
1957
|
+
if headers is None:
|
|
1958
|
+
headers = [f"col{i}" for i in range(len(row))]
|
|
1959
|
+
record: dict[str, str] = {}
|
|
1960
|
+
for i, h in enumerate(headers):
|
|
1961
|
+
record[h] = row[i] if i < len(row) else ""
|
|
1962
|
+
actions.emit(record)
|
|
1963
|
+
if active[0]:
|
|
1964
|
+
actions.down([(MessageType.COMPLETE,)])
|
|
1965
|
+
except BaseException as err:
|
|
1966
|
+
if active[0]:
|
|
1967
|
+
actions.down([(MessageType.ERROR, err)])
|
|
1968
|
+
|
|
1969
|
+
t = threading.Thread(target=drain, daemon=True)
|
|
1970
|
+
t.start()
|
|
1971
|
+
|
|
1972
|
+
def cleanup() -> None:
|
|
1973
|
+
active[0] = False
|
|
1974
|
+
|
|
1975
|
+
return cleanup
|
|
1976
|
+
|
|
1977
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
1978
|
+
|
|
1979
|
+
|
|
1980
|
+
# ---------------------------------------------------------------------------
|
|
1981
|
+
# NDJSON ingest
|
|
1982
|
+
# ---------------------------------------------------------------------------
|
|
1983
|
+
|
|
1984
|
+
|
|
1985
|
+
def from_ndjson(source: Iterable[str]) -> Node[Any]:
|
|
1986
|
+
"""Newline-delimited JSON stream ingest for batch replay.
|
|
1987
|
+
|
|
1988
|
+
Accepts an ``Iterable[str]`` of lines and emits one ``DATA`` per parsed JSON object.
|
|
1989
|
+
``COMPLETE`` after stream ends. Malformed lines emit ``ERROR``.
|
|
1990
|
+
|
|
1991
|
+
Args:
|
|
1992
|
+
source: Iterable of NDJSON text lines.
|
|
1993
|
+
|
|
1994
|
+
Returns:
|
|
1995
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per JSON line.
|
|
1996
|
+
"""
|
|
1997
|
+
|
|
1998
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
1999
|
+
active = [True]
|
|
2000
|
+
|
|
2001
|
+
def drain() -> None:
|
|
2002
|
+
try:
|
|
2003
|
+
for line in source:
|
|
2004
|
+
if not active[0]:
|
|
2005
|
+
return
|
|
2006
|
+
trimmed = line.strip()
|
|
2007
|
+
if not trimmed:
|
|
2008
|
+
continue
|
|
2009
|
+
actions.emit(json.loads(trimmed))
|
|
2010
|
+
if active[0]:
|
|
2011
|
+
actions.down([(MessageType.COMPLETE,)])
|
|
2012
|
+
except BaseException as err:
|
|
2013
|
+
if active[0]:
|
|
2014
|
+
actions.down([(MessageType.ERROR, err)])
|
|
2015
|
+
|
|
2016
|
+
t = threading.Thread(target=drain, daemon=True)
|
|
2017
|
+
t.start()
|
|
2018
|
+
|
|
2019
|
+
def cleanup() -> None:
|
|
2020
|
+
active[0] = False
|
|
2021
|
+
|
|
2022
|
+
return cleanup
|
|
2023
|
+
|
|
2024
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
2025
|
+
|
|
2026
|
+
|
|
2027
|
+
# ---------------------------------------------------------------------------
|
|
2028
|
+
# ClickHouse live materialized view
|
|
2029
|
+
# ---------------------------------------------------------------------------
|
|
2030
|
+
|
|
2031
|
+
|
|
2032
|
+
@runtime_checkable
|
|
2033
|
+
class ClickHouseClientLike(Protocol):
|
|
2034
|
+
"""Duck-typed ClickHouse client."""
|
|
2035
|
+
|
|
2036
|
+
def query(self, query: str, *, format: str = "JSONEachRow") -> Any: ...
|
|
2037
|
+
|
|
2038
|
+
|
|
2039
|
+
def from_clickhouse_watch(
|
|
2040
|
+
client: Any,
|
|
2041
|
+
query: str,
|
|
2042
|
+
*,
|
|
2043
|
+
interval_ns: int = 5_000_000_000,
|
|
2044
|
+
format: str = "JSONEachRow",
|
|
2045
|
+
) -> Node[Any]:
|
|
2046
|
+
"""ClickHouse live materialized view as a reactive source.
|
|
2047
|
+
|
|
2048
|
+
Polls a ClickHouse query on a reactive timer interval and emits rows.
|
|
2049
|
+
Uses a timer-driven approach (not busy-wait polling).
|
|
2050
|
+
|
|
2051
|
+
Args:
|
|
2052
|
+
client: ClickHouse client instance with a ``query`` method (caller owns connection).
|
|
2053
|
+
query: SQL query to execute on each interval.
|
|
2054
|
+
interval_ns: Polling interval in nanoseconds. Default ``5_000_000_000`` (5s).
|
|
2055
|
+
format: JSON format to request. Default ``"JSONEachRow"``.
|
|
2056
|
+
|
|
2057
|
+
Returns:
|
|
2058
|
+
A :class:`~graphrefly.core.node.Node` emitting one ``DATA`` per result row per scrape.
|
|
2059
|
+
"""
|
|
2060
|
+
interval_s = interval_ns / 1e9
|
|
2061
|
+
|
|
2062
|
+
def start(_deps: list[Any], actions: NodeActions) -> Callable[[], None]:
|
|
2063
|
+
active = [True]
|
|
2064
|
+
running = [False]
|
|
2065
|
+
timer: list[threading.Timer | None] = [None]
|
|
2066
|
+
|
|
2067
|
+
def execute() -> None:
|
|
2068
|
+
if not active[0]:
|
|
2069
|
+
return
|
|
2070
|
+
if running[0]:
|
|
2071
|
+
schedule()
|
|
2072
|
+
return
|
|
2073
|
+
running[0] = True
|
|
2074
|
+
try:
|
|
2075
|
+
result = client.query(query, format=format)
|
|
2076
|
+
if not active[0]:
|
|
2077
|
+
return
|
|
2078
|
+
rows = result if isinstance(result, list) else list(result)
|
|
2079
|
+
for row in rows:
|
|
2080
|
+
if not active[0]:
|
|
2081
|
+
return
|
|
2082
|
+
actions.emit(row)
|
|
2083
|
+
except Exception as err:
|
|
2084
|
+
active[0] = False
|
|
2085
|
+
actions.down([(MessageType.ERROR, err)])
|
|
2086
|
+
return
|
|
2087
|
+
finally:
|
|
2088
|
+
running[0] = False
|
|
2089
|
+
schedule()
|
|
2090
|
+
|
|
2091
|
+
def schedule() -> None:
|
|
2092
|
+
if not active[0]:
|
|
2093
|
+
return
|
|
2094
|
+
t = threading.Timer(interval_s, execute)
|
|
2095
|
+
t.daemon = True
|
|
2096
|
+
t.start()
|
|
2097
|
+
timer[0] = t
|
|
2098
|
+
|
|
2099
|
+
# Initial execute in background thread.
|
|
2100
|
+
t = threading.Thread(target=execute, daemon=True)
|
|
2101
|
+
t.start()
|
|
2102
|
+
|
|
2103
|
+
def cleanup() -> None:
|
|
2104
|
+
active[0] = False
|
|
2105
|
+
if timer[0] is not None:
|
|
2106
|
+
timer[0].cancel()
|
|
2107
|
+
timer[0] = None
|
|
2108
|
+
|
|
2109
|
+
return cleanup
|
|
2110
|
+
|
|
2111
|
+
return node(start, describe_kind="producer", complete_when_deps_complete=False)
|
|
2112
|
+
|
|
2113
|
+
|
|
2114
|
+
# ---------------------------------------------------------------------------
|
|
2115
|
+
# __all__
|
|
2116
|
+
# ---------------------------------------------------------------------------
|
|
2117
|
+
|
|
2118
|
+
__all__ = [
|
|
2119
|
+
# Moved from sources.py
|
|
2120
|
+
"HttpBundle",
|
|
2121
|
+
"from_http",
|
|
2122
|
+
"from_event_emitter",
|
|
2123
|
+
"from_fs_watch",
|
|
2124
|
+
"from_webhook",
|
|
2125
|
+
"from_websocket",
|
|
2126
|
+
"to_websocket",
|
|
2127
|
+
"sse_frame",
|
|
2128
|
+
"to_sse",
|
|
2129
|
+
"from_mcp",
|
|
2130
|
+
"from_git_hook",
|
|
2131
|
+
# 5.3b -- Ingest adapters
|
|
2132
|
+
"SinkTransportError",
|
|
2133
|
+
"OTelBundle",
|
|
2134
|
+
"from_otel",
|
|
2135
|
+
"parse_syslog",
|
|
2136
|
+
"from_syslog",
|
|
2137
|
+
"parse_statsd",
|
|
2138
|
+
"from_statsd",
|
|
2139
|
+
"parse_prometheus_text",
|
|
2140
|
+
"from_prometheus",
|
|
2141
|
+
"from_kafka",
|
|
2142
|
+
"to_kafka",
|
|
2143
|
+
"from_redis_stream",
|
|
2144
|
+
"to_redis_stream",
|
|
2145
|
+
"from_csv",
|
|
2146
|
+
"from_ndjson",
|
|
2147
|
+
"from_clickhouse_watch",
|
|
2148
|
+
"ClickHouseClientLike",
|
|
2149
|
+
]
|