agentflow-runtime 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. agentflow_runtime-1.1.0.dist-info/METADATA +55 -0
  2. agentflow_runtime-1.1.0.dist-info/RECORD +100 -0
  3. agentflow_runtime-1.1.0.dist-info/WHEEL +4 -0
  4. agentflow_runtime-1.1.0.dist-info/licenses/LICENSE +21 -0
  5. src/__init__.py +0 -0
  6. src/constants.py +3 -0
  7. src/ingestion/__init__.py +0 -0
  8. src/ingestion/cdc/__init__.py +5 -0
  9. src/ingestion/cdc/normalizer.py +186 -0
  10. src/ingestion/connectors/__init__.py +0 -0
  11. src/ingestion/connectors/mysql_cdc.py +63 -0
  12. src/ingestion/connectors/postgres_cdc.py +68 -0
  13. src/ingestion/producers/__init__.py +0 -0
  14. src/ingestion/producers/event_producer.py +237 -0
  15. src/ingestion/schemas/__init__.py +0 -0
  16. src/ingestion/schemas/events.py +147 -0
  17. src/ingestion/tenant_router.py +80 -0
  18. src/logger.py +41 -0
  19. src/orchestration/__init__.py +0 -0
  20. src/orchestration/dags/__init__.py +0 -0
  21. src/orchestration/dags/daily_batch.py +201 -0
  22. src/processing/__init__.py +0 -0
  23. src/processing/event_replayer.py +250 -0
  24. src/processing/flink_jobs/Dockerfile +55 -0
  25. src/processing/flink_jobs/__init__.py +0 -0
  26. src/processing/flink_jobs/checkpointing.py +32 -0
  27. src/processing/flink_jobs/session_aggregation.py +212 -0
  28. src/processing/flink_jobs/session_aggregator.py +199 -0
  29. src/processing/flink_jobs/stream_processor.py +316 -0
  30. src/processing/iceberg_sink.py +348 -0
  31. src/processing/local_pipeline.py +452 -0
  32. src/processing/outbox.py +273 -0
  33. src/processing/tracing.py +36 -0
  34. src/processing/transformations/__init__.py +0 -0
  35. src/processing/transformations/enrichment.py +125 -0
  36. src/quality/__init__.py +0 -0
  37. src/quality/monitors/__init__.py +0 -0
  38. src/quality/monitors/freshness_monitor.py +166 -0
  39. src/quality/monitors/metrics_collector.py +367 -0
  40. src/quality/validators/__init__.py +0 -0
  41. src/quality/validators/schema_validator.py +119 -0
  42. src/quality/validators/semantic_validator.py +202 -0
  43. src/serving/__init__.py +0 -0
  44. src/serving/api/__init__.py +0 -0
  45. src/serving/api/alert_dispatcher.py +51 -0
  46. src/serving/api/alerts/__init__.py +38 -0
  47. src/serving/api/alerts/dispatcher.py +299 -0
  48. src/serving/api/alerts/escalation.py +290 -0
  49. src/serving/api/alerts/evaluator.py +81 -0
  50. src/serving/api/alerts/history.py +115 -0
  51. src/serving/api/analytics.py +543 -0
  52. src/serving/api/auth/__init__.py +46 -0
  53. src/serving/api/auth/key_rotation.py +400 -0
  54. src/serving/api/auth/manager.py +406 -0
  55. src/serving/api/auth/middleware.py +331 -0
  56. src/serving/api/main.py +390 -0
  57. src/serving/api/middleware/logging.py +41 -0
  58. src/serving/api/middleware/tracing.py +51 -0
  59. src/serving/api/rate_limiter.py +76 -0
  60. src/serving/api/routers/__init__.py +0 -0
  61. src/serving/api/routers/admin.py +150 -0
  62. src/serving/api/routers/admin_ui.py +93 -0
  63. src/serving/api/routers/agent_query.py +639 -0
  64. src/serving/api/routers/alerts.py +134 -0
  65. src/serving/api/routers/batch.py +231 -0
  66. src/serving/api/routers/contracts.py +98 -0
  67. src/serving/api/routers/deadletter.py +337 -0
  68. src/serving/api/routers/lineage.py +218 -0
  69. src/serving/api/routers/search.py +103 -0
  70. src/serving/api/routers/slo.py +231 -0
  71. src/serving/api/routers/stream.py +141 -0
  72. src/serving/api/routers/webhooks.py +93 -0
  73. src/serving/api/security.py +83 -0
  74. src/serving/api/telemetry.py +66 -0
  75. src/serving/api/templates/admin.html +214 -0
  76. src/serving/api/versioning.py +328 -0
  77. src/serving/api/webhook_dispatcher.py +423 -0
  78. src/serving/backends/__init__.py +117 -0
  79. src/serving/backends/clickhouse_backend.py +310 -0
  80. src/serving/backends/duckdb_backend.py +268 -0
  81. src/serving/cache.py +169 -0
  82. src/serving/db_pool.py +105 -0
  83. src/serving/masking.py +122 -0
  84. src/serving/semantic_layer/__init__.py +0 -0
  85. src/serving/semantic_layer/catalog.py +177 -0
  86. src/serving/semantic_layer/contract_registry.py +258 -0
  87. src/serving/semantic_layer/entity_type_registry.py +107 -0
  88. src/serving/semantic_layer/nl_engine.py +189 -0
  89. src/serving/semantic_layer/query/__init__.py +3 -0
  90. src/serving/semantic_layer/query/contracts.py +47 -0
  91. src/serving/semantic_layer/query/engine.py +81 -0
  92. src/serving/semantic_layer/query/entity_queries.py +221 -0
  93. src/serving/semantic_layer/query/metric_queries.py +84 -0
  94. src/serving/semantic_layer/query/nl_queries.py +305 -0
  95. src/serving/semantic_layer/query/sql_builder.py +113 -0
  96. src/serving/semantic_layer/query/sql_guard.py +3 -0
  97. src/serving/semantic_layer/query_engine.py +5 -0
  98. src/serving/semantic_layer/schema_evolution.py +175 -0
  99. src/serving/semantic_layer/search_index.py +337 -0
  100. src/serving/semantic_layer/sql_guard.py +56 -0
@@ -0,0 +1,452 @@
1
+ """Local pipeline: end-to-end data flow without Kafka or Flink.
2
+
3
+ Generates → validates → enriches → writes to DuckDB in real-time.
4
+ Proves the pipeline works end-to-end, locally, with zero infrastructure.
5
+
6
+ Usage:
7
+ python -m src.processing.local_pipeline # default: 10 events/sec
8
+ python -m src.processing.local_pipeline --eps 50 # 50 events/sec
9
+ python -m src.processing.local_pipeline --burst 500 # one-shot: 500 events
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import time
16
+ from datetime import UTC, datetime
17
+ from pathlib import Path
18
+
19
+ import duckdb
20
+ import structlog
21
+ import yaml # type: ignore[import-untyped]
22
+ from pyiceberg.exceptions import NoSuchPropertyException, RESTError, ValidationError
23
+
24
+ from src.ingestion.producers.event_producer import (
25
+ generate_click,
26
+ generate_order,
27
+ generate_payment,
28
+ generate_product,
29
+ )
30
+ from src.logger import configure_logging
31
+ from src.processing.iceberg_sink import IcebergSink
32
+ from src.processing.transformations.enrichment import (
33
+ compute_payment_risk_score,
34
+ enrich_clickstream,
35
+ enrich_order,
36
+ )
37
+ from src.quality.validators.schema_validator import validate_event
38
+ from src.quality.validators.semantic_validator import validate_semantics
39
+
40
+ DB_PATH = os.getenv("DUCKDB_PATH", "agentflow_demo.duckdb")
41
+
42
+
43
+ def _ensure_tables(conn: duckdb.DuckDBPyConnection):
44
+ """Create all tables if they don't exist."""
45
+ conn.execute("""
46
+ CREATE TABLE IF NOT EXISTS orders_v2 (
47
+ order_id VARCHAR PRIMARY KEY,
48
+ user_id VARCHAR,
49
+ status VARCHAR,
50
+ total_amount DECIMAL(10,2),
51
+ currency VARCHAR DEFAULT 'USD',
52
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
53
+ )
54
+ """)
55
+ conn.execute("""
56
+ CREATE TABLE IF NOT EXISTS products_current (
57
+ product_id VARCHAR PRIMARY KEY,
58
+ name VARCHAR,
59
+ category VARCHAR,
60
+ price DECIMAL(10,2),
61
+ in_stock BOOLEAN DEFAULT TRUE,
62
+ stock_quantity INTEGER DEFAULT 0
63
+ )
64
+ """)
65
+ conn.execute("""
66
+ CREATE TABLE IF NOT EXISTS sessions_aggregated (
67
+ session_id VARCHAR PRIMARY KEY,
68
+ user_id VARCHAR,
69
+ started_at TIMESTAMP,
70
+ ended_at TIMESTAMP,
71
+ duration_seconds FLOAT,
72
+ event_count INTEGER,
73
+ unique_pages INTEGER,
74
+ funnel_stage VARCHAR,
75
+ is_conversion BOOLEAN DEFAULT FALSE
76
+ )
77
+ """)
78
+ conn.execute("""
79
+ CREATE TABLE IF NOT EXISTS users_enriched (
80
+ user_id VARCHAR PRIMARY KEY,
81
+ total_orders INTEGER DEFAULT 0,
82
+ total_spent DECIMAL(10,2) DEFAULT 0,
83
+ first_order_at TIMESTAMP,
84
+ last_order_at TIMESTAMP,
85
+ preferred_category VARCHAR
86
+ )
87
+ """)
88
+ conn.execute("""
89
+ CREATE TABLE IF NOT EXISTS pipeline_events (
90
+ event_id VARCHAR,
91
+ topic VARCHAR,
92
+ tenant_id VARCHAR DEFAULT 'default',
93
+ event_type VARCHAR,
94
+ latency_ms INTEGER,
95
+ processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
96
+ )
97
+ """)
98
+ conn.execute(
99
+ "ALTER TABLE pipeline_events ADD COLUMN IF NOT EXISTS tenant_id VARCHAR DEFAULT 'default'"
100
+ )
101
+
102
+
103
+ def _event_tenant(event: dict) -> str:
104
+ source_metadata = event.get("source_metadata", {})
105
+ metadata_tenant = source_metadata.get("tenant") if isinstance(source_metadata, dict) else None
106
+ tenant = event.get("tenant") or metadata_tenant
107
+ return str(tenant) if tenant else "default"
108
+
109
+
110
+ def _process_event(
111
+ conn: duckdb.DuckDBPyConnection,
112
+ event: dict,
113
+ iceberg_sink: IcebergSink | None = None,
114
+ ) -> tuple[bool, str]:
115
+ """Validate, enrich, and store a single event. Returns (success, reason)."""
116
+ event_type = event.get("event_type", "")
117
+ event_id = event.get("event_id", "unknown")
118
+ tenant_id = _event_tenant(event)
119
+
120
+ conn.execute("BEGIN")
121
+ try:
122
+ # Schema validation
123
+ schema_result = validate_event(event)
124
+ if not schema_result.is_valid:
125
+ conn.execute(
126
+ """
127
+ INSERT INTO pipeline_events (
128
+ event_id, topic, tenant_id, event_type, latency_ms, processed_at
129
+ )
130
+ VALUES (?, 'events.deadletter', ?, ?, 0, ?)
131
+ """,
132
+ [event_id, tenant_id, event_type, datetime.now(UTC)],
133
+ )
134
+ if iceberg_sink is not None:
135
+ iceberg_sink.write_batch(
136
+ "dead_letter",
137
+ [
138
+ {
139
+ "event_id": event.get("event_id"),
140
+ "event_type": event.get("event_type"),
141
+ "reason": f"schema: {schema_result.errors[0]}",
142
+ "source_topic": "events.deadletter",
143
+ "received_at": datetime.now(UTC),
144
+ "payload": event,
145
+ }
146
+ ],
147
+ )
148
+ conn.execute("COMMIT")
149
+ return False, f"schema: {schema_result.errors[0]}"
150
+
151
+ # Semantic validation
152
+ semantic_result = validate_semantics(event)
153
+ error_issues = [i for i in semantic_result.issues if i.severity == "error"]
154
+ if error_issues:
155
+ conn.execute(
156
+ """
157
+ INSERT INTO pipeline_events (
158
+ event_id, topic, tenant_id, event_type, latency_ms, processed_at
159
+ )
160
+ VALUES (?, 'events.deadletter', ?, ?, 0, ?)
161
+ """,
162
+ [event_id, tenant_id, event_type, datetime.now(UTC)],
163
+ )
164
+ if iceberg_sink is not None:
165
+ iceberg_sink.write_batch(
166
+ "dead_letter",
167
+ [
168
+ {
169
+ "event_id": event.get("event_id"),
170
+ "event_type": event.get("event_type"),
171
+ "reason": f"semantic: {error_issues[0].rule}",
172
+ "source_topic": "events.deadletter",
173
+ "received_at": datetime.now(UTC),
174
+ "payload": event,
175
+ }
176
+ ],
177
+ )
178
+ conn.execute("COMMIT")
179
+ return False, f"semantic: {error_issues[0].rule}"
180
+
181
+ # Enrichment
182
+ if event_type.startswith("order."):
183
+ event = enrich_order(event)
184
+ _upsert_order(conn, event)
185
+ if iceberg_sink is not None:
186
+ iceberg_sink.write_batch("orders", [event])
187
+ elif event_type in ("click", "page_view", "add_to_cart"):
188
+ event = enrich_clickstream(event)
189
+ _upsert_session(conn, event)
190
+ if iceberg_sink is not None:
191
+ iceberg_sink.write_batch("clickstream", [event])
192
+ elif event_type.startswith("payment."):
193
+ event = compute_payment_risk_score(event)
194
+ if iceberg_sink is not None:
195
+ iceberg_sink.write_batch("payments", [event])
196
+ elif event_type.startswith("product."):
197
+ _upsert_product(conn, event)
198
+ if iceberg_sink is not None:
199
+ iceberg_sink.write_batch("inventory", [event])
200
+
201
+ # Record in pipeline_events
202
+ ts = event.get("timestamp", "")
203
+ try:
204
+ event_ts = datetime.fromisoformat(ts)
205
+ if event_ts.tzinfo is None:
206
+ event_ts = event_ts.replace(tzinfo=UTC)
207
+ latency_ms = int((datetime.now(UTC) - event_ts).total_seconds() * 1000)
208
+ except (ValueError, TypeError):
209
+ latency_ms = 0
210
+
211
+ conn.execute(
212
+ """
213
+ INSERT INTO pipeline_events (
214
+ event_id, topic, tenant_id, event_type, latency_ms, processed_at
215
+ )
216
+ VALUES (?, 'events.validated', ?, ?, ?, ?)
217
+ """,
218
+ [event_id, tenant_id, event_type, latency_ms, datetime.now(UTC)],
219
+ )
220
+ conn.execute("COMMIT")
221
+ return True, "ok"
222
+ except Exception: # nosec B110 - rollback must preserve the original pipeline failure
223
+ # Transaction rollback must happen before unexpected errors propagate.
224
+ conn.execute("ROLLBACK")
225
+ raise
226
+
227
+
228
+ def _upsert_order(conn: duckdb.DuckDBPyConnection, event: dict):
229
+ conn.execute(
230
+ """
231
+ INSERT OR REPLACE INTO orders_v2
232
+ (order_id, user_id, status, total_amount, currency, created_at)
233
+ VALUES (?, ?, ?, ?, ?, ?)
234
+ """,
235
+ [
236
+ event["order_id"],
237
+ event["user_id"],
238
+ event["status"],
239
+ float(event["total_amount"]),
240
+ event.get("currency", "USD"),
241
+ datetime.fromisoformat(event["timestamp"]),
242
+ ],
243
+ )
244
+ # Update user aggregate
245
+ conn.execute(
246
+ """
247
+ INSERT OR REPLACE INTO users_enriched
248
+ (user_id, total_orders, total_spent,
249
+ first_order_at, last_order_at, preferred_category)
250
+ SELECT
251
+ user_id,
252
+ COUNT(*) as total_orders,
253
+ SUM(total_amount) as total_spent,
254
+ MIN(created_at),
255
+ MAX(created_at),
256
+ NULL
257
+ FROM orders_v2
258
+ WHERE user_id = ? AND status != 'cancelled'
259
+ GROUP BY user_id
260
+ """,
261
+ [event["user_id"]],
262
+ )
263
+
264
+
265
+ def _upsert_product(conn: duckdb.DuckDBPyConnection, event: dict):
266
+ conn.execute(
267
+ """
268
+ INSERT OR REPLACE INTO products_current
269
+ (product_id, name, category, price, in_stock, stock_quantity)
270
+ VALUES (?, ?, ?, ?, ?, ?)
271
+ """,
272
+ [
273
+ event["product_id"],
274
+ event["name"],
275
+ event["category"],
276
+ float(event["price"]),
277
+ event["in_stock"],
278
+ event["stock_quantity"],
279
+ ],
280
+ )
281
+
282
+
283
+ def _upsert_session(conn: duckdb.DuckDBPyConnection, event: dict):
284
+ session_id = event.get("session_id", "unknown")
285
+ derived = event.get("_derived", {})
286
+ page_cat = derived.get("page_category", "other")
287
+
288
+ # Determine funnel stage from page category
289
+ stage_order = {
290
+ "checkout": 4,
291
+ "cart": 3,
292
+ "product_detail": 2,
293
+ "search": 1,
294
+ "home": 0,
295
+ "other": 0,
296
+ }
297
+ new_stage_val = stage_order.get(page_cat, 0)
298
+
299
+ existing = conn.execute(
300
+ "SELECT funnel_stage, event_count FROM sessions_aggregated WHERE session_id = ?",
301
+ [session_id],
302
+ ).fetchone()
303
+
304
+ if existing:
305
+ old_stage = existing[0] or "bounce"
306
+ old_count = existing[1] or 0
307
+ old_stage_val = stage_order.get(old_stage, 0)
308
+ funnel = page_cat if new_stage_val > old_stage_val else old_stage
309
+ conn.execute(
310
+ """
311
+ UPDATE sessions_aggregated
312
+ SET event_count = ?,
313
+ funnel_stage = ?,
314
+ is_conversion = ?
315
+ WHERE session_id = ?
316
+ """,
317
+ [
318
+ old_count + 1,
319
+ funnel,
320
+ funnel == "checkout",
321
+ session_id,
322
+ ],
323
+ )
324
+ else:
325
+ conn.execute(
326
+ """
327
+ INSERT INTO sessions_aggregated
328
+ (session_id, user_id, started_at, ended_at,
329
+ duration_seconds, event_count, unique_pages,
330
+ funnel_stage, is_conversion)
331
+ VALUES (?, ?, ?, NULL, 0, 1, 1, ?, ?)
332
+ """,
333
+ [
334
+ session_id,
335
+ event.get("user_id"),
336
+ datetime.now(UTC),
337
+ page_cat,
338
+ page_cat == "checkout",
339
+ ],
340
+ )
341
+
342
+
343
+ def _generate_random_event() -> tuple[str, dict]:
344
+ """Generate a random event using existing producers."""
345
+ import random
346
+
347
+ generators: list[tuple] = [
348
+ (0.15, generate_order),
349
+ (0.25, generate_payment),
350
+ (0.95, generate_click),
351
+ (1.00, generate_product),
352
+ ]
353
+ roll = random.random()
354
+ for threshold, gen in generators:
355
+ if roll < threshold:
356
+ topic, event = gen()
357
+ return topic, json.loads(event.model_dump_json())
358
+ topic, event = generate_product()
359
+ return topic, json.loads(event.model_dump_json())
360
+
361
+
362
+ def run(events_per_second: int = 10, burst: int = 0):
363
+ """Run the local pipeline."""
364
+ configure_logging()
365
+ logger = structlog.get_logger()
366
+ conn = duckdb.connect(DB_PATH)
367
+ _ensure_tables(conn)
368
+ iceberg_sink = None
369
+ iceberg_config = os.getenv("AGENTFLOW_ICEBERG_CONFIG")
370
+ if not iceberg_config:
371
+ default_iceberg_config = Path("config/iceberg.yaml")
372
+ if default_iceberg_config.exists():
373
+ iceberg_config = str(default_iceberg_config)
374
+ if iceberg_config:
375
+ try:
376
+ iceberg_sink = IcebergSink(config_path=iceberg_config)
377
+ iceberg_sink.create_tables_if_not_exist()
378
+ except (
379
+ OSError,
380
+ KeyError,
381
+ ValueError,
382
+ yaml.YAMLError,
383
+ NoSuchPropertyException,
384
+ RESTError,
385
+ ValidationError,
386
+ ) as exc:
387
+ iceberg_sink = None
388
+ logger.warning(
389
+ "iceberg_sink_unavailable",
390
+ config=iceberg_config,
391
+ error=str(exc),
392
+ exc_info=True,
393
+ )
394
+
395
+ logger.info(
396
+ "local_pipeline_started",
397
+ db=DB_PATH,
398
+ eps=events_per_second,
399
+ burst=burst,
400
+ )
401
+
402
+ total = 0
403
+ valid = 0
404
+ invalid = 0
405
+ start_time = time.monotonic()
406
+
407
+ try:
408
+ count = burst if burst > 0 else float("inf")
409
+ while total < count:
410
+ _, event = _generate_random_event()
411
+ success, reason = _process_event(conn, event, iceberg_sink=iceberg_sink)
412
+
413
+ total += 1
414
+ if success:
415
+ valid += 1
416
+ else:
417
+ invalid += 1
418
+
419
+ if total % 100 == 0:
420
+ elapsed = time.monotonic() - start_time
421
+ logger.info(
422
+ "pipeline_progress",
423
+ total=total,
424
+ valid=valid,
425
+ invalid=invalid,
426
+ rate=f"{total / elapsed:.0f} evt/s",
427
+ )
428
+
429
+ if burst == 0:
430
+ time.sleep(1.0 / events_per_second)
431
+
432
+ except KeyboardInterrupt:
433
+ pass
434
+ finally:
435
+ elapsed = time.monotonic() - start_time
436
+ conn.close()
437
+ logger.info(
438
+ "local_pipeline_stopped",
439
+ total=total,
440
+ valid=valid,
441
+ invalid=invalid,
442
+ duration_s=round(elapsed, 1),
443
+ avg_rate=f"{total / max(elapsed, 0.001):.0f} evt/s",
444
+ )
445
+
446
+
447
+ if __name__ == "__main__":
448
+ parser = argparse.ArgumentParser(description="AgentFlow local pipeline")
449
+ parser.add_argument("--eps", type=int, default=10, help="Events per second")
450
+ parser.add_argument("--burst", type=int, default=0, help="One-shot: N events then stop")
451
+ args = parser.parse_args()
452
+ run(events_per_second=args.eps, burst=args.burst)