agentflow-runtime 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentflow_runtime-1.1.0.dist-info/METADATA +55 -0
- agentflow_runtime-1.1.0.dist-info/RECORD +100 -0
- agentflow_runtime-1.1.0.dist-info/WHEEL +4 -0
- agentflow_runtime-1.1.0.dist-info/licenses/LICENSE +21 -0
- src/__init__.py +0 -0
- src/constants.py +3 -0
- src/ingestion/__init__.py +0 -0
- src/ingestion/cdc/__init__.py +5 -0
- src/ingestion/cdc/normalizer.py +186 -0
- src/ingestion/connectors/__init__.py +0 -0
- src/ingestion/connectors/mysql_cdc.py +63 -0
- src/ingestion/connectors/postgres_cdc.py +68 -0
- src/ingestion/producers/__init__.py +0 -0
- src/ingestion/producers/event_producer.py +237 -0
- src/ingestion/schemas/__init__.py +0 -0
- src/ingestion/schemas/events.py +147 -0
- src/ingestion/tenant_router.py +80 -0
- src/logger.py +41 -0
- src/orchestration/__init__.py +0 -0
- src/orchestration/dags/__init__.py +0 -0
- src/orchestration/dags/daily_batch.py +201 -0
- src/processing/__init__.py +0 -0
- src/processing/event_replayer.py +250 -0
- src/processing/flink_jobs/Dockerfile +55 -0
- src/processing/flink_jobs/__init__.py +0 -0
- src/processing/flink_jobs/checkpointing.py +32 -0
- src/processing/flink_jobs/session_aggregation.py +212 -0
- src/processing/flink_jobs/session_aggregator.py +199 -0
- src/processing/flink_jobs/stream_processor.py +316 -0
- src/processing/iceberg_sink.py +348 -0
- src/processing/local_pipeline.py +452 -0
- src/processing/outbox.py +273 -0
- src/processing/tracing.py +36 -0
- src/processing/transformations/__init__.py +0 -0
- src/processing/transformations/enrichment.py +125 -0
- src/quality/__init__.py +0 -0
- src/quality/monitors/__init__.py +0 -0
- src/quality/monitors/freshness_monitor.py +166 -0
- src/quality/monitors/metrics_collector.py +367 -0
- src/quality/validators/__init__.py +0 -0
- src/quality/validators/schema_validator.py +119 -0
- src/quality/validators/semantic_validator.py +202 -0
- src/serving/__init__.py +0 -0
- src/serving/api/__init__.py +0 -0
- src/serving/api/alert_dispatcher.py +51 -0
- src/serving/api/alerts/__init__.py +38 -0
- src/serving/api/alerts/dispatcher.py +299 -0
- src/serving/api/alerts/escalation.py +290 -0
- src/serving/api/alerts/evaluator.py +81 -0
- src/serving/api/alerts/history.py +115 -0
- src/serving/api/analytics.py +543 -0
- src/serving/api/auth/__init__.py +46 -0
- src/serving/api/auth/key_rotation.py +400 -0
- src/serving/api/auth/manager.py +406 -0
- src/serving/api/auth/middleware.py +331 -0
- src/serving/api/main.py +390 -0
- src/serving/api/middleware/logging.py +41 -0
- src/serving/api/middleware/tracing.py +51 -0
- src/serving/api/rate_limiter.py +76 -0
- src/serving/api/routers/__init__.py +0 -0
- src/serving/api/routers/admin.py +150 -0
- src/serving/api/routers/admin_ui.py +93 -0
- src/serving/api/routers/agent_query.py +639 -0
- src/serving/api/routers/alerts.py +134 -0
- src/serving/api/routers/batch.py +231 -0
- src/serving/api/routers/contracts.py +98 -0
- src/serving/api/routers/deadletter.py +337 -0
- src/serving/api/routers/lineage.py +218 -0
- src/serving/api/routers/search.py +103 -0
- src/serving/api/routers/slo.py +231 -0
- src/serving/api/routers/stream.py +141 -0
- src/serving/api/routers/webhooks.py +93 -0
- src/serving/api/security.py +83 -0
- src/serving/api/telemetry.py +66 -0
- src/serving/api/templates/admin.html +214 -0
- src/serving/api/versioning.py +328 -0
- src/serving/api/webhook_dispatcher.py +423 -0
- src/serving/backends/__init__.py +117 -0
- src/serving/backends/clickhouse_backend.py +310 -0
- src/serving/backends/duckdb_backend.py +268 -0
- src/serving/cache.py +169 -0
- src/serving/db_pool.py +105 -0
- src/serving/masking.py +122 -0
- src/serving/semantic_layer/__init__.py +0 -0
- src/serving/semantic_layer/catalog.py +177 -0
- src/serving/semantic_layer/contract_registry.py +258 -0
- src/serving/semantic_layer/entity_type_registry.py +107 -0
- src/serving/semantic_layer/nl_engine.py +189 -0
- src/serving/semantic_layer/query/__init__.py +3 -0
- src/serving/semantic_layer/query/contracts.py +47 -0
- src/serving/semantic_layer/query/engine.py +81 -0
- src/serving/semantic_layer/query/entity_queries.py +221 -0
- src/serving/semantic_layer/query/metric_queries.py +84 -0
- src/serving/semantic_layer/query/nl_queries.py +305 -0
- src/serving/semantic_layer/query/sql_builder.py +113 -0
- src/serving/semantic_layer/query/sql_guard.py +3 -0
- src/serving/semantic_layer/query_engine.py +5 -0
- src/serving/semantic_layer/schema_evolution.py +175 -0
- src/serving/semantic_layer/search_index.py +337 -0
- src/serving/semantic_layer/sql_guard.py +56 -0
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
"""Local pipeline: end-to-end data flow without Kafka or Flink.
|
|
2
|
+
|
|
3
|
+
Generates → validates → enriches → writes to DuckDB in real-time.
|
|
4
|
+
Proves the pipeline works end-to-end, locally, with zero infrastructure.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python -m src.processing.local_pipeline # default: 10 events/sec
|
|
8
|
+
python -m src.processing.local_pipeline --eps 50 # 50 events/sec
|
|
9
|
+
python -m src.processing.local_pipeline --burst 500 # one-shot: 500 events
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
import duckdb
|
|
20
|
+
import structlog
|
|
21
|
+
import yaml # type: ignore[import-untyped]
|
|
22
|
+
from pyiceberg.exceptions import NoSuchPropertyException, RESTError, ValidationError
|
|
23
|
+
|
|
24
|
+
from src.ingestion.producers.event_producer import (
|
|
25
|
+
generate_click,
|
|
26
|
+
generate_order,
|
|
27
|
+
generate_payment,
|
|
28
|
+
generate_product,
|
|
29
|
+
)
|
|
30
|
+
from src.logger import configure_logging
|
|
31
|
+
from src.processing.iceberg_sink import IcebergSink
|
|
32
|
+
from src.processing.transformations.enrichment import (
|
|
33
|
+
compute_payment_risk_score,
|
|
34
|
+
enrich_clickstream,
|
|
35
|
+
enrich_order,
|
|
36
|
+
)
|
|
37
|
+
from src.quality.validators.schema_validator import validate_event
|
|
38
|
+
from src.quality.validators.semantic_validator import validate_semantics
|
|
39
|
+
|
|
40
|
+
DB_PATH = os.getenv("DUCKDB_PATH", "agentflow_demo.duckdb")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _ensure_tables(conn: duckdb.DuckDBPyConnection):
|
|
44
|
+
"""Create all tables if they don't exist."""
|
|
45
|
+
conn.execute("""
|
|
46
|
+
CREATE TABLE IF NOT EXISTS orders_v2 (
|
|
47
|
+
order_id VARCHAR PRIMARY KEY,
|
|
48
|
+
user_id VARCHAR,
|
|
49
|
+
status VARCHAR,
|
|
50
|
+
total_amount DECIMAL(10,2),
|
|
51
|
+
currency VARCHAR DEFAULT 'USD',
|
|
52
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
53
|
+
)
|
|
54
|
+
""")
|
|
55
|
+
conn.execute("""
|
|
56
|
+
CREATE TABLE IF NOT EXISTS products_current (
|
|
57
|
+
product_id VARCHAR PRIMARY KEY,
|
|
58
|
+
name VARCHAR,
|
|
59
|
+
category VARCHAR,
|
|
60
|
+
price DECIMAL(10,2),
|
|
61
|
+
in_stock BOOLEAN DEFAULT TRUE,
|
|
62
|
+
stock_quantity INTEGER DEFAULT 0
|
|
63
|
+
)
|
|
64
|
+
""")
|
|
65
|
+
conn.execute("""
|
|
66
|
+
CREATE TABLE IF NOT EXISTS sessions_aggregated (
|
|
67
|
+
session_id VARCHAR PRIMARY KEY,
|
|
68
|
+
user_id VARCHAR,
|
|
69
|
+
started_at TIMESTAMP,
|
|
70
|
+
ended_at TIMESTAMP,
|
|
71
|
+
duration_seconds FLOAT,
|
|
72
|
+
event_count INTEGER,
|
|
73
|
+
unique_pages INTEGER,
|
|
74
|
+
funnel_stage VARCHAR,
|
|
75
|
+
is_conversion BOOLEAN DEFAULT FALSE
|
|
76
|
+
)
|
|
77
|
+
""")
|
|
78
|
+
conn.execute("""
|
|
79
|
+
CREATE TABLE IF NOT EXISTS users_enriched (
|
|
80
|
+
user_id VARCHAR PRIMARY KEY,
|
|
81
|
+
total_orders INTEGER DEFAULT 0,
|
|
82
|
+
total_spent DECIMAL(10,2) DEFAULT 0,
|
|
83
|
+
first_order_at TIMESTAMP,
|
|
84
|
+
last_order_at TIMESTAMP,
|
|
85
|
+
preferred_category VARCHAR
|
|
86
|
+
)
|
|
87
|
+
""")
|
|
88
|
+
conn.execute("""
|
|
89
|
+
CREATE TABLE IF NOT EXISTS pipeline_events (
|
|
90
|
+
event_id VARCHAR,
|
|
91
|
+
topic VARCHAR,
|
|
92
|
+
tenant_id VARCHAR DEFAULT 'default',
|
|
93
|
+
event_type VARCHAR,
|
|
94
|
+
latency_ms INTEGER,
|
|
95
|
+
processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
96
|
+
)
|
|
97
|
+
""")
|
|
98
|
+
conn.execute(
|
|
99
|
+
"ALTER TABLE pipeline_events ADD COLUMN IF NOT EXISTS tenant_id VARCHAR DEFAULT 'default'"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _event_tenant(event: dict) -> str:
|
|
104
|
+
source_metadata = event.get("source_metadata", {})
|
|
105
|
+
metadata_tenant = source_metadata.get("tenant") if isinstance(source_metadata, dict) else None
|
|
106
|
+
tenant = event.get("tenant") or metadata_tenant
|
|
107
|
+
return str(tenant) if tenant else "default"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _process_event(
|
|
111
|
+
conn: duckdb.DuckDBPyConnection,
|
|
112
|
+
event: dict,
|
|
113
|
+
iceberg_sink: IcebergSink | None = None,
|
|
114
|
+
) -> tuple[bool, str]:
|
|
115
|
+
"""Validate, enrich, and store a single event. Returns (success, reason)."""
|
|
116
|
+
event_type = event.get("event_type", "")
|
|
117
|
+
event_id = event.get("event_id", "unknown")
|
|
118
|
+
tenant_id = _event_tenant(event)
|
|
119
|
+
|
|
120
|
+
conn.execute("BEGIN")
|
|
121
|
+
try:
|
|
122
|
+
# Schema validation
|
|
123
|
+
schema_result = validate_event(event)
|
|
124
|
+
if not schema_result.is_valid:
|
|
125
|
+
conn.execute(
|
|
126
|
+
"""
|
|
127
|
+
INSERT INTO pipeline_events (
|
|
128
|
+
event_id, topic, tenant_id, event_type, latency_ms, processed_at
|
|
129
|
+
)
|
|
130
|
+
VALUES (?, 'events.deadletter', ?, ?, 0, ?)
|
|
131
|
+
""",
|
|
132
|
+
[event_id, tenant_id, event_type, datetime.now(UTC)],
|
|
133
|
+
)
|
|
134
|
+
if iceberg_sink is not None:
|
|
135
|
+
iceberg_sink.write_batch(
|
|
136
|
+
"dead_letter",
|
|
137
|
+
[
|
|
138
|
+
{
|
|
139
|
+
"event_id": event.get("event_id"),
|
|
140
|
+
"event_type": event.get("event_type"),
|
|
141
|
+
"reason": f"schema: {schema_result.errors[0]}",
|
|
142
|
+
"source_topic": "events.deadletter",
|
|
143
|
+
"received_at": datetime.now(UTC),
|
|
144
|
+
"payload": event,
|
|
145
|
+
}
|
|
146
|
+
],
|
|
147
|
+
)
|
|
148
|
+
conn.execute("COMMIT")
|
|
149
|
+
return False, f"schema: {schema_result.errors[0]}"
|
|
150
|
+
|
|
151
|
+
# Semantic validation
|
|
152
|
+
semantic_result = validate_semantics(event)
|
|
153
|
+
error_issues = [i for i in semantic_result.issues if i.severity == "error"]
|
|
154
|
+
if error_issues:
|
|
155
|
+
conn.execute(
|
|
156
|
+
"""
|
|
157
|
+
INSERT INTO pipeline_events (
|
|
158
|
+
event_id, topic, tenant_id, event_type, latency_ms, processed_at
|
|
159
|
+
)
|
|
160
|
+
VALUES (?, 'events.deadletter', ?, ?, 0, ?)
|
|
161
|
+
""",
|
|
162
|
+
[event_id, tenant_id, event_type, datetime.now(UTC)],
|
|
163
|
+
)
|
|
164
|
+
if iceberg_sink is not None:
|
|
165
|
+
iceberg_sink.write_batch(
|
|
166
|
+
"dead_letter",
|
|
167
|
+
[
|
|
168
|
+
{
|
|
169
|
+
"event_id": event.get("event_id"),
|
|
170
|
+
"event_type": event.get("event_type"),
|
|
171
|
+
"reason": f"semantic: {error_issues[0].rule}",
|
|
172
|
+
"source_topic": "events.deadletter",
|
|
173
|
+
"received_at": datetime.now(UTC),
|
|
174
|
+
"payload": event,
|
|
175
|
+
}
|
|
176
|
+
],
|
|
177
|
+
)
|
|
178
|
+
conn.execute("COMMIT")
|
|
179
|
+
return False, f"semantic: {error_issues[0].rule}"
|
|
180
|
+
|
|
181
|
+
# Enrichment
|
|
182
|
+
if event_type.startswith("order."):
|
|
183
|
+
event = enrich_order(event)
|
|
184
|
+
_upsert_order(conn, event)
|
|
185
|
+
if iceberg_sink is not None:
|
|
186
|
+
iceberg_sink.write_batch("orders", [event])
|
|
187
|
+
elif event_type in ("click", "page_view", "add_to_cart"):
|
|
188
|
+
event = enrich_clickstream(event)
|
|
189
|
+
_upsert_session(conn, event)
|
|
190
|
+
if iceberg_sink is not None:
|
|
191
|
+
iceberg_sink.write_batch("clickstream", [event])
|
|
192
|
+
elif event_type.startswith("payment."):
|
|
193
|
+
event = compute_payment_risk_score(event)
|
|
194
|
+
if iceberg_sink is not None:
|
|
195
|
+
iceberg_sink.write_batch("payments", [event])
|
|
196
|
+
elif event_type.startswith("product."):
|
|
197
|
+
_upsert_product(conn, event)
|
|
198
|
+
if iceberg_sink is not None:
|
|
199
|
+
iceberg_sink.write_batch("inventory", [event])
|
|
200
|
+
|
|
201
|
+
# Record in pipeline_events
|
|
202
|
+
ts = event.get("timestamp", "")
|
|
203
|
+
try:
|
|
204
|
+
event_ts = datetime.fromisoformat(ts)
|
|
205
|
+
if event_ts.tzinfo is None:
|
|
206
|
+
event_ts = event_ts.replace(tzinfo=UTC)
|
|
207
|
+
latency_ms = int((datetime.now(UTC) - event_ts).total_seconds() * 1000)
|
|
208
|
+
except (ValueError, TypeError):
|
|
209
|
+
latency_ms = 0
|
|
210
|
+
|
|
211
|
+
conn.execute(
|
|
212
|
+
"""
|
|
213
|
+
INSERT INTO pipeline_events (
|
|
214
|
+
event_id, topic, tenant_id, event_type, latency_ms, processed_at
|
|
215
|
+
)
|
|
216
|
+
VALUES (?, 'events.validated', ?, ?, ?, ?)
|
|
217
|
+
""",
|
|
218
|
+
[event_id, tenant_id, event_type, latency_ms, datetime.now(UTC)],
|
|
219
|
+
)
|
|
220
|
+
conn.execute("COMMIT")
|
|
221
|
+
return True, "ok"
|
|
222
|
+
except Exception: # nosec B110 - rollback must preserve the original pipeline failure
|
|
223
|
+
# Transaction rollback must happen before unexpected errors propagate.
|
|
224
|
+
conn.execute("ROLLBACK")
|
|
225
|
+
raise
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _upsert_order(conn: duckdb.DuckDBPyConnection, event: dict):
|
|
229
|
+
conn.execute(
|
|
230
|
+
"""
|
|
231
|
+
INSERT OR REPLACE INTO orders_v2
|
|
232
|
+
(order_id, user_id, status, total_amount, currency, created_at)
|
|
233
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
234
|
+
""",
|
|
235
|
+
[
|
|
236
|
+
event["order_id"],
|
|
237
|
+
event["user_id"],
|
|
238
|
+
event["status"],
|
|
239
|
+
float(event["total_amount"]),
|
|
240
|
+
event.get("currency", "USD"),
|
|
241
|
+
datetime.fromisoformat(event["timestamp"]),
|
|
242
|
+
],
|
|
243
|
+
)
|
|
244
|
+
# Update user aggregate
|
|
245
|
+
conn.execute(
|
|
246
|
+
"""
|
|
247
|
+
INSERT OR REPLACE INTO users_enriched
|
|
248
|
+
(user_id, total_orders, total_spent,
|
|
249
|
+
first_order_at, last_order_at, preferred_category)
|
|
250
|
+
SELECT
|
|
251
|
+
user_id,
|
|
252
|
+
COUNT(*) as total_orders,
|
|
253
|
+
SUM(total_amount) as total_spent,
|
|
254
|
+
MIN(created_at),
|
|
255
|
+
MAX(created_at),
|
|
256
|
+
NULL
|
|
257
|
+
FROM orders_v2
|
|
258
|
+
WHERE user_id = ? AND status != 'cancelled'
|
|
259
|
+
GROUP BY user_id
|
|
260
|
+
""",
|
|
261
|
+
[event["user_id"]],
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _upsert_product(conn: duckdb.DuckDBPyConnection, event: dict):
|
|
266
|
+
conn.execute(
|
|
267
|
+
"""
|
|
268
|
+
INSERT OR REPLACE INTO products_current
|
|
269
|
+
(product_id, name, category, price, in_stock, stock_quantity)
|
|
270
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
271
|
+
""",
|
|
272
|
+
[
|
|
273
|
+
event["product_id"],
|
|
274
|
+
event["name"],
|
|
275
|
+
event["category"],
|
|
276
|
+
float(event["price"]),
|
|
277
|
+
event["in_stock"],
|
|
278
|
+
event["stock_quantity"],
|
|
279
|
+
],
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _upsert_session(conn: duckdb.DuckDBPyConnection, event: dict):
|
|
284
|
+
session_id = event.get("session_id", "unknown")
|
|
285
|
+
derived = event.get("_derived", {})
|
|
286
|
+
page_cat = derived.get("page_category", "other")
|
|
287
|
+
|
|
288
|
+
# Determine funnel stage from page category
|
|
289
|
+
stage_order = {
|
|
290
|
+
"checkout": 4,
|
|
291
|
+
"cart": 3,
|
|
292
|
+
"product_detail": 2,
|
|
293
|
+
"search": 1,
|
|
294
|
+
"home": 0,
|
|
295
|
+
"other": 0,
|
|
296
|
+
}
|
|
297
|
+
new_stage_val = stage_order.get(page_cat, 0)
|
|
298
|
+
|
|
299
|
+
existing = conn.execute(
|
|
300
|
+
"SELECT funnel_stage, event_count FROM sessions_aggregated WHERE session_id = ?",
|
|
301
|
+
[session_id],
|
|
302
|
+
).fetchone()
|
|
303
|
+
|
|
304
|
+
if existing:
|
|
305
|
+
old_stage = existing[0] or "bounce"
|
|
306
|
+
old_count = existing[1] or 0
|
|
307
|
+
old_stage_val = stage_order.get(old_stage, 0)
|
|
308
|
+
funnel = page_cat if new_stage_val > old_stage_val else old_stage
|
|
309
|
+
conn.execute(
|
|
310
|
+
"""
|
|
311
|
+
UPDATE sessions_aggregated
|
|
312
|
+
SET event_count = ?,
|
|
313
|
+
funnel_stage = ?,
|
|
314
|
+
is_conversion = ?
|
|
315
|
+
WHERE session_id = ?
|
|
316
|
+
""",
|
|
317
|
+
[
|
|
318
|
+
old_count + 1,
|
|
319
|
+
funnel,
|
|
320
|
+
funnel == "checkout",
|
|
321
|
+
session_id,
|
|
322
|
+
],
|
|
323
|
+
)
|
|
324
|
+
else:
|
|
325
|
+
conn.execute(
|
|
326
|
+
"""
|
|
327
|
+
INSERT INTO sessions_aggregated
|
|
328
|
+
(session_id, user_id, started_at, ended_at,
|
|
329
|
+
duration_seconds, event_count, unique_pages,
|
|
330
|
+
funnel_stage, is_conversion)
|
|
331
|
+
VALUES (?, ?, ?, NULL, 0, 1, 1, ?, ?)
|
|
332
|
+
""",
|
|
333
|
+
[
|
|
334
|
+
session_id,
|
|
335
|
+
event.get("user_id"),
|
|
336
|
+
datetime.now(UTC),
|
|
337
|
+
page_cat,
|
|
338
|
+
page_cat == "checkout",
|
|
339
|
+
],
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _generate_random_event() -> tuple[str, dict]:
|
|
344
|
+
"""Generate a random event using existing producers."""
|
|
345
|
+
import random
|
|
346
|
+
|
|
347
|
+
generators: list[tuple] = [
|
|
348
|
+
(0.15, generate_order),
|
|
349
|
+
(0.25, generate_payment),
|
|
350
|
+
(0.95, generate_click),
|
|
351
|
+
(1.00, generate_product),
|
|
352
|
+
]
|
|
353
|
+
roll = random.random()
|
|
354
|
+
for threshold, gen in generators:
|
|
355
|
+
if roll < threshold:
|
|
356
|
+
topic, event = gen()
|
|
357
|
+
return topic, json.loads(event.model_dump_json())
|
|
358
|
+
topic, event = generate_product()
|
|
359
|
+
return topic, json.loads(event.model_dump_json())
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def run(events_per_second: int = 10, burst: int = 0):
|
|
363
|
+
"""Run the local pipeline."""
|
|
364
|
+
configure_logging()
|
|
365
|
+
logger = structlog.get_logger()
|
|
366
|
+
conn = duckdb.connect(DB_PATH)
|
|
367
|
+
_ensure_tables(conn)
|
|
368
|
+
iceberg_sink = None
|
|
369
|
+
iceberg_config = os.getenv("AGENTFLOW_ICEBERG_CONFIG")
|
|
370
|
+
if not iceberg_config:
|
|
371
|
+
default_iceberg_config = Path("config/iceberg.yaml")
|
|
372
|
+
if default_iceberg_config.exists():
|
|
373
|
+
iceberg_config = str(default_iceberg_config)
|
|
374
|
+
if iceberg_config:
|
|
375
|
+
try:
|
|
376
|
+
iceberg_sink = IcebergSink(config_path=iceberg_config)
|
|
377
|
+
iceberg_sink.create_tables_if_not_exist()
|
|
378
|
+
except (
|
|
379
|
+
OSError,
|
|
380
|
+
KeyError,
|
|
381
|
+
ValueError,
|
|
382
|
+
yaml.YAMLError,
|
|
383
|
+
NoSuchPropertyException,
|
|
384
|
+
RESTError,
|
|
385
|
+
ValidationError,
|
|
386
|
+
) as exc:
|
|
387
|
+
iceberg_sink = None
|
|
388
|
+
logger.warning(
|
|
389
|
+
"iceberg_sink_unavailable",
|
|
390
|
+
config=iceberg_config,
|
|
391
|
+
error=str(exc),
|
|
392
|
+
exc_info=True,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
logger.info(
|
|
396
|
+
"local_pipeline_started",
|
|
397
|
+
db=DB_PATH,
|
|
398
|
+
eps=events_per_second,
|
|
399
|
+
burst=burst,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
total = 0
|
|
403
|
+
valid = 0
|
|
404
|
+
invalid = 0
|
|
405
|
+
start_time = time.monotonic()
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
count = burst if burst > 0 else float("inf")
|
|
409
|
+
while total < count:
|
|
410
|
+
_, event = _generate_random_event()
|
|
411
|
+
success, reason = _process_event(conn, event, iceberg_sink=iceberg_sink)
|
|
412
|
+
|
|
413
|
+
total += 1
|
|
414
|
+
if success:
|
|
415
|
+
valid += 1
|
|
416
|
+
else:
|
|
417
|
+
invalid += 1
|
|
418
|
+
|
|
419
|
+
if total % 100 == 0:
|
|
420
|
+
elapsed = time.monotonic() - start_time
|
|
421
|
+
logger.info(
|
|
422
|
+
"pipeline_progress",
|
|
423
|
+
total=total,
|
|
424
|
+
valid=valid,
|
|
425
|
+
invalid=invalid,
|
|
426
|
+
rate=f"{total / elapsed:.0f} evt/s",
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if burst == 0:
|
|
430
|
+
time.sleep(1.0 / events_per_second)
|
|
431
|
+
|
|
432
|
+
except KeyboardInterrupt:
|
|
433
|
+
pass
|
|
434
|
+
finally:
|
|
435
|
+
elapsed = time.monotonic() - start_time
|
|
436
|
+
conn.close()
|
|
437
|
+
logger.info(
|
|
438
|
+
"local_pipeline_stopped",
|
|
439
|
+
total=total,
|
|
440
|
+
valid=valid,
|
|
441
|
+
invalid=invalid,
|
|
442
|
+
duration_s=round(elapsed, 1),
|
|
443
|
+
avg_rate=f"{total / max(elapsed, 0.001):.0f} evt/s",
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
if __name__ == "__main__":
|
|
448
|
+
parser = argparse.ArgumentParser(description="AgentFlow local pipeline")
|
|
449
|
+
parser.add_argument("--eps", type=int, default=10, help="Events per second")
|
|
450
|
+
parser.add_argument("--burst", type=int, default=0, help="One-shot: N events then stop")
|
|
451
|
+
args = parser.parse_args()
|
|
452
|
+
run(events_per_second=args.eps, burst=args.burst)
|