agentflow-runtime 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. agentflow_runtime-1.1.0.dist-info/METADATA +55 -0
  2. agentflow_runtime-1.1.0.dist-info/RECORD +100 -0
  3. agentflow_runtime-1.1.0.dist-info/WHEEL +4 -0
  4. agentflow_runtime-1.1.0.dist-info/licenses/LICENSE +21 -0
  5. src/__init__.py +0 -0
  6. src/constants.py +3 -0
  7. src/ingestion/__init__.py +0 -0
  8. src/ingestion/cdc/__init__.py +5 -0
  9. src/ingestion/cdc/normalizer.py +186 -0
  10. src/ingestion/connectors/__init__.py +0 -0
  11. src/ingestion/connectors/mysql_cdc.py +63 -0
  12. src/ingestion/connectors/postgres_cdc.py +68 -0
  13. src/ingestion/producers/__init__.py +0 -0
  14. src/ingestion/producers/event_producer.py +237 -0
  15. src/ingestion/schemas/__init__.py +0 -0
  16. src/ingestion/schemas/events.py +147 -0
  17. src/ingestion/tenant_router.py +80 -0
  18. src/logger.py +41 -0
  19. src/orchestration/__init__.py +0 -0
  20. src/orchestration/dags/__init__.py +0 -0
  21. src/orchestration/dags/daily_batch.py +201 -0
  22. src/processing/__init__.py +0 -0
  23. src/processing/event_replayer.py +250 -0
  24. src/processing/flink_jobs/Dockerfile +55 -0
  25. src/processing/flink_jobs/__init__.py +0 -0
  26. src/processing/flink_jobs/checkpointing.py +32 -0
  27. src/processing/flink_jobs/session_aggregation.py +212 -0
  28. src/processing/flink_jobs/session_aggregator.py +199 -0
  29. src/processing/flink_jobs/stream_processor.py +316 -0
  30. src/processing/iceberg_sink.py +348 -0
  31. src/processing/local_pipeline.py +452 -0
  32. src/processing/outbox.py +273 -0
  33. src/processing/tracing.py +36 -0
  34. src/processing/transformations/__init__.py +0 -0
  35. src/processing/transformations/enrichment.py +125 -0
  36. src/quality/__init__.py +0 -0
  37. src/quality/monitors/__init__.py +0 -0
  38. src/quality/monitors/freshness_monitor.py +166 -0
  39. src/quality/monitors/metrics_collector.py +367 -0
  40. src/quality/validators/__init__.py +0 -0
  41. src/quality/validators/schema_validator.py +119 -0
  42. src/quality/validators/semantic_validator.py +202 -0
  43. src/serving/__init__.py +0 -0
  44. src/serving/api/__init__.py +0 -0
  45. src/serving/api/alert_dispatcher.py +51 -0
  46. src/serving/api/alerts/__init__.py +38 -0
  47. src/serving/api/alerts/dispatcher.py +299 -0
  48. src/serving/api/alerts/escalation.py +290 -0
  49. src/serving/api/alerts/evaluator.py +81 -0
  50. src/serving/api/alerts/history.py +115 -0
  51. src/serving/api/analytics.py +543 -0
  52. src/serving/api/auth/__init__.py +46 -0
  53. src/serving/api/auth/key_rotation.py +400 -0
  54. src/serving/api/auth/manager.py +406 -0
  55. src/serving/api/auth/middleware.py +331 -0
  56. src/serving/api/main.py +390 -0
  57. src/serving/api/middleware/logging.py +41 -0
  58. src/serving/api/middleware/tracing.py +51 -0
  59. src/serving/api/rate_limiter.py +76 -0
  60. src/serving/api/routers/__init__.py +0 -0
  61. src/serving/api/routers/admin.py +150 -0
  62. src/serving/api/routers/admin_ui.py +93 -0
  63. src/serving/api/routers/agent_query.py +639 -0
  64. src/serving/api/routers/alerts.py +134 -0
  65. src/serving/api/routers/batch.py +231 -0
  66. src/serving/api/routers/contracts.py +98 -0
  67. src/serving/api/routers/deadletter.py +337 -0
  68. src/serving/api/routers/lineage.py +218 -0
  69. src/serving/api/routers/search.py +103 -0
  70. src/serving/api/routers/slo.py +231 -0
  71. src/serving/api/routers/stream.py +141 -0
  72. src/serving/api/routers/webhooks.py +93 -0
  73. src/serving/api/security.py +83 -0
  74. src/serving/api/telemetry.py +66 -0
  75. src/serving/api/templates/admin.html +214 -0
  76. src/serving/api/versioning.py +328 -0
  77. src/serving/api/webhook_dispatcher.py +423 -0
  78. src/serving/backends/__init__.py +117 -0
  79. src/serving/backends/clickhouse_backend.py +310 -0
  80. src/serving/backends/duckdb_backend.py +268 -0
  81. src/serving/cache.py +169 -0
  82. src/serving/db_pool.py +105 -0
  83. src/serving/masking.py +122 -0
  84. src/serving/semantic_layer/__init__.py +0 -0
  85. src/serving/semantic_layer/catalog.py +177 -0
  86. src/serving/semantic_layer/contract_registry.py +258 -0
  87. src/serving/semantic_layer/entity_type_registry.py +107 -0
  88. src/serving/semantic_layer/nl_engine.py +189 -0
  89. src/serving/semantic_layer/query/__init__.py +3 -0
  90. src/serving/semantic_layer/query/contracts.py +47 -0
  91. src/serving/semantic_layer/query/engine.py +81 -0
  92. src/serving/semantic_layer/query/entity_queries.py +221 -0
  93. src/serving/semantic_layer/query/metric_queries.py +84 -0
  94. src/serving/semantic_layer/query/nl_queries.py +305 -0
  95. src/serving/semantic_layer/query/sql_builder.py +113 -0
  96. src/serving/semantic_layer/query/sql_guard.py +3 -0
  97. src/serving/semantic_layer/query_engine.py +5 -0
  98. src/serving/semantic_layer/schema_evolution.py +175 -0
  99. src/serving/semantic_layer/search_index.py +337 -0
  100. src/serving/semantic_layer/sql_guard.py +56 -0
@@ -0,0 +1,367 @@
1
+ """Collects and exposes pipeline health metrics.
2
+
3
+ Aggregates metrics from Kafka consumer groups, Flink jobs,
4
+ and quality checks into a unified health status.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass
9
+ from datetime import UTC, datetime
10
+ from enum import StrEnum
11
+ from pathlib import Path
12
+
13
+ import duckdb
14
+ import httpx
15
+ import structlog
16
+ import yaml # type: ignore[import-untyped]
17
+ from confluent_kafka import KafkaException
18
+ from prometheus_client import Gauge
19
+ from pyiceberg.exceptions import NoSuchPropertyException, RESTError, ValidationError
20
+
21
+ logger = structlog.get_logger()
22
+
23
+
24
+ class HealthStatus(StrEnum):
25
+ HEALTHY = "healthy"
26
+ DEGRADED = "degraded"
27
+ UNHEALTHY = "unhealthy"
28
+
29
+
30
+ PIPELINE_HEALTH = Gauge(
31
+ "agentflow_pipeline_health",
32
+ "Pipeline health status (1=healthy, 0.5=degraded, 0=unhealthy)",
33
+ ["component"],
34
+ )
35
+
36
+ CONSUMER_LAG = Gauge(
37
+ "agentflow_consumer_lag",
38
+ "Kafka consumer group lag",
39
+ ["group_id", "topic", "partition"],
40
+ )
41
+
42
+
43
+ class CheckSource(StrEnum):
44
+ LIVE = "live"
45
+ PLACEHOLDER = "placeholder"
46
+
47
+
48
+ @dataclass
49
+ class ComponentHealth:
50
+ name: str
51
+ status: HealthStatus
52
+ message: str
53
+ last_check: datetime
54
+ metrics: dict
55
+ source: CheckSource = CheckSource.LIVE
56
+
57
+
58
+ @dataclass
59
+ class PipelineHealth:
60
+ overall: HealthStatus
61
+ components: list[ComponentHealth]
62
+ checked_at: datetime
63
+
64
+ def to_dict(self) -> dict:
65
+ return {
66
+ "status": self.overall,
67
+ "checked_at": self.checked_at.isoformat(),
68
+ "components": [
69
+ {
70
+ "name": c.name,
71
+ "status": c.status,
72
+ "message": c.message,
73
+ "metrics": c.metrics,
74
+ "source": c.source.value,
75
+ }
76
+ for c in self.components
77
+ ],
78
+ }
79
+
80
+
81
+ class HealthCollector:
82
+ """Aggregates health from all pipeline components."""
83
+
84
+ def __init__(self):
85
+ self._checks: list = [
86
+ self._check_kafka,
87
+ self._check_flink,
88
+ self._check_freshness,
89
+ self._check_quality_score,
90
+ self._check_iceberg,
91
+ ]
92
+
93
+ def collect(self) -> PipelineHealth:
94
+ components = []
95
+ for check in self._checks:
96
+ components.append(check())
97
+
98
+ # Overall status: worst component determines it
99
+ statuses = [c.status for c in components]
100
+ if HealthStatus.UNHEALTHY in statuses:
101
+ overall = HealthStatus.UNHEALTHY
102
+ elif HealthStatus.DEGRADED in statuses:
103
+ overall = HealthStatus.DEGRADED
104
+ else:
105
+ overall = HealthStatus.HEALTHY
106
+
107
+ for c in components:
108
+ val = {"healthy": 1.0, "degraded": 0.5, "unhealthy": 0.0}[c.status]
109
+ PIPELINE_HEALTH.labels(component=c.name).set(val)
110
+
111
+ return PipelineHealth(
112
+ overall=overall,
113
+ components=components,
114
+ checked_at=datetime.now(UTC),
115
+ )
116
+
117
+ def _check_kafka(self) -> ComponentHealth:
118
+ """Check Kafka broker connectivity and consumer lag."""
119
+ from confluent_kafka.admin import AdminClient
120
+
121
+ bootstrap = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "localhost:9092")
122
+ try:
123
+ admin = AdminClient({"bootstrap.servers": bootstrap})
124
+ cluster_meta = admin.list_topics(timeout=5)
125
+ except (KafkaException, OSError) as exc:
126
+ logger.warning(
127
+ "kafka_check_unavailable",
128
+ bootstrap_servers=bootstrap,
129
+ error=str(exc),
130
+ exc_info=True,
131
+ )
132
+ return ComponentHealth(
133
+ name="kafka",
134
+ status=HealthStatus.UNHEALTHY,
135
+ message=f"Kafka unavailable: {exc}",
136
+ last_check=datetime.now(UTC),
137
+ metrics={"brokers": 0, "topics": 0},
138
+ source=CheckSource.PLACEHOLDER,
139
+ )
140
+ topic_count = len(cluster_meta.topics)
141
+ broker_count = len(cluster_meta.brokers)
142
+
143
+ if broker_count == 0:
144
+ return ComponentHealth(
145
+ name="kafka",
146
+ status=HealthStatus.UNHEALTHY,
147
+ message="No brokers available",
148
+ last_check=datetime.now(UTC),
149
+ metrics={"brokers": 0},
150
+ )
151
+
152
+ return ComponentHealth(
153
+ name="kafka",
154
+ status=HealthStatus.HEALTHY,
155
+ message=f"{broker_count} brokers, {topic_count} topics",
156
+ last_check=datetime.now(UTC),
157
+ metrics={"brokers": broker_count, "topics": topic_count},
158
+ )
159
+
160
+ def _check_flink(self) -> ComponentHealth:
161
+ """Check Flink JobManager and running jobs."""
162
+ flink_url = os.getenv("FLINK_JOBMANAGER_URL", "http://localhost:8081")
163
+ try:
164
+ resp = httpx.get(f"{flink_url}/overview", timeout=5)
165
+ resp.raise_for_status()
166
+ data = resp.json()
167
+ except (httpx.HTTPError, ValueError) as exc:
168
+ logger.warning(
169
+ "flink_check_unavailable",
170
+ flink_url=flink_url,
171
+ error=str(exc),
172
+ exc_info=True,
173
+ )
174
+ return ComponentHealth(
175
+ name="flink",
176
+ status=HealthStatus.UNHEALTHY,
177
+ message=f"Flink unavailable: {exc}",
178
+ last_check=datetime.now(UTC),
179
+ metrics={"running_jobs": None, "failed_jobs": None},
180
+ source=CheckSource.PLACEHOLDER,
181
+ )
182
+
183
+ running = data.get("jobs-running", 0)
184
+ failed = data.get("jobs-failed", 0)
185
+
186
+ if failed > 0:
187
+ status = HealthStatus.DEGRADED
188
+ msg = f"{running} running, {failed} failed"
189
+ elif running == 0:
190
+ status = HealthStatus.DEGRADED
191
+ msg = "No running jobs"
192
+ else:
193
+ status = HealthStatus.HEALTHY
194
+ msg = f"{running} jobs running"
195
+
196
+ return ComponentHealth(
197
+ name="flink",
198
+ status=status,
199
+ message=msg,
200
+ last_check=datetime.now(UTC),
201
+ metrics={"running_jobs": running, "failed_jobs": failed},
202
+ )
203
+
204
+ def _check_freshness(self) -> ComponentHealth:
205
+ """Check data freshness from the most recent pipeline event."""
206
+ try:
207
+ db_path = os.getenv("DUCKDB_PATH", "agentflow_demo.duckdb")
208
+ conn = duckdb.connect(db_path, read_only=True)
209
+ row = conn.execute("SELECT MAX(processed_at) FROM pipeline_events").fetchone()
210
+ conn.close()
211
+
212
+ if row and row[0]:
213
+ last_event = row[0]
214
+ if hasattr(last_event, "timestamp"):
215
+ age_s = (datetime.now(UTC) - last_event.replace(tzinfo=UTC)).total_seconds()
216
+ else:
217
+ age_s = -1.0
218
+
219
+ sla = int(os.getenv("FRESHNESS_SLA_SECONDS", "30"))
220
+ if age_s <= sla:
221
+ status = HealthStatus.HEALTHY
222
+ msg = f"Last event {age_s:.0f}s ago (SLA: {sla}s)"
223
+ elif age_s <= sla * 3:
224
+ status = HealthStatus.DEGRADED
225
+ msg = f"Last event {age_s:.0f}s ago (SLA: {sla}s)"
226
+ else:
227
+ status = HealthStatus.UNHEALTHY
228
+ msg = f"Last event {age_s:.0f}s ago (SLA: {sla}s)"
229
+
230
+ return ComponentHealth(
231
+ name="freshness",
232
+ status=status,
233
+ message=msg,
234
+ last_check=datetime.now(UTC),
235
+ metrics={
236
+ "last_event_age_seconds": round(age_s, 1),
237
+ "sla_seconds": sla,
238
+ },
239
+ source=CheckSource.LIVE,
240
+ )
241
+ except duckdb.Error as exc:
242
+ logger.warning(
243
+ "freshness_check_unavailable",
244
+ db_path=db_path,
245
+ error=str(exc),
246
+ exc_info=True,
247
+ )
248
+
249
+ return ComponentHealth(
250
+ name="freshness",
251
+ status=HealthStatus.DEGRADED,
252
+ message="No pipeline events found (run local pipeline first)",
253
+ last_check=datetime.now(UTC),
254
+ metrics={"last_event_age_seconds": None},
255
+ source=CheckSource.PLACEHOLDER,
256
+ )
257
+
258
+ def _check_quality_score(self) -> ComponentHealth:
259
+ """Check data quality from dead letter ratio in pipeline events."""
260
+ try:
261
+ db_path = os.getenv("DUCKDB_PATH", "agentflow_demo.duckdb")
262
+ conn = duckdb.connect(db_path, read_only=True)
263
+ row = conn.execute("""
264
+ SELECT
265
+ COUNT(*) as total,
266
+ COUNT(*) FILTER (
267
+ WHERE topic = 'events.deadletter'
268
+ ) as dead
269
+ FROM pipeline_events
270
+ WHERE processed_at >= NOW() - INTERVAL '1 hour'
271
+ """).fetchone()
272
+ conn.close()
273
+
274
+ if row and row[0] and row[0] > 0:
275
+ total, dead = row[0], row[1]
276
+ pass_rate = (total - dead) / total
277
+ if pass_rate >= 0.99:
278
+ status = HealthStatus.HEALTHY
279
+ elif pass_rate >= 0.95:
280
+ status = HealthStatus.DEGRADED
281
+ else:
282
+ status = HealthStatus.UNHEALTHY
283
+
284
+ return ComponentHealth(
285
+ name="quality",
286
+ status=status,
287
+ message=f"Pass rate: {pass_rate:.1%} ({dead}/{total} rejected)",
288
+ last_check=datetime.now(UTC),
289
+ metrics={
290
+ "pass_rate": round(pass_rate, 4),
291
+ "total_events": total,
292
+ "rejected_events": dead,
293
+ },
294
+ source=CheckSource.LIVE,
295
+ )
296
+ except duckdb.Error as exc:
297
+ logger.warning(
298
+ "quality_check_unavailable",
299
+ db_path=db_path,
300
+ error=str(exc),
301
+ exc_info=True,
302
+ )
303
+
304
+ return ComponentHealth(
305
+ name="quality",
306
+ status=HealthStatus.DEGRADED,
307
+ message="No pipeline events found (run local pipeline first)",
308
+ last_check=datetime.now(UTC),
309
+ metrics={"pass_rate": None},
310
+ source=CheckSource.PLACEHOLDER,
311
+ )
312
+
313
+ def _check_iceberg(self) -> ComponentHealth:
314
+ """Check Iceberg catalog accessibility and row counts."""
315
+ config_path = Path(os.getenv("AGENTFLOW_ICEBERG_CONFIG", "config/iceberg.yaml"))
316
+ if not config_path.exists():
317
+ return ComponentHealth(
318
+ name="iceberg",
319
+ status=HealthStatus.DEGRADED,
320
+ message="Iceberg config not found",
321
+ last_check=datetime.now(UTC),
322
+ metrics={"row_counts": {}},
323
+ source=CheckSource.PLACEHOLDER,
324
+ )
325
+
326
+ try:
327
+ from src.processing.iceberg_sink import IcebergSink
328
+
329
+ sink = IcebergSink(config_path=config_path)
330
+ row_counts = sink.row_counts()
331
+ except (
332
+ ImportError,
333
+ OSError,
334
+ KeyError,
335
+ ValueError,
336
+ yaml.YAMLError,
337
+ NoSuchPropertyException,
338
+ RESTError,
339
+ ValidationError,
340
+ ) as exc:
341
+ logger.warning(
342
+ "iceberg_check_unavailable",
343
+ config_path=str(config_path),
344
+ error=str(exc),
345
+ exc_info=True,
346
+ )
347
+ return ComponentHealth(
348
+ name="iceberg",
349
+ status=HealthStatus.DEGRADED,
350
+ message=f"Iceberg unavailable: {exc}",
351
+ last_check=datetime.now(UTC),
352
+ metrics={"row_counts": {}},
353
+ source=CheckSource.PLACEHOLDER,
354
+ )
355
+
356
+ total_rows = sum(row_counts.values())
357
+ return ComponentHealth(
358
+ name="iceberg",
359
+ status=HealthStatus.HEALTHY,
360
+ message=f"{len(row_counts)} tables, {total_rows} rows",
361
+ last_check=datetime.now(UTC),
362
+ metrics={
363
+ "row_counts": row_counts,
364
+ "total_rows": total_rows,
365
+ },
366
+ source=CheckSource.LIVE,
367
+ )
File without changes
@@ -0,0 +1,119 @@
1
+ """Schema validation for incoming events.
2
+
3
+ Validates events against their Pydantic schemas before they enter the storage layer.
4
+ Returns structured validation results with error details for observability.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from datetime import UTC, datetime
9
+
10
+ from pydantic import ValidationError
11
+
12
+ from src.ingestion.schemas.events import (
13
+ CdcEvent,
14
+ ClickstreamEvent,
15
+ OrderEvent,
16
+ PaymentEvent,
17
+ ProductEvent,
18
+ )
19
+
20
+
21
+ @dataclass
22
+ class ValidationResult:
23
+ is_valid: bool
24
+ event_id: str
25
+ event_type: str
26
+ errors: list[dict] = field(default_factory=list)
27
+ validated_at: datetime = field(default_factory=lambda: datetime.now(UTC))
28
+
29
+ def to_dict(self) -> dict:
30
+ return {
31
+ "is_valid": self.is_valid,
32
+ "event_id": self.event_id,
33
+ "event_type": self.event_type,
34
+ "errors": self.errors,
35
+ "validated_at": self.validated_at.isoformat(),
36
+ }
37
+
38
+
39
+ # Map event type prefixes to their Pydantic models
40
+ _SCHEMA_MAP: dict = {
41
+ "order.": OrderEvent,
42
+ "payment.": PaymentEvent,
43
+ "click": ClickstreamEvent,
44
+ "page_view": ClickstreamEvent,
45
+ "add_to_cart": ClickstreamEvent,
46
+ "product.": ProductEvent,
47
+ }
48
+
49
+ _CDC_SOURCES = {"postgres_cdc", "mysql_cdc"}
50
+
51
+
52
+ def _get_model_for_event(event_type: str): # -> BaseModel subclass | None
53
+ for prefix, model in _SCHEMA_MAP.items():
54
+ if event_type.startswith(prefix) or event_type == prefix:
55
+ return model
56
+ return None
57
+
58
+
59
+ def validate_event(raw_event: dict) -> ValidationResult:
60
+ """Validate a single event against its schema.
61
+
62
+ Args:
63
+ raw_event: Raw event dict (already parsed from JSON).
64
+
65
+ Returns:
66
+ ValidationResult with is_valid=True if the event passes,
67
+ or is_valid=False with structured error details.
68
+ """
69
+ event_id = raw_event.get("event_id", "unknown")
70
+ event_type = raw_event.get("event_type", "unknown")
71
+
72
+ model = CdcEvent if _is_cdc_event(raw_event) else _get_model_for_event(event_type)
73
+ if model is None:
74
+ return ValidationResult(
75
+ is_valid=False,
76
+ event_id=event_id,
77
+ event_type=event_type,
78
+ errors=[{"type": "unknown_event_type", "msg": f"No schema for: {event_type}"}],
79
+ )
80
+
81
+ try:
82
+ model.model_validate(raw_event)
83
+ return ValidationResult(is_valid=True, event_id=event_id, event_type=event_type)
84
+ except ValidationError as e:
85
+ errors = [
86
+ {
87
+ "type": err["type"],
88
+ "loc": list(err["loc"]),
89
+ "msg": err["msg"],
90
+ }
91
+ for err in e.errors()
92
+ ]
93
+ return ValidationResult(
94
+ is_valid=False,
95
+ event_id=event_id,
96
+ event_type=event_type,
97
+ errors=errors,
98
+ )
99
+
100
+
101
+ def _is_cdc_event(raw_event: dict) -> bool:
102
+ return (
103
+ raw_event.get("source") in _CDC_SOURCES
104
+ and "operation" in raw_event
105
+ and "source_metadata" in raw_event
106
+ )
107
+
108
+
109
+ def validate_batch(events: list[dict]) -> tuple[list[dict], list[ValidationResult]]:
110
+ """Validate a batch of events. Returns (valid_events, failed_results)."""
111
+ valid = []
112
+ failed = []
113
+ for event in events:
114
+ result = validate_event(event)
115
+ if result.is_valid:
116
+ valid.append(event)
117
+ else:
118
+ failed.append(result)
119
+ return valid, failed
@@ -0,0 +1,202 @@
1
+ """Semantic validation: business rules that go beyond schema correctness.
2
+
3
+ Schema validation checks structure. Semantic validation checks meaning:
4
+ - Does the order total actually match line items?
5
+ - Is the payment amount within reasonable bounds?
6
+ - Does the user_id reference a plausible user?
7
+
8
+ These rules catch data quality issues that pass schema validation
9
+ but would cause AI agents to give wrong answers.
10
+ """
11
+
12
+ import argparse
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from decimal import Decimal
16
+
17
+
18
+ @dataclass
19
+ class SemanticIssue:
20
+ rule: str
21
+ severity: str # "error" | "warning"
22
+ field: str
23
+ message: str
24
+ actual_value: str | None = None
25
+ expected: str | None = None
26
+
27
+
28
+ @dataclass
29
+ class SemanticResult:
30
+ event_id: str
31
+ event_type: str
32
+ is_clean: bool
33
+ issues: list[SemanticIssue] = field(default_factory=list)
34
+ checked_at: datetime = field(default_factory=lambda: datetime.now(UTC))
35
+
36
+ def to_dict(self) -> dict:
37
+ return {
38
+ "event_id": self.event_id,
39
+ "event_type": self.event_type,
40
+ "is_clean": self.is_clean,
41
+ "issues": [
42
+ {
43
+ "rule": i.rule,
44
+ "severity": i.severity,
45
+ "field": i.field,
46
+ "message": i.message,
47
+ }
48
+ for i in self.issues
49
+ ],
50
+ "checked_at": self.checked_at.isoformat(),
51
+ }
52
+
53
+
54
+ # ── Rule definitions ────────────────────────────────────────────
55
+
56
+
57
+ def _check_order_total_consistency(event: dict) -> list[SemanticIssue]:
58
+ """Order total must match sum of (quantity * unit_price) for all items."""
59
+ issues = []
60
+ items = event.get("items", [])
61
+ stated_total = Decimal(str(event.get("total_amount", 0)))
62
+
63
+ computed_total = sum(
64
+ Decimal(str(i.get("quantity", 0))) * Decimal(str(i.get("unit_price", 0))) for i in items
65
+ )
66
+
67
+ if abs(stated_total - computed_total) > Decimal("0.01"):
68
+ issues.append(
69
+ SemanticIssue(
70
+ rule="order_total_consistency",
71
+ severity="error",
72
+ field="total_amount",
73
+ message=f"Stated total {stated_total} != computed {computed_total}",
74
+ actual_value=str(stated_total),
75
+ expected=str(computed_total),
76
+ )
77
+ )
78
+ return issues
79
+
80
+
81
+ def _check_payment_amount_bounds(event: dict) -> list[SemanticIssue]:
82
+ """Payment amount should be between $0.50 and $50,000."""
83
+ issues = []
84
+ amount = Decimal(str(event.get("amount", 0)))
85
+
86
+ if amount < Decimal("0.50"):
87
+ issues.append(
88
+ SemanticIssue(
89
+ rule="payment_min_amount",
90
+ severity="error",
91
+ field="amount",
92
+ message=f"Payment amount {amount} below minimum $0.50",
93
+ actual_value=str(amount),
94
+ )
95
+ )
96
+ elif amount > Decimal("50000"):
97
+ issues.append(
98
+ SemanticIssue(
99
+ rule="payment_max_amount",
100
+ severity="warning",
101
+ field="amount",
102
+ message=f"Payment amount {amount} exceeds $50,000 — needs manual review",
103
+ actual_value=str(amount),
104
+ )
105
+ )
106
+ return issues
107
+
108
+
109
+ def _check_payment_failure_reason(event: dict) -> list[SemanticIssue]:
110
+ """Failed payments must have a failure_reason."""
111
+ issues = []
112
+ if event.get("status") == "failed" and not event.get("failure_reason"):
113
+ issues.append(
114
+ SemanticIssue(
115
+ rule="payment_failure_reason_required",
116
+ severity="warning",
117
+ field="failure_reason",
118
+ message="Failed payment missing failure_reason",
119
+ )
120
+ )
121
+ return issues
122
+
123
+
124
+ def _check_clickstream_session_id(event: dict) -> list[SemanticIssue]:
125
+ """Clickstream events must have a session_id."""
126
+ issues = []
127
+ if not event.get("session_id"):
128
+ issues.append(
129
+ SemanticIssue(
130
+ rule="clickstream_session_required",
131
+ severity="error",
132
+ field="session_id",
133
+ message="Clickstream event missing session_id",
134
+ )
135
+ )
136
+ return issues
137
+
138
+
139
+ def _check_product_price_sanity(event: dict) -> list[SemanticIssue]:
140
+ """Product price should be between $0 and $100,000."""
141
+ issues = []
142
+ price = Decimal(str(event.get("price", 0)))
143
+ if price > Decimal("100000"):
144
+ issues.append(
145
+ SemanticIssue(
146
+ rule="product_price_sanity",
147
+ severity="warning",
148
+ field="price",
149
+ message=f"Product price {price} seems unreasonably high",
150
+ actual_value=str(price),
151
+ )
152
+ )
153
+ return issues
154
+
155
+
156
+ # ── Rule registry ───────────────────────────────────────────────
157
+
158
+ _RULES: dict[str, list] = {
159
+ "order.": [_check_order_total_consistency],
160
+ "payment.": [_check_payment_amount_bounds, _check_payment_failure_reason],
161
+ "click": [_check_clickstream_session_id],
162
+ "page_view": [_check_clickstream_session_id],
163
+ "add_to_cart": [_check_clickstream_session_id],
164
+ "product.": [_check_product_price_sanity],
165
+ }
166
+
167
+
168
+ def validate_semantics(event: dict) -> SemanticResult:
169
+ """Run all applicable semantic rules on an event."""
170
+ event_id = event.get("event_id", "unknown")
171
+ event_type = event.get("event_type", "unknown")
172
+ all_issues: list[SemanticIssue] = []
173
+
174
+ for prefix, rules in _RULES.items():
175
+ if event_type.startswith(prefix) or event_type == prefix:
176
+ for rule_fn in rules:
177
+ all_issues.extend(rule_fn(event))
178
+
179
+ has_errors = any(i.severity == "error" for i in all_issues)
180
+
181
+ return SemanticResult(
182
+ event_id=event_id,
183
+ event_type=event_type,
184
+ is_clean=not has_errors,
185
+ issues=all_issues,
186
+ )
187
+
188
+
189
+ if __name__ == "__main__":
190
+ parser = argparse.ArgumentParser(description="Run semantic validation checks")
191
+ parser.add_argument("--check-all", action="store_true", help="Run all checks on sample data")
192
+ args = parser.parse_args()
193
+
194
+ if args.check_all:
195
+ sample_order = {
196
+ "event_id": "test-001",
197
+ "event_type": "order.created",
198
+ "total_amount": "100.00",
199
+ "items": [{"quantity": 2, "unit_price": "50.00", "product_id": "P1"}],
200
+ }
201
+ result = validate_semantics(sample_order)
202
+ print(f"Order check: is_clean={result.is_clean}, issues={len(result.issues)}")
File without changes
File without changes