agentic-threat-hunting-framework 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,518 @@
1
+ """Centralized metrics tracking for hunt execution.
2
+
3
+ Auto-captures ClickHouse query times and Bedrock LLM token usage,
4
+ associating metrics with active hunt sessions when available.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import os
10
+ import tempfile
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional
15
+
16
+
17
+ @dataclass
18
+ class ClickHouseQueryMetric:
19
+ """Metric for a single ClickHouse query execution."""
20
+
21
+ id: str
22
+ timestamp: str
23
+ sql_hash: str # SHA256 of SQL for grouping (privacy)
24
+ duration_ms: int
25
+ rows_returned: int
26
+ status: str # success | error | timeout
27
+
28
+
29
+ @dataclass
30
+ class BedrockCallMetric:
31
+ """Metric for a single Bedrock LLM call."""
32
+
33
+ id: str
34
+ timestamp: str
35
+ agent: str
36
+ model_id: str
37
+ input_tokens: int
38
+ output_tokens: int
39
+ cost_usd: float
40
+ duration_ms: int
41
+
42
+
43
+ @dataclass
44
+ class SessionMetrics:
45
+ """Metrics for a single hunt session."""
46
+
47
+ session_id: str
48
+ hunt_id: str
49
+ start_time: Optional[str] = None
50
+ end_time: Optional[str] = None
51
+ clickhouse_queries: List[ClickHouseQueryMetric] = field(default_factory=list)
52
+ bedrock_calls: List[BedrockCallMetric] = field(default_factory=list)
53
+
54
+ @property
55
+ def clickhouse_totals(self) -> Dict[str, Any]:
56
+ """Calculate ClickHouse totals for this session."""
57
+ return {
58
+ "query_count": len(self.clickhouse_queries),
59
+ "total_duration_ms": sum(q.duration_ms for q in self.clickhouse_queries),
60
+ "total_rows_returned": sum(q.rows_returned for q in self.clickhouse_queries),
61
+ }
62
+
63
+ @property
64
+ def bedrock_totals(self) -> Dict[str, Any]:
65
+ """Calculate Bedrock totals for this session."""
66
+ return {
67
+ "call_count": len(self.bedrock_calls),
68
+ "total_input_tokens": sum(c.input_tokens for c in self.bedrock_calls),
69
+ "total_output_tokens": sum(c.output_tokens for c in self.bedrock_calls),
70
+ "total_cost_usd": round(sum(c.cost_usd for c in self.bedrock_calls), 4),
71
+ }
72
+
73
+
74
+ class MetricsTracker:
75
+ """Singleton tracker for execution metrics.
76
+
77
+ Auto-captures ClickHouse query times and Bedrock LLM usage.
78
+ Persists to metrics/execution_metrics.json.
79
+
80
+ Usage:
81
+ tracker = MetricsTracker.get_instance()
82
+ tracker.log_clickhouse_query(sql, duration_ms, rows, "success")
83
+ tracker.log_bedrock_call("hypothesis-generator", model_id, in_tok, out_tok, cost, dur)
84
+ """
85
+
86
+ _instance: Optional["MetricsTracker"] = None
87
+ _metrics_file: Path = Path("metrics/execution_metrics.json")
88
+ _data: Optional[Dict[str, Any]] = None
89
+
90
+ def __new__(cls) -> "MetricsTracker":
91
+ """Ensure only one instance exists (singleton pattern)."""
92
+ if cls._instance is None:
93
+ cls._instance = super().__new__(cls)
94
+ return cls._instance
95
+
96
+ @classmethod
97
+ def get_instance(cls) -> "MetricsTracker":
98
+ """Get the singleton instance."""
99
+ if cls._instance is None:
100
+ cls._instance = cls()
101
+ return cls._instance
102
+
103
+ def _now_iso(self) -> str:
104
+ """Get current timestamp in ISO format."""
105
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
106
+
107
+ def _hash_sql(self, sql: str) -> str:
108
+ """Create SHA256 hash of SQL query for grouping."""
109
+ return hashlib.sha256(sql.encode()).hexdigest()[:16]
110
+
111
+ def _get_current_context(self) -> tuple[Optional[str], Optional[str]]:
112
+ """Get current hunt_id and session_id from active session.
113
+
114
+ Returns:
115
+ Tuple of (hunt_id, session_id), both None if no active session.
116
+ """
117
+ try:
118
+ # Lazy import to avoid circular dependencies
119
+ from athf.core.session_manager import SessionManager
120
+
121
+ manager = SessionManager.get_instance()
122
+ session = manager.get_active_session()
123
+ if session:
124
+ return session.hunt_id, session.session_id
125
+ except Exception:
126
+ pass # No active session or import failed
127
+ return None, None
128
+
129
+ def _load_metrics(self) -> Dict[str, Any]:
130
+ """Load metrics from file or create empty structure."""
131
+ if self._data is not None:
132
+ return self._data
133
+
134
+ if self._metrics_file.exists():
135
+ try:
136
+ with open(self._metrics_file, "r") as f:
137
+ self._data = json.load(f)
138
+ return self._data
139
+ except (json.JSONDecodeError, IOError):
140
+ pass # Fall through to create new
141
+
142
+ # Create empty structure
143
+ self._data = {
144
+ "version": "1.0.0",
145
+ "last_updated": self._now_iso(),
146
+ "hunts": {},
147
+ "sessions": {},
148
+ "no_session": {"clickhouse_queries": [], "bedrock_calls": []},
149
+ }
150
+ return self._data
151
+
152
+ def _save_metrics(self) -> None:
153
+ """Save metrics to file with atomic write."""
154
+ if self._data is None:
155
+ return
156
+
157
+ self._data["last_updated"] = self._now_iso()
158
+
159
+ # Ensure directory exists
160
+ self._metrics_file.parent.mkdir(parents=True, exist_ok=True)
161
+
162
+ # Atomic write: write to temp file then rename
163
+ try:
164
+ fd, tmp_path = tempfile.mkstemp(dir=self._metrics_file.parent, suffix=".tmp")
165
+ try:
166
+ with os.fdopen(fd, "w") as f:
167
+ json.dump(self._data, f, indent=2)
168
+ os.rename(tmp_path, self._metrics_file)
169
+ except Exception:
170
+ # Clean up temp file on error
171
+ if os.path.exists(tmp_path):
172
+ os.unlink(tmp_path)
173
+ raise
174
+ except Exception:
175
+ # If atomic write fails, try direct write
176
+ with open(self._metrics_file, "w") as f:
177
+ json.dump(self._data, f, indent=2)
178
+
179
+ def _ensure_session_entry(self, hunt_id: str, session_id: str) -> Dict[str, Any]:
180
+ """Ensure session entry exists in data structure."""
181
+ data = self._load_metrics()
182
+
183
+ # Ensure hunt entry
184
+ if hunt_id not in data["hunts"]:
185
+ data["hunts"][hunt_id] = {
186
+ "hunt_id": hunt_id,
187
+ "sessions": [],
188
+ "totals": {
189
+ "clickhouse": {
190
+ "query_count": 0,
191
+ "total_duration_ms": 0,
192
+ "total_rows_returned": 0,
193
+ },
194
+ "bedrock": {
195
+ "call_count": 0,
196
+ "total_input_tokens": 0,
197
+ "total_output_tokens": 0,
198
+ "total_cost_usd": 0.0,
199
+ },
200
+ },
201
+ }
202
+
203
+ # Ensure session entry
204
+ if session_id not in data["sessions"]:
205
+ data["sessions"][session_id] = {
206
+ "session_id": session_id,
207
+ "hunt_id": hunt_id,
208
+ "start_time": self._now_iso(),
209
+ "clickhouse_queries": [],
210
+ "bedrock_calls": [],
211
+ }
212
+ # Add session to hunt's session list
213
+ if session_id not in data["hunts"][hunt_id]["sessions"]:
214
+ data["hunts"][hunt_id]["sessions"].append(session_id)
215
+
216
+ session_data: dict[str, Any] = data["sessions"][session_id]
217
+ return session_data
218
+
219
+ def _update_hunt_totals(self, hunt_id: str) -> None:
220
+ """Recalculate hunt totals from all sessions."""
221
+ data = self._load_metrics()
222
+ if hunt_id not in data["hunts"]:
223
+ return
224
+
225
+ hunt = data["hunts"][hunt_id]
226
+ ch_totals = {"query_count": 0, "total_duration_ms": 0, "total_rows_returned": 0}
227
+ br_totals = {
228
+ "call_count": 0,
229
+ "total_input_tokens": 0,
230
+ "total_output_tokens": 0,
231
+ "total_cost_usd": 0.0,
232
+ }
233
+
234
+ for session_id in hunt.get("sessions", []):
235
+ if session_id in data["sessions"]:
236
+ session = data["sessions"][session_id]
237
+
238
+ # ClickHouse totals
239
+ for q in session.get("clickhouse_queries", []):
240
+ ch_totals["query_count"] += 1
241
+ ch_totals["total_duration_ms"] += q.get("duration_ms", 0)
242
+ ch_totals["total_rows_returned"] += q.get("rows_returned", 0)
243
+
244
+ # Bedrock totals
245
+ for c in session.get("bedrock_calls", []):
246
+ br_totals["call_count"] += 1
247
+ br_totals["total_input_tokens"] += c.get("input_tokens", 0)
248
+ br_totals["total_output_tokens"] += c.get("output_tokens", 0)
249
+ br_totals["total_cost_usd"] += c.get("cost_usd", 0.0)
250
+
251
+ br_totals["total_cost_usd"] = round(br_totals["total_cost_usd"], 4)
252
+ hunt["totals"] = {"clickhouse": ch_totals, "bedrock": br_totals}
253
+
254
+ def log_clickhouse_query(
255
+ self,
256
+ sql: str,
257
+ duration_ms: int,
258
+ rows: int,
259
+ status: str = "success",
260
+ ) -> None:
261
+ """Log a ClickHouse query execution.
262
+
263
+ Args:
264
+ sql: SQL query executed (hashed for storage)
265
+ duration_ms: Query execution time in milliseconds
266
+ rows: Number of rows returned
267
+ status: Query status (success, error, timeout)
268
+ """
269
+ hunt_id, session_id = self._get_current_context()
270
+ data = self._load_metrics()
271
+
272
+ # Create metric entry
273
+ if session_id and hunt_id:
274
+ session = self._ensure_session_entry(hunt_id, session_id)
275
+ query_num = len(session.get("clickhouse_queries", [])) + 1
276
+ metric_id = f"ch-{query_num:03d}"
277
+ target = session.setdefault("clickhouse_queries", [])
278
+ else:
279
+ query_num = len(data["no_session"].get("clickhouse_queries", [])) + 1
280
+ metric_id = f"ch-nosess-{query_num:03d}"
281
+ target = data["no_session"].setdefault("clickhouse_queries", [])
282
+
283
+ metric = {
284
+ "id": metric_id,
285
+ "timestamp": self._now_iso(),
286
+ "sql_hash": self._hash_sql(sql),
287
+ "duration_ms": duration_ms,
288
+ "rows_returned": rows,
289
+ "status": status,
290
+ }
291
+ target.append(metric)
292
+
293
+ # Update hunt totals if we have a hunt context
294
+ if hunt_id:
295
+ self._update_hunt_totals(hunt_id)
296
+
297
+ self._save_metrics()
298
+
299
+ def log_bedrock_call(
300
+ self,
301
+ agent: str,
302
+ model_id: str,
303
+ input_tokens: int,
304
+ output_tokens: int,
305
+ cost_usd: float,
306
+ duration_ms: int,
307
+ ) -> None:
308
+ """Log a Bedrock LLM call.
309
+
310
+ Args:
311
+ agent: Agent name (e.g., "hypothesis-generator")
312
+ model_id: Bedrock model ID
313
+ input_tokens: Number of input tokens
314
+ output_tokens: Number of output tokens
315
+ cost_usd: Estimated cost in USD
316
+ duration_ms: Call duration in milliseconds
317
+ """
318
+ hunt_id, session_id = self._get_current_context()
319
+ data = self._load_metrics()
320
+
321
+ # Create metric entry
322
+ if session_id and hunt_id:
323
+ session = self._ensure_session_entry(hunt_id, session_id)
324
+ call_num = len(session.get("bedrock_calls", [])) + 1
325
+ metric_id = f"br-{call_num:03d}"
326
+ target = session.setdefault("bedrock_calls", [])
327
+ else:
328
+ call_num = len(data["no_session"].get("bedrock_calls", [])) + 1
329
+ metric_id = f"br-nosess-{call_num:03d}"
330
+ target = data["no_session"].setdefault("bedrock_calls", [])
331
+
332
+ metric = {
333
+ "id": metric_id,
334
+ "timestamp": self._now_iso(),
335
+ "agent": agent,
336
+ "model_id": model_id,
337
+ "input_tokens": input_tokens,
338
+ "output_tokens": output_tokens,
339
+ "cost_usd": round(cost_usd, 4),
340
+ "duration_ms": duration_ms,
341
+ }
342
+ target.append(metric)
343
+
344
+ # Update hunt totals if we have a hunt context
345
+ if hunt_id:
346
+ self._update_hunt_totals(hunt_id)
347
+
348
+ self._save_metrics()
349
+
350
+ def get_hunt_metrics(self, hunt_id: str) -> Optional[Dict[str, Any]]:
351
+ """Get metrics for a specific hunt.
352
+
353
+ Args:
354
+ hunt_id: Hunt identifier (e.g., "H-0019")
355
+
356
+ Returns:
357
+ Hunt metrics dict or None if not found
358
+ """
359
+ data = self._load_metrics()
360
+ result: Optional[Dict[str, Any]] = data["hunts"].get(hunt_id)
361
+ return result
362
+
363
+ def get_session_metrics(self, session_id: str) -> Optional[Dict[str, Any]]:
364
+ """Get metrics for a specific session.
365
+
366
+ Args:
367
+ session_id: Session identifier (e.g., "H-0019-2025-12-30")
368
+
369
+ Returns:
370
+ Session metrics dict or None if not found
371
+ """
372
+ data = self._load_metrics()
373
+ result: Optional[Dict[str, Any]] = data["sessions"].get(session_id)
374
+ return result
375
+
376
+ def get_aggregate_metrics(self, days: int = 30) -> Dict[str, Any]:
377
+ """Get aggregate metrics summary.
378
+
379
+ Args:
380
+ days: Number of days to include (0 for all time)
381
+
382
+ Returns:
383
+ Aggregate metrics dict
384
+ """
385
+ data = self._load_metrics()
386
+
387
+ # Calculate totals across all hunts
388
+ ch_totals = {"query_count": 0, "total_duration_ms": 0, "total_rows_returned": 0}
389
+ br_totals = {
390
+ "call_count": 0,
391
+ "total_input_tokens": 0,
392
+ "total_output_tokens": 0,
393
+ "total_cost_usd": 0.0,
394
+ }
395
+
396
+ for hunt in data.get("hunts", {}).values():
397
+ totals = hunt.get("totals", {})
398
+ ch = totals.get("clickhouse", {})
399
+ br = totals.get("bedrock", {})
400
+
401
+ ch_totals["query_count"] += ch.get("query_count", 0)
402
+ ch_totals["total_duration_ms"] += ch.get("total_duration_ms", 0)
403
+ ch_totals["total_rows_returned"] += ch.get("total_rows_returned", 0)
404
+
405
+ br_totals["call_count"] += br.get("call_count", 0)
406
+ br_totals["total_input_tokens"] += br.get("total_input_tokens", 0)
407
+ br_totals["total_output_tokens"] += br.get("total_output_tokens", 0)
408
+ br_totals["total_cost_usd"] += br.get("total_cost_usd", 0.0)
409
+
410
+ # Include no-session metrics
411
+ no_sess = data.get("no_session", {})
412
+ ch_totals["query_count"] += len(no_sess.get("clickhouse_queries", []))
413
+ for q in no_sess.get("clickhouse_queries", []):
414
+ ch_totals["total_duration_ms"] += q.get("duration_ms", 0)
415
+ ch_totals["total_rows_returned"] += q.get("rows_returned", 0)
416
+
417
+ br_totals["call_count"] += len(no_sess.get("bedrock_calls", []))
418
+ for c in no_sess.get("bedrock_calls", []):
419
+ br_totals["total_input_tokens"] += c.get("input_tokens", 0)
420
+ br_totals["total_output_tokens"] += c.get("output_tokens", 0)
421
+ br_totals["total_cost_usd"] += c.get("cost_usd", 0.0)
422
+
423
+ br_totals["total_cost_usd"] = round(br_totals["total_cost_usd"], 4)
424
+
425
+ return {
426
+ "hunt_count": len(data.get("hunts", {})),
427
+ "session_count": len(data.get("sessions", {})),
428
+ "clickhouse": ch_totals,
429
+ "bedrock": br_totals,
430
+ "last_updated": data.get("last_updated"),
431
+ }
432
+
433
+ def get_metrics_in_time_window(
434
+ self,
435
+ session_id: str,
436
+ start_time: str,
437
+ end_time: str,
438
+ ) -> Dict[str, Any]:
439
+ """Get session metrics including no_session calls within time window.
440
+
441
+ This captures Bedrock calls that occurred during the session time window
442
+ but were logged before the session started (e.g., hypothesis generation
443
+ that ran before 'athf session start').
444
+
445
+ Args:
446
+ session_id: Session identifier (e.g., "H-0019-2025-12-30")
447
+ start_time: Session start time in ISO format
448
+ end_time: Session end time in ISO format
449
+
450
+ Returns:
451
+ Dict with combined metrics:
452
+ {
453
+ "clickhouse": {"query_count": N, "total_duration_ms": N, ...},
454
+ "bedrock": {"call_count": N, "total_input_tokens": N, ...},
455
+ }
456
+ """
457
+ from datetime import datetime
458
+
459
+ data = self._load_metrics()
460
+
461
+ # Initialize totals
462
+ ch_totals = {"query_count": 0, "total_duration_ms": 0, "total_rows_returned": 0}
463
+ br_totals = {
464
+ "call_count": 0,
465
+ "total_input_tokens": 0,
466
+ "total_output_tokens": 0,
467
+ "total_cost_usd": 0.0,
468
+ }
469
+
470
+ # Parse time window
471
+ try:
472
+ start_dt = datetime.fromisoformat(start_time.replace("Z", "+00:00"))
473
+ end_dt = datetime.fromisoformat(end_time.replace("Z", "+00:00"))
474
+ except (ValueError, AttributeError):
475
+ # If time parsing fails, return empty metrics
476
+ return {"clickhouse": ch_totals, "bedrock": br_totals}
477
+
478
+ # Get session-specific metrics
479
+ session_metrics = data.get("sessions", {}).get(session_id, {})
480
+
481
+ for q in session_metrics.get("clickhouse_queries", []):
482
+ ch_totals["query_count"] += 1
483
+ ch_totals["total_duration_ms"] += q.get("duration_ms", 0)
484
+ ch_totals["total_rows_returned"] += q.get("rows_returned", 0)
485
+
486
+ for c in session_metrics.get("bedrock_calls", []):
487
+ br_totals["call_count"] += 1
488
+ br_totals["total_input_tokens"] += c.get("input_tokens", 0)
489
+ br_totals["total_output_tokens"] += c.get("output_tokens", 0)
490
+ br_totals["total_cost_usd"] += c.get("cost_usd", 0.0)
491
+
492
+ # Also include no_session metrics within the time window
493
+ no_sess = data.get("no_session", {})
494
+
495
+ for q in no_sess.get("clickhouse_queries", []):
496
+ try:
497
+ q_time = datetime.fromisoformat(q.get("timestamp", "").replace("Z", "+00:00"))
498
+ if start_dt <= q_time <= end_dt:
499
+ ch_totals["query_count"] += 1
500
+ ch_totals["total_duration_ms"] += q.get("duration_ms", 0)
501
+ ch_totals["total_rows_returned"] += q.get("rows_returned", 0)
502
+ except (ValueError, AttributeError):
503
+ continue
504
+
505
+ for c in no_sess.get("bedrock_calls", []):
506
+ try:
507
+ c_time = datetime.fromisoformat(c.get("timestamp", "").replace("Z", "+00:00"))
508
+ if start_dt <= c_time <= end_dt:
509
+ br_totals["call_count"] += 1
510
+ br_totals["total_input_tokens"] += c.get("input_tokens", 0)
511
+ br_totals["total_output_tokens"] += c.get("output_tokens", 0)
512
+ br_totals["total_cost_usd"] += c.get("cost_usd", 0.0)
513
+ except (ValueError, AttributeError):
514
+ continue
515
+
516
+ br_totals["total_cost_usd"] = round(br_totals["total_cost_usd"], 4)
517
+
518
+ return {"clickhouse": ch_totals, "bedrock": br_totals}
@@ -0,0 +1,169 @@
1
+ """Query executor for rendering parameterized queries."""
2
+
3
+ import re
4
+ from typing import Any, Dict, List, Tuple
5
+
6
+ import jinja2
7
+
8
+
9
+ class QueryExecutor:
10
+ """Executor for rendering parameterized SQL queries with Jinja2."""
11
+
12
+ def __init__(self) -> None:
13
+ """Initialize query executor with Jinja2 environment."""
14
+ self.template_env = jinja2.Environment( # nosec B701 - SQL templates, not HTML (XSS not applicable)
15
+ variable_start_string="{{",
16
+ variable_end_string="}}",
17
+ autoescape=False,
18
+ trim_blocks=True,
19
+ lstrip_blocks=True,
20
+ )
21
+
22
+ def render_query(self, query_template: str, parameters: Dict[str, Any]) -> str:
23
+ """Render query template with Jinja2 placeholder substitution.
24
+
25
+ Args:
26
+ query_template: Query SQL template with {{placeholder}} syntax
27
+ parameters: Dictionary of parameter values
28
+
29
+ Returns:
30
+ Rendered SQL query string
31
+ """
32
+ try:
33
+ template = self.template_env.from_string(query_template)
34
+ rendered = template.render(**parameters)
35
+ # Clean up extra whitespace
36
+ rendered = re.sub(r"\n\s*\n", "\n", rendered)
37
+ return rendered.strip()
38
+ except jinja2.TemplateError as e:
39
+ raise ValueError(f"Template rendering error: {e}") from e
40
+
41
+ def validate_parameters(self, query: Dict[str, Any], parameters: Dict[str, Any]) -> Tuple[bool, List[str]]:
42
+ """Validate provided parameters match query placeholders.
43
+
44
+ Args:
45
+ query: Query dictionary with placeholders metadata
46
+ parameters: Dictionary of parameter values
47
+
48
+ Returns:
49
+ Tuple of (is_valid, list_of_errors)
50
+ """
51
+ errors = []
52
+ placeholders = query.get("placeholders", {})
53
+
54
+ # Check required parameters (no default value)
55
+ for name, info in placeholders.items():
56
+ if "default" not in info and name not in parameters:
57
+ errors.append(f"Missing required parameter: {name} ({info.get('description', '')})")
58
+
59
+ # Check parameter types
60
+ for name, value in parameters.items():
61
+ if name not in placeholders:
62
+ # Optional parameter not defined in query (like organization_id)
63
+ # This is OK - allow extra parameters
64
+ continue
65
+
66
+ expected_type = placeholders[name].get("type")
67
+ if expected_type == "integer":
68
+ if not isinstance(value, int):
69
+ try:
70
+ int(value)
71
+ except (TypeError, ValueError):
72
+ errors.append(f"Parameter {name} must be an integer")
73
+ elif expected_type == "string":
74
+ if not isinstance(value, str):
75
+ errors.append(f"Parameter {name} must be a string")
76
+
77
+ return (len(errors) == 0, errors)
78
+
79
+ def apply_defaults(self, query: Dict[str, Any], parameters: Dict[str, Any]) -> Dict[str, Any]:
80
+ """Apply default values for missing optional parameters.
81
+
82
+ Args:
83
+ query: Query dictionary with placeholders metadata
84
+ parameters: Dictionary of parameter values
85
+
86
+ Returns:
87
+ Updated parameters dictionary with defaults applied
88
+ """
89
+ placeholders = query.get("placeholders", {})
90
+ result = dict(parameters)
91
+
92
+ for name, info in placeholders.items():
93
+ if name not in result and "default" in info:
94
+ result[name] = info["default"]
95
+
96
+ return result
97
+
98
+ def execute_query(self, query: Dict[str, Any], parameters: Dict[str, Any]) -> str:
99
+ """Render and return executable SQL query.
100
+
101
+ Note: This method only renders queries. Use execute_query_with_params()
102
+ to actually execute queries via the ClickHouse Python client.
103
+
104
+ Args:
105
+ query: Query dictionary
106
+ parameters: Dictionary of parameter values
107
+
108
+ Returns:
109
+ Rendered SQL query string
110
+ """
111
+ # Apply defaults
112
+ params = self.apply_defaults(query, parameters)
113
+
114
+ # Validate parameters
115
+ valid, errors = self.validate_parameters(query, params)
116
+ if not valid:
117
+ raise ValueError(f"Parameter validation failed: {', '.join(errors)}")
118
+
119
+ # Render query
120
+ query_template = query.get("query", "")
121
+ return self.render_query(query_template, params)
122
+
123
+ def execute_query_with_params(
124
+ self, query: Dict[str, Any], parameters: Dict[str, Any], format: str = "json", validate: bool = True
125
+ ) -> Dict[str, Any]:
126
+ """Render and execute parameterized query via ClickHouse Python client.
127
+
128
+ Args:
129
+ query: Query dictionary from library
130
+ parameters: Parameter values
131
+ format: Output format (json/table/csv) - used for formatting, not query execution
132
+ validate: Whether to validate query before execution (default: True)
133
+
134
+ Returns:
135
+ Query results with metadata:
136
+ {
137
+ 'columns': List[str],
138
+ 'data': List[List[Any]],
139
+ 'rows': int,
140
+ 'elapsed': str,
141
+ 'query': str
142
+ }
143
+
144
+ Raises:
145
+ ValueError: If parameter validation fails or query validation fails
146
+ ClickHouseConnectionError: If connection fails
147
+ ClickHouseQueryError: If query execution fails
148
+ """
149
+ # Import here to avoid circular dependencies
150
+ from athf.core.clickhouse_connection import ClickHouseClient
151
+ from athf.core.query_validator import QueryValidator
152
+
153
+ # Render query first
154
+ rendered = self.execute_query(query, parameters)
155
+
156
+ # Validate rendered query if requested
157
+ if validate:
158
+ validator = QueryValidator()
159
+ validation = validator.validate(rendered, target="clickhouse")
160
+
161
+ if not validation.is_valid:
162
+ error_msg = "Query validation failed:\n" + "\n".join(f" - {e}" for e in validation.errors)
163
+ raise ValueError(error_msg)
164
+
165
+ # Execute via ClickHouseClient
166
+ client = ClickHouseClient()
167
+ results = client.execute_query(rendered, format=format)
168
+
169
+ return results