agentic-threat-hunting-framework 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/METADATA +1 -1
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/RECORD +19 -11
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/WHEEL +1 -1
- athf/agents/base.py +2 -2
- athf/cli.py +10 -2
- athf/commands/__init__.py +6 -1
- athf/commands/similar.py +2 -2
- athf/core/clickhouse_connection.py +396 -0
- athf/core/metrics_tracker.py +518 -0
- athf/core/query_executor.py +169 -0
- athf/core/query_parser.py +203 -0
- athf/core/query_suggester.py +235 -0
- athf/core/query_validator.py +240 -0
- athf/core/session_manager.py +764 -0
- athf/core/web_search.py +1 -1
- athf/plugin_system.py +48 -0
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/entry_points.txt +0 -0
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {agentic_threat_hunting_framework-0.4.0.dist-info → agentic_threat_hunting_framework-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentic-threat-hunting-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Agentic Threat Hunting Framework - Memory and AI for threat hunters
|
|
5
5
|
Author-email: Sydney Marrone <athf@nebulock.io>
|
|
6
6
|
Maintainer-email: Sydney Marrone <athf@nebulock.io>
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
agentic_threat_hunting_framework-0.
|
|
1
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/licenses/LICENSE,sha256=_KObErRfiKoolznt-DF0nJnr3U9Rdh7Z4Ba7G5qqckk,1071
|
|
2
2
|
athf/__init__.py,sha256=OrjZe8P97_BTEkscapnwSsqKSjwXNP9d8-HtGr19Ni0,241
|
|
3
3
|
athf/__version__.py,sha256=wCIQoU9b7qKcSNQiIOgHaD2buzBC-dlQYtvg8X5WS4A,59
|
|
4
|
-
athf/cli.py,sha256=
|
|
4
|
+
athf/cli.py,sha256=108PnDRlaytJj9KzjzcTLljB3DeerMIXZOeAQJrmtPU,5052
|
|
5
|
+
athf/plugin_system.py,sha256=c_9-oUiR6tuYpWpEmeVRayU8-TXlkjvZC3EUxuYWW4M,1515
|
|
5
6
|
athf/agents/__init__.py,sha256=iaSJpvnXm9rz4QS7gBrsaLEjm49uvsMs4BLPOJeyp78,346
|
|
6
|
-
athf/agents/base.py,sha256=
|
|
7
|
+
athf/agents/base.py,sha256=lnVDIOQUOyP-Apa9UM2E1mRXUPnNJ4hVqQXOwVw2u4c,4286
|
|
7
8
|
athf/agents/llm/__init__.py,sha256=qSGA-NaInjsDkMpGQwnTz3S1OgCVlzetpMcDS_to1co,671
|
|
8
9
|
athf/agents/llm/hunt_researcher.py,sha256=dIyD2Izh3zdf62kCHug1DwXFgmWhOMQUTim7qM3UAIs,27071
|
|
9
10
|
athf/agents/llm/hypothesis_generator.py,sha256=XkbJz8IS4zwQjEy-ZD0zy2XW5uRnAy87Lii-5XTY0WU,8564
|
|
10
|
-
athf/commands/__init__.py,sha256
|
|
11
|
+
athf/commands/__init__.py,sha256=-ZOfg6uV1eSh7RDW7dKzdufuYvQTT0KGMF4JB6waHsY,635
|
|
11
12
|
athf/commands/agent.py,sha256=c7ZeZa3OArXyXTgVjmUB2JXa3m9IpLFJ_FEVDhaDLE8,19000
|
|
12
13
|
athf/commands/context.py,sha256=V-at81-OgKcLY-In48-AccTnHfTgdofmnjE8S5kypoI,12678
|
|
13
14
|
athf/commands/env.py,sha256=JPKRsv48cgsIAjSFaGJ1-Nu0nQKGSVg4AbiFxb9jVX4,11887
|
|
@@ -15,17 +16,24 @@ athf/commands/hunt.py,sha256=aQdgNddqy_VrxZOkxhuPxIr4KLZtX5a2ZLb9079vLlw,25169
|
|
|
15
16
|
athf/commands/init.py,sha256=Qn0iETNyuQvM-ySqCeoDz-pPemeuzROX_karQF5yN_o,12685
|
|
16
17
|
athf/commands/investigate.py,sha256=mK_id5vjfN_ukqB_-fyia0FNa0pBmtn0Xv6CKHQI1Qo,24663
|
|
17
18
|
athf/commands/research.py,sha256=FrLph4agaGQ_rIxMh0OQwh1MIGDFtj40zJ3E1ZFwaAw,18112
|
|
18
|
-
athf/commands/similar.py,sha256=
|
|
19
|
+
athf/commands/similar.py,sha256=FTTVr4zzP9bdJrirscp6pOxdQbE8zot6pa20-_TYiuo,11804
|
|
19
20
|
athf/commands/splunk.py,sha256=7n7Jl1ExqZCNxUhG0kAKgAvZMqbIoGSgx2Moq7vAu-Y,11622
|
|
20
21
|
athf/core/__init__.py,sha256=yG7C8ljx3UW4QZoYvDjUxsWHlbS8M-GLGB7Je7rRfqo,31
|
|
21
22
|
athf/core/attack_matrix.py,sha256=QZKKmxckQ6-U7lqVdGUJoj2jEAhP3Juvr3sqaNx2oTw,3238
|
|
23
|
+
athf/core/clickhouse_connection.py,sha256=8thmJvd2pUeeRZmDE7K491NgbC0myNZsdA29ooJRfVM,13561
|
|
22
24
|
athf/core/hunt_manager.py,sha256=PFsg8Ecg94NCpuFZpApo82lyORkgK5IfOIih-7-XsmM,11580
|
|
23
25
|
athf/core/hunt_parser.py,sha256=FUj0yyBIcZnaS9aItMImeBDhegQwpkewIwUMNXW_ZWU,5122
|
|
24
26
|
athf/core/investigation_parser.py,sha256=wbfjnq4gFgIc0a4bHIAnidVNPhbHDpIXWY1SGLk0Xls,6804
|
|
27
|
+
athf/core/metrics_tracker.py,sha256=VYEiO5QVteTtR4ddyHkL61KrO4QVNUDdNaDOVFcHy4Q,18873
|
|
28
|
+
athf/core/query_executor.py,sha256=OtzUkxoOdDC4ZErVIbf0Qov82uHRJ8dJ965r4pLbiVA,6271
|
|
29
|
+
athf/core/query_parser.py,sha256=Uz3ZMpd4YWKLPoge16uKZLlcMQrg49Z0NLXSceg893w,6722
|
|
30
|
+
athf/core/query_suggester.py,sha256=i3P624tXb9uRKGxTpcSZx4ZVbOwnCiJqLnkxQD_UqyA,7736
|
|
31
|
+
athf/core/query_validator.py,sha256=mfwdtLcPZS6ON4AlR-4d8YbQ12cqpnIq6526obdPDx8,9101
|
|
25
32
|
athf/core/research_manager.py,sha256=i4fUjuZJcAik8I4pwbLkQlu6cuxkWDlqaIRQrzAfB0s,14512
|
|
33
|
+
athf/core/session_manager.py,sha256=8Mz082ex87VXPiSFYRFNAb9e3ED6luCy0Q6zilyaz9A,25108
|
|
26
34
|
athf/core/splunk_client.py,sha256=Xib2zVwV2l8eChzqUahI3PZ7Z2XS2wz01sPbF1E0Q18,11611
|
|
27
35
|
athf/core/template_engine.py,sha256=Awp0n9E5Q1dYA35XDKKAd5VJLdpaDl2N967hackUVa8,6010
|
|
28
|
-
athf/core/web_search.py,sha256=
|
|
36
|
+
athf/core/web_search.py,sha256=B9IhmwH7gy2RVA6WSN3L7yGp3Q4L8OsiiwcEvnnZejU,10320
|
|
29
37
|
athf/data/__init__.py,sha256=QtgONloCaS3E9Ow995FMxyy6BbszpfmYeWpySQ2b9Mc,502
|
|
30
38
|
athf/data/docs/CHANGELOG.md,sha256=JKkzzs1n5jSERHFi6fDt6sYEe52MSaY127dfzthkUA8,8655
|
|
31
39
|
athf/data/docs/CLI_REFERENCE.md,sha256=pb76UqkY_WHJMBEXwEmK0TJR8kcGzoBPlJ0WdGMKDQM,54875
|
|
@@ -51,8 +59,8 @@ athf/data/prompts/ai-workflow.md,sha256=rZtOcGuAEi35qx7182TwHJEORdz1-RxkZMBVkg61
|
|
|
51
59
|
athf/data/prompts/basic-prompts.md,sha256=2bunpO35RoBdJWYthXVi40RNl2UWrfwOaFthBLHF5sU,8463
|
|
52
60
|
athf/data/templates/HUNT_LOCK.md,sha256=zXxHaKMWbRDLewLTegYJMbXRM72s9gFFvjdwFfGNeJE,7386
|
|
53
61
|
athf/utils/__init__.py,sha256=aEAPI1xnAsowOtc036cCb9ZOek5nrrfevu8PElhbNgk,30
|
|
54
|
-
agentic_threat_hunting_framework-0.
|
|
55
|
-
agentic_threat_hunting_framework-0.
|
|
56
|
-
agentic_threat_hunting_framework-0.
|
|
57
|
-
agentic_threat_hunting_framework-0.
|
|
58
|
-
agentic_threat_hunting_framework-0.
|
|
62
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/METADATA,sha256=mM_lQGR-f8k7s905FXJ5xucVAoc6hp5yrq8cQmKJ-T0,15949
|
|
63
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
64
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/entry_points.txt,sha256=GopR2iTiBs-yNMWiUZ2DaFIFglXxWJx1XPjTa3ePtfE,39
|
|
65
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/top_level.txt,sha256=Cxxg6SMLfawDJWBITsciRzq27XV8fiaAor23o9Byoes,5
|
|
66
|
+
agentic_threat_hunting_framework-0.5.0.dist-info/RECORD,,
|
athf/agents/base.py
CHANGED
|
@@ -98,7 +98,7 @@ class LLMAgent(Agent[InputT, OutputT]):
|
|
|
98
98
|
duration_ms: Call duration in milliseconds
|
|
99
99
|
"""
|
|
100
100
|
try:
|
|
101
|
-
from athf.core.metrics_tracker import MetricsTracker
|
|
101
|
+
from athf.core.metrics_tracker import MetricsTracker
|
|
102
102
|
|
|
103
103
|
MetricsTracker.get_instance().log_bedrock_call(
|
|
104
104
|
agent=agent_name,
|
|
@@ -125,7 +125,7 @@ class LLMAgent(Agent[InputT, OutputT]):
|
|
|
125
125
|
return None
|
|
126
126
|
|
|
127
127
|
try:
|
|
128
|
-
import boto3
|
|
128
|
+
import boto3
|
|
129
129
|
|
|
130
130
|
# Get AWS region from environment or use default
|
|
131
131
|
region = os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1"))
|
athf/cli.py
CHANGED
|
@@ -95,8 +95,16 @@ cli.add_command(similar)
|
|
|
95
95
|
# Agent commands
|
|
96
96
|
cli.add_command(agent)
|
|
97
97
|
|
|
98
|
-
# Integration commands
|
|
99
|
-
|
|
98
|
+
# Integration commands (optional, requires additional dependencies)
|
|
99
|
+
if splunk is not None:
|
|
100
|
+
cli.add_command(splunk)
|
|
101
|
+
|
|
102
|
+
# Load and register plugins
|
|
103
|
+
from athf.plugin_system import PluginRegistry
|
|
104
|
+
|
|
105
|
+
PluginRegistry.load_plugins()
|
|
106
|
+
for name, cmd in PluginRegistry._commands.items():
|
|
107
|
+
cli.add_command(cmd, name=name)
|
|
100
108
|
|
|
101
109
|
|
|
102
110
|
@cli.command(hidden=True)
|
athf/commands/__init__.py
CHANGED
|
@@ -7,7 +7,12 @@ from athf.commands.init import init
|
|
|
7
7
|
from athf.commands.investigate import investigate
|
|
8
8
|
from athf.commands.research import research
|
|
9
9
|
from athf.commands.similar import similar
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
# Optional: Splunk integration (requires requests package)
|
|
12
|
+
try:
|
|
13
|
+
from athf.commands.splunk import splunk
|
|
14
|
+
except ImportError:
|
|
15
|
+
splunk = None # type: ignore[assignment]
|
|
11
16
|
|
|
12
17
|
__all__ = [
|
|
13
18
|
"init",
|
athf/commands/similar.py
CHANGED
|
@@ -132,8 +132,8 @@ def _find_similar_hunts(
|
|
|
132
132
|
) -> List[Dict[str, Any]]:
|
|
133
133
|
"""Find similar hunts using TF-IDF similarity."""
|
|
134
134
|
try:
|
|
135
|
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
136
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
|
135
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
136
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
137
137
|
except ImportError:
|
|
138
138
|
console.print("[red]Error: scikit-learn not installed[/red]")
|
|
139
139
|
console.print("[dim]Install with: pip install scikit-learn[/dim]")
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""ClickHouse connection management and configuration."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Custom exceptions
|
|
13
|
+
class ClickHouseConfigError(Exception):
|
|
14
|
+
"""Configuration validation or loading error."""
|
|
15
|
+
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClickHouseConnectionError(Exception):
|
|
20
|
+
"""Connection establishment or network error."""
|
|
21
|
+
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ClickHouseQueryError(Exception):
|
|
26
|
+
"""Query execution error."""
|
|
27
|
+
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ClickHouseConfig:
|
|
33
|
+
"""Configuration for ClickHouse connection.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
host: ClickHouse server hostname
|
|
37
|
+
port: ClickHouse server port (default: 8443 for HTTPS)
|
|
38
|
+
username: Database username (required)
|
|
39
|
+
password: Database password (required)
|
|
40
|
+
database: Default database name (default: "default")
|
|
41
|
+
secure: Use SSL/TLS encryption (default: True)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
host: str
|
|
45
|
+
port: int
|
|
46
|
+
username: str
|
|
47
|
+
password: str
|
|
48
|
+
database: str = "default"
|
|
49
|
+
secure: bool = True
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def load(cls) -> "ClickHouseConfig":
|
|
53
|
+
"""Load configuration from environment variables and config file.
|
|
54
|
+
|
|
55
|
+
Precedence order:
|
|
56
|
+
1. Environment variables (highest priority)
|
|
57
|
+
2. Config file (~/.athf/clickhouse.yaml)
|
|
58
|
+
3. Hardcoded defaults (host, port, database only)
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
ClickHouseConfig instance
|
|
62
|
+
|
|
63
|
+
Raises:
|
|
64
|
+
ClickHouseConfigError: If required credentials are missing
|
|
65
|
+
"""
|
|
66
|
+
config_data: Dict[str, Any] = {}
|
|
67
|
+
|
|
68
|
+
# Load from config file first
|
|
69
|
+
config_file = Path.home() / ".athf" / "clickhouse.yaml"
|
|
70
|
+
if config_file.exists():
|
|
71
|
+
try:
|
|
72
|
+
with open(config_file, "r") as f:
|
|
73
|
+
yaml_data = yaml.safe_load(f) or {}
|
|
74
|
+
clickhouse_section = yaml_data.get("clickhouse", {})
|
|
75
|
+
config_data.update(clickhouse_section)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise ClickHouseConfigError(f"Failed to load config file {config_file}: {e}")
|
|
78
|
+
|
|
79
|
+
# Override with environment variables
|
|
80
|
+
if host := os.getenv("CLICKHOUSE_HOST"):
|
|
81
|
+
config_data["host"] = host
|
|
82
|
+
if port := os.getenv("CLICKHOUSE_PORT"):
|
|
83
|
+
config_data["port"] = int(port)
|
|
84
|
+
if user := os.getenv("CLICKHOUSE_USER"):
|
|
85
|
+
config_data["username"] = user
|
|
86
|
+
if password := os.getenv("CLICKHOUSE_PASSWORD"):
|
|
87
|
+
config_data["password"] = password
|
|
88
|
+
if database := os.getenv("CLICKHOUSE_DATABASE"):
|
|
89
|
+
config_data["database"] = database
|
|
90
|
+
if secure := os.getenv("CLICKHOUSE_SECURE"):
|
|
91
|
+
config_data["secure"] = secure.lower() in ("true", "1", "yes")
|
|
92
|
+
|
|
93
|
+
# Apply defaults
|
|
94
|
+
config_data.setdefault("host", "ohma99qewu.us-east-1.aws.clickhouse.cloud")
|
|
95
|
+
config_data.setdefault("port", 8443)
|
|
96
|
+
config_data.setdefault("database", "default")
|
|
97
|
+
config_data.setdefault("secure", True)
|
|
98
|
+
|
|
99
|
+
# Validate required fields
|
|
100
|
+
if "username" not in config_data or not config_data["username"]:
|
|
101
|
+
raise ClickHouseConfigError(
|
|
102
|
+
"Missing required credential: CLICKHOUSE_USER environment variable not set. "
|
|
103
|
+
"Set credentials with: export CLICKHOUSE_USER='your_username'"
|
|
104
|
+
)
|
|
105
|
+
if "password" not in config_data or not config_data["password"]:
|
|
106
|
+
raise ClickHouseConfigError(
|
|
107
|
+
"Missing required credential: CLICKHOUSE_PASSWORD environment variable not set. "
|
|
108
|
+
"Set credentials with: export CLICKHOUSE_PASSWORD='your_password'"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return cls(
|
|
112
|
+
host=config_data["host"],
|
|
113
|
+
port=config_data["port"],
|
|
114
|
+
username=config_data["username"],
|
|
115
|
+
password=config_data["password"],
|
|
116
|
+
database=config_data["database"],
|
|
117
|
+
secure=config_data["secure"],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def to_dict(self, mask_password: bool = True) -> Dict[str, Any]:
|
|
121
|
+
"""Convert config to dictionary.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
mask_password: If True, replace password with asterisks
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Dictionary representation of config
|
|
128
|
+
"""
|
|
129
|
+
return {
|
|
130
|
+
"host": self.host,
|
|
131
|
+
"port": self.port,
|
|
132
|
+
"username": self.username,
|
|
133
|
+
"password": "***" if mask_password else self.password,
|
|
134
|
+
"database": self.database,
|
|
135
|
+
"secure": self.secure,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ClickHouseConnectionManager:
|
|
140
|
+
"""Singleton connection manager for ClickHouse queries.
|
|
141
|
+
|
|
142
|
+
Manages a single ClickHouse client instance with lazy initialization.
|
|
143
|
+
Connection is reused across multiple queries within the same process.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
_instance: Optional["ClickHouseConnectionManager"] = None
|
|
147
|
+
_client: Optional[Any] = None # Type: clickhouse_connect.driver.Client
|
|
148
|
+
_config: Optional[ClickHouseConfig] = None
|
|
149
|
+
|
|
150
|
+
def __new__(cls) -> "ClickHouseConnectionManager":
|
|
151
|
+
"""Ensure only one instance exists (singleton pattern)."""
|
|
152
|
+
if cls._instance is None:
|
|
153
|
+
cls._instance = super().__new__(cls)
|
|
154
|
+
return cls._instance
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def get_instance(cls) -> "ClickHouseConnectionManager":
|
|
158
|
+
"""Get the singleton instance.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
ClickHouseConnectionManager instance
|
|
162
|
+
"""
|
|
163
|
+
if cls._instance is None:
|
|
164
|
+
cls._instance = cls()
|
|
165
|
+
return cls._instance
|
|
166
|
+
|
|
167
|
+
def get_client(self) -> Any:
|
|
168
|
+
"""Get or create ClickHouse client (lazy initialization).
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
ClickHouse client instance
|
|
172
|
+
|
|
173
|
+
Raises:
|
|
174
|
+
ClickHouseConnectionError: If connection fails
|
|
175
|
+
"""
|
|
176
|
+
if self._client is None:
|
|
177
|
+
self._client = self._create_client()
|
|
178
|
+
return self._client
|
|
179
|
+
|
|
180
|
+
def _create_client(self) -> Any:
|
|
181
|
+
"""Create ClickHouse client from configuration.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
ClickHouse client instance
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ClickHouseConnectionError: If client creation fails
|
|
188
|
+
ClickHouseConfigError: If configuration is invalid
|
|
189
|
+
"""
|
|
190
|
+
try:
|
|
191
|
+
import clickhouse_connect
|
|
192
|
+
except ImportError:
|
|
193
|
+
raise ClickHouseConnectionError(
|
|
194
|
+
"clickhouse-connect not installed. Install with: pip install 'hunt-vault[clickhouse]'"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Load configuration
|
|
198
|
+
if self._config is None:
|
|
199
|
+
self._config = ClickHouseConfig.load()
|
|
200
|
+
|
|
201
|
+
# Create client with retry logic
|
|
202
|
+
max_retries = 2
|
|
203
|
+
retry_delay = 5 # seconds
|
|
204
|
+
|
|
205
|
+
for attempt in range(max_retries):
|
|
206
|
+
try:
|
|
207
|
+
# Check if running in AWS Lambda (no SSL verification)
|
|
208
|
+
is_lambda = os.environ.get("AWS_EXECUTION_ENV") or os.environ.get("AWS_LAMBDA_FUNCTION_NAME")
|
|
209
|
+
|
|
210
|
+
client = clickhouse_connect.get_client(
|
|
211
|
+
host=self._config.host,
|
|
212
|
+
port=self._config.port,
|
|
213
|
+
username=self._config.username,
|
|
214
|
+
password=self._config.password,
|
|
215
|
+
database=self._config.database,
|
|
216
|
+
secure=self._config.secure,
|
|
217
|
+
verify=not bool(is_lambda), # Disable SSL verification in Lambda
|
|
218
|
+
)
|
|
219
|
+
# Test connection with simple query
|
|
220
|
+
client.command("SELECT 1")
|
|
221
|
+
return client
|
|
222
|
+
except Exception as e:
|
|
223
|
+
if "authentication" in str(e).lower() or "credential" in str(e).lower():
|
|
224
|
+
# Authentication failures should not retry
|
|
225
|
+
raise ClickHouseConnectionError(
|
|
226
|
+
f"Authentication failed: Invalid credentials for user '{self._config.username}'. "
|
|
227
|
+
f"Check CLICKHOUSE_USER and CLICKHOUSE_PASSWORD environment variables."
|
|
228
|
+
) from e
|
|
229
|
+
elif attempt < max_retries - 1:
|
|
230
|
+
# Network errors: retry once
|
|
231
|
+
time.sleep(retry_delay)
|
|
232
|
+
continue
|
|
233
|
+
else:
|
|
234
|
+
# Final attempt failed
|
|
235
|
+
raise ClickHouseConnectionError(f"Failed to connect to ClickHouse at {self._config.host}: {e}") from e
|
|
236
|
+
|
|
237
|
+
# Should never reach here due to max_retries logic, but for type safety
|
|
238
|
+
raise ClickHouseConnectionError("Failed to establish connection after retries")
|
|
239
|
+
|
|
240
|
+
def get_config(self) -> ClickHouseConfig:
|
|
241
|
+
"""Get current configuration.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
ClickHouseConfig instance
|
|
245
|
+
|
|
246
|
+
Raises:
|
|
247
|
+
ClickHouseConfigError: If configuration loading fails
|
|
248
|
+
"""
|
|
249
|
+
if self._config is None:
|
|
250
|
+
self._config = ClickHouseConfig.load()
|
|
251
|
+
return self._config
|
|
252
|
+
|
|
253
|
+
def close(self) -> None:
|
|
254
|
+
"""Close the current connection.
|
|
255
|
+
|
|
256
|
+
Note: Typically not needed for CLI use cases (process termination handles cleanup).
|
|
257
|
+
Provided for completeness and testing.
|
|
258
|
+
"""
|
|
259
|
+
if self._client is not None:
|
|
260
|
+
try:
|
|
261
|
+
self._client.close()
|
|
262
|
+
except Exception: # nosec B110 - cleanup, safe to ignore failures
|
|
263
|
+
pass # Best effort close
|
|
264
|
+
finally:
|
|
265
|
+
self._client = None
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class ClickHouseClient:
|
|
269
|
+
"""Wrapper for ClickHouse query execution with formatted output."""
|
|
270
|
+
|
|
271
|
+
def __init__(self) -> None:
|
|
272
|
+
"""Initialize ClickHouse client wrapper."""
|
|
273
|
+
self.manager = ClickHouseConnectionManager.get_instance()
|
|
274
|
+
|
|
275
|
+
def execute_query(self, query: str, format: str = "json") -> Dict[str, Any]:
|
|
276
|
+
"""Execute query and return formatted results.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
query: SQL query to execute
|
|
280
|
+
format: Output format ('json', 'table', 'csv')
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Dictionary with query results and metadata:
|
|
284
|
+
{
|
|
285
|
+
'columns': List[str],
|
|
286
|
+
'data': List[List[Any]],
|
|
287
|
+
'rows': int,
|
|
288
|
+
'elapsed': str,
|
|
289
|
+
'query': str
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
Raises:
|
|
293
|
+
ClickHouseQueryError: If query execution fails
|
|
294
|
+
"""
|
|
295
|
+
try:
|
|
296
|
+
client = self.manager.get_client()
|
|
297
|
+
start_time = time.time()
|
|
298
|
+
|
|
299
|
+
# Execute query
|
|
300
|
+
result = client.query(query)
|
|
301
|
+
|
|
302
|
+
elapsed = time.time() - start_time
|
|
303
|
+
elapsed_ms = int(elapsed * 1000)
|
|
304
|
+
|
|
305
|
+
# Extract column names and data
|
|
306
|
+
columns = result.column_names
|
|
307
|
+
data = result.result_rows
|
|
308
|
+
rows = len(data)
|
|
309
|
+
|
|
310
|
+
# Auto-log metrics to centralized tracker
|
|
311
|
+
try:
|
|
312
|
+
from athf.core.metrics_tracker import MetricsTracker
|
|
313
|
+
|
|
314
|
+
MetricsTracker.get_instance().log_clickhouse_query(
|
|
315
|
+
sql=query,
|
|
316
|
+
duration_ms=elapsed_ms,
|
|
317
|
+
rows=rows,
|
|
318
|
+
status="success",
|
|
319
|
+
)
|
|
320
|
+
except Exception:
|
|
321
|
+
pass # Never fail query execution due to metrics logging
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
"columns": columns,
|
|
325
|
+
"data": data,
|
|
326
|
+
"rows": rows,
|
|
327
|
+
"elapsed": f"{elapsed:.3f}s",
|
|
328
|
+
"query": query,
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
except Exception as e:
|
|
332
|
+
# Log error metrics
|
|
333
|
+
try:
|
|
334
|
+
from athf.core.metrics_tracker import MetricsTracker
|
|
335
|
+
|
|
336
|
+
status = "timeout" if "timeout" in str(e).lower() else "error"
|
|
337
|
+
MetricsTracker.get_instance().log_clickhouse_query(
|
|
338
|
+
sql=query,
|
|
339
|
+
duration_ms=0, # Unknown duration on error
|
|
340
|
+
rows=0,
|
|
341
|
+
status=status,
|
|
342
|
+
)
|
|
343
|
+
except Exception:
|
|
344
|
+
pass # Never fail due to metrics logging
|
|
345
|
+
|
|
346
|
+
# Check for timeout errors
|
|
347
|
+
if "timeout" in str(e).lower():
|
|
348
|
+
raise ClickHouseQueryError(
|
|
349
|
+
f"Query timeout: {e}\n\n"
|
|
350
|
+
"Tips to avoid timeouts:\n"
|
|
351
|
+
" 1. Add time bounds: WHERE timestamp >= now() - INTERVAL 7 DAY\n"
|
|
352
|
+
" 2. Start with small LIMIT: LIMIT 100\n"
|
|
353
|
+
" 3. Filter early: Add WHERE clause before aggregations\n"
|
|
354
|
+
' 4. Validate query: athf validate query --sql "..."'
|
|
355
|
+
) from e
|
|
356
|
+
else:
|
|
357
|
+
raise ClickHouseQueryError(f"Query execution failed: {e}") from e
|
|
358
|
+
|
|
359
|
+
def test_connection(self) -> Dict[str, Any]:
|
|
360
|
+
"""Test ClickHouse connection with simple query.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Dictionary with connection status and details:
|
|
364
|
+
{
|
|
365
|
+
'success': bool,
|
|
366
|
+
'host': str,
|
|
367
|
+
'port': int,
|
|
368
|
+
'database': str,
|
|
369
|
+
'message': str
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
Raises:
|
|
373
|
+
ClickHouseConnectionError: If connection test fails
|
|
374
|
+
"""
|
|
375
|
+
try:
|
|
376
|
+
client = self.manager.get_client()
|
|
377
|
+
client.command("SELECT 1")
|
|
378
|
+
|
|
379
|
+
config = self.manager.get_config()
|
|
380
|
+
|
|
381
|
+
return {
|
|
382
|
+
"success": True,
|
|
383
|
+
"host": config.host,
|
|
384
|
+
"port": config.port,
|
|
385
|
+
"database": config.database,
|
|
386
|
+
"message": "Connection successful",
|
|
387
|
+
}
|
|
388
|
+
except Exception as e:
|
|
389
|
+
config = self.manager.get_config()
|
|
390
|
+
return {
|
|
391
|
+
"success": False,
|
|
392
|
+
"host": config.host,
|
|
393
|
+
"port": config.port,
|
|
394
|
+
"database": config.database,
|
|
395
|
+
"message": f"Connection failed: {e}",
|
|
396
|
+
}
|