detectkit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. detectkit/__init__.py +17 -0
  2. detectkit/alerting/__init__.py +13 -0
  3. detectkit/alerting/channels/__init__.py +21 -0
  4. detectkit/alerting/channels/base.py +191 -0
  5. detectkit/alerting/channels/email.py +146 -0
  6. detectkit/alerting/channels/factory.py +193 -0
  7. detectkit/alerting/channels/mattermost.py +53 -0
  8. detectkit/alerting/channels/slack.py +55 -0
  9. detectkit/alerting/channels/telegram.py +110 -0
  10. detectkit/alerting/channels/webhook.py +139 -0
  11. detectkit/alerting/orchestrator.py +368 -0
  12. detectkit/cli/__init__.py +1 -0
  13. detectkit/cli/commands/__init__.py +1 -0
  14. detectkit/cli/commands/init.py +282 -0
  15. detectkit/cli/commands/run.py +427 -0
  16. detectkit/cli/commands/test_alert.py +184 -0
  17. detectkit/cli/main.py +186 -0
  18. detectkit/config/__init__.py +30 -0
  19. detectkit/config/metric_config.py +467 -0
  20. detectkit/config/profile.py +285 -0
  21. detectkit/config/project_config.py +164 -0
  22. detectkit/core/__init__.py +6 -0
  23. detectkit/core/interval.py +132 -0
  24. detectkit/core/models.py +106 -0
  25. detectkit/database/__init__.py +27 -0
  26. detectkit/database/clickhouse_manager.py +385 -0
  27. detectkit/database/internal_tables.py +581 -0
  28. detectkit/database/manager.py +324 -0
  29. detectkit/database/tables.py +134 -0
  30. detectkit/detectors/__init__.py +6 -0
  31. detectkit/detectors/base.py +222 -0
  32. detectkit/detectors/factory.py +138 -0
  33. detectkit/detectors/statistical/__init__.py +8 -0
  34. detectkit/detectors/statistical/iqr.py +230 -0
  35. detectkit/detectors/statistical/mad.py +423 -0
  36. detectkit/detectors/statistical/manual_bounds.py +177 -0
  37. detectkit/detectors/statistical/zscore.py +225 -0
  38. detectkit/loaders/__init__.py +6 -0
  39. detectkit/loaders/metric_loader.py +470 -0
  40. detectkit/loaders/query_template.py +164 -0
  41. detectkit/orchestration/__init__.py +9 -0
  42. detectkit/orchestration/task_manager.py +698 -0
  43. detectkit/utils/__init__.py +1 -0
  44. detectkit-0.1.0.dist-info/METADATA +231 -0
  45. detectkit-0.1.0.dist-info/RECORD +49 -0
  46. detectkit-0.1.0.dist-info/WHEEL +5 -0
  47. detectkit-0.1.0.dist-info/entry_points.txt +2 -0
  48. detectkit-0.1.0.dist-info/licenses/LICENSE +21 -0
  49. detectkit-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,285 @@
1
+ """
2
+ Profile configuration for detectk.
3
+
4
+ Manages database connections and locations (similar to dbt profiles).
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Any, Dict, Optional
9
+
10
+ import yaml
11
+ from pydantic import BaseModel, Field, field_validator
12
+
13
+ from detectkit.database.clickhouse_manager import ClickHouseDatabaseManager
14
+ from detectkit.database.manager import BaseDatabaseManager
15
+
16
+
17
+ class ProfileConfig(BaseModel):
18
+ """
19
+ Single profile configuration.
20
+
21
+ Defines connection parameters and database locations for a specific
22
+ environment (dev, prod, etc.).
23
+
24
+ Attributes:
25
+ type: Database type ("clickhouse", "postgres", "mysql")
26
+ host: Database host
27
+ port: Database port
28
+ user: Database user
29
+ password: Database password
30
+ internal_database: Database/schema for internal tables
31
+ internal_schema: Schema for internal tables (PostgreSQL only)
32
+ data_database: Database for user data tables
33
+ data_schema: Schema for user data (PostgreSQL only)
34
+ settings: Additional database-specific settings
35
+ """
36
+
37
+ type: str = Field(..., description="Database type")
38
+ host: str = Field(default="localhost", description="Database host")
39
+ port: int = Field(..., description="Database port")
40
+ user: str = Field(default="default", description="Database user")
41
+ password: str = Field(default="", description="Database password")
42
+
43
+ # Internal location for _dtk_* tables
44
+ internal_database: Optional[str] = Field(
45
+ default=None,
46
+ description="Database for internal tables (ClickHouse/MySQL)"
47
+ )
48
+ internal_schema: Optional[str] = Field(
49
+ default=None,
50
+ description="Schema for internal tables (PostgreSQL)"
51
+ )
52
+
53
+ # Data location for user tables
54
+ data_database: Optional[str] = Field(
55
+ default=None,
56
+ description="Database for user data tables (ClickHouse/MySQL)"
57
+ )
58
+ data_schema: Optional[str] = Field(
59
+ default=None,
60
+ description="Schema for user data (PostgreSQL)"
61
+ )
62
+
63
+ settings: Dict[str, Any] = Field(
64
+ default_factory=dict,
65
+ description="Additional database settings"
66
+ )
67
+
68
+ @field_validator("type")
69
+ @classmethod
70
+ def validate_type(cls, v: str) -> str:
71
+ """Validate database type."""
72
+ allowed_types = {"clickhouse", "postgres", "mysql"}
73
+ if v not in allowed_types:
74
+ raise ValueError(
75
+ f"Invalid database type: {v}. "
76
+ f"Allowed types: {', '.join(allowed_types)}"
77
+ )
78
+ return v
79
+
80
+ @field_validator("port")
81
+ @classmethod
82
+ def validate_port(cls, v: int) -> int:
83
+ """Validate port number."""
84
+ if not (1 <= v <= 65535):
85
+ raise ValueError(f"Port must be between 1 and 65535, got {v}")
86
+ return v
87
+
88
+ def get_internal_location(self) -> str:
89
+ """
90
+ Get internal location (database or schema).
91
+
92
+ Returns:
93
+ Internal database/schema name
94
+
95
+ Raises:
96
+ ValueError: If location not configured
97
+ """
98
+ if self.type == "clickhouse":
99
+ if not self.internal_database:
100
+ raise ValueError("internal_database must be set for ClickHouse")
101
+ return self.internal_database
102
+ elif self.type == "postgres":
103
+ if not self.internal_schema:
104
+ raise ValueError("internal_schema must be set for PostgreSQL")
105
+ return self.internal_schema
106
+ elif self.type == "mysql":
107
+ if not self.internal_database:
108
+ raise ValueError("internal_database must be set for MySQL")
109
+ return self.internal_database
110
+ else:
111
+ raise ValueError(f"Unsupported database type: {self.type}")
112
+
113
+ def get_data_location(self) -> str:
114
+ """
115
+ Get data location (database or schema).
116
+
117
+ Returns:
118
+ Data database/schema name
119
+
120
+ Raises:
121
+ ValueError: If location not configured
122
+ """
123
+ if self.type == "clickhouse":
124
+ if not self.data_database:
125
+ raise ValueError("data_database must be set for ClickHouse")
126
+ return self.data_database
127
+ elif self.type == "postgres":
128
+ if not self.data_schema:
129
+ raise ValueError("data_schema must be set for PostgreSQL")
130
+ return self.data_schema
131
+ elif self.type == "mysql":
132
+ if not self.data_database:
133
+ raise ValueError("data_database must be set for MySQL")
134
+ return self.data_database
135
+ else:
136
+ raise ValueError(f"Unsupported database type: {self.type}")
137
+
138
+ def create_manager(self) -> BaseDatabaseManager:
139
+ """
140
+ Create database manager from profile configuration.
141
+
142
+ Returns:
143
+ Database manager instance
144
+
145
+ Raises:
146
+ NotImplementedError: If database type not yet implemented
147
+ """
148
+ if self.type == "clickhouse":
149
+ return ClickHouseDatabaseManager(
150
+ host=self.host,
151
+ port=self.port,
152
+ user=self.user,
153
+ password=self.password,
154
+ internal_database=self.get_internal_location(),
155
+ data_database=self.get_data_location(),
156
+ settings=self.settings,
157
+ )
158
+ elif self.type == "postgres":
159
+ raise NotImplementedError("PostgreSQL support coming soon")
160
+ elif self.type == "mysql":
161
+ raise NotImplementedError("MySQL support coming soon")
162
+ else:
163
+ raise ValueError(f"Unsupported database type: {self.type}")
164
+
165
+
166
+ class ProfilesConfig(BaseModel):
167
+ """
168
+ Container for multiple profile configurations.
169
+
170
+ Loaded from profiles.yml file.
171
+
172
+ Attributes:
173
+ profiles: Dictionary mapping profile names to configurations
174
+ default_profile: Name of default profile to use
175
+ alert_channels: Dictionary mapping channel names to configurations
176
+ """
177
+
178
+ profiles: Dict[str, ProfileConfig]
179
+ default_profile: Optional[str] = None
180
+ alert_channels: Dict[str, Dict[str, Any]] = Field(
181
+ default_factory=dict, description="Alert channel configurations"
182
+ )
183
+
184
+ @field_validator("default_profile")
185
+ @classmethod
186
+ def validate_default_profile(cls, v: Optional[str], info) -> Optional[str]:
187
+ """Validate default profile exists."""
188
+ if v is not None:
189
+ profiles = info.data.get("profiles", {})
190
+ if v not in profiles:
191
+ raise ValueError(
192
+ f"default_profile '{v}' not found in profiles. "
193
+ f"Available profiles: {', '.join(profiles.keys())}"
194
+ )
195
+ return v
196
+
197
+ @classmethod
198
+ def from_yaml(cls, path: Path) -> "ProfilesConfig":
199
+ """
200
+ Load profiles from YAML file.
201
+
202
+ Args:
203
+ path: Path to profiles.yml
204
+
205
+ Returns:
206
+ ProfilesConfig instance
207
+
208
+ Raises:
209
+ FileNotFoundError: If file doesn't exist
210
+ ValueError: If YAML is invalid
211
+ """
212
+ if not path.exists():
213
+ raise FileNotFoundError(f"Profiles file not found: {path}")
214
+
215
+ with open(path, "r") as f:
216
+ data = yaml.safe_load(f)
217
+
218
+ if not data:
219
+ raise ValueError("Profiles file is empty")
220
+
221
+ return cls.model_validate(data)
222
+
223
+ def get_profile(self, name: Optional[str] = None) -> ProfileConfig:
224
+ """
225
+ Get profile configuration by name.
226
+
227
+ Args:
228
+ name: Profile name (if None, use default_profile)
229
+
230
+ Returns:
231
+ ProfileConfig instance
232
+
233
+ Raises:
234
+ ValueError: If profile not found or no default set
235
+ """
236
+ if name is None:
237
+ if self.default_profile is None:
238
+ raise ValueError(
239
+ "No profile name specified and no default_profile set. "
240
+ f"Available profiles: {', '.join(self.profiles.keys())}"
241
+ )
242
+ name = self.default_profile
243
+
244
+ if name not in self.profiles:
245
+ raise ValueError(
246
+ f"Profile '{name}' not found. "
247
+ f"Available profiles: {', '.join(self.profiles.keys())}"
248
+ )
249
+
250
+ return self.profiles[name]
251
+
252
+ def create_manager(self, profile_name: Optional[str] = None) -> BaseDatabaseManager:
253
+ """
254
+ Create database manager for a profile.
255
+
256
+ Args:
257
+ profile_name: Profile name (if None, use default)
258
+
259
+ Returns:
260
+ Database manager instance
261
+ """
262
+ profile = self.get_profile(profile_name)
263
+ return profile.create_manager()
264
+
265
+ def get_alert_channel_config(self, channel_name: str) -> Dict[str, Any]:
266
+ """
267
+ Get alert channel configuration by name.
268
+
269
+ Args:
270
+ channel_name: Channel name
271
+
272
+ Returns:
273
+ Channel configuration dictionary
274
+
275
+ Raises:
276
+ ValueError: If channel not found
277
+ """
278
+ if channel_name not in self.alert_channels:
279
+ available = ", ".join(sorted(self.alert_channels.keys()))
280
+ raise ValueError(
281
+ f"Alert channel '{channel_name}' not found. "
282
+ f"Available channels: {available}"
283
+ )
284
+
285
+ return self.alert_channels[channel_name]
@@ -0,0 +1,164 @@
1
+ """
2
+ Project configuration models.
3
+
4
+ Defines configuration structure for detectkit_project.yml.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Dict, Optional
9
+
10
+ from pydantic import BaseModel, Field, field_validator
11
+
12
+
13
+ class ProjectPathsConfig(BaseModel):
14
+ """
15
+ Project directory paths configuration.
16
+
17
+ Attributes:
18
+ metrics: Directory containing metric YAML files
19
+ sql: Directory containing SQL query files
20
+ templates: Directory containing alert templates
21
+ """
22
+
23
+ metrics: str = Field(default="metrics", description="Metrics directory")
24
+ sql: str = Field(default="sql", description="SQL files directory")
25
+ templates: str = Field(default="templates", description="Templates directory")
26
+
27
+
28
+ class ProjectTablesConfig(BaseModel):
29
+ """
30
+ Default internal table names for the project.
31
+
32
+ Attributes:
33
+ datapoints: Default datapoints table name
34
+ detections: Default detections table name
35
+ tasks: Default tasks table name
36
+ """
37
+
38
+ datapoints: str = Field(
39
+ default="_dtk_datapoints", description="Default datapoints table"
40
+ )
41
+ detections: str = Field(
42
+ default="_dtk_detections", description="Default detections table"
43
+ )
44
+ tasks: str = Field(default="_dtk_tasks", description="Default tasks table")
45
+
46
+
47
+ class ProjectTimeoutsConfig(BaseModel):
48
+ """
49
+ Default timeout values for operations (in seconds).
50
+
51
+ Attributes:
52
+ load: Timeout for data loading operations
53
+ detect: Timeout for detection operations
54
+ alert: Timeout for alerting operations
55
+ """
56
+
57
+ load: int = Field(default=3600, description="Load timeout (seconds)")
58
+ detect: int = Field(default=7200, description="Detect timeout (seconds)")
59
+ alert: int = Field(default=300, description="Alert timeout (seconds)")
60
+
61
+ @field_validator("load", "detect", "alert")
62
+ @classmethod
63
+ def validate_timeout(cls, v: int) -> int:
64
+ """Validate timeout value."""
65
+ if v < 1:
66
+ raise ValueError("Timeout must be at least 1 second")
67
+ if v > 86400: # 24 hours
68
+ raise ValueError("Timeout cannot exceed 24 hours (86400 seconds)")
69
+ return v
70
+
71
+
72
+ class ProjectConfig(BaseModel):
73
+ """
74
+ Project configuration loaded from detectkit_project.yml.
75
+
76
+ Attributes:
77
+ name: Project name
78
+ version: Project version
79
+ paths: Directory paths configuration
80
+ tables: Default table names
81
+ timeouts: Operation timeouts
82
+ default_profile: Default database profile to use
83
+
84
+ Example YAML:
85
+ ```yaml
86
+ name: "my_analytics_project"
87
+ version: "1.0"
88
+
89
+ paths:
90
+ metrics: "metrics"
91
+ sql: "sql"
92
+ templates: "templates"
93
+
94
+ tables:
95
+ datapoints: "_dtk_datapoints"
96
+ detections: "_dtk_detections"
97
+ tasks: "_dtk_tasks"
98
+
99
+ timeouts:
100
+ load: 3600
101
+ detect: 7200
102
+ alert: 300
103
+
104
+ default_profile: "clickhouse_prod"
105
+ ```
106
+ """
107
+
108
+ name: str = Field(..., description="Project name")
109
+ version: str = Field(default="1.0", description="Project version")
110
+ paths: ProjectPathsConfig = Field(
111
+ default_factory=ProjectPathsConfig, description="Directory paths"
112
+ )
113
+ tables: ProjectTablesConfig = Field(
114
+ default_factory=ProjectTablesConfig, description="Default table names"
115
+ )
116
+ timeouts: ProjectTimeoutsConfig = Field(
117
+ default_factory=ProjectTimeoutsConfig, description="Operation timeouts"
118
+ )
119
+ default_profile: str = Field(..., description="Default database profile")
120
+
121
+ @field_validator("name")
122
+ @classmethod
123
+ def validate_name(cls, v: str) -> str:
124
+ """Validate project name."""
125
+ if not v:
126
+ raise ValueError("Project name cannot be empty")
127
+ # Allow alphanumeric, underscore, dash, space
128
+ if not all(c.isalnum() or c in ("_", "-", " ") for c in v):
129
+ raise ValueError(
130
+ "Project name can only contain alphanumeric characters, "
131
+ "underscores, dashes, and spaces"
132
+ )
133
+ return v
134
+
135
+ @classmethod
136
+ def from_yaml_file(cls, path: Path) -> "ProjectConfig":
137
+ """
138
+ Load project configuration from YAML file.
139
+
140
+ Args:
141
+ path: Path to detectkit_project.yml
142
+
143
+ Returns:
144
+ ProjectConfig instance
145
+
146
+ Raises:
147
+ FileNotFoundError: If file doesn't exist
148
+ ValueError: If YAML is invalid
149
+
150
+ Example:
151
+ >>> config = ProjectConfig.from_yaml_file(Path("detectkit_project.yml"))
152
+ """
153
+ import yaml
154
+
155
+ if not path.exists():
156
+ raise FileNotFoundError(f"Project config file not found: {path}")
157
+
158
+ with open(path, "r") as f:
159
+ data = yaml.safe_load(f)
160
+
161
+ if not data:
162
+ raise ValueError(f"Empty project config file: {path}")
163
+
164
+ return cls.model_validate(data)
@@ -0,0 +1,6 @@
1
+ """Core functionality for detectk."""
2
+
3
+ from detectkit.core.interval import Interval
4
+ from detectkit.core.models import ColumnDefinition, TableModel
5
+
6
+ __all__ = ["Interval", "ColumnDefinition", "TableModel"]
@@ -0,0 +1,132 @@
1
+ """
2
+ Interval parsing and handling.
3
+
4
+ Supports:
5
+ - Integer seconds: 600
6
+ - String format: "10min", "1h", "1d"
7
+ """
8
+
9
+ import re
10
+ from typing import Union
11
+
12
+
13
+ class Interval:
14
+ """
15
+ Represents a time interval in seconds.
16
+
17
+ Supports parsing from:
18
+ - Integer (seconds): 600
19
+ - String: "10min", "1h", "1d", "30s"
20
+
21
+ Examples:
22
+ >>> interval = Interval("10min")
23
+ >>> interval.seconds
24
+ 600
25
+ >>> interval = Interval(3600)
26
+ >>> interval.seconds
27
+ 3600
28
+ """
29
+
30
+ UNITS = {
31
+ 's': 1,
32
+ 'sec': 1,
33
+ 'second': 1,
34
+ 'seconds': 1,
35
+ 'm': 60,
36
+ 'min': 60,
37
+ 'minute': 60,
38
+ 'minutes': 60,
39
+ 'h': 3600,
40
+ 'hour': 3600,
41
+ 'hours': 3600,
42
+ 'd': 86400,
43
+ 'day': 86400,
44
+ 'days': 86400,
45
+ }
46
+
47
+ def __init__(self, value: Union[int, str]):
48
+ """
49
+ Initialize interval from integer or string.
50
+
51
+ Args:
52
+ value: Interval as integer (seconds) or string ("10min")
53
+
54
+ Raises:
55
+ ValueError: If string format is invalid
56
+ """
57
+ if isinstance(value, int):
58
+ if value <= 0:
59
+ raise ValueError(f"Interval must be positive, got {value}")
60
+ self._seconds = value
61
+ elif isinstance(value, str):
62
+ self._seconds = self._parse_string(value)
63
+ else:
64
+ raise TypeError(f"Interval must be int or str, got {type(value)}")
65
+
66
+ def _parse_string(self, s: str) -> int:
67
+ """
68
+ Parse interval string.
69
+
70
+ Args:
71
+ s: String like "10min", "1h", "30s"
72
+
73
+ Returns:
74
+ Interval in seconds
75
+
76
+ Raises:
77
+ ValueError: If format is invalid
78
+ """
79
+ s = s.strip().lower()
80
+
81
+ # Match pattern: digits followed by unit
82
+ match = re.match(r'^(\d+)([a-z]+)$', s)
83
+ if not match:
84
+ raise ValueError(
85
+ f"Invalid interval format: '{s}'. "
86
+ f"Expected format: <number><unit> (e.g., '10min', '1h')"
87
+ )
88
+
89
+ value_str, unit = match.groups()
90
+ value = int(value_str)
91
+
92
+ if value <= 0:
93
+ raise ValueError(f"Interval value must be positive, got {value}")
94
+
95
+ if unit not in self.UNITS:
96
+ raise ValueError(
97
+ f"Unknown time unit: '{unit}'. "
98
+ f"Supported units: {', '.join(sorted(set(self.UNITS.keys())))}"
99
+ )
100
+
101
+ return value * self.UNITS[unit]
102
+
103
+ @property
104
+ def seconds(self) -> int:
105
+ """Get interval in seconds."""
106
+ return self._seconds
107
+
108
+ def __eq__(self, other) -> bool:
109
+ """Check equality based on seconds."""
110
+ if isinstance(other, Interval):
111
+ return self._seconds == other._seconds
112
+ return False
113
+
114
+ def __hash__(self) -> int:
115
+ """Hash based on seconds."""
116
+ return hash(self._seconds)
117
+
118
+ def __repr__(self) -> str:
119
+ """String representation."""
120
+ return f"Interval({self._seconds})"
121
+
122
+ def __str__(self) -> str:
123
+ """User-friendly string representation."""
124
+ # Try to represent in human-readable format
125
+ if self._seconds % 86400 == 0:
126
+ return f"{self._seconds // 86400}d"
127
+ elif self._seconds % 3600 == 0:
128
+ return f"{self._seconds // 3600}h"
129
+ elif self._seconds % 60 == 0:
130
+ return f"{self._seconds // 60}min"
131
+ else:
132
+ return f"{self._seconds}s"
@@ -0,0 +1,106 @@
1
+ """
2
+ Core data models for detectk.
3
+
4
+ Defines table schemas and column definitions for database abstraction.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, List, Optional
9
+
10
+
11
+ @dataclass
12
+ class ColumnDefinition:
13
+ """
14
+ Definition of a table column.
15
+
16
+ Attributes:
17
+ name: Column name
18
+ type: SQL type (database-specific)
19
+ nullable: Whether column can be NULL
20
+ default: Default value for column
21
+ """
22
+
23
+ name: str
24
+ type: str
25
+ nullable: bool = False
26
+ default: Optional[Any] = None
27
+
28
+ def __post_init__(self):
29
+ """Validate column definition."""
30
+ if not self.name:
31
+ raise ValueError("Column name cannot be empty")
32
+ if not self.type:
33
+ raise ValueError("Column type cannot be empty")
34
+
35
+
36
+ @dataclass
37
+ class TableModel:
38
+ """
39
+ Model for table schema definition.
40
+
41
+ This is used by BaseDatabaseManager.create_table() to create tables
42
+ in a database-agnostic way.
43
+
44
+ Attributes:
45
+ columns: List of column definitions
46
+ primary_key: List of column names forming primary key
47
+ engine: Database engine (ClickHouse-specific, e.g., "MergeTree")
48
+ order_by: Columns for ORDER BY clause (ClickHouse-specific)
49
+ indexes: Additional indexes to create
50
+
51
+ Example:
52
+ >>> model = TableModel(
53
+ ... columns=[
54
+ ... ColumnDefinition("id", "Int32"),
55
+ ... ColumnDefinition("name", "String"),
56
+ ... ],
57
+ ... primary_key=["id"],
58
+ ... engine="MergeTree",
59
+ ... order_by=["id"]
60
+ ... )
61
+ """
62
+
63
+ columns: List[ColumnDefinition]
64
+ primary_key: List[str]
65
+ engine: Optional[str] = None
66
+ order_by: Optional[List[str]] = None
67
+ indexes: List[str] = field(default_factory=list)
68
+
69
+ def __post_init__(self):
70
+ """Validate table model."""
71
+ if not self.columns:
72
+ raise ValueError("Table must have at least one column")
73
+
74
+ if not self.primary_key:
75
+ raise ValueError("Table must have a primary key")
76
+
77
+ # Validate primary key columns exist
78
+ column_names = {col.name for col in self.columns}
79
+ for pk_col in self.primary_key:
80
+ if pk_col not in column_names:
81
+ raise ValueError(
82
+ f"Primary key column '{pk_col}' not found in table columns"
83
+ )
84
+
85
+ # Validate order_by columns exist (if specified)
86
+ if self.order_by:
87
+ for order_col in self.order_by:
88
+ if order_col not in column_names:
89
+ raise ValueError(
90
+ f"ORDER BY column '{order_col}' not found in table columns"
91
+ )
92
+
93
+ def get_column(self, name: str) -> Optional[ColumnDefinition]:
94
+ """
95
+ Get column definition by name.
96
+
97
+ Args:
98
+ name: Column name
99
+
100
+ Returns:
101
+ ColumnDefinition or None if not found
102
+ """
103
+ for col in self.columns:
104
+ if col.name == name:
105
+ return col
106
+ return None