databao-context-engine 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +18 -6
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +84 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
- databao_context_engine/cli/add_datasource_config.py +41 -15
- databao_context_engine/cli/commands.py +12 -43
- databao_context_engine/cli/info.py +2 -2
- databao_context_engine/databao_context_engine.py +137 -0
- databao_context_engine/databao_context_project_manager.py +96 -6
- databao_context_engine/datasources/add_config.py +34 -0
- databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
- databao_context_engine/datasources/datasource_context.py +93 -0
- databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
- databao_context_engine/{project → datasources}/types.py +64 -15
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +59 -30
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/install.py +13 -20
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/all_results_tool.py +2 -2
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +1 -4
- databao_context_engine/mcp/retrieve_tool.py +3 -11
- databao_context_engine/plugin_loader.py +111 -0
- databao_context_engine/pluginlib/build_plugin.py +25 -9
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/databases/athena_introspector.py +85 -22
- databao_context_engine/plugins/databases/base_introspector.py +5 -3
- databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
- databao_context_engine/plugins/databases/duckdb_introspector.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
- databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
- databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
- databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
- databao_context_engine/plugins/plugin_loader.py +43 -42
- databao_context_engine/plugins/resources/parquet_introspector.py +2 -3
- databao_context_engine/project/info.py +31 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +3 -3
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
- databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +2 -4
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.3.dist-info/METADATA +75 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/RECORD +68 -77
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.2.dist-info/METADATA +0 -187
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/WHEEL +0 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/entry_points.txt +0 -0
|
@@ -5,9 +5,7 @@ from typing import Any, Protocol, runtime_checkable
|
|
|
5
5
|
|
|
6
6
|
@dataclass
|
|
7
7
|
class EmbeddableChunk:
|
|
8
|
-
"""
|
|
9
|
-
A chunk that will be embedded as a vector and used when searching context from a given AI prompt
|
|
10
|
-
"""
|
|
8
|
+
"""A chunk that will be embedded as a vector and used when searching context from a given AI prompt."""
|
|
11
9
|
|
|
12
10
|
embeddable_text: str
|
|
13
11
|
"""
|
|
@@ -48,19 +46,19 @@ class BuildDatasourcePlugin[T](BaseBuildPlugin, Protocol):
|
|
|
48
46
|
"""
|
|
49
47
|
|
|
50
48
|
def check_connection(self, full_type: str, datasource_name: str, file_config: T) -> None:
|
|
51
|
-
"""
|
|
52
|
-
Checks whether the configuration to the datasource is working.
|
|
49
|
+
"""Check whether the configuration to the datasource is working.
|
|
53
50
|
|
|
54
51
|
The function is expected to succeed without a result if the connection is working.
|
|
55
52
|
If something is wrong with the connection, the function should raise an Exception
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
NotSupportedError: If the plugin doesn't support this method.
|
|
56
56
|
"""
|
|
57
57
|
raise NotSupportedError("This method is not implemented for this plugin")
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
class DefaultBuildDatasourcePlugin(BuildDatasourcePlugin[dict[str, Any]], Protocol):
|
|
61
|
-
"""
|
|
62
|
-
Use this as a base class for plugins that don't need a specific config file type.
|
|
63
|
-
"""
|
|
61
|
+
"""Use this as a base class for plugins that don't need a specific config file type."""
|
|
64
62
|
|
|
65
63
|
config_file_type: type[dict[str, Any]] = dict[str, Any]
|
|
66
64
|
|
|
@@ -75,7 +73,7 @@ class BuildFilePlugin(BaseBuildPlugin, Protocol):
|
|
|
75
73
|
|
|
76
74
|
|
|
77
75
|
class NotSupportedError(RuntimeError):
|
|
78
|
-
"""Exception raised by methods not supported by a plugin"""
|
|
76
|
+
"""Exception raised by methods not supported by a plugin."""
|
|
79
77
|
|
|
80
78
|
|
|
81
79
|
BuildPlugin = BuildDatasourcePlugin | BuildFilePlugin
|
|
@@ -83,6 +81,12 @@ BuildPlugin = BuildDatasourcePlugin | BuildFilePlugin
|
|
|
83
81
|
|
|
84
82
|
@dataclass(kw_only=True, frozen=True)
|
|
85
83
|
class DatasourceType:
|
|
84
|
+
"""The type of a Datasource.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
full_type: The full type of the datasource, in the format `<main_type>/<subtype>`.
|
|
88
|
+
"""
|
|
89
|
+
|
|
86
90
|
full_type: str
|
|
87
91
|
|
|
88
92
|
def __post_init__(self):
|
|
@@ -92,16 +96,28 @@ class DatasourceType:
|
|
|
92
96
|
|
|
93
97
|
@property
|
|
94
98
|
def main_type(self) -> str:
|
|
99
|
+
"""The main type of the datasource, aka the folder in which the config or raw file is located."""
|
|
95
100
|
return self.full_type.split("/")[0]
|
|
96
101
|
|
|
97
102
|
@property
|
|
98
103
|
def config_folder(self) -> str:
|
|
104
|
+
"""The folder in which the config or raw file is located. This is equivalent to `main_type`."""
|
|
99
105
|
return self.main_type
|
|
100
106
|
|
|
101
107
|
@property
|
|
102
108
|
def subtype(self) -> str:
|
|
109
|
+
"""The subtype of the datasource. This is the actual type declared in the config file or the raw file's extension."""
|
|
103
110
|
return self.full_type.split("/")[1]
|
|
104
111
|
|
|
105
112
|
@staticmethod
|
|
106
113
|
def from_main_and_subtypes(main_type: str, subtype: str) -> "DatasourceType":
|
|
114
|
+
"""Create a DatasourceType from its main type and subtype.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
main_type: The main type (aka config folder) of the datasource.
|
|
118
|
+
subtype: The subtype of the datasource.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
A DatasourceType instance with the specified main type and subtype.
|
|
122
|
+
"""
|
|
107
123
|
return DatasourceType(full_type=f"{main_type}/{subtype}")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from dataclasses import dataclass
|
|
2
4
|
from typing import Any, Protocol, runtime_checkable
|
|
3
5
|
|
|
@@ -17,12 +19,23 @@ class DuckDBSecret(BaseModel):
|
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
@dataclass(kw_only=True)
|
|
20
|
-
class
|
|
22
|
+
class ConfigUnionPropertyDefinition:
|
|
23
|
+
property_key: str
|
|
24
|
+
types: tuple[type, ...]
|
|
25
|
+
type_properties: dict[type, list[ConfigPropertyDefinition]]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(kw_only=True)
|
|
29
|
+
class ConfigSinglePropertyDefinition:
|
|
21
30
|
property_key: str
|
|
22
31
|
required: bool
|
|
23
32
|
property_type: type | None = str
|
|
24
33
|
default_value: str | None = None
|
|
25
|
-
nested_properties: list[
|
|
34
|
+
nested_properties: list[ConfigPropertyDefinition] | None = None
|
|
35
|
+
secret: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
ConfigPropertyDefinition = ConfigSinglePropertyDefinition | ConfigUnionPropertyDefinition
|
|
26
39
|
|
|
27
40
|
|
|
28
41
|
@dataclass(kw_only=True)
|
|
@@ -30,6 +43,7 @@ class ConfigPropertyAnnotation:
|
|
|
30
43
|
required: bool = False
|
|
31
44
|
default_value: str | None = None
|
|
32
45
|
ignored_for_config_wizard: bool = False
|
|
46
|
+
secret: bool = False
|
|
33
47
|
|
|
34
48
|
|
|
35
49
|
@runtime_checkable
|
|
@@ -1,22 +1,64 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Annotated
|
|
4
4
|
|
|
5
5
|
from pyathena import connect
|
|
6
6
|
from pyathena.cursor import DictCursor
|
|
7
|
-
from pydantic import Field
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
9
10
|
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
10
11
|
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
11
12
|
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema
|
|
12
13
|
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
class AwsProfileAuth(BaseModel):
|
|
17
|
+
profile_name: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AwsIamAuth(BaseModel):
|
|
21
|
+
aws_access_key_id: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
22
|
+
aws_secret_access_key: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
23
|
+
session_token: str | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AwsAssumeRoleAuth(BaseModel):
|
|
27
|
+
role_arn: str | None = None
|
|
28
|
+
role_session_name: str | None = None
|
|
29
|
+
source_profile: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AwsDefaultAuth(BaseModel):
|
|
33
|
+
# Uses environment variables, instance profile, ECS task role
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AthenaConnectionProperties(BaseModel):
|
|
38
|
+
region_name: str
|
|
39
|
+
schema_name: str = "default"
|
|
40
|
+
catalog: str | None = "awsdatacatalog"
|
|
41
|
+
work_group: str | None = None
|
|
42
|
+
s3_staging_dir: str | None = None
|
|
43
|
+
auth: AwsIamAuth | AwsProfileAuth | AwsDefaultAuth | AwsAssumeRoleAuth
|
|
44
|
+
additional_properties: dict[str, Any] = {}
|
|
45
|
+
|
|
46
|
+
def to_athena_kwargs(self) -> dict[str, Any]:
|
|
47
|
+
kwargs = self.model_dump(
|
|
48
|
+
exclude={
|
|
49
|
+
"additional_properties": True,
|
|
50
|
+
},
|
|
51
|
+
exclude_none=True,
|
|
52
|
+
)
|
|
53
|
+
auth_fields = kwargs.pop("auth", {})
|
|
54
|
+
kwargs.update(auth_fields)
|
|
55
|
+
kwargs.update(self.additional_properties)
|
|
56
|
+
return kwargs
|
|
57
|
+
|
|
58
|
+
|
|
15
59
|
class AthenaConfigFile(BaseDatabaseConfigFile):
|
|
16
|
-
type: str = Field(default="
|
|
17
|
-
connection:
|
|
18
|
-
description="Connection parameters for the Athena database. It can contain any of the keys supported by the Athena connection library"
|
|
19
|
-
)
|
|
60
|
+
type: str = Field(default="athena")
|
|
61
|
+
connection: AthenaConnectionProperties
|
|
20
62
|
|
|
21
63
|
|
|
22
64
|
class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
@@ -26,11 +68,7 @@ class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
|
26
68
|
supports_catalogs = True
|
|
27
69
|
|
|
28
70
|
def _connect(self, file_config: AthenaConfigFile):
|
|
29
|
-
|
|
30
|
-
if not isinstance(connection, Mapping):
|
|
31
|
-
raise ValueError("Invalid YAML config: 'connection' must be a mapping of connection parameters")
|
|
32
|
-
|
|
33
|
-
return connect(**connection, cursor_class=DictCursor)
|
|
71
|
+
return connect(**file_config.connection.to_athena_kwargs(), cursor_class=DictCursor)
|
|
34
72
|
|
|
35
73
|
def _fetchall_dicts(self, connection, sql: str, params) -> list[dict]:
|
|
36
74
|
with connection.cursor() as cur:
|
|
@@ -38,29 +76,28 @@ class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
|
38
76
|
return cur.fetchall()
|
|
39
77
|
|
|
40
78
|
def _get_catalogs(self, connection, file_config: AthenaConfigFile) -> list[str]:
|
|
41
|
-
catalog = file_config.connection.
|
|
79
|
+
catalog = file_config.connection.catalog or self._resolve_pseudo_catalog_name(file_config)
|
|
42
80
|
return [catalog]
|
|
43
81
|
|
|
44
82
|
def _connect_to_catalog(self, file_config: AthenaConfigFile, catalog: str):
|
|
45
|
-
self._connect(file_config)
|
|
83
|
+
return self._connect(file_config)
|
|
46
84
|
|
|
47
85
|
def _sql_list_schemas(self, catalogs: list[str] | None) -> SQLQuery:
|
|
48
86
|
if not catalogs:
|
|
49
87
|
return SQLQuery("SELECT schema_name, catalog_name FROM information_schema.schemata", None)
|
|
50
88
|
catalog = catalogs[0]
|
|
51
|
-
sql = "SELECT schema_name, catalog_name FROM information_schema.schemata
|
|
52
|
-
return SQLQuery(sql,
|
|
89
|
+
sql = f"SELECT schema_name, catalog_name FROM {catalog}.information_schema.schemata"
|
|
90
|
+
return SQLQuery(sql, None)
|
|
53
91
|
|
|
54
|
-
# TODO: Incomplete plugin. Awaiting permission access to AWS to properly develop
|
|
55
92
|
def collect_catalog_model(self, connection, catalog: str, schemas: list[str]) -> list[DatabaseSchema] | None:
|
|
56
93
|
if not schemas:
|
|
57
94
|
return []
|
|
58
95
|
|
|
59
|
-
comps =
|
|
96
|
+
comps = self._component_queries(catalog, schemas)
|
|
60
97
|
results: dict[str, list[dict]] = {}
|
|
61
98
|
|
|
62
99
|
for name, q in comps.items():
|
|
63
|
-
results[name] = self._fetchall_dicts(connection, q
|
|
100
|
+
results[name] = self._fetchall_dicts(connection, q, None)
|
|
64
101
|
|
|
65
102
|
return IntrospectionModelBuilder.build_schemas_from_components(
|
|
66
103
|
schemas=schemas,
|
|
@@ -73,8 +110,32 @@ class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
|
73
110
|
idx_cols=[],
|
|
74
111
|
)
|
|
75
112
|
|
|
76
|
-
def
|
|
77
|
-
|
|
113
|
+
def _component_queries(self, catalog: str, schemas: list[str]) -> dict[str, str]:
|
|
114
|
+
schemas_in = ", ".join(self._quote_literal(s) for s in schemas)
|
|
115
|
+
return {
|
|
116
|
+
"relations": self._sql_relations(catalog, schemas_in),
|
|
117
|
+
"columns": self._sql_columns(catalog, schemas_in),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
def _sql_relations(self, catalog: str, schemas_in: str) -> str:
|
|
121
|
+
return f"""
|
|
122
|
+
SELECT
|
|
123
|
+
table_schema AS schema_name,
|
|
124
|
+
table_name,
|
|
125
|
+
CASE table_type
|
|
126
|
+
WHEN 'BASE TABLE' THEN 'table'
|
|
127
|
+
WHEN 'VIEW' THEN 'view'
|
|
128
|
+
ELSE LOWER(table_type)
|
|
129
|
+
END AS kind,
|
|
130
|
+
NULL AS description
|
|
131
|
+
FROM
|
|
132
|
+
{catalog}.information_schema.tables
|
|
133
|
+
WHERE
|
|
134
|
+
table_schema IN ({schemas_in})
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
def _sql_columns(self, catalog: str, schemas_in: str) -> str:
|
|
138
|
+
return f"""
|
|
78
139
|
SELECT
|
|
79
140
|
table_schema AS schema_name,
|
|
80
141
|
table_name,
|
|
@@ -85,13 +146,12 @@ class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
|
85
146
|
FROM
|
|
86
147
|
{catalog}.information_schema.columns
|
|
87
148
|
WHERE
|
|
88
|
-
table_schema IN ({
|
|
149
|
+
table_schema IN ({schemas_in})
|
|
89
150
|
ORDER BY
|
|
90
151
|
table_schema,
|
|
91
152
|
table_name,
|
|
92
153
|
ordinal_position
|
|
93
154
|
"""
|
|
94
|
-
return SQLQuery(sql, {"schema": schemas})
|
|
95
155
|
|
|
96
156
|
def _resolve_pseudo_catalog_name(self, file_config: AthenaConfigFile) -> str:
|
|
97
157
|
return "awsdatacatalog"
|
|
@@ -99,3 +159,6 @@ class AthenaIntrospector(BaseIntrospector[AthenaConfigFile]):
|
|
|
99
159
|
def _sql_sample_rows(self, catalog: str, schema: str, table: str, limit: int) -> SQLQuery:
|
|
100
160
|
sql = f'SELECT * FROM "{schema}"."{table}" LIMIT %(limit)s'
|
|
101
161
|
return SQLQuery(sql, {"limit": limit})
|
|
162
|
+
|
|
163
|
+
def _quote_literal(self, value: str) -> str:
|
|
164
|
+
return "'" + str(value).replace("'", "''") + "'"
|
|
@@ -121,9 +121,11 @@ class BaseIntrospector[T: SupportsIntrospectionScope](ABC):
|
|
|
121
121
|
|
|
122
122
|
@abstractmethod
|
|
123
123
|
def _connect_to_catalog(self, file_config: T, catalog: str):
|
|
124
|
-
"""Return a connection scoped to `catalog`.
|
|
125
|
-
|
|
126
|
-
|
|
124
|
+
"""Return a connection scoped to `catalog`.
|
|
125
|
+
|
|
126
|
+
For engines that don’t need a new connection, return a connection with the
|
|
127
|
+
session set/USE’d to that catalog.
|
|
128
|
+
"""
|
|
127
129
|
|
|
128
130
|
def _sql_sample_rows(self, catalog: str, schema: str, table: str, limit: int) -> SQLQuery:
|
|
129
131
|
raise NotImplementedError
|
|
@@ -1,21 +1,34 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Annotated
|
|
4
4
|
|
|
5
5
|
import clickhouse_connect
|
|
6
|
-
from pydantic import Field
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
8
9
|
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
9
10
|
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
10
11
|
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema
|
|
11
12
|
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
12
13
|
|
|
13
14
|
|
|
15
|
+
class ClickhouseConnectionProperties(BaseModel):
|
|
16
|
+
host: Annotated[str, ConfigPropertyAnnotation(default_value="localhost", required=True)]
|
|
17
|
+
port: int | None = None
|
|
18
|
+
database: str | None = None
|
|
19
|
+
username: str | None = None
|
|
20
|
+
password: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
21
|
+
additional_properties: dict[str, Any] = {}
|
|
22
|
+
|
|
23
|
+
def to_clickhouse_kwargs(self) -> dict[str, Any]:
|
|
24
|
+
kwargs = self.model_dump(exclude={"additional_properties"}, exclude_none=True)
|
|
25
|
+
kwargs.update(self.additional_properties)
|
|
26
|
+
return kwargs
|
|
27
|
+
|
|
28
|
+
|
|
14
29
|
class ClickhouseConfigFile(BaseDatabaseConfigFile):
|
|
15
|
-
type: str = Field(default="
|
|
16
|
-
connection:
|
|
17
|
-
description="Connection parameters for the Clickhouse database. It can contain any of the keys supported by the Clickhouse connection library (see https://clickhouse.com/docs/integrations/language-clients/python/driver-api#connection-arguments)"
|
|
18
|
-
)
|
|
30
|
+
type: str = Field(default="clickhouse")
|
|
31
|
+
connection: ClickhouseConnectionProperties
|
|
19
32
|
|
|
20
33
|
|
|
21
34
|
class ClickhouseIntrospector(BaseIntrospector[ClickhouseConfigFile]):
|
|
@@ -24,11 +37,9 @@ class ClickhouseIntrospector(BaseIntrospector[ClickhouseConfigFile]):
|
|
|
24
37
|
supports_catalogs = True
|
|
25
38
|
|
|
26
39
|
def _connect(self, file_config: ClickhouseConfigFile):
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return clickhouse_connect.get_client(**connection)
|
|
40
|
+
return clickhouse_connect.get_client(
|
|
41
|
+
**file_config.connection.to_clickhouse_kwargs(),
|
|
42
|
+
)
|
|
32
43
|
|
|
33
44
|
def _connect_to_catalog(self, file_config: ClickhouseConfigFile, catalog: str):
|
|
34
45
|
return self._connect(file_config)
|
|
@@ -10,7 +10,7 @@ from databao_context_engine.plugins.databases.introspection_model_builder import
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DuckDBConfigFile(BaseDatabaseConfigFile):
|
|
13
|
-
type: str = Field(default="
|
|
13
|
+
type: str = Field(default="duckdb")
|
|
14
14
|
connection: DuckDBConnectionConfig
|
|
15
15
|
|
|
16
16
|
|
|
@@ -6,10 +6,11 @@ from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ScopeIncludeRule(BaseModel):
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
"""Allowlist selector.
|
|
10
|
+
|
|
11
|
+
Attributes:
|
|
12
|
+
catalog: optional glob pattern
|
|
13
|
+
schemas: optional list of glob patterns (string also accepted and normalized to a list)
|
|
13
14
|
|
|
14
15
|
A rule must specify at least one of: catalog, schemas.
|
|
15
16
|
"""
|
|
@@ -36,11 +37,12 @@ class ScopeIncludeRule(BaseModel):
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class ScopeExcludeRule(BaseModel):
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
"""Denylist selector.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
catalog: optional glob pattern
|
|
44
|
+
schemas: optional list of glob patterns (string also accepted)
|
|
45
|
+
except_schemas: optional list of glob patterns (string also accepted)
|
|
44
46
|
|
|
45
47
|
If a target matches the rule but also matches except_schemas, it is NOT excluded by this rule.
|
|
46
48
|
"""
|
|
@@ -10,17 +10,14 @@ from databao_context_engine.plugins.databases.introspection_scope import (
|
|
|
10
10
|
|
|
11
11
|
@dataclass(frozen=True)
|
|
12
12
|
class ScopeSelection:
|
|
13
|
-
"""
|
|
14
|
-
The final catalog+schema scope to introspect.
|
|
15
|
-
"""
|
|
13
|
+
"""The final catalog+schema scope to introspect."""
|
|
16
14
|
|
|
17
15
|
catalogs: list[str]
|
|
18
16
|
schemas_per_catalog: dict[str, list[str]]
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class IntrospectionScopeMatcher:
|
|
22
|
-
"""
|
|
23
|
-
Applies include/exclude rules (glob matching, case-insensitive) to a discovered set of catalogs/schemas.
|
|
20
|
+
"""Applies include/exclude rules (glob matching, case-insensitive) to a discovered set of catalogs/schemas.
|
|
24
21
|
|
|
25
22
|
Semantics:
|
|
26
23
|
- If include is empty => start from "everything"
|
|
@@ -1,21 +1,36 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Mapping
|
|
3
|
+
from typing import Any, Mapping, Annotated
|
|
4
4
|
|
|
5
5
|
from mssql_python import connect # type: ignore[import-untyped]
|
|
6
|
-
from pydantic import Field
|
|
6
|
+
from pydantic import Field, BaseModel
|
|
7
7
|
|
|
8
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
8
9
|
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
9
10
|
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
10
11
|
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema, DatabaseTable
|
|
11
12
|
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
12
13
|
|
|
13
14
|
|
|
15
|
+
class MSSQLConnectionProperties(BaseModel):
|
|
16
|
+
host: Annotated[str, ConfigPropertyAnnotation(default_value="localhost", required=True)]
|
|
17
|
+
port: int | None = None
|
|
18
|
+
instance_name: str | None = None
|
|
19
|
+
database: str | None = None
|
|
20
|
+
user: str | None = None
|
|
21
|
+
password: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
22
|
+
encrypt: str | None = None
|
|
23
|
+
additional_properties: dict[str, Any] = {}
|
|
24
|
+
|
|
25
|
+
def to_mssql_kwargs(self) -> dict[str, Any]:
|
|
26
|
+
kwargs = self.model_dump(exclude={"additional_properties"}, exclude_none=True)
|
|
27
|
+
kwargs.update(self.additional_properties)
|
|
28
|
+
return kwargs
|
|
29
|
+
|
|
30
|
+
|
|
14
31
|
class MSSQLConfigFile(BaseDatabaseConfigFile):
|
|
15
|
-
type: str = Field(default="
|
|
16
|
-
connection:
|
|
17
|
-
description="Connection parameters for the Microsoft Server SQL database. It can contain any of the keys supported by the Microsoft Server connection library"
|
|
18
|
-
)
|
|
32
|
+
type: str = Field(default="mssql")
|
|
33
|
+
connection: MSSQLConnectionProperties
|
|
19
34
|
|
|
20
35
|
|
|
21
36
|
class MSSQLIntrospector(BaseIntrospector[MSSQLConfigFile]):
|
|
@@ -42,22 +57,16 @@ class MSSQLIntrospector(BaseIntrospector[MSSQLConfigFile]):
|
|
|
42
57
|
|
|
43
58
|
def _connect(self, file_config: MSSQLConfigFile):
|
|
44
59
|
connection = file_config.connection
|
|
45
|
-
|
|
46
|
-
raise ValueError("Invalid YAML config: 'connection' must be a mapping of connection parameters")
|
|
47
|
-
|
|
48
|
-
connection_string = self._create_connection_string_for_config(connection)
|
|
60
|
+
connection_string = self._create_connection_string_for_config(connection.to_mssql_kwargs())
|
|
49
61
|
return connect(connection_string)
|
|
50
62
|
|
|
51
63
|
def _connect_to_catalog(self, file_config: MSSQLConfigFile, catalog: str):
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
connection_string = self._create_connection_string_for_config(cfg_for_db)
|
|
57
|
-
return connect(connection_string)
|
|
64
|
+
cfg = file_config.model_copy(deep=True)
|
|
65
|
+
cfg.connection.database = catalog
|
|
66
|
+
return self._connect(cfg)
|
|
58
67
|
|
|
59
68
|
def _get_catalogs(self, connection, file_config: MSSQLConfigFile) -> list[str]:
|
|
60
|
-
database = file_config.connection.
|
|
69
|
+
database = file_config.connection.database
|
|
61
70
|
if isinstance(database, str) and database:
|
|
62
71
|
return [database]
|
|
63
72
|
|
|
@@ -1,18 +1,33 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Annotated
|
|
2
2
|
|
|
3
3
|
import pymysql
|
|
4
|
-
from pydantic import Field
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
5
|
from pymysql.constants import CLIENT
|
|
6
6
|
|
|
7
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
7
8
|
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
8
9
|
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
9
10
|
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema, DatabaseTable
|
|
10
11
|
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class MySQLConnectionProperties(BaseModel):
|
|
15
|
+
host: Annotated[str, ConfigPropertyAnnotation(default_value="localhost", required=True)]
|
|
16
|
+
port: int | None = None
|
|
17
|
+
database: str | None = None
|
|
18
|
+
user: str | None = None
|
|
19
|
+
password: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
20
|
+
additional_properties: dict[str, Any] = {}
|
|
21
|
+
|
|
22
|
+
def to_pymysql_kwargs(self) -> dict[str, Any]:
|
|
23
|
+
kwargs = self.model_dump(exclude={"additional_properties"}, exclude_none=True)
|
|
24
|
+
kwargs.update(self.additional_properties)
|
|
25
|
+
return kwargs
|
|
26
|
+
|
|
27
|
+
|
|
13
28
|
class MySQLConfigFile(BaseDatabaseConfigFile):
|
|
14
|
-
connection:
|
|
15
|
-
type: str = Field(default="
|
|
29
|
+
connection: MySQLConnectionProperties
|
|
30
|
+
type: str = Field(default="mysql")
|
|
16
31
|
|
|
17
32
|
|
|
18
33
|
class MySQLIntrospector(BaseIntrospector[MySQLConfigFile]):
|
|
@@ -21,20 +36,16 @@ class MySQLIntrospector(BaseIntrospector[MySQLConfigFile]):
|
|
|
21
36
|
supports_catalogs = True
|
|
22
37
|
|
|
23
38
|
def _connect(self, file_config: MySQLConfigFile):
|
|
24
|
-
connection = file_config.connection
|
|
25
|
-
if not isinstance(connection, Mapping):
|
|
26
|
-
raise ValueError("Invalid YAML config: 'connection' must be a mapping of connection parameters")
|
|
27
|
-
|
|
28
39
|
return pymysql.connect(
|
|
29
|
-
**connection,
|
|
40
|
+
**file_config.connection.to_pymysql_kwargs(),
|
|
30
41
|
cursorclass=pymysql.cursors.DictCursor,
|
|
31
42
|
client_flag=CLIENT.MULTI_STATEMENTS | CLIENT.MULTI_RESULTS,
|
|
32
43
|
)
|
|
33
44
|
|
|
34
45
|
def _connect_to_catalog(self, file_config: MySQLConfigFile, catalog: str):
|
|
35
|
-
cfg =
|
|
36
|
-
cfg
|
|
37
|
-
return self._connect(
|
|
46
|
+
cfg = file_config.model_copy(deep=True)
|
|
47
|
+
cfg.connection.database = catalog
|
|
48
|
+
return self._connect(cfg)
|
|
38
49
|
|
|
39
50
|
def _get_catalogs(self, connection, file_config: MySQLConfigFile) -> list[str]:
|
|
40
51
|
with connection.cursor() as cur:
|
|
@@ -16,12 +16,12 @@ class PostgresConnectionProperties(BaseModel):
|
|
|
16
16
|
port: int | None = None
|
|
17
17
|
database: str | None = None
|
|
18
18
|
user: str | None = None
|
|
19
|
-
password: str
|
|
19
|
+
password: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
20
20
|
additional_properties: dict[str, Any] = {}
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class PostgresConfigFile(BaseDatabaseConfigFile):
|
|
24
|
-
type: str = Field(default="
|
|
24
|
+
type: str = Field(default="postgres")
|
|
25
25
|
connection: PostgresConnectionProperties
|
|
26
26
|
|
|
27
27
|
|
|
@@ -1,22 +1,57 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Dict, List,
|
|
3
|
+
from typing import Any, Dict, List, Annotated
|
|
4
4
|
|
|
5
5
|
import snowflake.connector
|
|
6
|
-
from pydantic import Field
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
7
|
from snowflake.connector import DictCursor
|
|
8
8
|
|
|
9
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
9
10
|
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
10
11
|
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
11
12
|
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema
|
|
12
13
|
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
class SnowflakePasswordAuth(BaseModel):
|
|
17
|
+
password: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SnowflakeKeyPairAuth(BaseModel):
|
|
21
|
+
private_key_file: str | None = None
|
|
22
|
+
private_key_file_pwd: str | None = None
|
|
23
|
+
private_key: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SnowflakeSSOAuth(BaseModel):
|
|
27
|
+
authenticator: str = Field(description='e.g. "externalbrowser"')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SnowflakeConnectionProperties(BaseModel):
|
|
31
|
+
account: Annotated[str, ConfigPropertyAnnotation(required=True)]
|
|
32
|
+
warehouse: str | None = None
|
|
33
|
+
database: str | None = None
|
|
34
|
+
user: str | None = None
|
|
35
|
+
role: str | None = None
|
|
36
|
+
auth: SnowflakePasswordAuth | SnowflakeKeyPairAuth | SnowflakeSSOAuth
|
|
37
|
+
additional_properties: dict[str, Any] = {}
|
|
38
|
+
|
|
39
|
+
def to_snowflake_kwargs(self) -> dict[str, Any]:
|
|
40
|
+
kwargs = self.model_dump(
|
|
41
|
+
exclude={
|
|
42
|
+
"additional_properties": True,
|
|
43
|
+
},
|
|
44
|
+
exclude_none=True,
|
|
45
|
+
)
|
|
46
|
+
auth_fields = kwargs.pop("auth", {})
|
|
47
|
+
kwargs.update(auth_fields)
|
|
48
|
+
kwargs.update(self.additional_properties)
|
|
49
|
+
return kwargs
|
|
50
|
+
|
|
51
|
+
|
|
15
52
|
class SnowflakeConfigFile(BaseDatabaseConfigFile):
|
|
16
|
-
type: str = Field(default="
|
|
17
|
-
connection:
|
|
18
|
-
description="Connection parameters for Snowflake. It can contain any of the keys supported by the Snowflake connection library"
|
|
19
|
-
)
|
|
53
|
+
type: str = Field(default="snowflake")
|
|
54
|
+
connection: SnowflakeConnectionProperties
|
|
20
55
|
|
|
21
56
|
|
|
22
57
|
class SnowflakeIntrospector(BaseIntrospector[SnowflakeConfigFile]):
|
|
@@ -28,11 +63,9 @@ class SnowflakeIntrospector(BaseIntrospector[SnowflakeConfigFile]):
|
|
|
28
63
|
|
|
29
64
|
def _connect(self, file_config: SnowflakeConfigFile):
|
|
30
65
|
connection = file_config.connection
|
|
31
|
-
if not isinstance(connection, Mapping):
|
|
32
|
-
raise ValueError("Invalid YAML config: 'connection' must be a mapping of connection parameters")
|
|
33
66
|
snowflake.connector.paramstyle = "qmark"
|
|
34
67
|
return snowflake.connector.connect(
|
|
35
|
-
**connection,
|
|
68
|
+
**connection.to_snowflake_kwargs(),
|
|
36
69
|
)
|
|
37
70
|
|
|
38
71
|
def _connect_to_catalog(self, file_config: SnowflakeConfigFile, catalog: str):
|
|
@@ -41,7 +74,7 @@ class SnowflakeIntrospector(BaseIntrospector[SnowflakeConfigFile]):
|
|
|
41
74
|
return snowflake.connector.connect(**cfg)
|
|
42
75
|
|
|
43
76
|
def _get_catalogs(self, connection, file_config: SnowflakeConfigFile) -> list[str]:
|
|
44
|
-
database = file_config.connection.
|
|
77
|
+
database = file_config.connection.database
|
|
45
78
|
if database:
|
|
46
79
|
return [database]
|
|
47
80
|
|