databao-context-engine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +35 -0
- databao_context_engine/build_sources/__init__.py +0 -0
- databao_context_engine/build_sources/internal/__init__.py +0 -0
- databao_context_engine/build_sources/internal/build_runner.py +111 -0
- databao_context_engine/build_sources/internal/build_service.py +77 -0
- databao_context_engine/build_sources/internal/build_wiring.py +52 -0
- databao_context_engine/build_sources/internal/export_results.py +43 -0
- databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
- databao_context_engine/build_sources/public/__init__.py +0 -0
- databao_context_engine/build_sources/public/api.py +4 -0
- databao_context_engine/cli/__init__.py +0 -0
- databao_context_engine/cli/add_datasource_config.py +130 -0
- databao_context_engine/cli/commands.py +256 -0
- databao_context_engine/cli/datasources.py +64 -0
- databao_context_engine/cli/info.py +32 -0
- databao_context_engine/config/__init__.py +0 -0
- databao_context_engine/config/log_config.yaml +16 -0
- databao_context_engine/config/logging.py +43 -0
- databao_context_engine/databao_context_project_manager.py +92 -0
- databao_context_engine/databao_engine.py +85 -0
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +50 -0
- databao_context_engine/datasource_config/check_config.py +131 -0
- databao_context_engine/datasource_config/datasource_context.py +60 -0
- databao_context_engine/event_journal/__init__.py +0 -0
- databao_context_engine/event_journal/writer.py +29 -0
- databao_context_engine/generate_configs_schemas.py +92 -0
- databao_context_engine/init_project.py +18 -0
- databao_context_engine/introspection/__init__.py +0 -0
- databao_context_engine/introspection/property_extract.py +202 -0
- databao_context_engine/llm/__init__.py +0 -0
- databao_context_engine/llm/config.py +20 -0
- databao_context_engine/llm/descriptions/__init__.py +0 -0
- databao_context_engine/llm/descriptions/ollama.py +21 -0
- databao_context_engine/llm/descriptions/provider.py +10 -0
- databao_context_engine/llm/embeddings/__init__.py +0 -0
- databao_context_engine/llm/embeddings/ollama.py +37 -0
- databao_context_engine/llm/embeddings/provider.py +13 -0
- databao_context_engine/llm/errors.py +16 -0
- databao_context_engine/llm/factory.py +61 -0
- databao_context_engine/llm/install.py +227 -0
- databao_context_engine/llm/runtime.py +73 -0
- databao_context_engine/llm/service.py +159 -0
- databao_context_engine/main.py +19 -0
- databao_context_engine/mcp/__init__.py +0 -0
- databao_context_engine/mcp/all_results_tool.py +5 -0
- databao_context_engine/mcp/mcp_runner.py +16 -0
- databao_context_engine/mcp/mcp_server.py +63 -0
- databao_context_engine/mcp/retrieve_tool.py +22 -0
- databao_context_engine/pluginlib/__init__.py +0 -0
- databao_context_engine/pluginlib/build_plugin.py +107 -0
- databao_context_engine/pluginlib/config.py +37 -0
- databao_context_engine/pluginlib/plugin_utils.py +68 -0
- databao_context_engine/plugins/__init__.py +0 -0
- databao_context_engine/plugins/athena_db_plugin.py +12 -0
- databao_context_engine/plugins/base_db_plugin.py +45 -0
- databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/__init__.py +0 -0
- databao_context_engine/plugins/databases/athena_introspector.py +101 -0
- databao_context_engine/plugins/databases/base_introspector.py +144 -0
- databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
- databao_context_engine/plugins/databases/database_chunker.py +69 -0
- databao_context_engine/plugins/databases/databases_types.py +114 -0
- databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
- databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
- databao_context_engine/plugins/databases/introspection_scope.py +74 -0
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
- databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
- databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
- databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
- databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
- databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/mssql_db_plugin.py +12 -0
- databao_context_engine/plugins/mysql_db_plugin.py +12 -0
- databao_context_engine/plugins/parquet_plugin.py +32 -0
- databao_context_engine/plugins/plugin_loader.py +110 -0
- databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
- databao_context_engine/plugins/resources/__init__.py +0 -0
- databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
- databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
- databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
- databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
- databao_context_engine/project/__init__.py +0 -0
- databao_context_engine/project/datasource_discovery.py +141 -0
- databao_context_engine/project/info.py +44 -0
- databao_context_engine/project/init_project.py +102 -0
- databao_context_engine/project/layout.py +127 -0
- databao_context_engine/project/project_config.py +32 -0
- databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
- databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
- databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
- databao_context_engine/project/runs.py +39 -0
- databao_context_engine/project/types.py +134 -0
- databao_context_engine/retrieve_embeddings/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +3 -0
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/serialisation/yaml.py +35 -0
- databao_context_engine/services/__init__.py +0 -0
- databao_context_engine/services/chunk_embedding_service.py +104 -0
- databao_context_engine/services/embedding_shard_resolver.py +64 -0
- databao_context_engine/services/factories.py +88 -0
- databao_context_engine/services/models.py +12 -0
- databao_context_engine/services/persistence_service.py +61 -0
- databao_context_engine/services/run_name_policy.py +8 -0
- databao_context_engine/services/table_name_policy.py +15 -0
- databao_context_engine/storage/__init__.py +0 -0
- databao_context_engine/storage/connection.py +32 -0
- databao_context_engine/storage/exceptions/__init__.py +0 -0
- databao_context_engine/storage/exceptions/exceptions.py +6 -0
- databao_context_engine/storage/migrate.py +127 -0
- databao_context_engine/storage/migrations/V01__init.sql +63 -0
- databao_context_engine/storage/models.py +51 -0
- databao_context_engine/storage/repositories/__init__.py +0 -0
- databao_context_engine/storage/repositories/chunk_repository.py +130 -0
- databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
- databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
- databao_context_engine/storage/repositories/embedding_repository.py +113 -0
- databao_context_engine/storage/repositories/factories.py +35 -0
- databao_context_engine/storage/repositories/run_repository.py +157 -0
- databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
- databao_context_engine/storage/transaction.py +14 -0
- databao_context_engine/system/__init__.py +0 -0
- databao_context_engine/system/properties.py +13 -0
- databao_context_engine/templating/__init__.py +0 -0
- databao_context_engine/templating/renderer.py +29 -0
- databao_context_engine-0.1.1.dist-info/METADATA +186 -0
- databao_context_engine-0.1.1.dist-info/RECORD +135 -0
- databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
- databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Annotated, Any, Sequence
|
|
3
|
+
|
|
4
|
+
import asyncpg
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation
|
|
8
|
+
from databao_context_engine.plugins.base_db_plugin import BaseDatabaseConfigFile
|
|
9
|
+
from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
|
|
10
|
+
from databao_context_engine.plugins.databases.databases_types import DatabaseSchema
|
|
11
|
+
from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PostgresConnectionProperties(BaseModel):
|
|
15
|
+
host: Annotated[str, ConfigPropertyAnnotation(default_value="localhost", required=True)]
|
|
16
|
+
port: int | None = None
|
|
17
|
+
database: str | None = None
|
|
18
|
+
user: str | None = None
|
|
19
|
+
password: str | None = None
|
|
20
|
+
additional_properties: dict[str, Any] = {}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PostgresConfigFile(BaseDatabaseConfigFile):
|
|
24
|
+
type: str = Field(default="databases/postgres")
|
|
25
|
+
connection: PostgresConnectionProperties
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _SyncAsyncpgConnection:
|
|
29
|
+
def __init__(self, connect_kwargs: dict[str, Any]):
|
|
30
|
+
self._connect_kwargs = connect_kwargs
|
|
31
|
+
self._conn: asyncpg.Connection | None = None
|
|
32
|
+
self._event_loop: asyncio.AbstractEventLoop | None = None
|
|
33
|
+
|
|
34
|
+
def __enter__(self):
|
|
35
|
+
self._event_loop = asyncio.new_event_loop()
|
|
36
|
+
try:
|
|
37
|
+
self._conn = self._event_loop.run_until_complete(asyncpg.connect(**self._connect_kwargs))
|
|
38
|
+
except Exception:
|
|
39
|
+
self._event_loop.close()
|
|
40
|
+
self._event_loop = None
|
|
41
|
+
raise
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __exit__(self, exception_type, exception_value, traceback):
|
|
45
|
+
try:
|
|
46
|
+
if self._conn is not None and self._event_loop is not None and not self._event_loop.is_closed():
|
|
47
|
+
self._event_loop.run_until_complete(self._conn.close())
|
|
48
|
+
finally:
|
|
49
|
+
self._conn = None
|
|
50
|
+
if self._event_loop is not None and not self._event_loop.is_closed():
|
|
51
|
+
self._event_loop.close()
|
|
52
|
+
self._event_loop = None
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def conn(self) -> asyncpg.Connection:
|
|
56
|
+
if self._conn is None:
|
|
57
|
+
raise RuntimeError("Connection is not open")
|
|
58
|
+
return self._conn
|
|
59
|
+
|
|
60
|
+
def _run_blocking(self, awaitable) -> Any:
|
|
61
|
+
if self._event_loop is None:
|
|
62
|
+
raise RuntimeError("Event loop is not initialized")
|
|
63
|
+
return self._event_loop.run_until_complete(awaitable)
|
|
64
|
+
|
|
65
|
+
def fetch_rows(self, sql: str, params: Sequence[Any] | None = None) -> list[dict]:
|
|
66
|
+
query_params = [] if params is None else list(params)
|
|
67
|
+
records = self._run_blocking(self.conn.fetch(sql, *query_params))
|
|
68
|
+
return [dict(record) for record in records]
|
|
69
|
+
|
|
70
|
+
def fetch_scalar_values(self, sql: str) -> list[Any]:
|
|
71
|
+
records = self._run_blocking(self.conn.fetch(sql))
|
|
72
|
+
return [record[0] for record in records]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class PostgresqlIntrospector(BaseIntrospector[PostgresConfigFile]):
|
|
76
|
+
_IGNORED_SCHEMAS = {"information_schema", "pg_catalog", "pg_toast"}
|
|
77
|
+
|
|
78
|
+
supports_catalogs = True
|
|
79
|
+
|
|
80
|
+
def _sql_list_schemas(self, catalogs: list[str] | None) -> SQLQuery:
|
|
81
|
+
if self.supports_catalogs:
|
|
82
|
+
sql = "SELECT catalog_name, schema_name FROM information_schema.schemata WHERE catalog_name = ANY($1)"
|
|
83
|
+
return SQLQuery(sql, (catalogs,))
|
|
84
|
+
else:
|
|
85
|
+
sql = "SELECT schema_name FROM information_schema.schemata"
|
|
86
|
+
return SQLQuery(sql, None)
|
|
87
|
+
|
|
88
|
+
def _connect(self, file_config: PostgresConfigFile):
|
|
89
|
+
kwargs = self._create_connection_kwargs(file_config.connection)
|
|
90
|
+
return _SyncAsyncpgConnection(kwargs)
|
|
91
|
+
|
|
92
|
+
def _fetchall_dicts(self, connection: _SyncAsyncpgConnection, sql: str, params) -> list[dict]:
|
|
93
|
+
return connection.fetch_rows(sql, params)
|
|
94
|
+
|
|
95
|
+
def _get_catalogs(self, connection: _SyncAsyncpgConnection, file_config: PostgresConfigFile) -> list[str]:
|
|
96
|
+
database = file_config.connection.database
|
|
97
|
+
if database is not None:
|
|
98
|
+
return [database]
|
|
99
|
+
|
|
100
|
+
rows = connection.fetch_scalar_values("SELECT datname FROM pg_catalog.pg_database WHERE datistemplate = false")
|
|
101
|
+
return rows
|
|
102
|
+
|
|
103
|
+
def _connect_to_catalog(self, file_config: PostgresConfigFile, catalog: str):
|
|
104
|
+
cfg = file_config.model_copy(deep=True)
|
|
105
|
+
cfg.connection.database = catalog
|
|
106
|
+
return self._connect(cfg)
|
|
107
|
+
|
|
108
|
+
def collect_catalog_model(
|
|
109
|
+
self, connection: _SyncAsyncpgConnection, catalog: str, schemas: list[str]
|
|
110
|
+
) -> list[DatabaseSchema] | None:
|
|
111
|
+
if not schemas:
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
comps = self._component_queries()
|
|
115
|
+
results: dict[str, list[dict]] = {name: [] for name in comps}
|
|
116
|
+
|
|
117
|
+
for cq, sql in comps.items():
|
|
118
|
+
results[cq] = self._fetchall_dicts(connection, sql, (schemas,)) or []
|
|
119
|
+
|
|
120
|
+
return IntrospectionModelBuilder.build_schemas_from_components(
|
|
121
|
+
schemas=schemas,
|
|
122
|
+
rels=results.get("relations", []),
|
|
123
|
+
cols=results.get("columns", []),
|
|
124
|
+
pk_cols=results.get("pk", []),
|
|
125
|
+
uq_cols=results.get("uq", []),
|
|
126
|
+
checks=results.get("checks", []),
|
|
127
|
+
fk_cols=results.get("fks", []),
|
|
128
|
+
idx_cols=results.get("idx", []),
|
|
129
|
+
partitions=results.get("partitions", []),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def _component_queries(self) -> dict[str, str]:
|
|
133
|
+
return {
|
|
134
|
+
"relations": self._sql_relations(),
|
|
135
|
+
"columns": self._sql_columns(),
|
|
136
|
+
"pk": self._sql_primary_keys(),
|
|
137
|
+
"uq": self._sql_uniques(),
|
|
138
|
+
"checks": self._sql_checks(),
|
|
139
|
+
"fks": self._sql_foreign_keys(),
|
|
140
|
+
"idx": self._sql_indexes(),
|
|
141
|
+
"partitions": self._sql_partitions(),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
def _sql_relations(self) -> str:
|
|
145
|
+
return """
|
|
146
|
+
SELECT
|
|
147
|
+
n.nspname AS schema_name,
|
|
148
|
+
c.relname AS table_name,
|
|
149
|
+
CASE c.relkind
|
|
150
|
+
WHEN 'v' THEN 'view'
|
|
151
|
+
WHEN 'm' THEN 'materialized_view'
|
|
152
|
+
WHEN 'f' THEN 'external_table'
|
|
153
|
+
ELSE 'table'
|
|
154
|
+
END AS kind,
|
|
155
|
+
obj_description(c.oid, 'pg_class') AS description
|
|
156
|
+
FROM
|
|
157
|
+
pg_class c
|
|
158
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
159
|
+
WHERE
|
|
160
|
+
n.nspname = ANY($1)
|
|
161
|
+
AND c.relkind IN ('r','p','v','m','f')
|
|
162
|
+
AND NOT c.relispartition
|
|
163
|
+
ORDER BY
|
|
164
|
+
schema_name,
|
|
165
|
+
c.relname
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
def _sql_columns(self) -> str:
|
|
169
|
+
return """
|
|
170
|
+
SELECT
|
|
171
|
+
n.nspname AS schema_name,
|
|
172
|
+
c.relname AS table_name,
|
|
173
|
+
a.attname AS column_name,
|
|
174
|
+
a.attnum AS ordinal_position,
|
|
175
|
+
format_type(a.atttypid, a.atttypmod) AS data_type,
|
|
176
|
+
NOT a.attnotnull AS is_nullable,
|
|
177
|
+
pg_get_expr(ad.adbin, ad.adrelid) AS default_expression,
|
|
178
|
+
CASE
|
|
179
|
+
WHEN a.attidentity IN ('a','d') THEN 'identity'
|
|
180
|
+
WHEN a.attgenerated = 's' THEN 'computed'
|
|
181
|
+
END AS generated,
|
|
182
|
+
col_description(a.attrelid, a.attnum) AS description
|
|
183
|
+
FROM
|
|
184
|
+
pg_attribute a
|
|
185
|
+
JOIN pg_class c ON c.oid = a.attrelid
|
|
186
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
187
|
+
LEFT JOIN pg_attrdef ad ON ad.adrelid = a.attrelid AND ad.adnum = a.attnum
|
|
188
|
+
WHERE
|
|
189
|
+
n.nspname = ANY($1)
|
|
190
|
+
AND a.attnum > 0
|
|
191
|
+
AND c.relkind IN ('r','p','v','m','f')
|
|
192
|
+
AND NOT a.attisdropped
|
|
193
|
+
AND NOT c.relispartition
|
|
194
|
+
ORDER BY
|
|
195
|
+
schema_name,
|
|
196
|
+
c.relname,
|
|
197
|
+
a.attnum
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
def _sql_primary_keys(self) -> str:
|
|
201
|
+
return """
|
|
202
|
+
SELECT
|
|
203
|
+
n.nspname AS schema_name,
|
|
204
|
+
c.relname AS table_name,
|
|
205
|
+
con.conname AS constraint_name,
|
|
206
|
+
att.attname AS column_name,
|
|
207
|
+
k.pos AS position
|
|
208
|
+
FROM
|
|
209
|
+
pg_constraint con
|
|
210
|
+
JOIN pg_class c ON c.oid = con.conrelid
|
|
211
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
212
|
+
JOIN LATERAL unnest(con.conkey) WITH ORDINALITY AS k(attnum, pos) ON TRUE
|
|
213
|
+
JOIN pg_attribute att ON att.attrelid = c.oid AND att.attnum = k.attnum
|
|
214
|
+
WHERE
|
|
215
|
+
n.nspname = ANY($1)
|
|
216
|
+
AND con.contype = 'p'
|
|
217
|
+
AND NOT c.relispartition
|
|
218
|
+
ORDER BY
|
|
219
|
+
schema_name,
|
|
220
|
+
c.relname,
|
|
221
|
+
con.conname,
|
|
222
|
+
k.pos
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def _sql_uniques(self) -> str:
|
|
226
|
+
return """
|
|
227
|
+
SELECT
|
|
228
|
+
n.nspname AS schema_name,
|
|
229
|
+
c.relname AS table_name,
|
|
230
|
+
con.conname AS constraint_name,
|
|
231
|
+
att.attname AS column_name,
|
|
232
|
+
k.pos AS position
|
|
233
|
+
FROM
|
|
234
|
+
pg_constraint con
|
|
235
|
+
JOIN pg_class c ON c.oid = con.conrelid
|
|
236
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
237
|
+
JOIN LATERAL unnest(con.conkey) WITH ORDINALITY AS k(attnum, pos) ON TRUE
|
|
238
|
+
JOIN pg_attribute att ON att.attrelid = c.oid AND att.attnum = k.attnum
|
|
239
|
+
WHERE
|
|
240
|
+
n.nspname = ANY($1)
|
|
241
|
+
AND con.contype = 'u'
|
|
242
|
+
AND NOT c.relispartition
|
|
243
|
+
ORDER BY
|
|
244
|
+
schema_name,
|
|
245
|
+
c.relname,
|
|
246
|
+
con.conname,
|
|
247
|
+
k.pos
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
def _sql_checks(self) -> str:
|
|
251
|
+
return """
|
|
252
|
+
SELECT
|
|
253
|
+
n.nspname AS schema_name,
|
|
254
|
+
c.relname AS table_name,
|
|
255
|
+
con.conname AS constraint_name,
|
|
256
|
+
pg_get_expr(con.conbin, con.conrelid) AS expression,
|
|
257
|
+
con.convalidated AS validated
|
|
258
|
+
FROM
|
|
259
|
+
pg_constraint con
|
|
260
|
+
JOIN pg_class c ON c.oid = con.conrelid
|
|
261
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
262
|
+
WHERE
|
|
263
|
+
n.nspname = ANY($1)
|
|
264
|
+
AND con.contype = 'c'
|
|
265
|
+
AND NOT c.relispartition
|
|
266
|
+
ORDER BY
|
|
267
|
+
schema_name,
|
|
268
|
+
c.relname,
|
|
269
|
+
con.conname
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
def _sql_foreign_keys(self) -> str:
|
|
273
|
+
return """
|
|
274
|
+
SELECT
|
|
275
|
+
n.nspname AS schema_name,
|
|
276
|
+
c.relname AS table_name,
|
|
277
|
+
con.conname AS constraint_name,
|
|
278
|
+
src.ord AS position,
|
|
279
|
+
attc.attname AS from_column,
|
|
280
|
+
nref.nspname AS ref_schema,
|
|
281
|
+
cref.relname AS ref_table,
|
|
282
|
+
attref.attname AS to_column,
|
|
283
|
+
con.convalidated AS validated,
|
|
284
|
+
CASE con.confupdtype
|
|
285
|
+
WHEN 'a' THEN 'no action' WHEN 'r' THEN 'restrict' WHEN 'c' THEN 'cascade'
|
|
286
|
+
WHEN 'n' THEN 'set null' WHEN 'd' THEN 'set default'
|
|
287
|
+
END AS on_update,
|
|
288
|
+
CASE con.confdeltype
|
|
289
|
+
WHEN 'a' THEN 'no action' WHEN 'r' THEN 'restrict' WHEN 'c' THEN 'cascade'
|
|
290
|
+
WHEN 'n' THEN 'set null' WHEN 'd' THEN 'set default'
|
|
291
|
+
END AS on_delete
|
|
292
|
+
FROM
|
|
293
|
+
pg_constraint con
|
|
294
|
+
JOIN pg_class c ON c.oid = con.conrelid
|
|
295
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
296
|
+
JOIN pg_class cref ON cref.oid = con.confrelid
|
|
297
|
+
JOIN pg_namespace nref ON nref.oid = cref.relnamespace
|
|
298
|
+
JOIN LATERAL unnest(con.conkey) WITH ORDINALITY AS src(src_attnum, ord) ON TRUE
|
|
299
|
+
JOIN LATERAL unnest(con.confkey) WITH ORDINALITY AS ref(ref_attnum, ord2) ON ref.ord2 = src.ord
|
|
300
|
+
JOIN pg_attribute attc ON attc.attrelid = c.oid AND attc.attnum = src.src_attnum
|
|
301
|
+
JOIN pg_attribute attref ON attref.attrelid = cref.oid AND attref.attnum = ref.ref_attnum
|
|
302
|
+
WHERE
|
|
303
|
+
n.nspname = ANY($1)
|
|
304
|
+
AND con.contype = 'f'
|
|
305
|
+
AND NOT c.relispartition
|
|
306
|
+
ORDER BY
|
|
307
|
+
schema_name,
|
|
308
|
+
c.relname,
|
|
309
|
+
con.conname,
|
|
310
|
+
src.ord
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
def _sql_indexes(self) -> str:
|
|
314
|
+
return """
|
|
315
|
+
SELECT
|
|
316
|
+
n.nspname AS schema_name,
|
|
317
|
+
c.relname AS table_name,
|
|
318
|
+
idx.relname AS index_name,
|
|
319
|
+
k.pos AS position,
|
|
320
|
+
pg_get_indexdef(i.indexrelid, k.pos, true) AS expr,
|
|
321
|
+
i.indisunique AS is_unique,
|
|
322
|
+
am.amname AS method,
|
|
323
|
+
pg_get_expr(i.indpred, i.indrelid) AS predicate
|
|
324
|
+
FROM
|
|
325
|
+
pg_index i
|
|
326
|
+
JOIN pg_class idx ON idx.oid = i.indexrelid
|
|
327
|
+
JOIN pg_class c ON c.oid = i.indrelid
|
|
328
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
329
|
+
JOIN pg_am am ON am.oid = idx.relam
|
|
330
|
+
CROSS JOIN LATERAL generate_series(1, i.indnkeyatts::int) AS k(pos)
|
|
331
|
+
WHERE
|
|
332
|
+
n.nspname = ANY($1)
|
|
333
|
+
AND i.indisprimary = false
|
|
334
|
+
AND NOT EXISTS (
|
|
335
|
+
SELECT
|
|
336
|
+
1
|
|
337
|
+
FROM
|
|
338
|
+
pg_constraint cc
|
|
339
|
+
WHERE
|
|
340
|
+
cc.conindid = i.indexrelid
|
|
341
|
+
AND cc.contype IN ('p','u')
|
|
342
|
+
)
|
|
343
|
+
AND NOT c.relispartition
|
|
344
|
+
ORDER BY
|
|
345
|
+
n.nspname,
|
|
346
|
+
c.relname,
|
|
347
|
+
idx.relname,
|
|
348
|
+
k.pos
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
def _sql_partitions(self) -> str:
|
|
352
|
+
return """
|
|
353
|
+
WITH partitions AS (
|
|
354
|
+
SELECT
|
|
355
|
+
parentrel.oid,
|
|
356
|
+
array_agg(childrel.relname) as partition_tables
|
|
357
|
+
FROM
|
|
358
|
+
pg_catalog.pg_class parentrel
|
|
359
|
+
JOIN pg_catalog.pg_inherits inh ON inh.inhparent = parentrel.oid
|
|
360
|
+
JOIN pg_catalog.pg_class childrel ON inh.inhrelid = childrel.oid
|
|
361
|
+
GROUP BY
|
|
362
|
+
parentrel.oid
|
|
363
|
+
)
|
|
364
|
+
SELECT
|
|
365
|
+
nsp.nspname AS schema_name,
|
|
366
|
+
rel.relname AS table_name,
|
|
367
|
+
CASE part.partstrat
|
|
368
|
+
WHEN 'h' THEN 'hash partitioned'
|
|
369
|
+
WHEN 'l' THEN 'list partitioned'
|
|
370
|
+
WHEN 'r' THEN 'range partitioned'
|
|
371
|
+
END AS partitioning_strategy,
|
|
372
|
+
array_agg(att.attname) AS columns_in_partition_key,
|
|
373
|
+
partitions.partition_tables
|
|
374
|
+
FROM
|
|
375
|
+
pg_catalog.pg_partitioned_table part
|
|
376
|
+
JOIN pg_catalog.pg_class rel ON part.partrelid = rel.oid
|
|
377
|
+
JOIN pg_catalog.pg_namespace nsp ON rel.relnamespace = nsp.oid
|
|
378
|
+
JOIN pg_catalog.pg_attribute att ON att.attrelid = rel.oid AND att.attnum = ANY (part.partattrs)
|
|
379
|
+
JOIN partitions ON partitions.oid = rel.oid
|
|
380
|
+
WHERE
|
|
381
|
+
nsp.nspname = ANY($1)
|
|
382
|
+
GROUP BY
|
|
383
|
+
schema_name,
|
|
384
|
+
rel.relname,
|
|
385
|
+
part.partstrat,
|
|
386
|
+
partitions.partition_tables
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
def _sql_sample_rows(self, catalog: str, schema: str, table: str, limit: int) -> SQLQuery:
|
|
390
|
+
sql = f'SELECT * FROM "{schema}"."{table}" LIMIT $1'
|
|
391
|
+
return SQLQuery(sql, (limit,))
|
|
392
|
+
|
|
393
|
+
def _create_connection_string_for_config(self, connection_config: PostgresConnectionProperties) -> str:
|
|
394
|
+
def _escape_pg_value(value: str) -> str:
|
|
395
|
+
escaped = value.replace("\\", "\\\\").replace("'", "\\'")
|
|
396
|
+
return f"'{escaped}'"
|
|
397
|
+
|
|
398
|
+
host = connection_config.host
|
|
399
|
+
if host is None:
|
|
400
|
+
raise ValueError("A host must be provided to connect to the PostgreSQL database.")
|
|
401
|
+
|
|
402
|
+
connection_parts = {
|
|
403
|
+
"host": host,
|
|
404
|
+
"port": connection_config.port or 5432,
|
|
405
|
+
"dbname": connection_config.database,
|
|
406
|
+
"user": connection_config.user,
|
|
407
|
+
"password": connection_config.password,
|
|
408
|
+
}
|
|
409
|
+
connection_parts.update(connection_config.additional_properties)
|
|
410
|
+
|
|
411
|
+
connection_string = " ".join(
|
|
412
|
+
f"{k}={_escape_pg_value(str(v))}" for k, v in connection_parts.items() if v is not None
|
|
413
|
+
)
|
|
414
|
+
return connection_string
|
|
415
|
+
|
|
416
|
+
def _create_connection_kwargs(self, connection_config: PostgresConnectionProperties) -> dict[str, Any]:
|
|
417
|
+
kwargs: dict[str, Any] = {
|
|
418
|
+
"host": connection_config.host,
|
|
419
|
+
"port": connection_config.port or 5432,
|
|
420
|
+
"database": connection_config.database or "postgres",
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
if connection_config.user:
|
|
424
|
+
kwargs["user"] = connection_config.user
|
|
425
|
+
if connection_config.password:
|
|
426
|
+
kwargs["password"] = connection_config.password
|
|
427
|
+
kwargs.update(connection_config.additional_properties or {})
|
|
428
|
+
return kwargs
|