databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. databao_context_engine/__init__.py +32 -7
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +82 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
  8. databao_context_engine/cli/add_datasource_config.py +49 -44
  9. databao_context_engine/cli/commands.py +40 -55
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +127 -0
  12. databao_context_engine/databao_context_project_manager.py +147 -30
  13. databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
  14. databao_context_engine/datasources/datasource_context.py +90 -0
  15. databao_context_engine/datasources/datasource_discovery.py +143 -0
  16. databao_context_engine/datasources/types.py +194 -0
  17. databao_context_engine/generate_configs_schemas.py +4 -5
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +76 -57
  20. databao_context_engine/llm/__init__.py +10 -0
  21. databao_context_engine/llm/api.py +57 -0
  22. databao_context_engine/llm/descriptions/ollama.py +1 -3
  23. databao_context_engine/llm/errors.py +2 -8
  24. databao_context_engine/llm/factory.py +5 -2
  25. databao_context_engine/llm/install.py +26 -30
  26. databao_context_engine/llm/runtime.py +3 -5
  27. databao_context_engine/llm/service.py +1 -3
  28. databao_context_engine/mcp/mcp_runner.py +4 -2
  29. databao_context_engine/mcp/mcp_server.py +9 -11
  30. databao_context_engine/plugin_loader.py +110 -0
  31. databao_context_engine/pluginlib/build_plugin.py +12 -29
  32. databao_context_engine/pluginlib/config.py +16 -2
  33. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  34. databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
  35. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
  36. databao_context_engine/plugins/databases/base_introspector.py +11 -12
  37. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  38. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
  39. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  40. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
  41. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  42. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  43. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  44. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  45. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
  46. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  47. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
  48. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  49. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  50. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
  51. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  52. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  53. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
  54. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  55. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  56. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  57. databao_context_engine/plugins/duckdb_tools.py +18 -0
  58. databao_context_engine/plugins/files/__init__.py +0 -0
  59. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  60. databao_context_engine/plugins/plugin_loader.py +58 -52
  61. databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
  62. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  63. databao_context_engine/project/info.py +34 -2
  64. databao_context_engine/project/init_project.py +16 -7
  65. databao_context_engine/project/layout.py +14 -15
  66. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  67. databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
  68. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
  69. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
  70. databao_context_engine/serialization/__init__.py +0 -0
  71. databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
  72. databao_context_engine/services/chunk_embedding_service.py +23 -11
  73. databao_context_engine/services/factories.py +1 -46
  74. databao_context_engine/services/persistence_service.py +11 -11
  75. databao_context_engine/storage/connection.py +11 -7
  76. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  77. databao_context_engine/storage/migrate.py +3 -5
  78. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  79. databao_context_engine/storage/models.py +2 -23
  80. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  81. databao_context_engine/storage/repositories/factories.py +1 -12
  82. databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
  83. databao_context_engine/system/properties.py +4 -2
  84. databao_context_engine-0.1.5.dist-info/METADATA +228 -0
  85. databao_context_engine-0.1.5.dist-info/RECORD +135 -0
  86. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
  87. databao_context_engine/build_sources/internal/build_service.py +0 -77
  88. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  89. databao_context_engine/build_sources/internal/export_results.py +0 -43
  90. databao_context_engine/build_sources/public/api.py +0 -4
  91. databao_context_engine/databao_engine.py +0 -85
  92. databao_context_engine/datasource_config/add_config.py +0 -50
  93. databao_context_engine/datasource_config/datasource_context.py +0 -60
  94. databao_context_engine/mcp/all_results_tool.py +0 -5
  95. databao_context_engine/mcp/retrieve_tool.py +0 -22
  96. databao_context_engine/plugins/databases/athena_introspector.py +0 -101
  97. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  98. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  99. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  100. databao_context_engine/project/datasource_discovery.py +0 -141
  101. databao_context_engine/project/runs.py +0 -39
  102. databao_context_engine/project/types.py +0 -134
  103. databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
  104. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
  105. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  106. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  107. databao_context_engine/services/run_name_policy.py +0 -8
  108. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  109. databao_context_engine/storage/repositories/run_repository.py +0 -157
  110. databao_context_engine-0.1.1.dist-info/METADATA +0 -186
  111. databao_context_engine-0.1.1.dist-info/RECORD +0 -135
  112. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  113. /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
  114. /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
  115. /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
  116. /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
  117. /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
  118. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,241 @@
1
+ import sqlite3
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from databao_context_engine.plugins.databases.base_db_plugin import BaseDatabaseConfigFile
6
+ from databao_context_engine.plugins.databases.base_introspector import BaseIntrospector, SQLQuery
7
+ from databao_context_engine.plugins.databases.databases_types import DatabaseSchema
8
+ from databao_context_engine.plugins.databases.introspection_model_builder import IntrospectionModelBuilder
9
+
10
+
11
+ class SQLiteConnectionConfig(BaseModel):
12
+ database_path: str = Field(description="Path to the SQLite database file")
13
+
14
+
15
+ class SQLiteConfigFile(BaseDatabaseConfigFile):
16
+ type: str = Field(default="sqlite")
17
+ connection: SQLiteConnectionConfig
18
+
19
+
20
+ class SQLiteIntrospector(BaseIntrospector[SQLiteConfigFile]):
21
+ _IGNORED_SCHEMAS = {"temp", "information_schema"}
22
+ _PSEUDO_SCHEMA = "main"
23
+ supports_catalogs = False
24
+
25
+ def _connect(self, file_config: SQLiteConfigFile, *, catalog: str | None = None):
26
+ database_path = str(file_config.connection.database_path)
27
+ conn = sqlite3.connect(database_path)
28
+ conn.text_factory = str
29
+ return conn
30
+
31
+ def _get_catalogs(self, connection, file_config: SQLiteConfigFile) -> list[str]:
32
+ return [self._resolve_pseudo_catalog_name(file_config)]
33
+
34
+ def _fetchall_dicts(self, connection, sql: str, params) -> list[dict]:
35
+ cur = connection.cursor()
36
+ if params is None:
37
+ cur.execute(sql)
38
+ else:
39
+ cur.execute(sql, params)
40
+
41
+ if cur.description is None:
42
+ return []
43
+
44
+ rows = cur.fetchall()
45
+ out: list[dict] = []
46
+ for r in rows:
47
+ if isinstance(r, sqlite3.Row):
48
+ out.append({k.lower(): r[k] for k in r.keys()})
49
+ else:
50
+ cols = [d[0].lower() for d in cur.description]
51
+ out.append(dict(zip(cols, r)))
52
+ return out
53
+
54
+ def _list_schemas_for_catalog(self, connection, catalog: str) -> list[str]:
55
+ return [self._PSEUDO_SCHEMA]
56
+
57
+ def collect_catalog_model(self, connection, catalog: str, schemas: list[str]) -> list[DatabaseSchema] | None:
58
+ if not schemas:
59
+ return []
60
+
61
+ comps = self._component_queries()
62
+ results: dict[str, list[dict]] = {name: [] for name in comps}
63
+
64
+ for name, sql in comps.items():
65
+ results[name] = self._fetchall_dicts(connection, sql, None) or []
66
+
67
+ return IntrospectionModelBuilder.build_schemas_from_components(
68
+ schemas=[self._PSEUDO_SCHEMA],
69
+ rels=results.get("relations", []),
70
+ cols=results.get("columns", []),
71
+ pk_cols=results.get("pk", []),
72
+ uq_cols=results.get("uq", []),
73
+ checks=[],
74
+ fk_cols=results.get("fks", []),
75
+ idx_cols=results.get("idx", []),
76
+ partitions=[],
77
+ )
78
+
79
+ def _component_queries(self) -> dict[str, str]:
80
+ return {
81
+ "relations": self._sql_relations(),
82
+ "columns": self._sql_columns(),
83
+ "pk": self._sql_primary_keys(),
84
+ "uq": self._sql_unique(),
85
+ "fks": self._sql_foreign_keys(),
86
+ "idx": self._sql_indexes(),
87
+ }
88
+
89
+ def _sql_relations(self) -> str:
90
+ return f"""
91
+ SELECT
92
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
93
+ m.name AS table_name,
94
+ CASE m.type
95
+ WHEN 'view' THEN 'view'
96
+ ELSE 'table'
97
+ END AS kind,
98
+ NULL AS description
99
+ FROM
100
+ sqlite_master m
101
+ WHERE
102
+ m.type IN ('table', 'view')
103
+ AND m.name NOT LIKE 'sqlite_%'
104
+ ORDER BY
105
+ m.name;
106
+ """
107
+
108
+ def _sql_columns(self) -> str:
109
+ return f"""
110
+ SELECT
111
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
112
+ m.name AS table_name,
113
+ c.name AS column_name,
114
+ (c.cid + 1) AS ordinal_position,
115
+ COALESCE(c.type,'') AS data_type,
116
+ CASE
117
+ WHEN c.pk > 0 THEN 0
118
+ WHEN c."notnull" = 0 THEN 1
119
+ ELSE 0
120
+ END AS is_nullable,
121
+ c.dflt_value AS default_expression,
122
+ CASE
123
+ WHEN c.hidden IN (2,3) THEN 'computed'
124
+ END AS generated,
125
+ NULL AS description
126
+ FROM
127
+ sqlite_master m
128
+ JOIN pragma_table_xinfo(m.name) c
129
+ WHERE
130
+ m.type IN ('table','view')
131
+ AND m.name NOT LIKE 'sqlite_%'
132
+ ORDER BY
133
+ m.name,
134
+ c.cid;
135
+ """
136
+
137
+ def _sql_primary_keys(self) -> str:
138
+ return f"""
139
+ SELECT
140
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
141
+ m.name AS table_name,
142
+ ('pk_' || m.name) AS constraint_name,
143
+ c.pk AS position,
144
+ c.name AS column_name
145
+ FROM
146
+ sqlite_master m
147
+ JOIN pragma_table_info(m.name) c
148
+ WHERE
149
+ m.type = 'table'
150
+ AND m.name NOT LIKE 'sqlite_%'
151
+ AND c.pk > 0
152
+ ORDER BY
153
+ m.name,
154
+ c.pk;
155
+ """
156
+
157
+ def _sql_unique(self) -> str:
158
+ return f"""
159
+ SELECT
160
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
161
+ m.name AS table_name,
162
+ il.name AS constraint_name,
163
+ (ii.seqno + 1) AS position,
164
+ ii.name AS column_name
165
+ FROM
166
+ sqlite_master m
167
+ JOIN pragma_index_list(m.name) il
168
+ JOIN pragma_index_info(il.name) ii
169
+ WHERE
170
+ m.type = 'table'
171
+ AND m.name NOT LIKE 'sqlite_%'
172
+ AND il."unique" = 1
173
+ AND il.origin = 'u'
174
+ ORDER BY
175
+ m.name,
176
+ il.name,
177
+ ii.seqno;
178
+ """
179
+
180
+ def _sql_foreign_keys(self) -> str:
181
+ return f"""
182
+ SELECT
183
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
184
+ m.name AS table_name,
185
+ ('fk_' || m.name || '_' || fk.id) AS constraint_name,
186
+ (fk.seq + 1) AS position,
187
+ fk."from" AS from_column,
188
+ 'main' AS ref_schema,
189
+ fk."table" AS ref_table,
190
+ fk."to" AS to_column,
191
+ lower(fk.on_update) AS on_update,
192
+ lower(fk.on_delete) AS on_delete,
193
+ 1 AS enforced,
194
+ 1 AS validated
195
+ FROM sqlite_master m
196
+ JOIN pragma_foreign_key_list(m.name) fk
197
+ WHERE
198
+ m.type = 'table'
199
+ AND m.name NOT LIKE 'sqlite_%'
200
+ ORDER BY
201
+ m.name,
202
+ fk.id,
203
+ fk.seq;
204
+ """
205
+
206
+ def _sql_indexes(self) -> str:
207
+ return f"""
208
+ SELECT
209
+ '{self._PSEUDO_SCHEMA}' AS schema_name,
210
+ m.name AS table_name,
211
+ il.name AS index_name,
212
+ (ix.seqno + 1) AS position,
213
+ ix.name AS expr,
214
+ il."unique" AS is_unique,
215
+ NULL AS method,
216
+ CASE
217
+ WHEN il.partial = 1 AND sm.sql IS NOT NULL AND instr(upper(sm.sql), 'WHERE') > 0
218
+ THEN trim(substr(sm.sql, instr(upper(sm.sql), 'WHERE') + length('WHERE')))
219
+ END AS predicate
220
+ FROM
221
+ sqlite_master m
222
+ JOIN pragma_index_list(m.name) il
223
+ JOIN pragma_index_xinfo(il.name) ix
224
+ LEFT JOIN sqlite_master sm ON sm.type = 'index' AND sm.name = il.name
225
+ WHERE
226
+ m.type='table'
227
+ AND m.name NOT LIKE 'sqlite_%'
228
+ AND lower(il.origin) = 'c'
229
+ AND ix.key = 1
230
+ ORDER BY
231
+ m.name,
232
+ il.name,
233
+ ix.seqno;
234
+ """
235
+
236
+ def _sql_sample_rows(self, catalog: str, schema: str, table: str, limit: int) -> SQLQuery:
237
+ sql = f"SELECT * FROM {self._quote_ident(table)} LIMIT ?"
238
+ return SQLQuery(sql, (limit,))
239
+
240
+ def _quote_ident(self, ident: str) -> str:
241
+ return '"' + str(ident).replace('"', '""') + '"'
@@ -0,0 +1,18 @@
1
+ from typing import Any
2
+
3
+ from databao_context_engine.pluginlib.config import DuckDBSecret
4
+
5
+
6
+ def generate_create_secret_sql(secret_name, duckdb_secret: DuckDBSecret) -> str:
7
+ parameters = [("type", duckdb_secret.type)] + list(duckdb_secret.properties.items())
8
+ return f"""CREATE SECRET {secret_name} (
9
+ {", ".join([f"{k} '{v}'" for (k, v) in parameters])}
10
+ );
11
+ """
12
+
13
+
14
+ def fetchall_dicts(cur, sql: str, params=None) -> list[dict[str, Any]]:
15
+ cur.execute(sql, params or [])
16
+ columns = [desc[0].lower() for desc in cur.description] if cur.description else []
17
+ rows = cur.fetchall()
18
+ return [dict(zip(columns, row)) for row in rows]
File without changes
@@ -24,7 +24,7 @@ class InternalUnstructuredFilesPlugin(BuildFilePlugin):
24
24
  self.tokens_overlap = tokens_overlap or self._DEFAULT_TOKENS_OVERLAP
25
25
 
26
26
  def supported_types(self) -> set[str]:
27
- return {f"files/{extension}" for extension in self._SUPPORTED_FILES_EXTENSIONS}
27
+ return self._SUPPORTED_FILES_EXTENSIONS
28
28
 
29
29
  def build_file_context(self, full_type: str, file_name: str, file_buffer: BufferedReader) -> Any:
30
30
  file_content = self._read_file(file_buffer)
@@ -14,31 +14,12 @@ class DuplicatePluginTypeError(RuntimeError):
14
14
  """Raised when two plugins register the same <main>/<sub> plugin key."""
15
15
 
16
16
 
17
- PluginList = dict[DatasourceType, BuildPlugin]
18
-
19
-
20
- def get_all_available_plugin_types(exclude_file_plugins: bool = False) -> set[DatasourceType]:
21
- return set(load_plugins(exclude_file_plugins=exclude_file_plugins).keys())
22
-
23
-
24
- def get_plugin_for_type(datasource_type: DatasourceType) -> BuildPlugin:
25
- all_plugins = load_plugins()
26
-
27
- if datasource_type not in all_plugins:
28
- raise ValueError(f"No plugin found for type '{datasource_type.full_type}'")
29
-
30
- return load_plugins()[datasource_type]
31
-
32
-
33
- def load_plugins(exclude_file_plugins: bool = False) -> PluginList:
34
- """
35
- Loads both builtin and external plugins and merges them into one list
36
- """
17
+ def load_plugins(exclude_file_plugins: bool = False) -> dict[DatasourceType, BuildPlugin]:
18
+ """Load both builtin and external plugins and merges them into one list."""
37
19
  builtin_plugins = _load_builtin_plugins(exclude_file_plugins)
38
20
  external_plugins = _load_external_plugins(exclude_file_plugins)
39
- plugins = merge_plugins(builtin_plugins, external_plugins)
40
21
 
41
- return plugins
22
+ return _merge_plugins(builtin_plugins, external_plugins)
42
23
 
43
24
 
44
25
  def _load_builtin_plugins(exclude_file_plugins: bool = False) -> list[BuildPlugin]:
@@ -53,7 +34,7 @@ def _load_builtin_plugins(exclude_file_plugins: bool = False) -> list[BuildPlugi
53
34
 
54
35
 
55
36
  def _load_builtin_file_plugins() -> list[BuildFilePlugin]:
56
- from databao_context_engine.plugins.unstructured_files_plugin import InternalUnstructuredFilesPlugin
37
+ from databao_context_engine.plugins.files.unstructured_files_plugin import InternalUnstructuredFilesPlugin
57
38
 
58
39
  return [
59
40
  InternalUnstructuredFilesPlugin(),
@@ -61,43 +42,68 @@ def _load_builtin_file_plugins() -> list[BuildFilePlugin]:
61
42
 
62
43
 
63
44
  def _load_builtin_datasource_plugins() -> list[BuildDatasourcePlugin]:
64
- """
65
- Statically register built-in plugins
66
- """
67
- from databao_context_engine.plugins.athena_db_plugin import AthenaDbPlugin
68
- from databao_context_engine.plugins.clickhouse_db_plugin import ClickhouseDbPlugin
69
- from databao_context_engine.plugins.duckdb_db_plugin import DuckDbPlugin
70
- from databao_context_engine.plugins.mssql_db_plugin import MSSQLDbPlugin
71
- from databao_context_engine.plugins.mysql_db_plugin import MySQLDbPlugin
72
- from databao_context_engine.plugins.parquet_plugin import ParquetPlugin
73
- from databao_context_engine.plugins.postgresql_db_plugin import PostgresqlDbPlugin
74
- from databao_context_engine.plugins.snowflake_db_plugin import SnowflakeDbPlugin
45
+ """Statically register built-in plugins."""
46
+ from databao_context_engine.plugins.databases.duckdb.duckdb_db_plugin import DuckDbPlugin
47
+ from databao_context_engine.plugins.databases.sqlite.sqlite_db_plugin import SQLiteDbPlugin
48
+ from databao_context_engine.plugins.resources.parquet_plugin import ParquetPlugin
75
49
 
76
- return [
77
- AthenaDbPlugin(),
78
- ClickhouseDbPlugin(),
79
- DuckDbPlugin(),
80
- MSSQLDbPlugin(),
81
- MySQLDbPlugin(),
82
- PostgresqlDbPlugin(),
83
- SnowflakeDbPlugin(),
84
- ParquetPlugin(),
85
- ]
50
+ # optional plugins are added to the python environment via extras
51
+ optional_plugins: list[BuildDatasourcePlugin] = []
52
+ try:
53
+ from databao_context_engine.plugins.databases.mssql.mssql_db_plugin import MSSQLDbPlugin
54
+
55
+ optional_plugins = [MSSQLDbPlugin()]
56
+ except ImportError:
57
+ pass
58
+
59
+ try:
60
+ from databao_context_engine.plugins.databases.clickhouse.clickhouse_db_plugin import ClickhouseDbPlugin
61
+
62
+ optional_plugins.append(ClickhouseDbPlugin())
63
+ except ImportError:
64
+ pass
65
+
66
+ try:
67
+ from databao_context_engine.plugins.databases.athena.athena_db_plugin import AthenaDbPlugin
68
+
69
+ optional_plugins.append(AthenaDbPlugin())
70
+ except ImportError:
71
+ pass
72
+
73
+ try:
74
+ from databao_context_engine.plugins.databases.snowflake.snowflake_db_plugin import SnowflakeDbPlugin
75
+
76
+ optional_plugins.append(SnowflakeDbPlugin())
77
+ except ImportError:
78
+ pass
79
+
80
+ try:
81
+ from databao_context_engine.plugins.databases.mysql.mysql_db_plugin import MySQLDbPlugin
82
+
83
+ optional_plugins.append(MySQLDbPlugin())
84
+ except ImportError:
85
+ pass
86
+
87
+ try:
88
+ from databao_context_engine.plugins.databases.postgresql.postgresql_db_plugin import PostgresqlDbPlugin
89
+
90
+ optional_plugins.append(PostgresqlDbPlugin())
91
+ except ImportError:
92
+ pass
93
+
94
+ required_plugins: list[BuildDatasourcePlugin] = [DuckDbPlugin(), ParquetPlugin(), SQLiteDbPlugin()]
95
+ return required_plugins + optional_plugins
86
96
 
87
97
 
88
98
  def _load_external_plugins(exclude_file_plugins: bool = False) -> list[BuildPlugin]:
89
- """
90
- Discover external plugins via entry points
91
- """
99
+ """Discover external plugins via entry points."""
92
100
  # TODO: implement external plugin loading
93
101
  return []
94
102
 
95
103
 
96
- def merge_plugins(*plugin_lists: list[BuildPlugin]) -> PluginList:
97
- """
98
- Merge multiple plugin maps
99
- """
100
- registry: PluginList = {}
104
+ def _merge_plugins(*plugin_lists: list[BuildPlugin]) -> dict[DatasourceType, BuildPlugin]:
105
+ """Merge multiple plugin maps."""
106
+ registry: dict[DatasourceType, BuildPlugin] = {}
101
107
  for plugins in plugin_lists:
102
108
  for plugin in plugins:
103
109
  for full_type in plugin.supported_types():
@@ -6,21 +6,21 @@ from dataclasses import dataclass, replace
6
6
  from urllib.parse import urlparse
7
7
 
8
8
  import duckdb
9
- from _duckdb import DuckDBPyConnection
9
+ from duckdb import DuckDBPyConnection
10
10
  from pydantic import BaseModel, Field
11
11
 
12
12
  from databao_context_engine.pluginlib.config import DuckDBSecret
13
+ from databao_context_engine.plugins.duckdb_tools import fetchall_dicts, generate_create_secret_sql
13
14
 
14
- parquet_type = "resources/parquet"
15
+ parquet_type = "parquet"
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
18
19
 
19
20
  class ParquetConfigFile(BaseModel):
20
21
  name: str | None = Field(default=None)
21
- type: str = Field(default=parquet_type)
22
+ type: str = Field(default="parquet")
22
23
  url: str = Field(
23
- default=type,
24
24
  description="Parquet resource location. Should be a valid URL or a path to a local file. "
25
25
  "Examples: s3://your_bucket/file.parquet, s3://your-bucket/*.parquet, https://some.url/some_file.parquet, ~/path_to/file.parquet",
26
26
  )
@@ -50,14 +50,6 @@ class ParquetIntrospectionResult:
50
50
  files: list[ParquetFile]
51
51
 
52
52
 
53
- def generate_create_secret_sql(secret_name, duckdb_secret: DuckDBSecret) -> str:
54
- parameters = [("type", duckdb_secret.type)] + list(duckdb_secret.properties.items())
55
- return f"""CREATE SECRET {secret_name} (
56
- {", ".join([f"{k} {v}" for (k, v) in parameters])}
57
- );
58
- """
59
-
60
-
61
53
  @contextlib.contextmanager
62
54
  def _create_secret(conn: DuckDBPyConnection, duckdb_secret: DuckDBSecret):
63
55
  secret_name = duckdb_secret.name or "gen_secret_" + str(uuid.uuid4()).replace("-", "_")
@@ -98,10 +90,9 @@ class ParquetIntrospector:
98
90
  with self._connect(file_config) as conn:
99
91
  with conn.cursor() as cur:
100
92
  resolved_url = _resolve_url(file_config)
101
- cur.execute(f"SELECT * FROM parquet_file_metadata('{resolved_url}') LIMIT 1")
102
- columns = [desc[0].lower() for desc in cur.description] if cur.description else []
103
- rows = cur.fetchall()
104
- parquet_file_metadata = [dict(zip(columns, row)) for row in rows]
93
+ parquet_file_metadata = fetchall_dicts(
94
+ cur, f"SELECT * FROM parquet_file_metadata('{resolved_url}') LIMIT 1"
95
+ )
105
96
  if not parquet_file_metadata:
106
97
  raise ValueError(f"No parquet files found by url {resolved_url}")
107
98
  if not parquet_file_metadata or not parquet_file_metadata[0]["file_name"]:
@@ -111,10 +102,7 @@ class ParquetIntrospector:
111
102
  with self._connect(file_config) as conn:
112
103
  with conn.cursor() as cur:
113
104
  resolved_url = _resolve_url(file_config)
114
- cur.execute(f"SELECT * from parquet_metadata('{resolved_url}')")
115
- cols = [desc[0].lower() for desc in cur.description] if cur.description else []
116
- rows = cur.fetchall()
117
- file_metas = [dict(zip(cols, row)) for row in rows]
105
+ file_metas = fetchall_dicts(cur, f"SELECT * from parquet_metadata('{resolved_url}')")
118
106
 
119
107
  columns_per_file: dict[str, dict[int, ParquetColumn]] = defaultdict(defaultdict)
120
108
  for file_meta in file_metas:
@@ -24,9 +24,7 @@ class ParquetPlugin(BuildDatasourcePlugin[ParquetConfigFile]):
24
24
  return build_parquet_chunks(context)
25
25
 
26
26
  def build_context(self, full_type: str, datasource_name: str, file_config: ParquetConfigFile) -> Any:
27
- introspection_result = self._introspector.introspect(file_config)
28
-
29
- return introspection_result
27
+ return self._introspector.introspect(file_config)
30
28
 
31
29
  def check_connection(self, full_type: str, datasource_name: str, file_config: ParquetConfigFile) -> None:
32
30
  self._introspector.check_connection(file_config)
@@ -3,29 +3,56 @@ from importlib.metadata import version
3
3
  from pathlib import Path
4
4
  from uuid import UUID
5
5
 
6
+ from databao_context_engine.plugins.plugin_loader import load_plugins
6
7
  from databao_context_engine.project.layout import validate_project_dir
7
8
  from databao_context_engine.system.properties import get_dce_path
8
9
 
9
10
 
10
11
  @dataclass(kw_only=True, frozen=True)
11
12
  class DceProjectInfo:
13
+ """Information about a Databao Context Engine project.
14
+
15
+ Attributes:
16
+ project_path: The root directory of the Databao Context Engine project.
17
+ is_initialized: Whether the project has been initialized.
18
+ project_id: The UUID of the project, or None if the project has not been initialized.
19
+ """
20
+
12
21
  project_path: Path
13
- is_initialised: bool
22
+ is_initialized: bool
14
23
  project_id: UUID | None
15
24
 
16
25
 
17
26
  @dataclass(kw_only=True, frozen=True)
18
27
  class DceInfo:
28
+ """Information about the current Databao Context Engine installation and project.
29
+
30
+ Attributes:
31
+ version: The version of the databao_context_engine package installed on the system.
32
+ dce_path: The path where databao_context_engine stores its global data.
33
+ project_info: Information about the Databao Context Engine project.
34
+ """
35
+
19
36
  version: str
20
37
  dce_path: Path
38
+ plugin_ids: list[str]
21
39
 
22
40
  project_info: DceProjectInfo
23
41
 
24
42
 
25
43
  def get_databao_context_engine_info(project_dir: Path) -> DceInfo:
44
+ """Return information about the current Databao Context Engine installation and project.
45
+
46
+ Args:
47
+ project_dir: The root directory of the Databao Context Project.
48
+
49
+ Returns:
50
+ A DceInfo instance containing information about the Databao Context Engine installation and project.
51
+ """
26
52
  return DceInfo(
27
53
  version=get_dce_version(),
28
54
  dce_path=get_dce_path(),
55
+ plugin_ids=[plugin.id for plugin in load_plugins().values()],
29
56
  project_info=_get_project_info(project_dir),
30
57
  )
31
58
 
@@ -35,10 +62,15 @@ def _get_project_info(project_dir: Path) -> DceProjectInfo:
35
62
 
36
63
  return DceProjectInfo(
37
64
  project_path=project_dir,
38
- is_initialised=project_layout is not None,
65
+ is_initialized=project_layout is not None,
39
66
  project_id=project_layout.read_config_file().project_id if project_layout is not None else None,
40
67
  )
41
68
 
42
69
 
43
70
  def get_dce_version() -> str:
71
+ """Return the installed version of the databao_context_engine package.
72
+
73
+ Returns:
74
+ The installed version of the databao_context_engine package.
75
+ """
44
76
  return version("databao_context_engine")
@@ -13,12 +13,21 @@ from databao_context_engine.project.project_config import ProjectConfig
13
13
 
14
14
 
15
15
  class InitErrorReason(Enum):
16
+ """Reasons for which project initialization can fail."""
17
+
16
18
  PROJECT_DIR_DOESNT_EXIST = "PROJECT_DIR_DOESNT_EXIST"
17
19
  PROJECT_DIR_NOT_DIRECTORY = "PROJECT_DIR_NOT_DIRECTORY"
18
- PROJECT_DIR_ALREADY_INITIALISED = "PROJECT_DIR_ALREADY_INITIALISED"
20
+ PROJECT_DIR_ALREADY_INITIALIZED = "PROJECT_DIR_ALREADY_INITIALIZED"
19
21
 
20
22
 
21
23
  class InitProjectError(Exception):
24
+ """Raised when a project can't be initialized.
25
+
26
+ Attributes:
27
+ message: The error message.
28
+ reason: The reason for the initialization failure.
29
+ """
30
+
22
31
  reason: InitErrorReason
23
32
 
24
33
  def __init__(self, reason: InitErrorReason, message: str | None):
@@ -65,20 +74,20 @@ class _ProjectCreator:
65
74
 
66
75
  if self.config_file.is_file() or self.deprecated_config_file.is_file():
67
76
  raise InitProjectError(
68
- message=f"Can't initialise a Databao Context Engine project in a folder that already contains a config file. [project_dir: {self.project_dir.resolve()}]",
69
- reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALISED,
77
+ message=f"Can't initialize a Databao Context Engine project in a folder that already contains a config file. [project_dir: {self.project_dir.resolve()}]",
78
+ reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALIZED,
70
79
  )
71
80
 
72
81
  if self.src_dir.is_dir():
73
82
  raise InitProjectError(
74
- message=f"Can't initialise a Databao Context Engine project in a folder that already contains a src directory. [project_dir: {self.project_dir.resolve()}]",
75
- reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALISED,
83
+ message=f"Can't initialize a Databao Context Engine project in a folder that already contains a src directory. [project_dir: {self.project_dir.resolve()}]",
84
+ reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALIZED,
76
85
  )
77
86
 
78
87
  if self.examples_dir.is_file():
79
88
  raise InitProjectError(
80
- message=f"Can't initialise a Databao Context Engine project in a folder that already contains an examples dir. [project_dir: {self.project_dir.resolve()}]",
81
- reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALISED,
89
+ message=f"Can't initialize a Databao Context Engine project in a folder that already contains an examples dir. [project_dir: {self.project_dir.resolve()}]",
90
+ reason=InitErrorReason.PROJECT_DIR_ALREADY_INITIALIZED,
82
91
  )
83
92
 
84
93
  return True