databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +32 -7
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +82 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
- databao_context_engine/cli/add_datasource_config.py +49 -44
- databao_context_engine/cli/commands.py +40 -55
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +127 -0
- databao_context_engine/databao_context_project_manager.py +147 -30
- databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
- databao_context_engine/datasources/datasource_context.py +90 -0
- databao_context_engine/datasources/datasource_discovery.py +143 -0
- databao_context_engine/datasources/types.py +194 -0
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +76 -57
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +26 -30
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +9 -11
- databao_context_engine/plugin_loader.py +110 -0
- databao_context_engine/pluginlib/build_plugin.py +12 -29
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
- databao_context_engine/plugins/databases/base_introspector.py +11 -12
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +58 -52
- databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +14 -15
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
- databao_context_engine/serialization/__init__.py +0 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +3 -5
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.5.dist-info/METADATA +228 -0
- databao_context_engine-0.1.5.dist-info/RECORD +135 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/project/datasource_discovery.py +0 -141
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/project/types.py +0 -134
- databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- databao_context_engine-0.1.1.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
- /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
- /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import sys
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Literal
|
|
@@ -13,13 +14,12 @@ from databao_context_engine import (
|
|
|
13
14
|
InitErrorReason,
|
|
14
15
|
InitProjectError,
|
|
15
16
|
init_dce_project,
|
|
17
|
+
install_ollama_if_needed,
|
|
16
18
|
)
|
|
17
19
|
from databao_context_engine.cli.datasources import add_datasource_config_cli, check_datasource_connection_cli
|
|
18
20
|
from databao_context_engine.cli.info import echo_info
|
|
19
21
|
from databao_context_engine.config.logging import configure_logging
|
|
20
|
-
from databao_context_engine.llm.install import resolve_ollama_bin
|
|
21
22
|
from databao_context_engine.mcp.mcp_runner import McpTransport, run_mcp_server
|
|
22
|
-
from databao_context_engine.storage.migrate import migrate
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@click.group()
|
|
@@ -34,7 +34,7 @@ from databao_context_engine.storage.migrate import migrate
|
|
|
34
34
|
@click.pass_context
|
|
35
35
|
def dce(ctx: Context, verbose: bool, quiet: bool, project_dir: str | None) -> None:
|
|
36
36
|
if verbose and quiet:
|
|
37
|
-
|
|
37
|
+
click.echo("Arguments --quiet and --verbose can not be used together", file=sys.stderr)
|
|
38
38
|
exit(1)
|
|
39
39
|
|
|
40
40
|
if project_dir is None:
|
|
@@ -49,25 +49,18 @@ def dce(ctx: Context, verbose: bool, quiet: bool, project_dir: str | None) -> No
|
|
|
49
49
|
ctx.obj["quiet"] = quiet
|
|
50
50
|
ctx.obj["project_dir"] = project_path
|
|
51
51
|
|
|
52
|
-
migrate()
|
|
53
|
-
|
|
54
52
|
|
|
55
53
|
@dce.command()
|
|
56
54
|
@click.pass_context
|
|
57
55
|
def info(ctx: Context) -> None:
|
|
58
|
-
"""
|
|
59
|
-
Display system-wide information
|
|
60
|
-
"""
|
|
61
|
-
|
|
56
|
+
"""Display system-wide information."""
|
|
62
57
|
echo_info(ctx.obj["project_dir"])
|
|
63
58
|
|
|
64
59
|
|
|
65
60
|
@dce.command()
|
|
66
61
|
@click.pass_context
|
|
67
62
|
def init(ctx: Context) -> None:
|
|
68
|
-
"""
|
|
69
|
-
Create an empty Databao Context Engine project
|
|
70
|
-
"""
|
|
63
|
+
"""Create an empty Databao Context Engine project."""
|
|
71
64
|
project_dir = ctx.obj["project_dir"]
|
|
72
65
|
try:
|
|
73
66
|
init_dce_project(project_dir=project_dir)
|
|
@@ -87,7 +80,7 @@ def init(ctx: Context) -> None:
|
|
|
87
80
|
click.echo(f"Project initialized successfully at {project_dir.resolve()}")
|
|
88
81
|
|
|
89
82
|
try:
|
|
90
|
-
|
|
83
|
+
install_ollama_if_needed()
|
|
91
84
|
except RuntimeError as e:
|
|
92
85
|
click.echo(str(e), err=True)
|
|
93
86
|
|
|
@@ -97,17 +90,14 @@ def init(ctx: Context) -> None:
|
|
|
97
90
|
|
|
98
91
|
@dce.group()
|
|
99
92
|
def datasource() -> None:
|
|
100
|
-
"""
|
|
101
|
-
Manage datasource configurations
|
|
102
|
-
"""
|
|
93
|
+
"""Manage datasource configurations."""
|
|
103
94
|
pass
|
|
104
95
|
|
|
105
96
|
|
|
106
97
|
@datasource.command(name="add")
|
|
107
98
|
@click.pass_context
|
|
108
99
|
def add_datasource_config(ctx: Context) -> None:
|
|
109
|
-
"""
|
|
110
|
-
Add a new datasource configuration.
|
|
100
|
+
"""Add a new datasource configuration.
|
|
111
101
|
|
|
112
102
|
The command will ask all relevant information for that datasource and save it in your Databao Context Engine project.
|
|
113
103
|
"""
|
|
@@ -122,15 +112,13 @@ def add_datasource_config(ctx: Context) -> None:
|
|
|
122
112
|
)
|
|
123
113
|
@click.pass_context
|
|
124
114
|
def check_datasource_config(ctx: Context, datasources_config_files: list[str] | None) -> None:
|
|
125
|
-
"""
|
|
126
|
-
Check whether a datasource configuration is valid.
|
|
115
|
+
"""Check whether a datasource configuration is valid.
|
|
127
116
|
|
|
128
117
|
The configuration is considered as valid if a connection with the datasource can be established.
|
|
129
118
|
|
|
130
119
|
By default, all datasources declared in the project will be checked.
|
|
131
120
|
You can explicitely list which datasources to validate by using the [DATASOURCES_CONFIG_FILES] argument. Each argument must be the path to the file within the src folder (e.g: my-folder/my-config.yaml)
|
|
132
121
|
"""
|
|
133
|
-
|
|
134
122
|
datasource_ids = (
|
|
135
123
|
[DatasourceId.from_string_repr(datasource_config_file) for datasource_config_file in datasources_config_files]
|
|
136
124
|
if datasources_config_files is not None
|
|
@@ -157,10 +145,9 @@ def build(
|
|
|
157
145
|
"embeddable_text_only", "generated_description_only", "embeddable_text_and_generated_description"
|
|
158
146
|
],
|
|
159
147
|
) -> None:
|
|
160
|
-
"""
|
|
161
|
-
Build context for all datasources
|
|
148
|
+
"""Build context for all datasources.
|
|
162
149
|
|
|
163
|
-
The output of the build command will be saved in
|
|
150
|
+
The output of the build command will be saved in the output directory.
|
|
164
151
|
|
|
165
152
|
Internally, this indexes the context to be used by the MCP server and the "retrieve" command.
|
|
166
153
|
"""
|
|
@@ -177,12 +164,6 @@ def build(
|
|
|
177
164
|
nargs=-1,
|
|
178
165
|
required=True,
|
|
179
166
|
)
|
|
180
|
-
@click.option(
|
|
181
|
-
"-r",
|
|
182
|
-
"--run-name",
|
|
183
|
-
type=click.STRING,
|
|
184
|
-
help="Build run to use (the run folder name). Defaults to the latest run in the project.",
|
|
185
|
-
)
|
|
186
167
|
@click.option(
|
|
187
168
|
"-l",
|
|
188
169
|
"--limit",
|
|
@@ -191,42 +172,48 @@ def build(
|
|
|
191
172
|
)
|
|
192
173
|
@click.option(
|
|
193
174
|
"-o",
|
|
194
|
-
"--output-
|
|
195
|
-
type=click.
|
|
196
|
-
|
|
197
|
-
|
|
175
|
+
"--output-file",
|
|
176
|
+
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
|
|
177
|
+
help="If provided, the results are written as YAML into the file given as a path, rather than printed to the console.",
|
|
178
|
+
)
|
|
179
|
+
@click.option(
|
|
180
|
+
"-d",
|
|
181
|
+
"--datasource-id",
|
|
182
|
+
"datasource_ids_as_str",
|
|
183
|
+
type=click.STRING,
|
|
184
|
+
multiple=True,
|
|
185
|
+
help="A datasourceId to restrict the search to. If not provided, search across all datasources. This option can be provided multiple times",
|
|
198
186
|
)
|
|
199
187
|
@click.pass_context
|
|
200
188
|
def retrieve(
|
|
201
189
|
ctx: Context,
|
|
202
190
|
retrieve_text: tuple[str, ...],
|
|
203
|
-
run_name: str | None,
|
|
204
191
|
limit: int | None,
|
|
205
|
-
|
|
192
|
+
output_file: str | None,
|
|
193
|
+
datasource_ids_as_str: tuple[str, ...] | None,
|
|
206
194
|
) -> None:
|
|
207
|
-
"""
|
|
208
|
-
Search the project's built context for the most relevant chunks.
|
|
209
|
-
"""
|
|
195
|
+
"""Search the project's built context for the most relevant chunks."""
|
|
210
196
|
text = " ".join(retrieve_text)
|
|
211
197
|
|
|
198
|
+
datasource_ids = (
|
|
199
|
+
[DatasourceId.from_string_repr(datasource_id) for datasource_id in datasource_ids_as_str]
|
|
200
|
+
if datasource_ids_as_str is not None
|
|
201
|
+
else None
|
|
202
|
+
)
|
|
203
|
+
|
|
212
204
|
databao_engine = DatabaoContextEngine(project_dir=ctx.obj["project_dir"])
|
|
213
205
|
|
|
214
|
-
retrieve_results = databao_engine.search_context(
|
|
215
|
-
retrieve_text=text, run_name=run_name, limit=limit, export_to_file=output_format == "file"
|
|
216
|
-
)
|
|
206
|
+
retrieve_results = databao_engine.search_context(retrieve_text=text, limit=limit, datasource_ids=datasource_ids)
|
|
217
207
|
|
|
218
|
-
|
|
219
|
-
|
|
208
|
+
display_texts = [context_search_result.context_result for context_search_result in retrieve_results]
|
|
209
|
+
if output_file is not None:
|
|
210
|
+
Path(output_file).expanduser().write_text(f"---{os.linesep}".join(display_texts))
|
|
211
|
+
click.echo(f"Found {len(retrieve_results)} results, written to {output_file}")
|
|
212
|
+
else:
|
|
220
213
|
click.echo("\n".join(display_texts))
|
|
221
214
|
|
|
222
215
|
|
|
223
216
|
@dce.command()
|
|
224
|
-
@click.option(
|
|
225
|
-
"-r",
|
|
226
|
-
"--run-name",
|
|
227
|
-
type=click.STRING,
|
|
228
|
-
help="Name of the build run you want to use (aka. the name of the run folder in your project's output). Defaults to the latest one in the project.",
|
|
229
|
-
)
|
|
230
217
|
@click.option(
|
|
231
218
|
"-H",
|
|
232
219
|
"--host",
|
|
@@ -247,10 +234,8 @@ def retrieve(
|
|
|
247
234
|
help="Transport to use. Defaults to stdio",
|
|
248
235
|
)
|
|
249
236
|
@click.pass_context
|
|
250
|
-
def mcp(ctx: Context,
|
|
251
|
-
"""
|
|
252
|
-
Run Databao Context Engine's MCP server
|
|
253
|
-
"""
|
|
237
|
+
def mcp(ctx: Context, host: str | None, port: int | None, transport: McpTransport) -> None:
|
|
238
|
+
"""Run Databao Context Engine's MCP server."""
|
|
254
239
|
if transport == "stdio":
|
|
255
240
|
configure_logging(verbose=False, quiet=True, project_dir=ctx.obj["project_dir"])
|
|
256
|
-
run_mcp_server(project_dir=ctx.obj["project_dir"],
|
|
241
|
+
run_mcp_server(project_dir=ctx.obj["project_dir"], transport=transport, host=host, port=port)
|
|
@@ -15,6 +15,7 @@ def _generate_info_string(command_info: DceInfo) -> str:
|
|
|
15
15
|
info_lines = []
|
|
16
16
|
info_lines.append(f"Databao context engine version: {command_info.version}")
|
|
17
17
|
info_lines.append(f"Databao context engine storage dir: {command_info.dce_path}")
|
|
18
|
+
info_lines.append(f"Databao context engine plugins: {command_info.plugin_ids}")
|
|
18
19
|
|
|
19
20
|
info_lines.append("")
|
|
20
21
|
|
|
@@ -23,10 +24,10 @@ def _generate_info_string(command_info: DceInfo) -> str:
|
|
|
23
24
|
|
|
24
25
|
info_lines.append("")
|
|
25
26
|
|
|
26
|
-
if command_info.project_info.
|
|
27
|
+
if command_info.project_info.is_initialized:
|
|
27
28
|
info_lines.append(f"Project dir: {command_info.project_info.project_path.resolve()}")
|
|
28
29
|
info_lines.append(f"Project ID: {str(command_info.project_info.project_id)}")
|
|
29
30
|
else:
|
|
30
|
-
info_lines.append(f"Project not
|
|
31
|
+
info_lines.append(f"Project not initialized at {command_info.project_info.project_path.resolve()}")
|
|
31
32
|
|
|
32
33
|
return os.linesep.join(info_lines)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.datasources.datasource_context import (
|
|
7
|
+
DatasourceContext,
|
|
8
|
+
get_all_contexts,
|
|
9
|
+
get_context_header_for_datasource,
|
|
10
|
+
get_datasource_context,
|
|
11
|
+
get_introspected_datasource_list,
|
|
12
|
+
)
|
|
13
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
14
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
15
|
+
from databao_context_engine.project.layout import ProjectLayout, ensure_project_dir
|
|
16
|
+
from databao_context_engine.retrieve_embeddings import retrieve_embeddings
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ContextSearchResult:
|
|
21
|
+
"""The result of a search in the project's contexts.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
datasource_id: The ID of the datasource that generated the result.
|
|
25
|
+
datasource_type: The type of the datasource that generated the result.
|
|
26
|
+
distance: The distance between the search text and the result.
|
|
27
|
+
context_result: The actual content of the result that was found as a YAML string.
|
|
28
|
+
This content will be a subpart of the full context of the datasource.
|
|
29
|
+
In some cases, its content won't contain the exact same attributes as what can be
|
|
30
|
+
found directly in the full context.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
datasource_id: DatasourceId
|
|
34
|
+
datasource_type: DatasourceType
|
|
35
|
+
distance: float
|
|
36
|
+
context_result: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DatabaoContextEngine:
|
|
40
|
+
"""Engine for reading and using the contexts generated in a Databao Context Project.
|
|
41
|
+
|
|
42
|
+
The Databao Context Project should already have datasources configured and built (see DatabaoContextProjectManager), so that they can be used in the Engine.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
project_dir: The root directory of the Databao Context Project.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
project_dir: Path
|
|
49
|
+
_project_layout: ProjectLayout
|
|
50
|
+
|
|
51
|
+
def __init__(self, project_dir: Path) -> None:
|
|
52
|
+
"""Initialize the DatabaoContextEngine.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
project_dir: The root directory of the Databao Context Project.
|
|
56
|
+
There must be a valid DatabaoContextProject in this directory.
|
|
57
|
+
"""
|
|
58
|
+
self._project_layout = ensure_project_dir(project_dir=project_dir)
|
|
59
|
+
self.project_dir = project_dir
|
|
60
|
+
|
|
61
|
+
def get_introspected_datasource_list(self) -> list[Datasource]:
|
|
62
|
+
"""Return the list of datasources for which a context is available.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
A list of the datasources for which a context is available.
|
|
66
|
+
"""
|
|
67
|
+
return get_introspected_datasource_list(self._project_layout)
|
|
68
|
+
|
|
69
|
+
def get_datasource_context(self, datasource_id: DatasourceId) -> DatasourceContext:
|
|
70
|
+
"""Return the context available for a given datasource.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
datasource_id: The ID of the datasource.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The context for this datasource.
|
|
77
|
+
"""
|
|
78
|
+
return get_datasource_context(project_layout=self._project_layout, datasource_id=datasource_id)
|
|
79
|
+
|
|
80
|
+
def get_all_contexts(self) -> list[DatasourceContext]:
|
|
81
|
+
"""Return all contexts generated in the project.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A list of all contexts generated in the project.
|
|
85
|
+
"""
|
|
86
|
+
return get_all_contexts(project_layout=self._project_layout)
|
|
87
|
+
|
|
88
|
+
def get_all_contexts_formatted(self) -> str:
|
|
89
|
+
all_contexts = self.get_all_contexts()
|
|
90
|
+
|
|
91
|
+
return os.linesep.join(
|
|
92
|
+
[f"{get_context_header_for_datasource(context.datasource_id)}{context.context}" for context in all_contexts]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def search_context(
|
|
96
|
+
self,
|
|
97
|
+
retrieve_text: str,
|
|
98
|
+
limit: int | None = None,
|
|
99
|
+
datasource_ids: list[DatasourceId] | None = None,
|
|
100
|
+
) -> list[ContextSearchResult]:
|
|
101
|
+
"""Search in the avaialable context for the closest matches to the given text.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
retrieve_text: The text to search for in the contexts.
|
|
105
|
+
limit: The maximum number of results to return. If None is provided, a default limit of 10 will be used.
|
|
106
|
+
datasource_ids: If provided, the search results will only come from the datasources with these IDs.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A list of the results found for the search, sorted by distance.
|
|
110
|
+
"""
|
|
111
|
+
results = retrieve_embeddings(
|
|
112
|
+
project_layout=self._project_layout, retrieve_text=retrieve_text, limit=limit, datasource_ids=datasource_ids
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return [
|
|
116
|
+
ContextSearchResult(
|
|
117
|
+
datasource_id=result.datasource_id,
|
|
118
|
+
datasource_type=result.datasource_type,
|
|
119
|
+
distance=result.cosine_distance,
|
|
120
|
+
context_result=result.display_text,
|
|
121
|
+
)
|
|
122
|
+
for result in results
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
def run_sql(self, datasource_id: DatasourceId, sql: str, params: list[str]) -> dict[str, Any]:
|
|
126
|
+
"""Not Implemented yet. This will allow to run a SQL query against a datasource (if the datasource supports it)."""
|
|
127
|
+
raise NotImplementedError("Running SQL is not supported yet")
|
|
@@ -2,45 +2,108 @@ from dataclasses import dataclass
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any, overload
|
|
4
4
|
|
|
5
|
-
from databao_context_engine.build_sources
|
|
6
|
-
from databao_context_engine.
|
|
7
|
-
|
|
8
|
-
get_datasource_id_for_config_file,
|
|
9
|
-
)
|
|
10
|
-
from databao_context_engine.datasource_config.check_config import (
|
|
5
|
+
from databao_context_engine.build_sources import BuildContextResult, build_all_datasources
|
|
6
|
+
from databao_context_engine.databao_context_engine import DatabaoContextEngine
|
|
7
|
+
from databao_context_engine.datasources.check_config import (
|
|
11
8
|
CheckDatasourceConnectionResult,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.datasources.check_config import (
|
|
12
11
|
check_datasource_connection as check_datasource_connection_internal,
|
|
13
12
|
)
|
|
13
|
+
from databao_context_engine.datasources.datasource_discovery import get_datasource_list
|
|
14
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
14
15
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
15
|
-
from databao_context_engine.project.
|
|
16
|
-
|
|
16
|
+
from databao_context_engine.project.layout import (
|
|
17
|
+
ProjectLayout,
|
|
18
|
+
ensure_project_dir,
|
|
19
|
+
)
|
|
20
|
+
from databao_context_engine.project.layout import (
|
|
21
|
+
create_datasource_config_file as create_datasource_config_file_internal,
|
|
22
|
+
)
|
|
23
|
+
from databao_context_engine.serialization.yaml import to_yaml_string
|
|
17
24
|
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
@dataclass
|
|
21
28
|
class DatasourceConfigFile:
|
|
29
|
+
"""A datasource config file that was created by the DatabaoContextProjectManager.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
datasource_id: The unique identifier for the datasource.
|
|
33
|
+
config_file_path: The path to the datasource configuration file.
|
|
34
|
+
"""
|
|
35
|
+
|
|
22
36
|
datasource_id: DatasourceId
|
|
23
37
|
config_file_path: Path
|
|
24
38
|
|
|
25
39
|
|
|
26
40
|
class DatabaoContextProjectManager:
|
|
41
|
+
"""Project Manager for Databao Context Projects.
|
|
42
|
+
|
|
43
|
+
This project manager is responsible for configuring and building a Databao Context Project.
|
|
44
|
+
The project_dir should already have been initialized before a Project manager can be used.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
project_dir: The root directory of the Databao Context Project.
|
|
48
|
+
"""
|
|
49
|
+
|
|
27
50
|
project_dir: Path
|
|
51
|
+
_project_layout: ProjectLayout
|
|
28
52
|
|
|
29
53
|
def __init__(self, project_dir: Path) -> None:
|
|
30
|
-
|
|
54
|
+
"""Initialize the DatabaoContextProjectManager.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
project_dir: The root directory of the Databao Context Project.
|
|
58
|
+
"""
|
|
59
|
+
self._project_layout = ensure_project_dir(project_dir=project_dir)
|
|
31
60
|
self.project_dir = project_dir
|
|
32
61
|
|
|
62
|
+
def get_configured_datasource_list(self) -> list[Datasource]:
|
|
63
|
+
"""Return the list of datasources configured in the project.
|
|
64
|
+
|
|
65
|
+
This method returns all datasources configured in the src folder of the project,
|
|
66
|
+
no matter whether the datasource configuration is valid or not.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The list of datasources configured in the project.
|
|
70
|
+
"""
|
|
71
|
+
return get_datasource_list(self._project_layout)
|
|
72
|
+
|
|
33
73
|
def build_context(
|
|
34
|
-
self,
|
|
74
|
+
self,
|
|
75
|
+
datasource_ids: list[DatasourceId] | None = None,
|
|
76
|
+
chunk_embedding_mode: ChunkEmbeddingMode = ChunkEmbeddingMode.EMBEDDABLE_TEXT_ONLY,
|
|
35
77
|
) -> list[BuildContextResult]:
|
|
78
|
+
"""Build the context for datasources in the project.
|
|
79
|
+
|
|
80
|
+
Any datasource with an invalid configuration will be skipped.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
datasource_ids: The list of datasource ids to build. If None, all datasources will be built.
|
|
84
|
+
chunk_embedding_mode: The mode to use for chunk embedding.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The list of all built results.
|
|
88
|
+
"""
|
|
36
89
|
# TODO: Filter which datasources to build by datasource_ids
|
|
37
|
-
return build_all_datasources(
|
|
90
|
+
return build_all_datasources(project_layout=self._project_layout, chunk_embedding_mode=chunk_embedding_mode)
|
|
38
91
|
|
|
39
92
|
def check_datasource_connection(
|
|
40
|
-
self, datasource_ids: list[DatasourceId] | None
|
|
93
|
+
self, datasource_ids: list[DatasourceId] | None = None
|
|
41
94
|
) -> list[CheckDatasourceConnectionResult]:
|
|
95
|
+
"""Check the connection for datasources in the project.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
datasource_ids: The list of datasource ids to check. If None, all datasources will be checked.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
The list of all connection check results, sorted by datasource id.
|
|
102
|
+
"""
|
|
42
103
|
return sorted(
|
|
43
|
-
check_datasource_connection_internal(
|
|
104
|
+
check_datasource_connection_internal(
|
|
105
|
+
project_layout=self._project_layout, datasource_ids=datasource_ids
|
|
106
|
+
).values(),
|
|
44
107
|
key=lambda result: str(result.datasource_id),
|
|
45
108
|
)
|
|
46
109
|
|
|
@@ -51,9 +114,21 @@ class DatabaoContextProjectManager:
|
|
|
51
114
|
config_content: dict[str, Any],
|
|
52
115
|
overwrite_existing: bool = False,
|
|
53
116
|
) -> DatasourceConfigFile:
|
|
117
|
+
"""Create a new datasource configuration file in the project.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
datasource_type: The type of the datasource to create.
|
|
121
|
+
datasource_name: The name of the datasource to create.
|
|
122
|
+
config_content: The content of the datasource configuration. This is a dictionary that will be written as-is in a yaml file.
|
|
123
|
+
The actual content of the configuration is not checked and might not be valid for the requested type..
|
|
124
|
+
overwrite_existing: Whether to overwrite an existing datasource configuration file if it already exists.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
The path to the created datasource configuration file.
|
|
128
|
+
"""
|
|
54
129
|
# TODO: Before creating the datasource, validate the config content based on which plugin will be used
|
|
55
|
-
|
|
56
|
-
|
|
130
|
+
config_file = _create_datasource_config_file(
|
|
131
|
+
project_layout=self._project_layout,
|
|
57
132
|
datasource_type=datasource_type,
|
|
58
133
|
datasource_name=datasource_name,
|
|
59
134
|
config_content=config_content,
|
|
@@ -61,32 +136,74 @@ class DatabaoContextProjectManager:
|
|
|
61
136
|
)
|
|
62
137
|
|
|
63
138
|
return DatasourceConfigFile(
|
|
64
|
-
datasource_id=DatasourceId.from_datasource_config_file_path(
|
|
65
|
-
config_file_path=
|
|
139
|
+
datasource_id=DatasourceId.from_datasource_config_file_path(self._project_layout, config_file),
|
|
140
|
+
config_file_path=config_file,
|
|
66
141
|
)
|
|
67
142
|
|
|
68
143
|
@overload
|
|
69
|
-
def datasource_config_exists(self, *,
|
|
144
|
+
def datasource_config_exists(self, *, datasource_name: str) -> DatasourceId | None: ...
|
|
70
145
|
@overload
|
|
71
|
-
def datasource_config_exists(self, *, datasource_id: DatasourceId) ->
|
|
146
|
+
def datasource_config_exists(self, *, datasource_id: DatasourceId) -> DatasourceId | None: ...
|
|
72
147
|
|
|
73
148
|
def datasource_config_exists(
|
|
74
149
|
self,
|
|
75
150
|
*,
|
|
76
|
-
datasource_type: DatasourceType | None = None,
|
|
77
151
|
datasource_name: str | None = None,
|
|
78
152
|
datasource_id: DatasourceId | None = None,
|
|
79
|
-
) ->
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
|
|
153
|
+
) -> DatasourceId | None:
|
|
154
|
+
"""Check if a datasource configuration file already exists in the project.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
datasource_name: The name of the datasource.
|
|
158
|
+
datasource_id: The id of the datasource. If provided, datasource_type and datasource_name will be ignored.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
datasource_id if there is already a datasource configuration file for this datasource, None otherwise.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ValueError: If the wrong set of arguments is provided.
|
|
165
|
+
"""
|
|
166
|
+
if datasource_name is not None:
|
|
167
|
+
datasource_id = DatasourceId.from_string_repr(f"{datasource_name}.yaml")
|
|
168
|
+
config_file = self._project_layout.src_dir / datasource_id.relative_path_to_config_file()
|
|
169
|
+
if config_file.is_file():
|
|
170
|
+
return datasource_id
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
if datasource_id is None:
|
|
83
174
|
raise ValueError("Either datasource_id or both datasource_type and datasource_name must be provided")
|
|
84
175
|
|
|
85
176
|
try:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
datasource_id
|
|
89
|
-
|
|
90
|
-
return
|
|
177
|
+
config_file = self._project_layout.src_dir.joinpath(datasource_id.relative_path_to_config_file())
|
|
178
|
+
if config_file.is_file():
|
|
179
|
+
raise ValueError(f"A config file already exists for {str(datasource_id)}")
|
|
180
|
+
|
|
181
|
+
return datasource_id
|
|
91
182
|
except ValueError:
|
|
92
|
-
return
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
def get_engine_for_project(self) -> DatabaoContextEngine:
|
|
186
|
+
"""Instantiate a DatabaoContextEngine for the project.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
A DatabaoContextEngine instance for the project.
|
|
190
|
+
"""
|
|
191
|
+
return DatabaoContextEngine(project_dir=self.project_dir)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _create_datasource_config_file(
|
|
195
|
+
project_layout: ProjectLayout,
|
|
196
|
+
datasource_type: DatasourceType,
|
|
197
|
+
datasource_name: str,
|
|
198
|
+
config_content: dict[str, Any],
|
|
199
|
+
overwrite_existing: bool,
|
|
200
|
+
) -> Path:
|
|
201
|
+
last_datasource_name = datasource_name.split("/")[-1]
|
|
202
|
+
basic_config = {"type": datasource_type.full_type, "name": last_datasource_name}
|
|
203
|
+
|
|
204
|
+
return create_datasource_config_file_internal(
|
|
205
|
+
project_layout,
|
|
206
|
+
f"{datasource_name}.yaml",
|
|
207
|
+
to_yaml_string(basic_config | config_content),
|
|
208
|
+
overwrite_existing=overwrite_existing,
|
|
209
|
+
)
|