databao-context-engine 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +18 -6
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +84 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
- databao_context_engine/cli/add_datasource_config.py +41 -15
- databao_context_engine/cli/commands.py +12 -43
- databao_context_engine/cli/info.py +2 -2
- databao_context_engine/databao_context_engine.py +137 -0
- databao_context_engine/databao_context_project_manager.py +96 -6
- databao_context_engine/datasources/add_config.py +34 -0
- databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
- databao_context_engine/datasources/datasource_context.py +93 -0
- databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
- databao_context_engine/{project → datasources}/types.py +64 -15
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +59 -30
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/install.py +13 -20
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/all_results_tool.py +2 -2
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +1 -4
- databao_context_engine/mcp/retrieve_tool.py +3 -11
- databao_context_engine/plugin_loader.py +111 -0
- databao_context_engine/pluginlib/build_plugin.py +25 -9
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/databases/athena_introspector.py +85 -22
- databao_context_engine/plugins/databases/base_introspector.py +5 -3
- databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
- databao_context_engine/plugins/databases/duckdb_introspector.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
- databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
- databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
- databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
- databao_context_engine/plugins/plugin_loader.py +43 -42
- databao_context_engine/plugins/resources/parquet_introspector.py +2 -3
- databao_context_engine/project/info.py +31 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +3 -3
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
- databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +2 -4
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.3.dist-info/METADATA +75 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/RECORD +68 -77
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.2.dist-info/METADATA +0 -187
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/WHEEL +0 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,16 +1,23 @@
|
|
|
1
|
-
from databao_context_engine.build_sources
|
|
1
|
+
from databao_context_engine.build_sources import BuildContextResult
|
|
2
|
+
from databao_context_engine.databao_context_engine import ContextSearchResult, DatabaoContextEngine
|
|
2
3
|
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
|
|
3
|
-
from databao_context_engine.
|
|
4
|
-
from databao_context_engine.datasource_config.check_config import (
|
|
4
|
+
from databao_context_engine.datasources.check_config import (
|
|
5
5
|
CheckDatasourceConnectionResult,
|
|
6
6
|
DatasourceConnectionStatus,
|
|
7
7
|
)
|
|
8
|
-
from databao_context_engine.
|
|
8
|
+
from databao_context_engine.datasources.datasource_context import DatasourceContext
|
|
9
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
9
10
|
from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
|
|
10
|
-
from databao_context_engine.
|
|
11
|
+
from databao_context_engine.plugin_loader import DatabaoContextPluginLoader
|
|
12
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
13
|
+
BuildDatasourcePlugin,
|
|
14
|
+
BuildFilePlugin,
|
|
15
|
+
BuildPlugin,
|
|
16
|
+
DatasourceType,
|
|
17
|
+
)
|
|
18
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
|
|
11
19
|
from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
|
|
12
20
|
from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
|
|
13
|
-
from databao_context_engine.project.types import Datasource, DatasourceId
|
|
14
21
|
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
15
22
|
|
|
16
23
|
__all__ = [
|
|
@@ -32,4 +39,9 @@ __all__ = [
|
|
|
32
39
|
"init_or_get_dce_project",
|
|
33
40
|
"InitErrorReason",
|
|
34
41
|
"InitProjectError",
|
|
42
|
+
"DatabaoContextPluginLoader",
|
|
43
|
+
"ConfigPropertyDefinition",
|
|
44
|
+
"BuildPlugin",
|
|
45
|
+
"BuildDatasourcePlugin",
|
|
46
|
+
"BuildFilePlugin",
|
|
35
47
|
]
|
|
@@ -3,22 +3,32 @@ from dataclasses import dataclass
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from databao_context_engine.build_sources.
|
|
7
|
-
from databao_context_engine.build_sources.
|
|
6
|
+
from databao_context_engine.build_sources.build_service import BuildService
|
|
7
|
+
from databao_context_engine.build_sources.export_results import (
|
|
8
8
|
append_result_to_all_results,
|
|
9
|
-
create_run_dir,
|
|
10
9
|
export_build_result,
|
|
10
|
+
reset_all_results,
|
|
11
11
|
)
|
|
12
|
+
from databao_context_engine.datasources.datasource_discovery import discover_datasources, prepare_source
|
|
13
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
12
14
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
13
15
|
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
14
|
-
from databao_context_engine.project.
|
|
15
|
-
from databao_context_engine.project.types import DatasourceId
|
|
16
|
+
from databao_context_engine.project.layout import get_output_dir
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
@dataclass
|
|
21
22
|
class BuildContextResult:
|
|
23
|
+
"""Result of a single datasource context build.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
datasource_id: The id of the datasource.
|
|
27
|
+
datasource_type: The type of the datasource.
|
|
28
|
+
context_built_at: The timestamp when the context was built.
|
|
29
|
+
context_file_path: The path to the generated context file.
|
|
30
|
+
"""
|
|
31
|
+
|
|
22
32
|
datasource_id: DatasourceId
|
|
23
33
|
datasource_type: DatasourceType
|
|
24
34
|
context_built_at: datetime
|
|
@@ -29,16 +39,19 @@ def build(
|
|
|
29
39
|
project_dir: Path,
|
|
30
40
|
*,
|
|
31
41
|
build_service: BuildService,
|
|
32
|
-
project_id: str,
|
|
33
|
-
dce_version: str,
|
|
34
42
|
) -> list[BuildContextResult]:
|
|
35
|
-
"""
|
|
36
|
-
|
|
43
|
+
"""Build the context for all datasources in the project.
|
|
44
|
+
|
|
45
|
+
Unless you already have access to BuildService, this should not be called directly.
|
|
46
|
+
Instead, internal callers should go through the build_wiring module or directly use DatabaoContextProjectManager.build_context().
|
|
37
47
|
|
|
38
48
|
1) Load available plugins
|
|
39
49
|
2) Discover sources
|
|
40
50
|
3) Create a run
|
|
41
51
|
4) For each source, call process_source
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
A list of all the contexts built.
|
|
42
55
|
"""
|
|
43
56
|
plugins = load_plugins()
|
|
44
57
|
|
|
@@ -48,11 +61,9 @@ def build(
|
|
|
48
61
|
logger.info("No sources discovered under %s", project_dir)
|
|
49
62
|
return []
|
|
50
63
|
|
|
51
|
-
run = None
|
|
52
|
-
run_dir = None
|
|
53
|
-
|
|
54
64
|
number_of_failed_builds = 0
|
|
55
65
|
build_result = []
|
|
66
|
+
reset_all_results(get_output_dir(project_dir))
|
|
56
67
|
for discovered_datasource in datasources:
|
|
57
68
|
try:
|
|
58
69
|
prepared_source = prepare_source(discovered_datasource)
|
|
@@ -71,19 +82,15 @@ def build(
|
|
|
71
82
|
number_of_failed_builds += 1
|
|
72
83
|
continue
|
|
73
84
|
|
|
74
|
-
if run is None or run_dir is None:
|
|
75
|
-
# Initialiase the run as soon as we found a source
|
|
76
|
-
run = build_service.start_run(project_id=project_id, dce_version=dce_version)
|
|
77
|
-
run_dir = create_run_dir(project_dir, run.run_name)
|
|
78
|
-
|
|
79
85
|
result = build_service.process_prepared_source(
|
|
80
|
-
run_id=run.run_id,
|
|
81
86
|
prepared_source=prepared_source,
|
|
82
87
|
plugin=plugin,
|
|
83
88
|
)
|
|
84
89
|
|
|
85
|
-
|
|
86
|
-
|
|
90
|
+
output_dir = get_output_dir(project_dir)
|
|
91
|
+
|
|
92
|
+
context_file_path = export_build_result(output_dir, result)
|
|
93
|
+
append_result_to_all_results(output_dir, result)
|
|
87
94
|
|
|
88
95
|
build_result.append(
|
|
89
96
|
BuildContextResult(
|
|
@@ -99,9 +106,6 @@ def build(
|
|
|
99
106
|
|
|
100
107
|
number_of_failed_builds += 1
|
|
101
108
|
|
|
102
|
-
if run is not None:
|
|
103
|
-
build_service.finalize_run(run_id=run.run_id)
|
|
104
|
-
|
|
105
109
|
logger.debug(
|
|
106
110
|
"Successfully built %d datasources. %s",
|
|
107
111
|
len(build_result),
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext, execute
|
|
6
|
+
from databao_context_engine.datasources.types import PreparedDatasource
|
|
7
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
8
|
+
BuildPlugin,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.serialization.yaml import to_yaml_string
|
|
11
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BuildService:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
chunk_embedding_service: ChunkEmbeddingService,
|
|
21
|
+
) -> None:
|
|
22
|
+
self._chunk_embedding_service = chunk_embedding_service
|
|
23
|
+
|
|
24
|
+
def process_prepared_source(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
prepared_source: PreparedDatasource,
|
|
28
|
+
plugin: BuildPlugin,
|
|
29
|
+
) -> BuiltDatasourceContext:
|
|
30
|
+
"""Process a single source to build its context.
|
|
31
|
+
|
|
32
|
+
1) Execute the plugin
|
|
33
|
+
2) Divide the results into chunks
|
|
34
|
+
3) Embed and persist the chunks
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The built context.
|
|
38
|
+
"""
|
|
39
|
+
result = execute(prepared_source, plugin)
|
|
40
|
+
|
|
41
|
+
chunks = plugin.divide_context_into_chunks(result.context)
|
|
42
|
+
if not chunks:
|
|
43
|
+
logger.info("No chunks for %s — skipping.", prepared_source.path.name)
|
|
44
|
+
return result
|
|
45
|
+
|
|
46
|
+
self._chunk_embedding_service.embed_chunks(
|
|
47
|
+
chunks=chunks,
|
|
48
|
+
result=to_yaml_string(result.context),
|
|
49
|
+
full_type=prepared_source.datasource_type.full_type,
|
|
50
|
+
datasource_id=result.datasource_id,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return result
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from duckdb import DuckDBPyConnection
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.build_sources.build_runner import BuildContextResult, build
|
|
7
|
+
from databao_context_engine.build_sources.build_service import BuildService
|
|
8
|
+
from databao_context_engine.llm.descriptions.provider import DescriptionProvider
|
|
9
|
+
from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
|
|
10
|
+
from databao_context_engine.llm.factory import (
|
|
11
|
+
create_ollama_description_provider,
|
|
12
|
+
create_ollama_embedding_provider,
|
|
13
|
+
create_ollama_service,
|
|
14
|
+
)
|
|
15
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
16
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
17
|
+
from databao_context_engine.services.factories import create_chunk_embedding_service
|
|
18
|
+
from databao_context_engine.storage.connection import open_duckdb_connection
|
|
19
|
+
from databao_context_engine.storage.migrate import migrate
|
|
20
|
+
from databao_context_engine.system.properties import get_db_path
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
|
|
26
|
+
"""Build the context for all datasources in the project.
|
|
27
|
+
|
|
28
|
+
- Instantiates the build service
|
|
29
|
+
- Delegates the actual build logic to the build runner
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A list of all the contexts built.
|
|
33
|
+
"""
|
|
34
|
+
ensure_project_dir(project_dir)
|
|
35
|
+
|
|
36
|
+
logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
|
|
37
|
+
|
|
38
|
+
# Think about alternative solutions. This solution will mirror the current behaviour
|
|
39
|
+
# The current behaviour only builds what is currently in the /src folder
|
|
40
|
+
# This will need to change in the future when we can pick which datasources to build
|
|
41
|
+
db_path = get_db_path(project_dir)
|
|
42
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
if db_path.exists():
|
|
44
|
+
db_path.unlink()
|
|
45
|
+
|
|
46
|
+
with open_duckdb_connection(db_path) as conn:
|
|
47
|
+
migrate(db_path)
|
|
48
|
+
|
|
49
|
+
ollama_service = create_ollama_service()
|
|
50
|
+
embedding_provider = create_ollama_embedding_provider(ollama_service)
|
|
51
|
+
description_provider = (
|
|
52
|
+
create_ollama_description_provider(ollama_service)
|
|
53
|
+
if chunk_embedding_mode.should_generate_description()
|
|
54
|
+
else None
|
|
55
|
+
)
|
|
56
|
+
build_service = _create_build_service(
|
|
57
|
+
conn,
|
|
58
|
+
embedding_provider=embedding_provider,
|
|
59
|
+
description_provider=description_provider,
|
|
60
|
+
chunk_embedding_mode=chunk_embedding_mode,
|
|
61
|
+
)
|
|
62
|
+
return build(
|
|
63
|
+
project_dir=project_dir,
|
|
64
|
+
build_service=build_service,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _create_build_service(
|
|
69
|
+
conn: DuckDBPyConnection,
|
|
70
|
+
*,
|
|
71
|
+
embedding_provider: EmbeddingProvider,
|
|
72
|
+
description_provider: DescriptionProvider | None,
|
|
73
|
+
chunk_embedding_mode: ChunkEmbeddingMode,
|
|
74
|
+
) -> BuildService:
|
|
75
|
+
chunk_embedding_service = create_chunk_embedding_service(
|
|
76
|
+
conn,
|
|
77
|
+
embedding_provider=embedding_provider,
|
|
78
|
+
description_provider=description_provider,
|
|
79
|
+
chunk_embedding_mode=chunk_embedding_mode,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return BuildService(
|
|
83
|
+
chunk_embedding_service=chunk_embedding_service,
|
|
84
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
|
|
5
|
+
from databao_context_engine.datasources.datasource_context import get_context_header_for_datasource
|
|
6
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
7
|
+
from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME
|
|
8
|
+
from databao_context_engine.serialization.yaml import write_yaml_to_stream
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_build_result(output_dir: Path, result: BuiltDatasourceContext) -> Path:
|
|
14
|
+
datasource_id = DatasourceId.from_string_repr(result.datasource_id)
|
|
15
|
+
export_file_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
16
|
+
|
|
17
|
+
# Make sure the parent folder exists
|
|
18
|
+
export_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
with export_file_path.open("w") as export_file:
|
|
21
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
22
|
+
|
|
23
|
+
logger.info(f"Exported result to {export_file_path.resolve()}")
|
|
24
|
+
|
|
25
|
+
return export_file_path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def append_result_to_all_results(output_dir: Path, result: BuiltDatasourceContext):
|
|
29
|
+
path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
30
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
with path.open("a", encoding="utf-8") as export_file:
|
|
32
|
+
export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
|
|
33
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
34
|
+
export_file.write("\n")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Here temporarily because it needs to be reset between runs.
|
|
38
|
+
# A subsequent PR will remove the existence of the all_results file
|
|
39
|
+
def reset_all_results(output_dir: Path):
|
|
40
|
+
path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
41
|
+
path.unlink(missing_ok=True)
|
|
@@ -8,14 +8,12 @@ from databao_context_engine.pluginlib.build_plugin import (
|
|
|
8
8
|
BuildPlugin,
|
|
9
9
|
)
|
|
10
10
|
from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
|
|
11
|
-
from databao_context_engine.
|
|
11
|
+
from databao_context_engine.datasources.types import PreparedConfig, PreparedDatasource, DatasourceId
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@dataclass()
|
|
15
15
|
class BuiltDatasourceContext:
|
|
16
|
-
"""
|
|
17
|
-
Dataclass defining the result of building a datasource's context.
|
|
18
|
-
"""
|
|
16
|
+
"""Dataclass defining the result of building a datasource's context."""
|
|
19
17
|
|
|
20
18
|
datasource_id: str
|
|
21
19
|
"""
|
|
@@ -53,9 +51,7 @@ def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Bui
|
|
|
53
51
|
|
|
54
52
|
|
|
55
53
|
def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
|
|
56
|
-
"""
|
|
57
|
-
Run a prepared source through the plugin
|
|
58
|
-
"""
|
|
54
|
+
"""Run a prepared source through the plugin."""
|
|
59
55
|
if isinstance(prepared_datasource, PreparedConfig):
|
|
60
56
|
ds_plugin = cast(BuildDatasourcePlugin, plugin)
|
|
61
57
|
|
|
@@ -5,22 +5,27 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
7
|
|
|
8
|
-
from databao_context_engine import
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
from databao_context_engine import (
|
|
9
|
+
DatabaoContextPluginLoader,
|
|
10
|
+
DatabaoContextProjectManager,
|
|
11
|
+
DatasourceId,
|
|
12
|
+
DatasourceType,
|
|
13
|
+
ConfigPropertyDefinition,
|
|
11
14
|
)
|
|
12
|
-
from databao_context_engine.pluginlib.config import
|
|
13
|
-
from databao_context_engine.plugins.plugin_loader import get_all_available_plugin_types
|
|
15
|
+
from databao_context_engine.pluginlib.config import ConfigUnionPropertyDefinition
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
|
|
17
19
|
project_manager = DatabaoContextProjectManager(project_dir=project_dir)
|
|
20
|
+
plugin_loader = DatabaoContextPluginLoader()
|
|
18
21
|
|
|
19
22
|
click.echo(
|
|
20
23
|
f"We will guide you to add a new datasource in your Databao Context Engine project, at {project_dir.resolve()}"
|
|
21
24
|
)
|
|
22
25
|
|
|
23
|
-
datasource_type = _ask_for_datasource_type(
|
|
26
|
+
datasource_type = _ask_for_datasource_type(
|
|
27
|
+
plugin_loader.get_all_supported_datasource_types(exclude_file_plugins=True)
|
|
28
|
+
)
|
|
24
29
|
datasource_name = click.prompt("Datasource name?", type=str)
|
|
25
30
|
|
|
26
31
|
is_datasource_existing = project_manager.datasource_config_exists(
|
|
@@ -33,7 +38,9 @@ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
|
|
|
33
38
|
default=False,
|
|
34
39
|
)
|
|
35
40
|
|
|
36
|
-
config_content = _ask_for_config_details(
|
|
41
|
+
config_content = _ask_for_config_details(
|
|
42
|
+
plugin_loader.get_config_file_structure_for_datasource_type(datasource_type)
|
|
43
|
+
)
|
|
37
44
|
|
|
38
45
|
config_file = project_manager.create_datasource_config(
|
|
39
46
|
datasource_type, datasource_name, config_content, overwrite_existing=True
|
|
@@ -44,10 +51,8 @@ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
|
|
|
44
51
|
return config_file.datasource_id
|
|
45
52
|
|
|
46
53
|
|
|
47
|
-
def _ask_for_datasource_type() -> DatasourceType:
|
|
48
|
-
supported_types_by_folder = _group_supported_types_by_folder(
|
|
49
|
-
get_all_available_plugin_types(exclude_file_plugins=True)
|
|
50
|
-
)
|
|
54
|
+
def _ask_for_datasource_type(supported_datasource_types: set[DatasourceType]) -> DatasourceType:
|
|
55
|
+
supported_types_by_folder = _group_supported_types_by_folder(supported_datasource_types)
|
|
51
56
|
|
|
52
57
|
all_config_folders = sorted(supported_types_by_folder.keys())
|
|
53
58
|
config_folder = click.prompt(
|
|
@@ -68,9 +73,7 @@ def _ask_for_datasource_type() -> DatasourceType:
|
|
|
68
73
|
return DatasourceType.from_main_and_subtypes(config_folder, config_type)
|
|
69
74
|
|
|
70
75
|
|
|
71
|
-
def _ask_for_config_details(
|
|
72
|
-
config_file_structure = get_config_file_structure_for_datasource_type(datasource_type)
|
|
73
|
-
|
|
76
|
+
def _ask_for_config_details(config_file_structure: list[ConfigPropertyDefinition]) -> dict[str, Any]:
|
|
74
77
|
# Adds a new line before asking for the config values specific to the plugin
|
|
75
78
|
click.echo("")
|
|
76
79
|
|
|
@@ -84,13 +87,35 @@ def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
|
|
|
84
87
|
|
|
85
88
|
|
|
86
89
|
def _build_config_content_from_properties(
|
|
87
|
-
properties: list[ConfigPropertyDefinition], properties_prefix: str
|
|
90
|
+
properties: list[ConfigPropertyDefinition], properties_prefix: str, in_union: bool = False
|
|
88
91
|
) -> dict[str, Any]:
|
|
89
92
|
config_content: dict[str, Any] = {}
|
|
90
93
|
for config_file_property in properties:
|
|
91
94
|
if config_file_property.property_key in ["type", "name"] and len(properties_prefix) == 0:
|
|
92
95
|
# We ignore type and name properties as they've already been filled
|
|
93
96
|
continue
|
|
97
|
+
if in_union and config_file_property.property_key == "type":
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
if isinstance(config_file_property, ConfigUnionPropertyDefinition):
|
|
101
|
+
choices = {t.__name__: t for t in config_file_property.types}
|
|
102
|
+
|
|
103
|
+
chosen = click.prompt(
|
|
104
|
+
f"{properties_prefix}{config_file_property.property_key}.type?",
|
|
105
|
+
type=click.Choice(sorted(choices.keys())),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
chosen_type = choices[chosen]
|
|
109
|
+
|
|
110
|
+
nested_props = config_file_property.type_properties[chosen_type]
|
|
111
|
+
nested_content = _build_config_content_from_properties(
|
|
112
|
+
nested_props, f"{properties_prefix}{config_file_property.property_key}.", in_union=True
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
config_content[config_file_property.property_key] = {
|
|
116
|
+
**nested_content,
|
|
117
|
+
}
|
|
118
|
+
continue
|
|
94
119
|
|
|
95
120
|
if config_file_property.nested_properties is not None and len(config_file_property.nested_properties) > 0:
|
|
96
121
|
nested_content = _build_config_content_from_properties(
|
|
@@ -114,6 +139,7 @@ def _build_config_content_from_properties(
|
|
|
114
139
|
type=str,
|
|
115
140
|
default=default_value,
|
|
116
141
|
show_default=default_value is not None and default_value != "",
|
|
142
|
+
hide_input=config_file_property.secret,
|
|
117
143
|
)
|
|
118
144
|
|
|
119
145
|
if property_value.strip():
|
|
@@ -19,7 +19,6 @@ from databao_context_engine.cli.info import echo_info
|
|
|
19
19
|
from databao_context_engine.config.logging import configure_logging
|
|
20
20
|
from databao_context_engine.llm.install import resolve_ollama_bin
|
|
21
21
|
from databao_context_engine.mcp.mcp_runner import McpTransport, run_mcp_server
|
|
22
|
-
from databao_context_engine.storage.migrate import migrate
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
@click.group()
|
|
@@ -49,25 +48,18 @@ def dce(ctx: Context, verbose: bool, quiet: bool, project_dir: str | None) -> No
|
|
|
49
48
|
ctx.obj["quiet"] = quiet
|
|
50
49
|
ctx.obj["project_dir"] = project_path
|
|
51
50
|
|
|
52
|
-
migrate()
|
|
53
|
-
|
|
54
51
|
|
|
55
52
|
@dce.command()
|
|
56
53
|
@click.pass_context
|
|
57
54
|
def info(ctx: Context) -> None:
|
|
58
|
-
"""
|
|
59
|
-
Display system-wide information
|
|
60
|
-
"""
|
|
61
|
-
|
|
55
|
+
"""Display system-wide information."""
|
|
62
56
|
echo_info(ctx.obj["project_dir"])
|
|
63
57
|
|
|
64
58
|
|
|
65
59
|
@dce.command()
|
|
66
60
|
@click.pass_context
|
|
67
61
|
def init(ctx: Context) -> None:
|
|
68
|
-
"""
|
|
69
|
-
Create an empty Databao Context Engine project
|
|
70
|
-
"""
|
|
62
|
+
"""Create an empty Databao Context Engine project."""
|
|
71
63
|
project_dir = ctx.obj["project_dir"]
|
|
72
64
|
try:
|
|
73
65
|
init_dce_project(project_dir=project_dir)
|
|
@@ -97,17 +89,14 @@ def init(ctx: Context) -> None:
|
|
|
97
89
|
|
|
98
90
|
@dce.group()
|
|
99
91
|
def datasource() -> None:
|
|
100
|
-
"""
|
|
101
|
-
Manage datasource configurations
|
|
102
|
-
"""
|
|
92
|
+
"""Manage datasource configurations."""
|
|
103
93
|
pass
|
|
104
94
|
|
|
105
95
|
|
|
106
96
|
@datasource.command(name="add")
|
|
107
97
|
@click.pass_context
|
|
108
98
|
def add_datasource_config(ctx: Context) -> None:
|
|
109
|
-
"""
|
|
110
|
-
Add a new datasource configuration.
|
|
99
|
+
"""Add a new datasource configuration.
|
|
111
100
|
|
|
112
101
|
The command will ask all relevant information for that datasource and save it in your Databao Context Engine project.
|
|
113
102
|
"""
|
|
@@ -122,15 +111,13 @@ def add_datasource_config(ctx: Context) -> None:
|
|
|
122
111
|
)
|
|
123
112
|
@click.pass_context
|
|
124
113
|
def check_datasource_config(ctx: Context, datasources_config_files: list[str] | None) -> None:
|
|
125
|
-
"""
|
|
126
|
-
Check whether a datasource configuration is valid.
|
|
114
|
+
"""Check whether a datasource configuration is valid.
|
|
127
115
|
|
|
128
116
|
The configuration is considered as valid if a connection with the datasource can be established.
|
|
129
117
|
|
|
130
118
|
By default, all datasources declared in the project will be checked.
|
|
131
119
|
You can explicitely list which datasources to validate by using the [DATASOURCES_CONFIG_FILES] argument. Each argument must be the path to the file within the src folder (e.g: my-folder/my-config.yaml)
|
|
132
120
|
"""
|
|
133
|
-
|
|
134
121
|
datasource_ids = (
|
|
135
122
|
[DatasourceId.from_string_repr(datasource_config_file) for datasource_config_file in datasources_config_files]
|
|
136
123
|
if datasources_config_files is not None
|
|
@@ -157,10 +144,9 @@ def build(
|
|
|
157
144
|
"embeddable_text_only", "generated_description_only", "embeddable_text_and_generated_description"
|
|
158
145
|
],
|
|
159
146
|
) -> None:
|
|
160
|
-
"""
|
|
161
|
-
Build context for all datasources
|
|
147
|
+
"""Build context for all datasources.
|
|
162
148
|
|
|
163
|
-
The output of the build command will be saved in
|
|
149
|
+
The output of the build command will be saved in the output directory.
|
|
164
150
|
|
|
165
151
|
Internally, this indexes the context to be used by the MCP server and the "retrieve" command.
|
|
166
152
|
"""
|
|
@@ -177,12 +163,6 @@ def build(
|
|
|
177
163
|
nargs=-1,
|
|
178
164
|
required=True,
|
|
179
165
|
)
|
|
180
|
-
@click.option(
|
|
181
|
-
"-r",
|
|
182
|
-
"--run-name",
|
|
183
|
-
type=click.STRING,
|
|
184
|
-
help="Build run to use (the run folder name). Defaults to the latest run in the project.",
|
|
185
|
-
)
|
|
186
166
|
@click.option(
|
|
187
167
|
"-l",
|
|
188
168
|
"--limit",
|
|
@@ -200,19 +180,16 @@ def build(
|
|
|
200
180
|
def retrieve(
|
|
201
181
|
ctx: Context,
|
|
202
182
|
retrieve_text: tuple[str, ...],
|
|
203
|
-
run_name: str | None,
|
|
204
183
|
limit: int | None,
|
|
205
184
|
output_format: Literal["file", "streamed"],
|
|
206
185
|
) -> None:
|
|
207
|
-
"""
|
|
208
|
-
Search the project's built context for the most relevant chunks.
|
|
209
|
-
"""
|
|
186
|
+
"""Search the project's built context for the most relevant chunks."""
|
|
210
187
|
text = " ".join(retrieve_text)
|
|
211
188
|
|
|
212
189
|
databao_engine = DatabaoContextEngine(project_dir=ctx.obj["project_dir"])
|
|
213
190
|
|
|
214
191
|
retrieve_results = databao_engine.search_context(
|
|
215
|
-
retrieve_text=text,
|
|
192
|
+
retrieve_text=text, limit=limit, export_to_file=output_format == "file"
|
|
216
193
|
)
|
|
217
194
|
|
|
218
195
|
if output_format == "streamed":
|
|
@@ -221,12 +198,6 @@ def retrieve(
|
|
|
221
198
|
|
|
222
199
|
|
|
223
200
|
@dce.command()
|
|
224
|
-
@click.option(
|
|
225
|
-
"-r",
|
|
226
|
-
"--run-name",
|
|
227
|
-
type=click.STRING,
|
|
228
|
-
help="Name of the build run you want to use (aka. the name of the run folder in your project's output). Defaults to the latest one in the project.",
|
|
229
|
-
)
|
|
230
201
|
@click.option(
|
|
231
202
|
"-H",
|
|
232
203
|
"--host",
|
|
@@ -247,10 +218,8 @@ def retrieve(
|
|
|
247
218
|
help="Transport to use. Defaults to stdio",
|
|
248
219
|
)
|
|
249
220
|
@click.pass_context
|
|
250
|
-
def mcp(ctx: Context,
|
|
251
|
-
"""
|
|
252
|
-
Run Databao Context Engine's MCP server
|
|
253
|
-
"""
|
|
221
|
+
def mcp(ctx: Context, host: str | None, port: int | None, transport: McpTransport) -> None:
|
|
222
|
+
"""Run Databao Context Engine's MCP server."""
|
|
254
223
|
if transport == "stdio":
|
|
255
224
|
configure_logging(verbose=False, quiet=True, project_dir=ctx.obj["project_dir"])
|
|
256
|
-
run_mcp_server(project_dir=ctx.obj["project_dir"],
|
|
225
|
+
run_mcp_server(project_dir=ctx.obj["project_dir"], transport=transport, host=host, port=port)
|
|
@@ -23,10 +23,10 @@ def _generate_info_string(command_info: DceInfo) -> str:
|
|
|
23
23
|
|
|
24
24
|
info_lines.append("")
|
|
25
25
|
|
|
26
|
-
if command_info.project_info.
|
|
26
|
+
if command_info.project_info.is_initialized:
|
|
27
27
|
info_lines.append(f"Project dir: {command_info.project_info.project_path.resolve()}")
|
|
28
28
|
info_lines.append(f"Project ID: {str(command_info.project_info.project_id)}")
|
|
29
29
|
else:
|
|
30
|
-
info_lines.append(f"Project not
|
|
30
|
+
info_lines.append(f"Project not initialized at {command_info.project_info.project_path.resolve()}")
|
|
31
31
|
|
|
32
32
|
return os.linesep.join(info_lines)
|