databao-context-engine 0.1.2__py3-none-any.whl → 0.1.4.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. databao_context_engine/__init__.py +18 -6
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +84 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
  8. databao_context_engine/cli/add_datasource_config.py +41 -15
  9. databao_context_engine/cli/commands.py +12 -43
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +137 -0
  12. databao_context_engine/databao_context_project_manager.py +96 -6
  13. databao_context_engine/datasources/add_config.py +34 -0
  14. databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
  15. databao_context_engine/datasources/datasource_context.py +93 -0
  16. databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
  17. databao_context_engine/{project → datasources}/types.py +64 -15
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +67 -53
  20. databao_context_engine/llm/errors.py +2 -8
  21. databao_context_engine/llm/install.py +13 -20
  22. databao_context_engine/llm/service.py +1 -3
  23. databao_context_engine/mcp/mcp_runner.py +4 -2
  24. databao_context_engine/mcp/mcp_server.py +10 -10
  25. databao_context_engine/plugin_loader.py +111 -0
  26. databao_context_engine/pluginlib/build_plugin.py +25 -9
  27. databao_context_engine/pluginlib/config.py +16 -2
  28. databao_context_engine/plugins/base_db_plugin.py +5 -2
  29. databao_context_engine/plugins/databases/athena_introspector.py +85 -22
  30. databao_context_engine/plugins/databases/base_introspector.py +5 -3
  31. databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
  32. databao_context_engine/plugins/databases/duckdb_introspector.py +3 -5
  33. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  34. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  35. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  36. databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
  37. databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
  38. databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
  39. databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
  40. databao_context_engine/plugins/duckdb_tools.py +18 -0
  41. databao_context_engine/plugins/plugin_loader.py +43 -42
  42. databao_context_engine/plugins/resources/parquet_introspector.py +7 -19
  43. databao_context_engine/project/info.py +34 -2
  44. databao_context_engine/project/init_project.py +16 -7
  45. databao_context_engine/project/layout.py +3 -3
  46. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  47. databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
  48. databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
  49. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
  50. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
  51. databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
  52. databao_context_engine/services/chunk_embedding_service.py +23 -11
  53. databao_context_engine/services/factories.py +1 -46
  54. databao_context_engine/services/persistence_service.py +11 -11
  55. databao_context_engine/storage/connection.py +11 -7
  56. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  57. databao_context_engine/storage/migrate.py +2 -4
  58. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  59. databao_context_engine/storage/models.py +2 -23
  60. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  61. databao_context_engine/storage/repositories/factories.py +1 -12
  62. databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
  63. databao_context_engine/system/properties.py +4 -2
  64. databao_context_engine-0.1.4.dev1.dist-info/METADATA +75 -0
  65. databao_context_engine-0.1.4.dev1.dist-info/RECORD +125 -0
  66. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/WHEEL +1 -1
  67. databao_context_engine/build_sources/internal/build_service.py +0 -77
  68. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  69. databao_context_engine/build_sources/internal/export_results.py +0 -43
  70. databao_context_engine/build_sources/public/api.py +0 -4
  71. databao_context_engine/databao_engine.py +0 -85
  72. databao_context_engine/datasource_config/__init__.py +0 -0
  73. databao_context_engine/datasource_config/add_config.py +0 -50
  74. databao_context_engine/datasource_config/datasource_context.py +0 -60
  75. databao_context_engine/mcp/all_results_tool.py +0 -5
  76. databao_context_engine/mcp/retrieve_tool.py +0 -22
  77. databao_context_engine/project/runs.py +0 -39
  78. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  79. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  80. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  81. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  82. databao_context_engine/serialisation/__init__.py +0 -0
  83. databao_context_engine/services/run_name_policy.py +0 -8
  84. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  85. databao_context_engine/storage/repositories/run_repository.py +0 -157
  86. databao_context_engine-0.1.2.dist-info/METADATA +0 -187
  87. databao_context_engine-0.1.2.dist-info/RECORD +0 -135
  88. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  89. /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
  90. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,23 @@
1
- from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
1
+ from databao_context_engine.build_sources import BuildContextResult
2
+ from databao_context_engine.databao_context_engine import ContextSearchResult, DatabaoContextEngine
2
3
  from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
3
- from databao_context_engine.databao_engine import ContextSearchResult, DatabaoContextEngine
4
- from databao_context_engine.datasource_config.check_config import (
4
+ from databao_context_engine.datasources.check_config import (
5
5
  CheckDatasourceConnectionResult,
6
6
  DatasourceConnectionStatus,
7
7
  )
8
- from databao_context_engine.datasource_config.datasource_context import DatasourceContext
8
+ from databao_context_engine.datasources.datasource_context import DatasourceContext
9
+ from databao_context_engine.datasources.types import Datasource, DatasourceId
9
10
  from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
10
- from databao_context_engine.pluginlib.build_plugin import DatasourceType
11
+ from databao_context_engine.plugin_loader import DatabaoContextPluginLoader
12
+ from databao_context_engine.pluginlib.build_plugin import (
13
+ BuildDatasourcePlugin,
14
+ BuildFilePlugin,
15
+ BuildPlugin,
16
+ DatasourceType,
17
+ )
18
+ from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
11
19
  from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
12
20
  from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
13
- from databao_context_engine.project.types import Datasource, DatasourceId
14
21
  from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
15
22
 
16
23
  __all__ = [
@@ -32,4 +39,9 @@ __all__ = [
32
39
  "init_or_get_dce_project",
33
40
  "InitErrorReason",
34
41
  "InitProjectError",
42
+ "DatabaoContextPluginLoader",
43
+ "ConfigPropertyDefinition",
44
+ "BuildPlugin",
45
+ "BuildDatasourcePlugin",
46
+ "BuildFilePlugin",
35
47
  ]
@@ -0,0 +1,4 @@
1
+ from databao_context_engine.build_sources.build_runner import BuildContextResult
2
+ from databao_context_engine.build_sources.build_wiring import build_all_datasources
3
+
4
+ __all__ = ["build_all_datasources", "BuildContextResult"]
@@ -3,22 +3,32 @@ from dataclasses import dataclass
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
5
 
6
- from databao_context_engine.build_sources.internal.build_service import BuildService
7
- from databao_context_engine.build_sources.internal.export_results import (
6
+ from databao_context_engine.build_sources.build_service import BuildService
7
+ from databao_context_engine.build_sources.export_results import (
8
8
  append_result_to_all_results,
9
- create_run_dir,
10
9
  export_build_result,
10
+ reset_all_results,
11
11
  )
12
+ from databao_context_engine.datasources.datasource_discovery import discover_datasources, prepare_source
13
+ from databao_context_engine.datasources.types import DatasourceId
12
14
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
15
  from databao_context_engine.plugins.plugin_loader import load_plugins
14
- from databao_context_engine.project.datasource_discovery import discover_datasources, prepare_source
15
- from databao_context_engine.project.types import DatasourceId
16
+ from databao_context_engine.project.layout import get_output_dir
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
20
21
  @dataclass
21
22
  class BuildContextResult:
23
+ """Result of a single datasource context build.
24
+
25
+ Attributes:
26
+ datasource_id: The id of the datasource.
27
+ datasource_type: The type of the datasource.
28
+ context_built_at: The timestamp when the context was built.
29
+ context_file_path: The path to the generated context file.
30
+ """
31
+
22
32
  datasource_id: DatasourceId
23
33
  datasource_type: DatasourceType
24
34
  context_built_at: datetime
@@ -29,16 +39,19 @@ def build(
29
39
  project_dir: Path,
30
40
  *,
31
41
  build_service: BuildService,
32
- project_id: str,
33
- dce_version: str,
34
42
  ) -> list[BuildContextResult]:
35
- """
36
- Build entrypoint.
43
+ """Build the context for all datasources in the project.
44
+
45
+ Unless you already have access to BuildService, this should not be called directly.
46
+ Instead, internal callers should go through the build_wiring module or directly use DatabaoContextProjectManager.build_context().
37
47
 
38
48
  1) Load available plugins
39
49
  2) Discover sources
40
50
  3) Create a run
41
51
  4) For each source, call process_source
52
+
53
+ Returns:
54
+ A list of all the contexts built.
42
55
  """
43
56
  plugins = load_plugins()
44
57
 
@@ -48,11 +61,9 @@ def build(
48
61
  logger.info("No sources discovered under %s", project_dir)
49
62
  return []
50
63
 
51
- run = None
52
- run_dir = None
53
-
54
64
  number_of_failed_builds = 0
55
65
  build_result = []
66
+ reset_all_results(get_output_dir(project_dir))
56
67
  for discovered_datasource in datasources:
57
68
  try:
58
69
  prepared_source = prepare_source(discovered_datasource)
@@ -71,19 +82,15 @@ def build(
71
82
  number_of_failed_builds += 1
72
83
  continue
73
84
 
74
- if run is None or run_dir is None:
75
- # Initialiase the run as soon as we found a source
76
- run = build_service.start_run(project_id=project_id, dce_version=dce_version)
77
- run_dir = create_run_dir(project_dir, run.run_name)
78
-
79
85
  result = build_service.process_prepared_source(
80
- run_id=run.run_id,
81
86
  prepared_source=prepared_source,
82
87
  plugin=plugin,
83
88
  )
84
89
 
85
- context_file_path = export_build_result(run_dir, result)
86
- append_result_to_all_results(run_dir, result)
90
+ output_dir = get_output_dir(project_dir)
91
+
92
+ context_file_path = export_build_result(output_dir, result)
93
+ append_result_to_all_results(output_dir, result)
87
94
 
88
95
  build_result.append(
89
96
  BuildContextResult(
@@ -99,9 +106,6 @@ def build(
99
106
 
100
107
  number_of_failed_builds += 1
101
108
 
102
- if run is not None:
103
- build_service.finalize_run(run_id=run.run_id)
104
-
105
109
  logger.debug(
106
110
  "Successfully built %d datasources. %s",
107
111
  len(build_result),
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext, execute
6
+ from databao_context_engine.datasources.types import PreparedDatasource
7
+ from databao_context_engine.pluginlib.build_plugin import (
8
+ BuildPlugin,
9
+ )
10
+ from databao_context_engine.serialization.yaml import to_yaml_string
11
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class BuildService:
17
+ def __init__(
18
+ self,
19
+ *,
20
+ chunk_embedding_service: ChunkEmbeddingService,
21
+ ) -> None:
22
+ self._chunk_embedding_service = chunk_embedding_service
23
+
24
+ def process_prepared_source(
25
+ self,
26
+ *,
27
+ prepared_source: PreparedDatasource,
28
+ plugin: BuildPlugin,
29
+ ) -> BuiltDatasourceContext:
30
+ """Process a single source to build its context.
31
+
32
+ 1) Execute the plugin
33
+ 2) Divide the results into chunks
34
+ 3) Embed and persist the chunks
35
+
36
+ Returns:
37
+ The built context.
38
+ """
39
+ result = execute(prepared_source, plugin)
40
+
41
+ chunks = plugin.divide_context_into_chunks(result.context)
42
+ if not chunks:
43
+ logger.info("No chunks for %s — skipping.", prepared_source.path.name)
44
+ return result
45
+
46
+ self._chunk_embedding_service.embed_chunks(
47
+ chunks=chunks,
48
+ result=to_yaml_string(result.context),
49
+ full_type=prepared_source.datasource_type.full_type,
50
+ datasource_id=result.datasource_id,
51
+ )
52
+
53
+ return result
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from duckdb import DuckDBPyConnection
5
+
6
+ from databao_context_engine.build_sources.build_runner import BuildContextResult, build
7
+ from databao_context_engine.build_sources.build_service import BuildService
8
+ from databao_context_engine.llm.descriptions.provider import DescriptionProvider
9
+ from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
10
+ from databao_context_engine.llm.factory import (
11
+ create_ollama_description_provider,
12
+ create_ollama_embedding_provider,
13
+ create_ollama_service,
14
+ )
15
+ from databao_context_engine.project.layout import ensure_project_dir
16
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
17
+ from databao_context_engine.services.factories import create_chunk_embedding_service
18
+ from databao_context_engine.storage.connection import open_duckdb_connection
19
+ from databao_context_engine.storage.migrate import migrate
20
+ from databao_context_engine.system.properties import get_db_path
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
26
+ """Build the context for all datasources in the project.
27
+
28
+ - Instantiates the build service
29
+ - Delegates the actual build logic to the build runner
30
+
31
+ Returns:
32
+ A list of all the contexts built.
33
+ """
34
+ ensure_project_dir(project_dir)
35
+
36
+ logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
37
+
38
+ # Think about alternative solutions. This solution will mirror the current behaviour
39
+ # The current behaviour only builds what is currently in the /src folder
40
+ # This will need to change in the future when we can pick which datasources to build
41
+ db_path = get_db_path(project_dir)
42
+ db_path.parent.mkdir(parents=True, exist_ok=True)
43
+ if db_path.exists():
44
+ db_path.unlink()
45
+
46
+ with open_duckdb_connection(db_path) as conn:
47
+ migrate(db_path)
48
+
49
+ ollama_service = create_ollama_service()
50
+ embedding_provider = create_ollama_embedding_provider(ollama_service)
51
+ description_provider = (
52
+ create_ollama_description_provider(ollama_service)
53
+ if chunk_embedding_mode.should_generate_description()
54
+ else None
55
+ )
56
+ build_service = _create_build_service(
57
+ conn,
58
+ embedding_provider=embedding_provider,
59
+ description_provider=description_provider,
60
+ chunk_embedding_mode=chunk_embedding_mode,
61
+ )
62
+ return build(
63
+ project_dir=project_dir,
64
+ build_service=build_service,
65
+ )
66
+
67
+
68
+ def _create_build_service(
69
+ conn: DuckDBPyConnection,
70
+ *,
71
+ embedding_provider: EmbeddingProvider,
72
+ description_provider: DescriptionProvider | None,
73
+ chunk_embedding_mode: ChunkEmbeddingMode,
74
+ ) -> BuildService:
75
+ chunk_embedding_service = create_chunk_embedding_service(
76
+ conn,
77
+ embedding_provider=embedding_provider,
78
+ description_provider=description_provider,
79
+ chunk_embedding_mode=chunk_embedding_mode,
80
+ )
81
+
82
+ return BuildService(
83
+ chunk_embedding_service=chunk_embedding_service,
84
+ )
@@ -0,0 +1,41 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
5
+ from databao_context_engine.datasources.datasource_context import get_context_header_for_datasource
6
+ from databao_context_engine.datasources.types import DatasourceId
7
+ from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME
8
+ from databao_context_engine.serialization.yaml import write_yaml_to_stream
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def export_build_result(output_dir: Path, result: BuiltDatasourceContext) -> Path:
14
+ datasource_id = DatasourceId.from_string_repr(result.datasource_id)
15
+ export_file_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
16
+
17
+ # Make sure the parent folder exists
18
+ export_file_path.parent.mkdir(parents=True, exist_ok=True)
19
+
20
+ with export_file_path.open("w") as export_file:
21
+ write_yaml_to_stream(data=result, file_stream=export_file)
22
+
23
+ logger.info(f"Exported result to {export_file_path.resolve()}")
24
+
25
+ return export_file_path
26
+
27
+
28
+ def append_result_to_all_results(output_dir: Path, result: BuiltDatasourceContext):
29
+ path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
30
+ path.parent.mkdir(parents=True, exist_ok=True)
31
+ with path.open("a", encoding="utf-8") as export_file:
32
+ export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
33
+ write_yaml_to_stream(data=result, file_stream=export_file)
34
+ export_file.write("\n")
35
+
36
+
37
+ # Here temporarily because it needs to be reset between runs.
38
+ # A subsequent PR will remove the existence of the all_results file
39
+ def reset_all_results(output_dir: Path):
40
+ path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
41
+ path.unlink(missing_ok=True)
@@ -2,20 +2,18 @@ from dataclasses import dataclass
2
2
  from datetime import datetime
3
3
  from typing import Any, cast
4
4
 
5
+ from databao_context_engine.datasources.types import DatasourceId, PreparedConfig, PreparedDatasource
5
6
  from databao_context_engine.pluginlib.build_plugin import (
6
7
  BuildDatasourcePlugin,
7
8
  BuildFilePlugin,
8
9
  BuildPlugin,
9
10
  )
10
11
  from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
11
- from databao_context_engine.project.types import PreparedConfig, PreparedDatasource, DatasourceId
12
12
 
13
13
 
14
14
  @dataclass()
15
15
  class BuiltDatasourceContext:
16
- """
17
- Dataclass defining the result of building a datasource's context.
18
- """
16
+ """Dataclass defining the result of building a datasource's context."""
19
17
 
20
18
  datasource_id: str
21
19
  """
@@ -53,9 +51,7 @@ def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Bui
53
51
 
54
52
 
55
53
  def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
56
- """
57
- Run a prepared source through the plugin
58
- """
54
+ """Run a prepared source through the plugin."""
59
55
  if isinstance(prepared_datasource, PreparedConfig):
60
56
  ds_plugin = cast(BuildDatasourcePlugin, plugin)
61
57
 
@@ -5,22 +5,27 @@ from typing import Any
5
5
 
6
6
  import click
7
7
 
8
- from databao_context_engine import DatabaoContextProjectManager, DatasourceId, DatasourceType
9
- from databao_context_engine.datasource_config.add_config import (
10
- get_config_file_structure_for_datasource_type,
8
+ from databao_context_engine import (
9
+ ConfigPropertyDefinition,
10
+ DatabaoContextPluginLoader,
11
+ DatabaoContextProjectManager,
12
+ DatasourceId,
13
+ DatasourceType,
11
14
  )
12
- from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
13
- from databao_context_engine.plugins.plugin_loader import get_all_available_plugin_types
15
+ from databao_context_engine.pluginlib.config import ConfigUnionPropertyDefinition
14
16
 
15
17
 
16
18
  def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
17
19
  project_manager = DatabaoContextProjectManager(project_dir=project_dir)
20
+ plugin_loader = DatabaoContextPluginLoader()
18
21
 
19
22
  click.echo(
20
23
  f"We will guide you to add a new datasource in your Databao Context Engine project, at {project_dir.resolve()}"
21
24
  )
22
25
 
23
- datasource_type = _ask_for_datasource_type()
26
+ datasource_type = _ask_for_datasource_type(
27
+ plugin_loader.get_all_supported_datasource_types(exclude_file_plugins=True)
28
+ )
24
29
  datasource_name = click.prompt("Datasource name?", type=str)
25
30
 
26
31
  is_datasource_existing = project_manager.datasource_config_exists(
@@ -33,7 +38,9 @@ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
33
38
  default=False,
34
39
  )
35
40
 
36
- config_content = _ask_for_config_details(datasource_type)
41
+ config_content = _ask_for_config_details(
42
+ plugin_loader.get_config_file_structure_for_datasource_type(datasource_type)
43
+ )
37
44
 
38
45
  config_file = project_manager.create_datasource_config(
39
46
  datasource_type, datasource_name, config_content, overwrite_existing=True
@@ -44,10 +51,8 @@ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
44
51
  return config_file.datasource_id
45
52
 
46
53
 
47
- def _ask_for_datasource_type() -> DatasourceType:
48
- supported_types_by_folder = _group_supported_types_by_folder(
49
- get_all_available_plugin_types(exclude_file_plugins=True)
50
- )
54
+ def _ask_for_datasource_type(supported_datasource_types: set[DatasourceType]) -> DatasourceType:
55
+ supported_types_by_folder = _group_supported_types_by_folder(supported_datasource_types)
51
56
 
52
57
  all_config_folders = sorted(supported_types_by_folder.keys())
53
58
  config_folder = click.prompt(
@@ -68,9 +73,7 @@ def _ask_for_datasource_type() -> DatasourceType:
68
73
  return DatasourceType.from_main_and_subtypes(config_folder, config_type)
69
74
 
70
75
 
71
- def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
72
- config_file_structure = get_config_file_structure_for_datasource_type(datasource_type)
73
-
76
+ def _ask_for_config_details(config_file_structure: list[ConfigPropertyDefinition]) -> dict[str, Any]:
74
77
  # Adds a new line before asking for the config values specific to the plugin
75
78
  click.echo("")
76
79
 
@@ -84,13 +87,35 @@ def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
84
87
 
85
88
 
86
89
  def _build_config_content_from_properties(
87
- properties: list[ConfigPropertyDefinition], properties_prefix: str
90
+ properties: list[ConfigPropertyDefinition], properties_prefix: str, in_union: bool = False
88
91
  ) -> dict[str, Any]:
89
92
  config_content: dict[str, Any] = {}
90
93
  for config_file_property in properties:
91
94
  if config_file_property.property_key in ["type", "name"] and len(properties_prefix) == 0:
92
95
  # We ignore type and name properties as they've already been filled
93
96
  continue
97
+ if in_union and config_file_property.property_key == "type":
98
+ continue
99
+
100
+ if isinstance(config_file_property, ConfigUnionPropertyDefinition):
101
+ choices = {t.__name__: t for t in config_file_property.types}
102
+
103
+ chosen = click.prompt(
104
+ f"{properties_prefix}{config_file_property.property_key}.type?",
105
+ type=click.Choice(sorted(choices.keys())),
106
+ )
107
+
108
+ chosen_type = choices[chosen]
109
+
110
+ nested_props = config_file_property.type_properties[chosen_type]
111
+ nested_content = _build_config_content_from_properties(
112
+ nested_props, f"{properties_prefix}{config_file_property.property_key}.", in_union=True
113
+ )
114
+
115
+ config_content[config_file_property.property_key] = {
116
+ **nested_content,
117
+ }
118
+ continue
94
119
 
95
120
  if config_file_property.nested_properties is not None and len(config_file_property.nested_properties) > 0:
96
121
  nested_content = _build_config_content_from_properties(
@@ -114,6 +139,7 @@ def _build_config_content_from_properties(
114
139
  type=str,
115
140
  default=default_value,
116
141
  show_default=default_value is not None and default_value != "",
142
+ hide_input=config_file_property.secret,
117
143
  )
118
144
 
119
145
  if property_value.strip():
@@ -19,7 +19,6 @@ from databao_context_engine.cli.info import echo_info
19
19
  from databao_context_engine.config.logging import configure_logging
20
20
  from databao_context_engine.llm.install import resolve_ollama_bin
21
21
  from databao_context_engine.mcp.mcp_runner import McpTransport, run_mcp_server
22
- from databao_context_engine.storage.migrate import migrate
23
22
 
24
23
 
25
24
  @click.group()
@@ -49,25 +48,18 @@ def dce(ctx: Context, verbose: bool, quiet: bool, project_dir: str | None) -> No
49
48
  ctx.obj["quiet"] = quiet
50
49
  ctx.obj["project_dir"] = project_path
51
50
 
52
- migrate()
53
-
54
51
 
55
52
  @dce.command()
56
53
  @click.pass_context
57
54
  def info(ctx: Context) -> None:
58
- """
59
- Display system-wide information
60
- """
61
-
55
+ """Display system-wide information."""
62
56
  echo_info(ctx.obj["project_dir"])
63
57
 
64
58
 
65
59
  @dce.command()
66
60
  @click.pass_context
67
61
  def init(ctx: Context) -> None:
68
- """
69
- Create an empty Databao Context Engine project
70
- """
62
+ """Create an empty Databao Context Engine project."""
71
63
  project_dir = ctx.obj["project_dir"]
72
64
  try:
73
65
  init_dce_project(project_dir=project_dir)
@@ -97,17 +89,14 @@ def init(ctx: Context) -> None:
97
89
 
98
90
  @dce.group()
99
91
  def datasource() -> None:
100
- """
101
- Manage datasource configurations
102
- """
92
+ """Manage datasource configurations."""
103
93
  pass
104
94
 
105
95
 
106
96
  @datasource.command(name="add")
107
97
  @click.pass_context
108
98
  def add_datasource_config(ctx: Context) -> None:
109
- """
110
- Add a new datasource configuration.
99
+ """Add a new datasource configuration.
111
100
 
112
101
  The command will ask all relevant information for that datasource and save it in your Databao Context Engine project.
113
102
  """
@@ -122,15 +111,13 @@ def add_datasource_config(ctx: Context) -> None:
122
111
  )
123
112
  @click.pass_context
124
113
  def check_datasource_config(ctx: Context, datasources_config_files: list[str] | None) -> None:
125
- """
126
- Check whether a datasource configuration is valid.
114
+ """Check whether a datasource configuration is valid.
127
115
 
128
116
  The configuration is considered as valid if a connection with the datasource can be established.
129
117
 
130
118
  By default, all datasources declared in the project will be checked.
131
119
  You can explicitely list which datasources to validate by using the [DATASOURCES_CONFIG_FILES] argument. Each argument must be the path to the file within the src folder (e.g: my-folder/my-config.yaml)
132
120
  """
133
-
134
121
  datasource_ids = (
135
122
  [DatasourceId.from_string_repr(datasource_config_file) for datasource_config_file in datasources_config_files]
136
123
  if datasources_config_files is not None
@@ -157,10 +144,9 @@ def build(
157
144
  "embeddable_text_only", "generated_description_only", "embeddable_text_and_generated_description"
158
145
  ],
159
146
  ) -> None:
160
- """
161
- Build context for all datasources
147
+ """Build context for all datasources.
162
148
 
163
- The output of the build command will be saved in a "run" folder in the output directory.
149
+ The output of the build command will be saved in the output directory.
164
150
 
165
151
  Internally, this indexes the context to be used by the MCP server and the "retrieve" command.
166
152
  """
@@ -177,12 +163,6 @@ def build(
177
163
  nargs=-1,
178
164
  required=True,
179
165
  )
180
- @click.option(
181
- "-r",
182
- "--run-name",
183
- type=click.STRING,
184
- help="Build run to use (the run folder name). Defaults to the latest run in the project.",
185
- )
186
166
  @click.option(
187
167
  "-l",
188
168
  "--limit",
@@ -200,19 +180,16 @@ def build(
200
180
  def retrieve(
201
181
  ctx: Context,
202
182
  retrieve_text: tuple[str, ...],
203
- run_name: str | None,
204
183
  limit: int | None,
205
184
  output_format: Literal["file", "streamed"],
206
185
  ) -> None:
207
- """
208
- Search the project's built context for the most relevant chunks.
209
- """
186
+ """Search the project's built context for the most relevant chunks."""
210
187
  text = " ".join(retrieve_text)
211
188
 
212
189
  databao_engine = DatabaoContextEngine(project_dir=ctx.obj["project_dir"])
213
190
 
214
191
  retrieve_results = databao_engine.search_context(
215
- retrieve_text=text, run_name=run_name, limit=limit, export_to_file=output_format == "file"
192
+ retrieve_text=text, limit=limit, export_to_file=output_format == "file"
216
193
  )
217
194
 
218
195
  if output_format == "streamed":
@@ -221,12 +198,6 @@ def retrieve(
221
198
 
222
199
 
223
200
  @dce.command()
224
- @click.option(
225
- "-r",
226
- "--run-name",
227
- type=click.STRING,
228
- help="Name of the build run you want to use (aka. the name of the run folder in your project's output). Defaults to the latest one in the project.",
229
- )
230
201
  @click.option(
231
202
  "-H",
232
203
  "--host",
@@ -247,10 +218,8 @@ def retrieve(
247
218
  help="Transport to use. Defaults to stdio",
248
219
  )
249
220
  @click.pass_context
250
- def mcp(ctx: Context, run_name: str | None, host: str | None, port: int | None, transport: McpTransport) -> None:
251
- """
252
- Run Databao Context Engine's MCP server
253
- """
221
+ def mcp(ctx: Context, host: str | None, port: int | None, transport: McpTransport) -> None:
222
+ """Run Databao Context Engine's MCP server."""
254
223
  if transport == "stdio":
255
224
  configure_logging(verbose=False, quiet=True, project_dir=ctx.obj["project_dir"])
256
- run_mcp_server(project_dir=ctx.obj["project_dir"], run_name=run_name, transport=transport, host=host, port=port)
225
+ run_mcp_server(project_dir=ctx.obj["project_dir"], transport=transport, host=host, port=port)
@@ -15,6 +15,7 @@ def _generate_info_string(command_info: DceInfo) -> str:
15
15
  info_lines = []
16
16
  info_lines.append(f"Databao context engine version: {command_info.version}")
17
17
  info_lines.append(f"Databao context engine storage dir: {command_info.dce_path}")
18
+ info_lines.append(f"Databao context engine plugins: {command_info.plugin_ids}")
18
19
 
19
20
  info_lines.append("")
20
21
 
@@ -23,10 +24,10 @@ def _generate_info_string(command_info: DceInfo) -> str:
23
24
 
24
25
  info_lines.append("")
25
26
 
26
- if command_info.project_info.is_initialised:
27
+ if command_info.project_info.is_initialized:
27
28
  info_lines.append(f"Project dir: {command_info.project_info.project_path.resolve()}")
28
29
  info_lines.append(f"Project ID: {str(command_info.project_info.project_id)}")
29
30
  else:
30
- info_lines.append(f"Project not initialised at {command_info.project_info.project_path.resolve()}")
31
+ info_lines.append(f"Project not initialized at {command_info.project_info.project_path.resolve()}")
31
32
 
32
33
  return os.linesep.join(info_lines)