databao-context-engine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. databao_context_engine/__init__.py +35 -0
  2. databao_context_engine/build_sources/__init__.py +0 -0
  3. databao_context_engine/build_sources/internal/__init__.py +0 -0
  4. databao_context_engine/build_sources/internal/build_runner.py +111 -0
  5. databao_context_engine/build_sources/internal/build_service.py +77 -0
  6. databao_context_engine/build_sources/internal/build_wiring.py +52 -0
  7. databao_context_engine/build_sources/internal/export_results.py +43 -0
  8. databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
  9. databao_context_engine/build_sources/public/__init__.py +0 -0
  10. databao_context_engine/build_sources/public/api.py +4 -0
  11. databao_context_engine/cli/__init__.py +0 -0
  12. databao_context_engine/cli/add_datasource_config.py +130 -0
  13. databao_context_engine/cli/commands.py +256 -0
  14. databao_context_engine/cli/datasources.py +64 -0
  15. databao_context_engine/cli/info.py +32 -0
  16. databao_context_engine/config/__init__.py +0 -0
  17. databao_context_engine/config/log_config.yaml +16 -0
  18. databao_context_engine/config/logging.py +43 -0
  19. databao_context_engine/databao_context_project_manager.py +92 -0
  20. databao_context_engine/databao_engine.py +85 -0
  21. databao_context_engine/datasource_config/__init__.py +0 -0
  22. databao_context_engine/datasource_config/add_config.py +50 -0
  23. databao_context_engine/datasource_config/check_config.py +131 -0
  24. databao_context_engine/datasource_config/datasource_context.py +60 -0
  25. databao_context_engine/event_journal/__init__.py +0 -0
  26. databao_context_engine/event_journal/writer.py +29 -0
  27. databao_context_engine/generate_configs_schemas.py +92 -0
  28. databao_context_engine/init_project.py +18 -0
  29. databao_context_engine/introspection/__init__.py +0 -0
  30. databao_context_engine/introspection/property_extract.py +202 -0
  31. databao_context_engine/llm/__init__.py +0 -0
  32. databao_context_engine/llm/config.py +20 -0
  33. databao_context_engine/llm/descriptions/__init__.py +0 -0
  34. databao_context_engine/llm/descriptions/ollama.py +21 -0
  35. databao_context_engine/llm/descriptions/provider.py +10 -0
  36. databao_context_engine/llm/embeddings/__init__.py +0 -0
  37. databao_context_engine/llm/embeddings/ollama.py +37 -0
  38. databao_context_engine/llm/embeddings/provider.py +13 -0
  39. databao_context_engine/llm/errors.py +16 -0
  40. databao_context_engine/llm/factory.py +61 -0
  41. databao_context_engine/llm/install.py +227 -0
  42. databao_context_engine/llm/runtime.py +73 -0
  43. databao_context_engine/llm/service.py +159 -0
  44. databao_context_engine/main.py +19 -0
  45. databao_context_engine/mcp/__init__.py +0 -0
  46. databao_context_engine/mcp/all_results_tool.py +5 -0
  47. databao_context_engine/mcp/mcp_runner.py +16 -0
  48. databao_context_engine/mcp/mcp_server.py +63 -0
  49. databao_context_engine/mcp/retrieve_tool.py +22 -0
  50. databao_context_engine/pluginlib/__init__.py +0 -0
  51. databao_context_engine/pluginlib/build_plugin.py +107 -0
  52. databao_context_engine/pluginlib/config.py +37 -0
  53. databao_context_engine/pluginlib/plugin_utils.py +68 -0
  54. databao_context_engine/plugins/__init__.py +0 -0
  55. databao_context_engine/plugins/athena_db_plugin.py +12 -0
  56. databao_context_engine/plugins/base_db_plugin.py +45 -0
  57. databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
  58. databao_context_engine/plugins/databases/__init__.py +0 -0
  59. databao_context_engine/plugins/databases/athena_introspector.py +101 -0
  60. databao_context_engine/plugins/databases/base_introspector.py +144 -0
  61. databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
  62. databao_context_engine/plugins/databases/database_chunker.py +69 -0
  63. databao_context_engine/plugins/databases/databases_types.py +114 -0
  64. databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
  65. databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
  66. databao_context_engine/plugins/databases/introspection_scope.py +74 -0
  67. databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
  68. databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
  69. databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
  70. databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
  71. databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
  72. databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
  73. databao_context_engine/plugins/mssql_db_plugin.py +12 -0
  74. databao_context_engine/plugins/mysql_db_plugin.py +12 -0
  75. databao_context_engine/plugins/parquet_plugin.py +32 -0
  76. databao_context_engine/plugins/plugin_loader.py +110 -0
  77. databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
  78. databao_context_engine/plugins/resources/__init__.py +0 -0
  79. databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
  80. databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
  81. databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
  82. databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
  83. databao_context_engine/project/__init__.py +0 -0
  84. databao_context_engine/project/datasource_discovery.py +141 -0
  85. databao_context_engine/project/info.py +44 -0
  86. databao_context_engine/project/init_project.py +102 -0
  87. databao_context_engine/project/layout.py +127 -0
  88. databao_context_engine/project/project_config.py +32 -0
  89. databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
  90. databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
  91. databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
  92. databao_context_engine/project/runs.py +39 -0
  93. databao_context_engine/project/types.py +134 -0
  94. databao_context_engine/retrieve_embeddings/__init__.py +0 -0
  95. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  96. databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
  97. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
  98. databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
  99. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
  100. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  101. databao_context_engine/retrieve_embeddings/public/api.py +3 -0
  102. databao_context_engine/serialisation/__init__.py +0 -0
  103. databao_context_engine/serialisation/yaml.py +35 -0
  104. databao_context_engine/services/__init__.py +0 -0
  105. databao_context_engine/services/chunk_embedding_service.py +104 -0
  106. databao_context_engine/services/embedding_shard_resolver.py +64 -0
  107. databao_context_engine/services/factories.py +88 -0
  108. databao_context_engine/services/models.py +12 -0
  109. databao_context_engine/services/persistence_service.py +61 -0
  110. databao_context_engine/services/run_name_policy.py +8 -0
  111. databao_context_engine/services/table_name_policy.py +15 -0
  112. databao_context_engine/storage/__init__.py +0 -0
  113. databao_context_engine/storage/connection.py +32 -0
  114. databao_context_engine/storage/exceptions/__init__.py +0 -0
  115. databao_context_engine/storage/exceptions/exceptions.py +6 -0
  116. databao_context_engine/storage/migrate.py +127 -0
  117. databao_context_engine/storage/migrations/V01__init.sql +63 -0
  118. databao_context_engine/storage/models.py +51 -0
  119. databao_context_engine/storage/repositories/__init__.py +0 -0
  120. databao_context_engine/storage/repositories/chunk_repository.py +130 -0
  121. databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
  122. databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
  123. databao_context_engine/storage/repositories/embedding_repository.py +113 -0
  124. databao_context_engine/storage/repositories/factories.py +35 -0
  125. databao_context_engine/storage/repositories/run_repository.py +157 -0
  126. databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
  127. databao_context_engine/storage/transaction.py +14 -0
  128. databao_context_engine/system/__init__.py +0 -0
  129. databao_context_engine/system/properties.py +13 -0
  130. databao_context_engine/templating/__init__.py +0 -0
  131. databao_context_engine/templating/renderer.py +29 -0
  132. databao_context_engine-0.1.1.dist-info/METADATA +186 -0
  133. databao_context_engine-0.1.1.dist-info/RECORD +135 -0
  134. databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
  135. databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,35 @@
1
+ from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
2
+ from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
3
+ from databao_context_engine.databao_engine import ContextSearchResult, DatabaoContextEngine
4
+ from databao_context_engine.datasource_config.check_config import (
5
+ CheckDatasourceConnectionResult,
6
+ DatasourceConnectionStatus,
7
+ )
8
+ from databao_context_engine.datasource_config.datasource_context import DatasourceContext
9
+ from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
10
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
11
+ from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
12
+ from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
13
+ from databao_context_engine.project.types import Datasource, DatasourceId
14
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
15
+
16
+ __all__ = [
17
+ "DatabaoContextEngine",
18
+ "Datasource",
19
+ "DatasourceId",
20
+ "DatasourceContext",
21
+ "ContextSearchResult",
22
+ "DatabaoContextProjectManager",
23
+ "ChunkEmbeddingMode",
24
+ "BuildContextResult",
25
+ "CheckDatasourceConnectionResult",
26
+ "DatasourceConnectionStatus",
27
+ "DatasourceConfigFile",
28
+ "DatasourceType",
29
+ "get_databao_context_engine_info",
30
+ "DceInfo",
31
+ "init_dce_project",
32
+ "init_or_get_dce_project",
33
+ "InitErrorReason",
34
+ "InitProjectError",
35
+ ]
File without changes
@@ -0,0 +1,111 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from databao_context_engine.build_sources.internal.build_service import BuildService
7
+ from databao_context_engine.build_sources.internal.export_results import (
8
+ append_result_to_all_results,
9
+ create_run_dir,
10
+ export_build_result,
11
+ )
12
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
+ from databao_context_engine.plugins.plugin_loader import load_plugins
14
+ from databao_context_engine.project.datasource_discovery import discover_datasources, prepare_source
15
+ from databao_context_engine.project.types import DatasourceId
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class BuildContextResult:
22
+ datasource_id: DatasourceId
23
+ datasource_type: DatasourceType
24
+ context_built_at: datetime
25
+ context_file_path: Path
26
+
27
+
28
+ def build(
29
+ project_dir: Path,
30
+ *,
31
+ build_service: BuildService,
32
+ project_id: str,
33
+ dce_version: str,
34
+ ) -> list[BuildContextResult]:
35
+ """
36
+ Build entrypoint.
37
+
38
+ 1) Load available plugins
39
+ 2) Discover sources
40
+ 3) Create a run
41
+ 4) For each source, call process_source
42
+ """
43
+ plugins = load_plugins()
44
+
45
+ datasources = discover_datasources(project_dir)
46
+
47
+ if not datasources:
48
+ logger.info("No sources discovered under %s", project_dir)
49
+ return []
50
+
51
+ run = None
52
+ run_dir = None
53
+
54
+ number_of_failed_builds = 0
55
+ build_result = []
56
+ for discovered_datasource in datasources:
57
+ try:
58
+ prepared_source = prepare_source(discovered_datasource)
59
+
60
+ logger.info(
61
+ f'Found datasource of type "{prepared_source.datasource_type.full_type}" with name {prepared_source.path.stem}'
62
+ )
63
+
64
+ plugin = plugins.get(prepared_source.datasource_type)
65
+ if plugin is None:
66
+ logger.warning(
67
+ "No plugin for '%s' (datasource=%s) — skipping.",
68
+ prepared_source.datasource_type.full_type,
69
+ prepared_source.path,
70
+ )
71
+ number_of_failed_builds += 1
72
+ continue
73
+
74
+ if run is None or run_dir is None:
75
+ # Initialiase the run as soon as we found a source
76
+ run = build_service.start_run(project_id=project_id, dce_version=dce_version)
77
+ run_dir = create_run_dir(project_dir, run.run_name)
78
+
79
+ result = build_service.process_prepared_source(
80
+ run_id=run.run_id,
81
+ prepared_source=prepared_source,
82
+ plugin=plugin,
83
+ )
84
+
85
+ context_file_path = export_build_result(run_dir, result)
86
+ append_result_to_all_results(run_dir, result)
87
+
88
+ build_result.append(
89
+ BuildContextResult(
90
+ datasource_id=DatasourceId.from_string_repr(result.datasource_id),
91
+ datasource_type=DatasourceType(full_type=result.datasource_type),
92
+ context_built_at=result.context_built_at,
93
+ context_file_path=context_file_path,
94
+ )
95
+ )
96
+ except Exception as e:
97
+ logger.debug(str(e), exc_info=True, stack_info=True)
98
+ logger.info(f"Failed to build source at ({discovered_datasource.path}): {str(e)}")
99
+
100
+ number_of_failed_builds += 1
101
+
102
+ if run is not None:
103
+ build_service.finalize_run(run_id=run.run_id)
104
+
105
+ logger.debug(
106
+ "Successfully built %d datasources. %s",
107
+ len(build_result),
108
+ f"Failed to build {number_of_failed_builds}." if number_of_failed_builds > 0 else "",
109
+ )
110
+
111
+ return build_result
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
7
+ from databao_context_engine.pluginlib.build_plugin import (
8
+ BuildPlugin,
9
+ )
10
+ from databao_context_engine.project.types import PreparedDatasource
11
+ from databao_context_engine.serialisation.yaml import to_yaml_string
12
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
13
+ from databao_context_engine.storage.models import RunDTO
14
+ from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
15
+ from databao_context_engine.storage.repositories.run_repository import RunRepository
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BuildService:
21
+ def __init__(
22
+ self,
23
+ *,
24
+ run_repo: RunRepository,
25
+ datasource_run_repo: DatasourceRunRepository,
26
+ chunk_embedding_service: ChunkEmbeddingService,
27
+ ) -> None:
28
+ self._run_repo = run_repo
29
+ self._datasource_run_repo = datasource_run_repo
30
+ self._chunk_embedding_service = chunk_embedding_service
31
+
32
+ def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
33
+ """
34
+ Create a new run row and return (run_id, started_at).
35
+ """
36
+ return self._run_repo.create(project_id=project_id, dce_version=dce_version)
37
+
38
+ def finalize_run(self, *, run_id: int):
39
+ """
40
+ Mark the run as complete (sets ended_at).
41
+ """
42
+ self._run_repo.update(run_id=run_id, ended_at=datetime.now())
43
+
44
+ def process_prepared_source(
45
+ self,
46
+ *,
47
+ run_id: int,
48
+ prepared_source: PreparedDatasource,
49
+ plugin: BuildPlugin,
50
+ ) -> BuiltDatasourceContext:
51
+ """
52
+ Process a single source.
53
+
54
+ 1) Execute the plugin
55
+ 2) Divide the results into chunks
56
+ 3) Embed and persist the chunks
57
+ """
58
+ result = execute(prepared_source, plugin)
59
+
60
+ chunks = plugin.divide_context_into_chunks(result.context)
61
+ if not chunks:
62
+ logger.info("No chunks for %s — skipping.", prepared_source.path.name)
63
+ return result
64
+
65
+ datasource_run = self._datasource_run_repo.create(
66
+ run_id=run_id,
67
+ plugin=plugin.name,
68
+ full_type=prepared_source.datasource_type.full_type,
69
+ source_id=result.datasource_id,
70
+ storage_directory=str(prepared_source.path.parent),
71
+ )
72
+
73
+ self._chunk_embedding_service.embed_chunks(
74
+ datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
75
+ )
76
+
77
+ return result
@@ -0,0 +1,52 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
5
+ from databao_context_engine.llm.factory import (
6
+ create_ollama_description_provider,
7
+ create_ollama_embedding_provider,
8
+ create_ollama_service,
9
+ )
10
+ from databao_context_engine.project.info import get_dce_version
11
+ from databao_context_engine.project.layout import ensure_project_dir
12
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
13
+ from databao_context_engine.services.factories import (
14
+ create_build_service,
15
+ )
16
+ from databao_context_engine.storage.connection import open_duckdb_connection
17
+ from databao_context_engine.system.properties import get_db_path
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
23
+ """
24
+ Public build entrypoint
25
+ - Instantiates the build service
26
+ - Delegates the actual build logic to the build runner
27
+ """
28
+ project_layout = ensure_project_dir(project_dir)
29
+
30
+ logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
31
+
32
+ with open_duckdb_connection(get_db_path()) as conn:
33
+ ollama_service = create_ollama_service()
34
+ embedding_provider = create_ollama_embedding_provider(ollama_service)
35
+ description_provider = (
36
+ create_ollama_description_provider(ollama_service)
37
+ if chunk_embedding_mode.should_generate_description()
38
+ else None
39
+ )
40
+ build_service = create_build_service(
41
+ conn,
42
+ embedding_provider=embedding_provider,
43
+ description_provider=description_provider,
44
+ chunk_embedding_mode=chunk_embedding_mode,
45
+ )
46
+ dce_config = project_layout.read_config_file()
47
+ return build(
48
+ project_dir=project_dir,
49
+ build_service=build_service,
50
+ project_id=str(dce_config.project_id),
51
+ dce_version=get_dce_version(),
52
+ )
@@ -0,0 +1,43 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext
5
+ from databao_context_engine.datasource_config.datasource_context import get_context_header_for_datasource
6
+ from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME, get_output_dir
7
+ from databao_context_engine.project.types import DatasourceId
8
+ from databao_context_engine.serialisation.yaml import write_yaml_to_stream
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def create_run_dir(project_dir: Path, run_name: str) -> Path:
14
+ output_dir = get_output_dir(project_dir)
15
+
16
+ run_dir = output_dir.joinpath(run_name)
17
+ run_dir.mkdir(parents=True, exist_ok=False)
18
+
19
+ return run_dir
20
+
21
+
22
+ def export_build_result(run_dir: Path, result: BuiltDatasourceContext) -> Path:
23
+ datasource_id = DatasourceId.from_string_repr(result.datasource_id)
24
+ export_file_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
25
+
26
+ # Make sure the parent folder exists
27
+ export_file_path.parent.mkdir(exist_ok=True)
28
+
29
+ with export_file_path.open("w") as export_file:
30
+ write_yaml_to_stream(data=result, file_stream=export_file)
31
+
32
+ logger.info(f"Exported result to {export_file_path.resolve()}")
33
+
34
+ return export_file_path
35
+
36
+
37
+ def append_result_to_all_results(run_dir: Path, result: BuiltDatasourceContext):
38
+ path = run_dir.joinpath(ALL_RESULTS_FILE_NAME)
39
+ path.parent.mkdir(parents=True, exist_ok=True)
40
+ with path.open("a", encoding="utf-8") as export_file:
41
+ export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
42
+ write_yaml_to_stream(data=result, file_stream=export_file)
43
+ export_file.write("\n")
@@ -0,0 +1,74 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Any, cast
4
+
5
+ from databao_context_engine.pluginlib.build_plugin import (
6
+ BuildDatasourcePlugin,
7
+ BuildFilePlugin,
8
+ BuildPlugin,
9
+ )
10
+ from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
11
+ from databao_context_engine.project.types import PreparedConfig, PreparedDatasource, DatasourceId
12
+
13
+
14
+ @dataclass()
15
+ class BuiltDatasourceContext:
16
+ """
17
+ Dataclass defining the result of building a datasource's context.
18
+ """
19
+
20
+ datasource_id: str
21
+ """
22
+ The ID of the built data source
23
+ """
24
+
25
+ datasource_type: str
26
+ """
27
+ The type of the built data source
28
+ """
29
+
30
+ context_built_at: datetime
31
+ """
32
+ The time at which the context was built
33
+ """
34
+
35
+ context: Any
36
+ """
37
+ A dictionary containing the actual context generated for the data source.
38
+ This dictionary should be serializable in YAML format.
39
+ """
40
+
41
+
42
+ def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> BuiltDatasourceContext:
43
+ built_context = _execute(prepared_datasource, plugin)
44
+
45
+ datasource_id = DatasourceId.from_datasource_config_file_path(prepared_datasource.path)
46
+
47
+ return BuiltDatasourceContext(
48
+ datasource_id=str(datasource_id),
49
+ datasource_type=prepared_datasource.datasource_type.full_type,
50
+ context_built_at=datetime.now(),
51
+ context=built_context,
52
+ )
53
+
54
+
55
+ def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
56
+ """
57
+ Run a prepared source through the plugin
58
+ """
59
+ if isinstance(prepared_datasource, PreparedConfig):
60
+ ds_plugin = cast(BuildDatasourcePlugin, plugin)
61
+
62
+ return execute_datasource_plugin(
63
+ plugin=ds_plugin,
64
+ datasource_type=prepared_datasource.datasource_type,
65
+ config=prepared_datasource.config,
66
+ datasource_name=prepared_datasource.datasource_name,
67
+ )
68
+ else:
69
+ file_plugin = cast(BuildFilePlugin, plugin)
70
+ return execute_file_plugin(
71
+ plugin=file_plugin,
72
+ datasource_type=prepared_datasource.datasource_type,
73
+ file_path=prepared_datasource.path,
74
+ )
@@ -0,0 +1,4 @@
1
+ from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
2
+ from databao_context_engine.build_sources.internal.build_wiring import build_all_datasources
3
+
4
+ __all__ = ["build_all_datasources", "BuildContextResult"]
File without changes
@@ -0,0 +1,130 @@
1
+ import os
2
+ from collections import defaultdict
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import click
7
+
8
+ from databao_context_engine import DatabaoContextProjectManager, DatasourceId, DatasourceType
9
+ from databao_context_engine.datasource_config.add_config import (
10
+ get_config_file_structure_for_datasource_type,
11
+ )
12
+ from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
13
+ from databao_context_engine.plugins.plugin_loader import get_all_available_plugin_types
14
+
15
+
16
+ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
17
+ project_manager = DatabaoContextProjectManager(project_dir=project_dir)
18
+
19
+ click.echo(
20
+ f"We will guide you to add a new datasource in your Databao Context Engine project, at {project_dir.resolve()}"
21
+ )
22
+
23
+ datasource_type = _ask_for_datasource_type()
24
+ datasource_name = click.prompt("Datasource name?", type=str)
25
+
26
+ is_datasource_existing = project_manager.datasource_config_exists(
27
+ datasource_type=datasource_type, datasource_name=datasource_name
28
+ )
29
+ if is_datasource_existing:
30
+ click.confirm(
31
+ f"A config file already exists for this datasource ({datasource_type.config_folder}/{datasource_name}). Do you want to overwrite it?",
32
+ abort=True,
33
+ default=False,
34
+ )
35
+
36
+ config_content = _ask_for_config_details(datasource_type)
37
+
38
+ config_file = project_manager.create_datasource_config(
39
+ datasource_type, datasource_name, config_content, overwrite_existing=True
40
+ )
41
+
42
+ click.echo(f"{os.linesep}We've created a new config file for your datasource at: {config_file.config_file_path}")
43
+
44
+ return config_file.datasource_id
45
+
46
+
47
+ def _ask_for_datasource_type() -> DatasourceType:
48
+ supported_types_by_folder = _group_supported_types_by_folder(
49
+ get_all_available_plugin_types(exclude_file_plugins=True)
50
+ )
51
+
52
+ all_config_folders = sorted(supported_types_by_folder.keys())
53
+ config_folder = click.prompt(
54
+ "What type of datasource do you want to add?",
55
+ type=click.Choice(all_config_folders),
56
+ default=all_config_folders[0] if len(all_config_folders) == 1 else None,
57
+ )
58
+ click.echo(f"Selected type: {config_folder}")
59
+
60
+ all_subtypes_for_folder = sorted(supported_types_by_folder[config_folder])
61
+ config_type = click.prompt(
62
+ "What is the subtype of this datasource?",
63
+ type=click.Choice(all_subtypes_for_folder),
64
+ default=all_subtypes_for_folder[0] if len(all_subtypes_for_folder) == 1 else None,
65
+ )
66
+ click.echo(f"Selected subtype: {config_type}")
67
+
68
+ return DatasourceType.from_main_and_subtypes(config_folder, config_type)
69
+
70
+
71
+ def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
72
+ config_file_structure = get_config_file_structure_for_datasource_type(datasource_type)
73
+
74
+ # Adds a new line before asking for the config values specific to the plugin
75
+ click.echo("")
76
+
77
+ if len(config_file_structure) == 0:
78
+ click.echo(
79
+ "The plugin for this datasource doesn't declare its configuration. Please check the documentation and fill the config file manually."
80
+ )
81
+ return {}
82
+
83
+ return _build_config_content_from_properties(config_file_structure, properties_prefix="")
84
+
85
+
86
+ def _build_config_content_from_properties(
87
+ properties: list[ConfigPropertyDefinition], properties_prefix: str
88
+ ) -> dict[str, Any]:
89
+ config_content: dict[str, Any] = {}
90
+ for config_file_property in properties:
91
+ if config_file_property.property_key in ["type", "name"] and len(properties_prefix) == 0:
92
+ # We ignore type and name properties as they've already been filled
93
+ continue
94
+
95
+ if config_file_property.nested_properties is not None and len(config_file_property.nested_properties) > 0:
96
+ nested_content = _build_config_content_from_properties(
97
+ config_file_property.nested_properties,
98
+ properties_prefix=f"{properties_prefix}.{config_file_property.property_key}."
99
+ if properties_prefix
100
+ else f"{config_file_property.property_key}.",
101
+ )
102
+ if len(nested_content.keys()) > 0:
103
+ config_content[config_file_property.property_key] = nested_content
104
+ else:
105
+ default_value: str | None
106
+ if config_file_property.default_value:
107
+ default_value = config_file_property.default_value
108
+ else:
109
+ # We need to add an empty string default value for non-required fields
110
+ default_value = None if config_file_property.required else ""
111
+
112
+ property_value = click.prompt(
113
+ f"{properties_prefix}{config_file_property.property_key}? {'(Optional)' if not config_file_property.required else ''}",
114
+ type=str,
115
+ default=default_value,
116
+ show_default=default_value is not None and default_value != "",
117
+ )
118
+
119
+ if property_value.strip():
120
+ config_content[config_file_property.property_key] = property_value
121
+
122
+ return config_content
123
+
124
+
125
+ def _group_supported_types_by_folder(all_plugin_types: set[DatasourceType]) -> dict[str, list[str]]:
126
+ main_to_subtypes = defaultdict(list)
127
+ for datasource_type in all_plugin_types:
128
+ main_to_subtypes[datasource_type.main_type].append(datasource_type.subtype)
129
+
130
+ return main_to_subtypes