databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. databao_context_engine/__init__.py +32 -7
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +82 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
  8. databao_context_engine/cli/add_datasource_config.py +49 -44
  9. databao_context_engine/cli/commands.py +40 -55
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +127 -0
  12. databao_context_engine/databao_context_project_manager.py +147 -30
  13. databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
  14. databao_context_engine/datasources/datasource_context.py +90 -0
  15. databao_context_engine/datasources/datasource_discovery.py +143 -0
  16. databao_context_engine/datasources/types.py +194 -0
  17. databao_context_engine/generate_configs_schemas.py +4 -5
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +76 -57
  20. databao_context_engine/llm/__init__.py +10 -0
  21. databao_context_engine/llm/api.py +57 -0
  22. databao_context_engine/llm/descriptions/ollama.py +1 -3
  23. databao_context_engine/llm/errors.py +2 -8
  24. databao_context_engine/llm/factory.py +5 -2
  25. databao_context_engine/llm/install.py +26 -30
  26. databao_context_engine/llm/runtime.py +3 -5
  27. databao_context_engine/llm/service.py +1 -3
  28. databao_context_engine/mcp/mcp_runner.py +4 -2
  29. databao_context_engine/mcp/mcp_server.py +9 -11
  30. databao_context_engine/plugin_loader.py +110 -0
  31. databao_context_engine/pluginlib/build_plugin.py +12 -29
  32. databao_context_engine/pluginlib/config.py +16 -2
  33. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  34. databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
  35. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
  36. databao_context_engine/plugins/databases/base_introspector.py +11 -12
  37. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  38. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
  39. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  40. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
  41. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  42. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  43. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  44. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  45. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
  46. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  47. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
  48. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  49. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  50. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
  51. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  52. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  53. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
  54. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  55. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  56. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  57. databao_context_engine/plugins/duckdb_tools.py +18 -0
  58. databao_context_engine/plugins/files/__init__.py +0 -0
  59. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  60. databao_context_engine/plugins/plugin_loader.py +58 -52
  61. databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
  62. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  63. databao_context_engine/project/info.py +34 -2
  64. databao_context_engine/project/init_project.py +16 -7
  65. databao_context_engine/project/layout.py +14 -15
  66. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  67. databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
  68. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
  69. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
  70. databao_context_engine/serialization/__init__.py +0 -0
  71. databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
  72. databao_context_engine/services/chunk_embedding_service.py +23 -11
  73. databao_context_engine/services/factories.py +1 -46
  74. databao_context_engine/services/persistence_service.py +11 -11
  75. databao_context_engine/storage/connection.py +11 -7
  76. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  77. databao_context_engine/storage/migrate.py +3 -5
  78. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  79. databao_context_engine/storage/models.py +2 -23
  80. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  81. databao_context_engine/storage/repositories/factories.py +1 -12
  82. databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
  83. databao_context_engine/system/properties.py +4 -2
  84. databao_context_engine-0.1.5.dist-info/METADATA +228 -0
  85. databao_context_engine-0.1.5.dist-info/RECORD +135 -0
  86. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
  87. databao_context_engine/build_sources/internal/build_service.py +0 -77
  88. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  89. databao_context_engine/build_sources/internal/export_results.py +0 -43
  90. databao_context_engine/build_sources/public/api.py +0 -4
  91. databao_context_engine/databao_engine.py +0 -85
  92. databao_context_engine/datasource_config/add_config.py +0 -50
  93. databao_context_engine/datasource_config/datasource_context.py +0 -60
  94. databao_context_engine/mcp/all_results_tool.py +0 -5
  95. databao_context_engine/mcp/retrieve_tool.py +0 -22
  96. databao_context_engine/plugins/databases/athena_introspector.py +0 -101
  97. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  98. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  99. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  100. databao_context_engine/project/datasource_discovery.py +0 -141
  101. databao_context_engine/project/runs.py +0 -39
  102. databao_context_engine/project/types.py +0 -134
  103. databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
  104. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
  105. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  106. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  107. databao_context_engine/services/run_name_policy.py +0 -8
  108. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  109. databao_context_engine/storage/repositories/run_repository.py +0 -157
  110. databao_context_engine-0.1.1.dist-info/METADATA +0 -186
  111. databao_context_engine-0.1.1.dist-info/RECORD +0 -135
  112. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  113. /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
  114. /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
  115. /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
  116. /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
  117. /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
  118. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,30 @@
1
- from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
1
+ from databao_context_engine.build_sources import BuildContextResult
2
+ from databao_context_engine.databao_context_engine import ContextSearchResult, DatabaoContextEngine
2
3
  from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
3
- from databao_context_engine.databao_engine import ContextSearchResult, DatabaoContextEngine
4
- from databao_context_engine.datasource_config.check_config import (
4
+ from databao_context_engine.datasources.check_config import (
5
5
  CheckDatasourceConnectionResult,
6
6
  DatasourceConnectionStatus,
7
7
  )
8
- from databao_context_engine.datasource_config.datasource_context import DatasourceContext
8
+ from databao_context_engine.datasources.datasource_context import DatasourceContext
9
+ from databao_context_engine.datasources.types import Datasource, DatasourceId
9
10
  from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
10
- from databao_context_engine.pluginlib.build_plugin import DatasourceType
11
- from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
11
+ from databao_context_engine.llm import (
12
+ OllamaError,
13
+ OllamaPermanentError,
14
+ OllamaTransientError,
15
+ download_ollama_models_if_needed,
16
+ install_ollama_if_needed,
17
+ )
18
+ from databao_context_engine.plugin_loader import DatabaoContextPluginLoader
19
+ from databao_context_engine.pluginlib.build_plugin import (
20
+ BuildDatasourcePlugin,
21
+ BuildFilePlugin,
22
+ BuildPlugin,
23
+ DatasourceType,
24
+ )
25
+ from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
26
+ from databao_context_engine.project.info import DceInfo, DceProjectInfo, get_databao_context_engine_info
12
27
  from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
13
- from databao_context_engine.project.types import Datasource, DatasourceId
14
28
  from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
15
29
 
16
30
  __all__ = [
@@ -28,8 +42,19 @@ __all__ = [
28
42
  "DatasourceType",
29
43
  "get_databao_context_engine_info",
30
44
  "DceInfo",
45
+ "DceProjectInfo",
31
46
  "init_dce_project",
32
47
  "init_or_get_dce_project",
33
48
  "InitErrorReason",
34
49
  "InitProjectError",
50
+ "DatabaoContextPluginLoader",
51
+ "ConfigPropertyDefinition",
52
+ "BuildPlugin",
53
+ "BuildDatasourcePlugin",
54
+ "BuildFilePlugin",
55
+ "install_ollama_if_needed",
56
+ "download_ollama_models_if_needed",
57
+ "OllamaError",
58
+ "OllamaTransientError",
59
+ "OllamaPermanentError",
35
60
  ]
@@ -0,0 +1,4 @@
1
+ from databao_context_engine.build_sources.build_runner import BuildContextResult
2
+ from databao_context_engine.build_sources.build_wiring import build_all_datasources
3
+
4
+ __all__ = ["build_all_datasources", "BuildContextResult"]
@@ -3,22 +3,32 @@ from dataclasses import dataclass
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
5
 
6
- from databao_context_engine.build_sources.internal.build_service import BuildService
7
- from databao_context_engine.build_sources.internal.export_results import (
6
+ from databao_context_engine.build_sources.build_service import BuildService
7
+ from databao_context_engine.build_sources.export_results import (
8
8
  append_result_to_all_results,
9
- create_run_dir,
10
9
  export_build_result,
10
+ reset_all_results,
11
11
  )
12
+ from databao_context_engine.datasources.datasource_discovery import discover_datasources, prepare_source
13
+ from databao_context_engine.datasources.types import DatasourceId
12
14
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
15
  from databao_context_engine.plugins.plugin_loader import load_plugins
14
- from databao_context_engine.project.datasource_discovery import discover_datasources, prepare_source
15
- from databao_context_engine.project.types import DatasourceId
16
+ from databao_context_engine.project.layout import ProjectLayout
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
20
21
  @dataclass
21
22
  class BuildContextResult:
23
+ """Result of a single datasource context build.
24
+
25
+ Attributes:
26
+ datasource_id: The id of the datasource.
27
+ datasource_type: The type of the datasource.
28
+ context_built_at: The timestamp when the context was built.
29
+ context_file_path: The path to the generated context file.
30
+ """
31
+
22
32
  datasource_id: DatasourceId
23
33
  datasource_type: DatasourceType
24
34
  context_built_at: datetime
@@ -26,33 +36,34 @@ class BuildContextResult:
26
36
 
27
37
 
28
38
  def build(
29
- project_dir: Path,
39
+ project_layout: ProjectLayout,
30
40
  *,
31
41
  build_service: BuildService,
32
- project_id: str,
33
- dce_version: str,
34
42
  ) -> list[BuildContextResult]:
35
- """
36
- Build entrypoint.
43
+ """Build the context for all datasources in the project.
44
+
45
+ Unless you already have access to BuildService, this should not be called directly.
46
+ Instead, internal callers should go through the build_wiring module or directly use DatabaoContextProjectManager.build_context().
37
47
 
38
48
  1) Load available plugins
39
49
  2) Discover sources
40
50
  3) Create a run
41
51
  4) For each source, call process_source
52
+
53
+ Returns:
54
+ A list of all the contexts built.
42
55
  """
43
56
  plugins = load_plugins()
44
57
 
45
- datasources = discover_datasources(project_dir)
58
+ datasources = discover_datasources(project_layout)
46
59
 
47
60
  if not datasources:
48
- logger.info("No sources discovered under %s", project_dir)
61
+ logger.info("No sources discovered under %s", project_layout.src_dir)
49
62
  return []
50
63
 
51
- run = None
52
- run_dir = None
53
-
54
64
  number_of_failed_builds = 0
55
65
  build_result = []
66
+ reset_all_results(project_layout.output_dir)
56
67
  for discovered_datasource in datasources:
57
68
  try:
58
69
  prepared_source = prepare_source(discovered_datasource)
@@ -71,23 +82,19 @@ def build(
71
82
  number_of_failed_builds += 1
72
83
  continue
73
84
 
74
- if run is None or run_dir is None:
75
- # Initialiase the run as soon as we found a source
76
- run = build_service.start_run(project_id=project_id, dce_version=dce_version)
77
- run_dir = create_run_dir(project_dir, run.run_name)
78
-
79
85
  result = build_service.process_prepared_source(
80
- run_id=run.run_id,
81
86
  prepared_source=prepared_source,
82
87
  plugin=plugin,
83
88
  )
84
89
 
85
- context_file_path = export_build_result(run_dir, result)
86
- append_result_to_all_results(run_dir, result)
90
+ output_dir = project_layout.output_dir
91
+
92
+ context_file_path = export_build_result(output_dir, result)
93
+ append_result_to_all_results(output_dir, result)
87
94
 
88
95
  build_result.append(
89
96
  BuildContextResult(
90
- datasource_id=DatasourceId.from_string_repr(result.datasource_id),
97
+ datasource_id=discovered_datasource.datasource_id,
91
98
  datasource_type=DatasourceType(full_type=result.datasource_type),
92
99
  context_built_at=result.context_built_at,
93
100
  context_file_path=context_file_path,
@@ -99,9 +106,6 @@ def build(
99
106
 
100
107
  number_of_failed_builds += 1
101
108
 
102
- if run is not None:
103
- build_service.finalize_run(run_id=run.run_id)
104
-
105
109
  logger.debug(
106
110
  "Successfully built %d datasources. %s",
107
111
  len(build_result),
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext, execute
6
+ from databao_context_engine.datasources.types import PreparedDatasource
7
+ from databao_context_engine.pluginlib.build_plugin import (
8
+ BuildPlugin,
9
+ )
10
+ from databao_context_engine.serialization.yaml import to_yaml_string
11
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class BuildService:
17
+ def __init__(
18
+ self,
19
+ *,
20
+ chunk_embedding_service: ChunkEmbeddingService,
21
+ ) -> None:
22
+ self._chunk_embedding_service = chunk_embedding_service
23
+
24
+ def process_prepared_source(
25
+ self,
26
+ *,
27
+ prepared_source: PreparedDatasource,
28
+ plugin: BuildPlugin,
29
+ ) -> BuiltDatasourceContext:
30
+ """Process a single source to build its context.
31
+
32
+ 1) Execute the plugin
33
+ 2) Divide the results into chunks
34
+ 3) Embed and persist the chunks
35
+
36
+ Returns:
37
+ The built context.
38
+ """
39
+ result = execute(prepared_source, plugin)
40
+
41
+ chunks = plugin.divide_context_into_chunks(result.context)
42
+ if not chunks:
43
+ logger.info("No chunks for %s — skipping.", prepared_source.path.name)
44
+ return result
45
+
46
+ self._chunk_embedding_service.embed_chunks(
47
+ chunks=chunks,
48
+ result=to_yaml_string(result.context),
49
+ full_type=prepared_source.datasource_type.full_type,
50
+ datasource_id=result.datasource_id,
51
+ )
52
+
53
+ return result
@@ -0,0 +1,82 @@
1
+ import logging
2
+
3
+ from duckdb import DuckDBPyConnection
4
+
5
+ from databao_context_engine.build_sources.build_runner import BuildContextResult, build
6
+ from databao_context_engine.build_sources.build_service import BuildService
7
+ from databao_context_engine.llm.descriptions.provider import DescriptionProvider
8
+ from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
9
+ from databao_context_engine.llm.factory import (
10
+ create_ollama_description_provider,
11
+ create_ollama_embedding_provider,
12
+ create_ollama_service,
13
+ )
14
+ from databao_context_engine.project.layout import ProjectLayout
15
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
16
+ from databao_context_engine.services.factories import create_chunk_embedding_service
17
+ from databao_context_engine.storage.connection import open_duckdb_connection
18
+ from databao_context_engine.storage.migrate import migrate
19
+ from databao_context_engine.system.properties import get_db_path
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def build_all_datasources(
25
+ project_layout: ProjectLayout, chunk_embedding_mode: ChunkEmbeddingMode
26
+ ) -> list[BuildContextResult]:
27
+ """Build the context for all datasources in the project.
28
+
29
+ - Instantiates the build service
30
+ - Delegates the actual build logic to the build runner
31
+
32
+ Returns:
33
+ A list of all the contexts built.
34
+ """
35
+ logger.debug(f"Starting to build datasources in project {project_layout.project_dir.resolve()}")
36
+
37
+ # Think about alternative solutions. This solution will mirror the current behaviour
38
+ # The current behaviour only builds what is currently in the /src folder
39
+ # This will need to change in the future when we can pick which datasources to build
40
+ db_path = get_db_path(project_layout.project_dir)
41
+ db_path.parent.mkdir(parents=True, exist_ok=True)
42
+ if db_path.exists():
43
+ db_path.unlink()
44
+
45
+ migrate(db_path)
46
+ with open_duckdb_connection(db_path) as conn:
47
+ ollama_service = create_ollama_service()
48
+ embedding_provider = create_ollama_embedding_provider(ollama_service)
49
+ description_provider = (
50
+ create_ollama_description_provider(ollama_service)
51
+ if chunk_embedding_mode.should_generate_description()
52
+ else None
53
+ )
54
+ build_service = _create_build_service(
55
+ conn,
56
+ embedding_provider=embedding_provider,
57
+ description_provider=description_provider,
58
+ chunk_embedding_mode=chunk_embedding_mode,
59
+ )
60
+ return build(
61
+ project_layout=project_layout,
62
+ build_service=build_service,
63
+ )
64
+
65
+
66
+ def _create_build_service(
67
+ conn: DuckDBPyConnection,
68
+ *,
69
+ embedding_provider: EmbeddingProvider,
70
+ description_provider: DescriptionProvider | None,
71
+ chunk_embedding_mode: ChunkEmbeddingMode,
72
+ ) -> BuildService:
73
+ chunk_embedding_service = create_chunk_embedding_service(
74
+ conn,
75
+ embedding_provider=embedding_provider,
76
+ description_provider=description_provider,
77
+ chunk_embedding_mode=chunk_embedding_mode,
78
+ )
79
+
80
+ return BuildService(
81
+ chunk_embedding_service=chunk_embedding_service,
82
+ )
@@ -0,0 +1,41 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
5
+ from databao_context_engine.datasources.datasource_context import get_context_header_for_datasource
6
+ from databao_context_engine.datasources.types import DatasourceId
7
+ from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME
8
+ from databao_context_engine.serialization.yaml import write_yaml_to_stream
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def export_build_result(output_dir: Path, result: BuiltDatasourceContext) -> Path:
14
+ datasource_id = DatasourceId.from_string_repr(result.datasource_id)
15
+ export_file_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
16
+
17
+ # Make sure the parent folder exists
18
+ export_file_path.parent.mkdir(parents=True, exist_ok=True)
19
+
20
+ with export_file_path.open("w") as export_file:
21
+ write_yaml_to_stream(data=result, file_stream=export_file)
22
+
23
+ logger.info(f"Exported result to {export_file_path.resolve()}")
24
+
25
+ return export_file_path
26
+
27
+
28
+ def append_result_to_all_results(output_dir: Path, result: BuiltDatasourceContext):
29
+ path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
30
+ path.parent.mkdir(parents=True, exist_ok=True)
31
+ with path.open("a", encoding="utf-8") as export_file:
32
+ export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
33
+ write_yaml_to_stream(data=result, file_stream=export_file)
34
+ export_file.write("\n")
35
+
36
+
37
+ # Here temporarily because it needs to be reset between runs.
38
+ # A subsequent PR will remove the existence of the all_results file
39
+ def reset_all_results(output_dir: Path):
40
+ path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
41
+ path.unlink(missing_ok=True)
@@ -2,20 +2,18 @@ from dataclasses import dataclass
2
2
  from datetime import datetime
3
3
  from typing import Any, cast
4
4
 
5
+ from databao_context_engine.datasources.types import PreparedConfig, PreparedDatasource
5
6
  from databao_context_engine.pluginlib.build_plugin import (
6
7
  BuildDatasourcePlugin,
7
8
  BuildFilePlugin,
8
9
  BuildPlugin,
9
10
  )
10
11
  from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
11
- from databao_context_engine.project.types import PreparedConfig, PreparedDatasource, DatasourceId
12
12
 
13
13
 
14
14
  @dataclass()
15
15
  class BuiltDatasourceContext:
16
- """
17
- Dataclass defining the result of building a datasource's context.
18
- """
16
+ """Dataclass defining the result of building a datasource's context."""
19
17
 
20
18
  datasource_id: str
21
19
  """
@@ -41,11 +39,8 @@ class BuiltDatasourceContext:
41
39
 
42
40
  def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> BuiltDatasourceContext:
43
41
  built_context = _execute(prepared_datasource, plugin)
44
-
45
- datasource_id = DatasourceId.from_datasource_config_file_path(prepared_datasource.path)
46
-
47
42
  return BuiltDatasourceContext(
48
- datasource_id=str(datasource_id),
43
+ datasource_id=str(prepared_datasource.datasource_id),
49
44
  datasource_type=prepared_datasource.datasource_type.full_type,
50
45
  context_built_at=datetime.now(),
51
46
  context=built_context,
@@ -53,9 +48,7 @@ def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Bui
53
48
 
54
49
 
55
50
  def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
56
- """
57
- Run a prepared source through the plugin
58
- """
51
+ """Run a prepared source through the plugin."""
59
52
  if isinstance(prepared_datasource, PreparedConfig):
60
53
  ds_plugin = cast(BuildDatasourcePlugin, plugin)
61
54
 
@@ -65,10 +58,10 @@ def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> An
65
58
  config=prepared_datasource.config,
66
59
  datasource_name=prepared_datasource.datasource_name,
67
60
  )
68
- else:
69
- file_plugin = cast(BuildFilePlugin, plugin)
70
- return execute_file_plugin(
71
- plugin=file_plugin,
72
- datasource_type=prepared_datasource.datasource_type,
73
- file_path=prepared_datasource.path,
74
- )
61
+
62
+ file_plugin = cast(BuildFilePlugin, plugin)
63
+ return execute_file_plugin(
64
+ plugin=file_plugin,
65
+ datasource_type=prepared_datasource.datasource_type,
66
+ file_path=prepared_datasource.path,
67
+ )
@@ -1,39 +1,43 @@
1
1
  import os
2
- from collections import defaultdict
3
2
  from pathlib import Path
4
3
  from typing import Any
5
4
 
6
5
  import click
7
6
 
8
- from databao_context_engine import DatabaoContextProjectManager, DatasourceId, DatasourceType
9
- from databao_context_engine.datasource_config.add_config import (
10
- get_config_file_structure_for_datasource_type,
7
+ from databao_context_engine import (
8
+ ConfigPropertyDefinition,
9
+ DatabaoContextPluginLoader,
10
+ DatabaoContextProjectManager,
11
+ DatasourceId,
12
+ DatasourceType,
11
13
  )
12
- from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
13
- from databao_context_engine.plugins.plugin_loader import get_all_available_plugin_types
14
+ from databao_context_engine.pluginlib.config import ConfigUnionPropertyDefinition
14
15
 
15
16
 
16
17
  def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
17
18
  project_manager = DatabaoContextProjectManager(project_dir=project_dir)
19
+ plugin_loader = DatabaoContextPluginLoader()
18
20
 
19
21
  click.echo(
20
22
  f"We will guide you to add a new datasource in your Databao Context Engine project, at {project_dir.resolve()}"
21
23
  )
22
24
 
23
- datasource_type = _ask_for_datasource_type()
25
+ datasource_type = _ask_for_datasource_type(
26
+ plugin_loader.get_all_supported_datasource_types(exclude_file_plugins=True)
27
+ )
24
28
  datasource_name = click.prompt("Datasource name?", type=str)
25
29
 
26
- is_datasource_existing = project_manager.datasource_config_exists(
27
- datasource_type=datasource_type, datasource_name=datasource_name
28
- )
29
- if is_datasource_existing:
30
+ datasource_id = project_manager.datasource_config_exists(datasource_name=datasource_name)
31
+ if datasource_id is not None:
30
32
  click.confirm(
31
- f"A config file already exists for this datasource ({datasource_type.config_folder}/{datasource_name}). Do you want to overwrite it?",
33
+ f"A config file already exists for this datasource {datasource_id.relative_path_to_config_file()}. Do you want to overwrite it?",
32
34
  abort=True,
33
35
  default=False,
34
36
  )
35
37
 
36
- config_content = _ask_for_config_details(datasource_type)
38
+ config_content = _ask_for_config_details(
39
+ plugin_loader.get_config_file_structure_for_datasource_type(datasource_type)
40
+ )
37
41
 
38
42
  config_file = project_manager.create_datasource_config(
39
43
  datasource_type, datasource_name, config_content, overwrite_existing=True
@@ -44,33 +48,19 @@ def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
44
48
  return config_file.datasource_id
45
49
 
46
50
 
47
- def _ask_for_datasource_type() -> DatasourceType:
48
- supported_types_by_folder = _group_supported_types_by_folder(
49
- get_all_available_plugin_types(exclude_file_plugins=True)
50
- )
51
-
52
- all_config_folders = sorted(supported_types_by_folder.keys())
53
- config_folder = click.prompt(
54
- "What type of datasource do you want to add?",
55
- type=click.Choice(all_config_folders),
56
- default=all_config_folders[0] if len(all_config_folders) == 1 else None,
57
- )
58
- click.echo(f"Selected type: {config_folder}")
59
-
60
- all_subtypes_for_folder = sorted(supported_types_by_folder[config_folder])
51
+ def _ask_for_datasource_type(supported_datasource_types: set[DatasourceType]) -> DatasourceType:
52
+ all_datasource_types = sorted([ds_type.full_type for ds_type in supported_datasource_types])
61
53
  config_type = click.prompt(
62
- "What is the subtype of this datasource?",
63
- type=click.Choice(all_subtypes_for_folder),
64
- default=all_subtypes_for_folder[0] if len(all_subtypes_for_folder) == 1 else None,
54
+ "What type of datasource do you want to add?",
55
+ type=click.Choice(all_datasource_types),
56
+ default=all_datasource_types[0] if len(all_datasource_types) == 1 else None,
65
57
  )
66
- click.echo(f"Selected subtype: {config_type}")
58
+ click.echo(f"Selected type: {config_type}")
67
59
 
68
- return DatasourceType.from_main_and_subtypes(config_folder, config_type)
60
+ return DatasourceType(full_type=config_type)
69
61
 
70
62
 
71
- def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
72
- config_file_structure = get_config_file_structure_for_datasource_type(datasource_type)
73
-
63
+ def _ask_for_config_details(config_file_structure: list[ConfigPropertyDefinition]) -> dict[str, Any]:
74
64
  # Adds a new line before asking for the config values specific to the plugin
75
65
  click.echo("")
76
66
 
@@ -84,13 +74,35 @@ def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
84
74
 
85
75
 
86
76
  def _build_config_content_from_properties(
87
- properties: list[ConfigPropertyDefinition], properties_prefix: str
77
+ properties: list[ConfigPropertyDefinition], properties_prefix: str, in_union: bool = False
88
78
  ) -> dict[str, Any]:
89
79
  config_content: dict[str, Any] = {}
90
80
  for config_file_property in properties:
91
81
  if config_file_property.property_key in ["type", "name"] and len(properties_prefix) == 0:
92
82
  # We ignore type and name properties as they've already been filled
93
83
  continue
84
+ if in_union and config_file_property.property_key == "type":
85
+ continue
86
+
87
+ if isinstance(config_file_property, ConfigUnionPropertyDefinition):
88
+ choices = {t.__name__: t for t in config_file_property.types}
89
+
90
+ chosen = click.prompt(
91
+ f"{properties_prefix}{config_file_property.property_key}.type?",
92
+ type=click.Choice(sorted(choices.keys())),
93
+ )
94
+
95
+ chosen_type = choices[chosen]
96
+
97
+ nested_props = config_file_property.type_properties[chosen_type]
98
+ nested_content = _build_config_content_from_properties(
99
+ nested_props, f"{properties_prefix}{config_file_property.property_key}.", in_union=True
100
+ )
101
+
102
+ config_content[config_file_property.property_key] = {
103
+ **nested_content,
104
+ }
105
+ continue
94
106
 
95
107
  if config_file_property.nested_properties is not None and len(config_file_property.nested_properties) > 0:
96
108
  nested_content = _build_config_content_from_properties(
@@ -114,17 +126,10 @@ def _build_config_content_from_properties(
114
126
  type=str,
115
127
  default=default_value,
116
128
  show_default=default_value is not None and default_value != "",
129
+ hide_input=config_file_property.secret,
117
130
  )
118
131
 
119
132
  if property_value.strip():
120
133
  config_content[config_file_property.property_key] = property_value
121
134
 
122
135
  return config_content
123
-
124
-
125
- def _group_supported_types_by_folder(all_plugin_types: set[DatasourceType]) -> dict[str, list[str]]:
126
- main_to_subtypes = defaultdict(list)
127
- for datasource_type in all_plugin_types:
128
- main_to_subtypes[datasource_type.main_type].append(datasource_type.subtype)
129
-
130
- return main_to_subtypes