databao-context-engine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +35 -0
- databao_context_engine/build_sources/__init__.py +0 -0
- databao_context_engine/build_sources/internal/__init__.py +0 -0
- databao_context_engine/build_sources/internal/build_runner.py +111 -0
- databao_context_engine/build_sources/internal/build_service.py +77 -0
- databao_context_engine/build_sources/internal/build_wiring.py +52 -0
- databao_context_engine/build_sources/internal/export_results.py +43 -0
- databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
- databao_context_engine/build_sources/public/__init__.py +0 -0
- databao_context_engine/build_sources/public/api.py +4 -0
- databao_context_engine/cli/__init__.py +0 -0
- databao_context_engine/cli/add_datasource_config.py +130 -0
- databao_context_engine/cli/commands.py +256 -0
- databao_context_engine/cli/datasources.py +64 -0
- databao_context_engine/cli/info.py +32 -0
- databao_context_engine/config/__init__.py +0 -0
- databao_context_engine/config/log_config.yaml +16 -0
- databao_context_engine/config/logging.py +43 -0
- databao_context_engine/databao_context_project_manager.py +92 -0
- databao_context_engine/databao_engine.py +85 -0
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +50 -0
- databao_context_engine/datasource_config/check_config.py +131 -0
- databao_context_engine/datasource_config/datasource_context.py +60 -0
- databao_context_engine/event_journal/__init__.py +0 -0
- databao_context_engine/event_journal/writer.py +29 -0
- databao_context_engine/generate_configs_schemas.py +92 -0
- databao_context_engine/init_project.py +18 -0
- databao_context_engine/introspection/__init__.py +0 -0
- databao_context_engine/introspection/property_extract.py +202 -0
- databao_context_engine/llm/__init__.py +0 -0
- databao_context_engine/llm/config.py +20 -0
- databao_context_engine/llm/descriptions/__init__.py +0 -0
- databao_context_engine/llm/descriptions/ollama.py +21 -0
- databao_context_engine/llm/descriptions/provider.py +10 -0
- databao_context_engine/llm/embeddings/__init__.py +0 -0
- databao_context_engine/llm/embeddings/ollama.py +37 -0
- databao_context_engine/llm/embeddings/provider.py +13 -0
- databao_context_engine/llm/errors.py +16 -0
- databao_context_engine/llm/factory.py +61 -0
- databao_context_engine/llm/install.py +227 -0
- databao_context_engine/llm/runtime.py +73 -0
- databao_context_engine/llm/service.py +159 -0
- databao_context_engine/main.py +19 -0
- databao_context_engine/mcp/__init__.py +0 -0
- databao_context_engine/mcp/all_results_tool.py +5 -0
- databao_context_engine/mcp/mcp_runner.py +16 -0
- databao_context_engine/mcp/mcp_server.py +63 -0
- databao_context_engine/mcp/retrieve_tool.py +22 -0
- databao_context_engine/pluginlib/__init__.py +0 -0
- databao_context_engine/pluginlib/build_plugin.py +107 -0
- databao_context_engine/pluginlib/config.py +37 -0
- databao_context_engine/pluginlib/plugin_utils.py +68 -0
- databao_context_engine/plugins/__init__.py +0 -0
- databao_context_engine/plugins/athena_db_plugin.py +12 -0
- databao_context_engine/plugins/base_db_plugin.py +45 -0
- databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/__init__.py +0 -0
- databao_context_engine/plugins/databases/athena_introspector.py +101 -0
- databao_context_engine/plugins/databases/base_introspector.py +144 -0
- databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
- databao_context_engine/plugins/databases/database_chunker.py +69 -0
- databao_context_engine/plugins/databases/databases_types.py +114 -0
- databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
- databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
- databao_context_engine/plugins/databases/introspection_scope.py +74 -0
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
- databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
- databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
- databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
- databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
- databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/mssql_db_plugin.py +12 -0
- databao_context_engine/plugins/mysql_db_plugin.py +12 -0
- databao_context_engine/plugins/parquet_plugin.py +32 -0
- databao_context_engine/plugins/plugin_loader.py +110 -0
- databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
- databao_context_engine/plugins/resources/__init__.py +0 -0
- databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
- databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
- databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
- databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
- databao_context_engine/project/__init__.py +0 -0
- databao_context_engine/project/datasource_discovery.py +141 -0
- databao_context_engine/project/info.py +44 -0
- databao_context_engine/project/init_project.py +102 -0
- databao_context_engine/project/layout.py +127 -0
- databao_context_engine/project/project_config.py +32 -0
- databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
- databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
- databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
- databao_context_engine/project/runs.py +39 -0
- databao_context_engine/project/types.py +134 -0
- databao_context_engine/retrieve_embeddings/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +3 -0
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/serialisation/yaml.py +35 -0
- databao_context_engine/services/__init__.py +0 -0
- databao_context_engine/services/chunk_embedding_service.py +104 -0
- databao_context_engine/services/embedding_shard_resolver.py +64 -0
- databao_context_engine/services/factories.py +88 -0
- databao_context_engine/services/models.py +12 -0
- databao_context_engine/services/persistence_service.py +61 -0
- databao_context_engine/services/run_name_policy.py +8 -0
- databao_context_engine/services/table_name_policy.py +15 -0
- databao_context_engine/storage/__init__.py +0 -0
- databao_context_engine/storage/connection.py +32 -0
- databao_context_engine/storage/exceptions/__init__.py +0 -0
- databao_context_engine/storage/exceptions/exceptions.py +6 -0
- databao_context_engine/storage/migrate.py +127 -0
- databao_context_engine/storage/migrations/V01__init.sql +63 -0
- databao_context_engine/storage/models.py +51 -0
- databao_context_engine/storage/repositories/__init__.py +0 -0
- databao_context_engine/storage/repositories/chunk_repository.py +130 -0
- databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
- databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
- databao_context_engine/storage/repositories/embedding_repository.py +113 -0
- databao_context_engine/storage/repositories/factories.py +35 -0
- databao_context_engine/storage/repositories/run_repository.py +157 -0
- databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
- databao_context_engine/storage/transaction.py +14 -0
- databao_context_engine/system/__init__.py +0 -0
- databao_context_engine/system/properties.py +13 -0
- databao_context_engine/templating/__init__.py +0 -0
- databao_context_engine/templating/renderer.py +29 -0
- databao_context_engine-0.1.1.dist-info/METADATA +186 -0
- databao_context_engine-0.1.1.dist-info/RECORD +135 -0
- databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
- databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
|
|
2
|
+
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
|
|
3
|
+
from databao_context_engine.databao_engine import ContextSearchResult, DatabaoContextEngine
|
|
4
|
+
from databao_context_engine.datasource_config.check_config import (
|
|
5
|
+
CheckDatasourceConnectionResult,
|
|
6
|
+
DatasourceConnectionStatus,
|
|
7
|
+
)
|
|
8
|
+
from databao_context_engine.datasource_config.datasource_context import DatasourceContext
|
|
9
|
+
from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
|
|
10
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
11
|
+
from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
|
|
12
|
+
from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
|
|
13
|
+
from databao_context_engine.project.types import Datasource, DatasourceId
|
|
14
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"DatabaoContextEngine",
|
|
18
|
+
"Datasource",
|
|
19
|
+
"DatasourceId",
|
|
20
|
+
"DatasourceContext",
|
|
21
|
+
"ContextSearchResult",
|
|
22
|
+
"DatabaoContextProjectManager",
|
|
23
|
+
"ChunkEmbeddingMode",
|
|
24
|
+
"BuildContextResult",
|
|
25
|
+
"CheckDatasourceConnectionResult",
|
|
26
|
+
"DatasourceConnectionStatus",
|
|
27
|
+
"DatasourceConfigFile",
|
|
28
|
+
"DatasourceType",
|
|
29
|
+
"get_databao_context_engine_info",
|
|
30
|
+
"DceInfo",
|
|
31
|
+
"init_dce_project",
|
|
32
|
+
"init_or_get_dce_project",
|
|
33
|
+
"InitErrorReason",
|
|
34
|
+
"InitProjectError",
|
|
35
|
+
]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.build_sources.internal.build_service import BuildService
|
|
7
|
+
from databao_context_engine.build_sources.internal.export_results import (
|
|
8
|
+
append_result_to_all_results,
|
|
9
|
+
create_run_dir,
|
|
10
|
+
export_build_result,
|
|
11
|
+
)
|
|
12
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
13
|
+
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
14
|
+
from databao_context_engine.project.datasource_discovery import discover_datasources, prepare_source
|
|
15
|
+
from databao_context_engine.project.types import DatasourceId
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class BuildContextResult:
|
|
22
|
+
datasource_id: DatasourceId
|
|
23
|
+
datasource_type: DatasourceType
|
|
24
|
+
context_built_at: datetime
|
|
25
|
+
context_file_path: Path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build(
|
|
29
|
+
project_dir: Path,
|
|
30
|
+
*,
|
|
31
|
+
build_service: BuildService,
|
|
32
|
+
project_id: str,
|
|
33
|
+
dce_version: str,
|
|
34
|
+
) -> list[BuildContextResult]:
|
|
35
|
+
"""
|
|
36
|
+
Build entrypoint.
|
|
37
|
+
|
|
38
|
+
1) Load available plugins
|
|
39
|
+
2) Discover sources
|
|
40
|
+
3) Create a run
|
|
41
|
+
4) For each source, call process_source
|
|
42
|
+
"""
|
|
43
|
+
plugins = load_plugins()
|
|
44
|
+
|
|
45
|
+
datasources = discover_datasources(project_dir)
|
|
46
|
+
|
|
47
|
+
if not datasources:
|
|
48
|
+
logger.info("No sources discovered under %s", project_dir)
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
run = None
|
|
52
|
+
run_dir = None
|
|
53
|
+
|
|
54
|
+
number_of_failed_builds = 0
|
|
55
|
+
build_result = []
|
|
56
|
+
for discovered_datasource in datasources:
|
|
57
|
+
try:
|
|
58
|
+
prepared_source = prepare_source(discovered_datasource)
|
|
59
|
+
|
|
60
|
+
logger.info(
|
|
61
|
+
f'Found datasource of type "{prepared_source.datasource_type.full_type}" with name {prepared_source.path.stem}'
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
plugin = plugins.get(prepared_source.datasource_type)
|
|
65
|
+
if plugin is None:
|
|
66
|
+
logger.warning(
|
|
67
|
+
"No plugin for '%s' (datasource=%s) — skipping.",
|
|
68
|
+
prepared_source.datasource_type.full_type,
|
|
69
|
+
prepared_source.path,
|
|
70
|
+
)
|
|
71
|
+
number_of_failed_builds += 1
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
if run is None or run_dir is None:
|
|
75
|
+
# Initialiase the run as soon as we found a source
|
|
76
|
+
run = build_service.start_run(project_id=project_id, dce_version=dce_version)
|
|
77
|
+
run_dir = create_run_dir(project_dir, run.run_name)
|
|
78
|
+
|
|
79
|
+
result = build_service.process_prepared_source(
|
|
80
|
+
run_id=run.run_id,
|
|
81
|
+
prepared_source=prepared_source,
|
|
82
|
+
plugin=plugin,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
context_file_path = export_build_result(run_dir, result)
|
|
86
|
+
append_result_to_all_results(run_dir, result)
|
|
87
|
+
|
|
88
|
+
build_result.append(
|
|
89
|
+
BuildContextResult(
|
|
90
|
+
datasource_id=DatasourceId.from_string_repr(result.datasource_id),
|
|
91
|
+
datasource_type=DatasourceType(full_type=result.datasource_type),
|
|
92
|
+
context_built_at=result.context_built_at,
|
|
93
|
+
context_file_path=context_file_path,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.debug(str(e), exc_info=True, stack_info=True)
|
|
98
|
+
logger.info(f"Failed to build source at ({discovered_datasource.path}): {str(e)}")
|
|
99
|
+
|
|
100
|
+
number_of_failed_builds += 1
|
|
101
|
+
|
|
102
|
+
if run is not None:
|
|
103
|
+
build_service.finalize_run(run_id=run.run_id)
|
|
104
|
+
|
|
105
|
+
logger.debug(
|
|
106
|
+
"Successfully built %d datasources. %s",
|
|
107
|
+
len(build_result),
|
|
108
|
+
f"Failed to build {number_of_failed_builds}." if number_of_failed_builds > 0 else "",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return build_result
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
|
|
7
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
8
|
+
BuildPlugin,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.project.types import PreparedDatasource
|
|
11
|
+
from databao_context_engine.serialisation.yaml import to_yaml_string
|
|
12
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
|
|
13
|
+
from databao_context_engine.storage.models import RunDTO
|
|
14
|
+
from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
|
|
15
|
+
from databao_context_engine.storage.repositories.run_repository import RunRepository
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BuildService:
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
*,
|
|
24
|
+
run_repo: RunRepository,
|
|
25
|
+
datasource_run_repo: DatasourceRunRepository,
|
|
26
|
+
chunk_embedding_service: ChunkEmbeddingService,
|
|
27
|
+
) -> None:
|
|
28
|
+
self._run_repo = run_repo
|
|
29
|
+
self._datasource_run_repo = datasource_run_repo
|
|
30
|
+
self._chunk_embedding_service = chunk_embedding_service
|
|
31
|
+
|
|
32
|
+
def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
|
|
33
|
+
"""
|
|
34
|
+
Create a new run row and return (run_id, started_at).
|
|
35
|
+
"""
|
|
36
|
+
return self._run_repo.create(project_id=project_id, dce_version=dce_version)
|
|
37
|
+
|
|
38
|
+
def finalize_run(self, *, run_id: int):
|
|
39
|
+
"""
|
|
40
|
+
Mark the run as complete (sets ended_at).
|
|
41
|
+
"""
|
|
42
|
+
self._run_repo.update(run_id=run_id, ended_at=datetime.now())
|
|
43
|
+
|
|
44
|
+
def process_prepared_source(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
run_id: int,
|
|
48
|
+
prepared_source: PreparedDatasource,
|
|
49
|
+
plugin: BuildPlugin,
|
|
50
|
+
) -> BuiltDatasourceContext:
|
|
51
|
+
"""
|
|
52
|
+
Process a single source.
|
|
53
|
+
|
|
54
|
+
1) Execute the plugin
|
|
55
|
+
2) Divide the results into chunks
|
|
56
|
+
3) Embed and persist the chunks
|
|
57
|
+
"""
|
|
58
|
+
result = execute(prepared_source, plugin)
|
|
59
|
+
|
|
60
|
+
chunks = plugin.divide_context_into_chunks(result.context)
|
|
61
|
+
if not chunks:
|
|
62
|
+
logger.info("No chunks for %s — skipping.", prepared_source.path.name)
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
datasource_run = self._datasource_run_repo.create(
|
|
66
|
+
run_id=run_id,
|
|
67
|
+
plugin=plugin.name,
|
|
68
|
+
full_type=prepared_source.datasource_type.full_type,
|
|
69
|
+
source_id=result.datasource_id,
|
|
70
|
+
storage_directory=str(prepared_source.path.parent),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self._chunk_embedding_service.embed_chunks(
|
|
74
|
+
datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return result
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
|
|
5
|
+
from databao_context_engine.llm.factory import (
|
|
6
|
+
create_ollama_description_provider,
|
|
7
|
+
create_ollama_embedding_provider,
|
|
8
|
+
create_ollama_service,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.project.info import get_dce_version
|
|
11
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
12
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
13
|
+
from databao_context_engine.services.factories import (
|
|
14
|
+
create_build_service,
|
|
15
|
+
)
|
|
16
|
+
from databao_context_engine.storage.connection import open_duckdb_connection
|
|
17
|
+
from databao_context_engine.system.properties import get_db_path
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
|
|
23
|
+
"""
|
|
24
|
+
Public build entrypoint
|
|
25
|
+
- Instantiates the build service
|
|
26
|
+
- Delegates the actual build logic to the build runner
|
|
27
|
+
"""
|
|
28
|
+
project_layout = ensure_project_dir(project_dir)
|
|
29
|
+
|
|
30
|
+
logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
|
|
31
|
+
|
|
32
|
+
with open_duckdb_connection(get_db_path()) as conn:
|
|
33
|
+
ollama_service = create_ollama_service()
|
|
34
|
+
embedding_provider = create_ollama_embedding_provider(ollama_service)
|
|
35
|
+
description_provider = (
|
|
36
|
+
create_ollama_description_provider(ollama_service)
|
|
37
|
+
if chunk_embedding_mode.should_generate_description()
|
|
38
|
+
else None
|
|
39
|
+
)
|
|
40
|
+
build_service = create_build_service(
|
|
41
|
+
conn,
|
|
42
|
+
embedding_provider=embedding_provider,
|
|
43
|
+
description_provider=description_provider,
|
|
44
|
+
chunk_embedding_mode=chunk_embedding_mode,
|
|
45
|
+
)
|
|
46
|
+
dce_config = project_layout.read_config_file()
|
|
47
|
+
return build(
|
|
48
|
+
project_dir=project_dir,
|
|
49
|
+
build_service=build_service,
|
|
50
|
+
project_id=str(dce_config.project_id),
|
|
51
|
+
dce_version=get_dce_version(),
|
|
52
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext
|
|
5
|
+
from databao_context_engine.datasource_config.datasource_context import get_context_header_for_datasource
|
|
6
|
+
from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME, get_output_dir
|
|
7
|
+
from databao_context_engine.project.types import DatasourceId
|
|
8
|
+
from databao_context_engine.serialisation.yaml import write_yaml_to_stream
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_run_dir(project_dir: Path, run_name: str) -> Path:
|
|
14
|
+
output_dir = get_output_dir(project_dir)
|
|
15
|
+
|
|
16
|
+
run_dir = output_dir.joinpath(run_name)
|
|
17
|
+
run_dir.mkdir(parents=True, exist_ok=False)
|
|
18
|
+
|
|
19
|
+
return run_dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def export_build_result(run_dir: Path, result: BuiltDatasourceContext) -> Path:
|
|
23
|
+
datasource_id = DatasourceId.from_string_repr(result.datasource_id)
|
|
24
|
+
export_file_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
25
|
+
|
|
26
|
+
# Make sure the parent folder exists
|
|
27
|
+
export_file_path.parent.mkdir(exist_ok=True)
|
|
28
|
+
|
|
29
|
+
with export_file_path.open("w") as export_file:
|
|
30
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
31
|
+
|
|
32
|
+
logger.info(f"Exported result to {export_file_path.resolve()}")
|
|
33
|
+
|
|
34
|
+
return export_file_path
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def append_result_to_all_results(run_dir: Path, result: BuiltDatasourceContext):
|
|
38
|
+
path = run_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
39
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
with path.open("a", encoding="utf-8") as export_file:
|
|
41
|
+
export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
|
|
42
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
43
|
+
export_file.write("\n")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
6
|
+
BuildDatasourcePlugin,
|
|
7
|
+
BuildFilePlugin,
|
|
8
|
+
BuildPlugin,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
|
|
11
|
+
from databao_context_engine.project.types import PreparedConfig, PreparedDatasource, DatasourceId
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass()
|
|
15
|
+
class BuiltDatasourceContext:
|
|
16
|
+
"""
|
|
17
|
+
Dataclass defining the result of building a datasource's context.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
datasource_id: str
|
|
21
|
+
"""
|
|
22
|
+
The ID of the built data source
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
datasource_type: str
|
|
26
|
+
"""
|
|
27
|
+
The type of the built data source
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
context_built_at: datetime
|
|
31
|
+
"""
|
|
32
|
+
The time at which the context was built
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
context: Any
|
|
36
|
+
"""
|
|
37
|
+
A dictionary containing the actual context generated for the data source.
|
|
38
|
+
This dictionary should be serializable in YAML format.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> BuiltDatasourceContext:
|
|
43
|
+
built_context = _execute(prepared_datasource, plugin)
|
|
44
|
+
|
|
45
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(prepared_datasource.path)
|
|
46
|
+
|
|
47
|
+
return BuiltDatasourceContext(
|
|
48
|
+
datasource_id=str(datasource_id),
|
|
49
|
+
datasource_type=prepared_datasource.datasource_type.full_type,
|
|
50
|
+
context_built_at=datetime.now(),
|
|
51
|
+
context=built_context,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
|
|
56
|
+
"""
|
|
57
|
+
Run a prepared source through the plugin
|
|
58
|
+
"""
|
|
59
|
+
if isinstance(prepared_datasource, PreparedConfig):
|
|
60
|
+
ds_plugin = cast(BuildDatasourcePlugin, plugin)
|
|
61
|
+
|
|
62
|
+
return execute_datasource_plugin(
|
|
63
|
+
plugin=ds_plugin,
|
|
64
|
+
datasource_type=prepared_datasource.datasource_type,
|
|
65
|
+
config=prepared_datasource.config,
|
|
66
|
+
datasource_name=prepared_datasource.datasource_name,
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
file_plugin = cast(BuildFilePlugin, plugin)
|
|
70
|
+
return execute_file_plugin(
|
|
71
|
+
plugin=file_plugin,
|
|
72
|
+
datasource_type=prepared_datasource.datasource_type,
|
|
73
|
+
file_path=prepared_datasource.path,
|
|
74
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from databao_context_engine import DatabaoContextProjectManager, DatasourceId, DatasourceType
|
|
9
|
+
from databao_context_engine.datasource_config.add_config import (
|
|
10
|
+
get_config_file_structure_for_datasource_type,
|
|
11
|
+
)
|
|
12
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
|
|
13
|
+
from databao_context_engine.plugins.plugin_loader import get_all_available_plugin_types
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def add_datasource_config_interactive(project_dir: Path) -> DatasourceId:
|
|
17
|
+
project_manager = DatabaoContextProjectManager(project_dir=project_dir)
|
|
18
|
+
|
|
19
|
+
click.echo(
|
|
20
|
+
f"We will guide you to add a new datasource in your Databao Context Engine project, at {project_dir.resolve()}"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
datasource_type = _ask_for_datasource_type()
|
|
24
|
+
datasource_name = click.prompt("Datasource name?", type=str)
|
|
25
|
+
|
|
26
|
+
is_datasource_existing = project_manager.datasource_config_exists(
|
|
27
|
+
datasource_type=datasource_type, datasource_name=datasource_name
|
|
28
|
+
)
|
|
29
|
+
if is_datasource_existing:
|
|
30
|
+
click.confirm(
|
|
31
|
+
f"A config file already exists for this datasource ({datasource_type.config_folder}/{datasource_name}). Do you want to overwrite it?",
|
|
32
|
+
abort=True,
|
|
33
|
+
default=False,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
config_content = _ask_for_config_details(datasource_type)
|
|
37
|
+
|
|
38
|
+
config_file = project_manager.create_datasource_config(
|
|
39
|
+
datasource_type, datasource_name, config_content, overwrite_existing=True
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
click.echo(f"{os.linesep}We've created a new config file for your datasource at: {config_file.config_file_path}")
|
|
43
|
+
|
|
44
|
+
return config_file.datasource_id
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _ask_for_datasource_type() -> DatasourceType:
|
|
48
|
+
supported_types_by_folder = _group_supported_types_by_folder(
|
|
49
|
+
get_all_available_plugin_types(exclude_file_plugins=True)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
all_config_folders = sorted(supported_types_by_folder.keys())
|
|
53
|
+
config_folder = click.prompt(
|
|
54
|
+
"What type of datasource do you want to add?",
|
|
55
|
+
type=click.Choice(all_config_folders),
|
|
56
|
+
default=all_config_folders[0] if len(all_config_folders) == 1 else None,
|
|
57
|
+
)
|
|
58
|
+
click.echo(f"Selected type: {config_folder}")
|
|
59
|
+
|
|
60
|
+
all_subtypes_for_folder = sorted(supported_types_by_folder[config_folder])
|
|
61
|
+
config_type = click.prompt(
|
|
62
|
+
"What is the subtype of this datasource?",
|
|
63
|
+
type=click.Choice(all_subtypes_for_folder),
|
|
64
|
+
default=all_subtypes_for_folder[0] if len(all_subtypes_for_folder) == 1 else None,
|
|
65
|
+
)
|
|
66
|
+
click.echo(f"Selected subtype: {config_type}")
|
|
67
|
+
|
|
68
|
+
return DatasourceType.from_main_and_subtypes(config_folder, config_type)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _ask_for_config_details(datasource_type: DatasourceType) -> dict[str, Any]:
|
|
72
|
+
config_file_structure = get_config_file_structure_for_datasource_type(datasource_type)
|
|
73
|
+
|
|
74
|
+
# Adds a new line before asking for the config values specific to the plugin
|
|
75
|
+
click.echo("")
|
|
76
|
+
|
|
77
|
+
if len(config_file_structure) == 0:
|
|
78
|
+
click.echo(
|
|
79
|
+
"The plugin for this datasource doesn't declare its configuration. Please check the documentation and fill the config file manually."
|
|
80
|
+
)
|
|
81
|
+
return {}
|
|
82
|
+
|
|
83
|
+
return _build_config_content_from_properties(config_file_structure, properties_prefix="")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _build_config_content_from_properties(
|
|
87
|
+
properties: list[ConfigPropertyDefinition], properties_prefix: str
|
|
88
|
+
) -> dict[str, Any]:
|
|
89
|
+
config_content: dict[str, Any] = {}
|
|
90
|
+
for config_file_property in properties:
|
|
91
|
+
if config_file_property.property_key in ["type", "name"] and len(properties_prefix) == 0:
|
|
92
|
+
# We ignore type and name properties as they've already been filled
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
if config_file_property.nested_properties is not None and len(config_file_property.nested_properties) > 0:
|
|
96
|
+
nested_content = _build_config_content_from_properties(
|
|
97
|
+
config_file_property.nested_properties,
|
|
98
|
+
properties_prefix=f"{properties_prefix}.{config_file_property.property_key}."
|
|
99
|
+
if properties_prefix
|
|
100
|
+
else f"{config_file_property.property_key}.",
|
|
101
|
+
)
|
|
102
|
+
if len(nested_content.keys()) > 0:
|
|
103
|
+
config_content[config_file_property.property_key] = nested_content
|
|
104
|
+
else:
|
|
105
|
+
default_value: str | None
|
|
106
|
+
if config_file_property.default_value:
|
|
107
|
+
default_value = config_file_property.default_value
|
|
108
|
+
else:
|
|
109
|
+
# We need to add an empty string default value for non-required fields
|
|
110
|
+
default_value = None if config_file_property.required else ""
|
|
111
|
+
|
|
112
|
+
property_value = click.prompt(
|
|
113
|
+
f"{properties_prefix}{config_file_property.property_key}? {'(Optional)' if not config_file_property.required else ''}",
|
|
114
|
+
type=str,
|
|
115
|
+
default=default_value,
|
|
116
|
+
show_default=default_value is not None and default_value != "",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if property_value.strip():
|
|
120
|
+
config_content[config_file_property.property_key] = property_value
|
|
121
|
+
|
|
122
|
+
return config_content
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _group_supported_types_by_folder(all_plugin_types: set[DatasourceType]) -> dict[str, list[str]]:
|
|
126
|
+
main_to_subtypes = defaultdict(list)
|
|
127
|
+
for datasource_type in all_plugin_types:
|
|
128
|
+
main_to_subtypes[datasource_type.main_type].append(datasource_type.subtype)
|
|
129
|
+
|
|
130
|
+
return main_to_subtypes
|