databao-context-engine 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +18 -6
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +84 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
- databao_context_engine/cli/add_datasource_config.py +41 -15
- databao_context_engine/cli/commands.py +12 -43
- databao_context_engine/cli/info.py +2 -2
- databao_context_engine/databao_context_engine.py +137 -0
- databao_context_engine/databao_context_project_manager.py +96 -6
- databao_context_engine/datasources/add_config.py +34 -0
- databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
- databao_context_engine/datasources/datasource_context.py +93 -0
- databao_context_engine/{project → datasources}/datasource_discovery.py +18 -17
- databao_context_engine/{project → datasources}/types.py +64 -15
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +59 -30
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/install.py +13 -20
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/all_results_tool.py +2 -2
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +1 -4
- databao_context_engine/mcp/retrieve_tool.py +3 -11
- databao_context_engine/plugin_loader.py +111 -0
- databao_context_engine/pluginlib/build_plugin.py +25 -9
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/databases/athena_introspector.py +85 -22
- databao_context_engine/plugins/databases/base_introspector.py +5 -3
- databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
- databao_context_engine/plugins/databases/duckdb_introspector.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
- databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
- databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
- databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
- databao_context_engine/plugins/plugin_loader.py +54 -45
- databao_context_engine/plugins/resources/parquet_introspector.py +2 -3
- databao_context_engine/project/info.py +31 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +3 -3
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
- databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +2 -4
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.3.dist-info/METADATA +75 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.3.dist-info}/RECORD +68 -77
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.3.dist-info}/WHEEL +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.datasources.datasource_context import (
|
|
7
|
+
DatasourceContext,
|
|
8
|
+
get_all_contexts,
|
|
9
|
+
get_context_header_for_datasource,
|
|
10
|
+
get_datasource_context,
|
|
11
|
+
get_introspected_datasource_list,
|
|
12
|
+
)
|
|
13
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
14
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
15
|
+
from databao_context_engine.project.layout import ProjectLayout, ensure_project_dir
|
|
16
|
+
from databao_context_engine.retrieve_embeddings import retrieve_embeddings
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ContextSearchResult:
|
|
21
|
+
"""The result of a search in the project's contexts.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
datasource_id: The ID of the datasource that generated the result.
|
|
25
|
+
datasource_type: The type of the datasource that generated the result.
|
|
26
|
+
distance: The distance between the search text and the result.
|
|
27
|
+
context_result: The actual content of the result that was found as a YAML string.
|
|
28
|
+
This content will be a subpart of the full context of the datasource.
|
|
29
|
+
In some cases, its content won't contain the exact same attributes as what can be
|
|
30
|
+
found directly in the full context.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
datasource_id: DatasourceId
|
|
34
|
+
datasource_type: DatasourceType
|
|
35
|
+
distance: float
|
|
36
|
+
context_result: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DatabaoContextEngine:
|
|
40
|
+
"""Engine for reading and using the contexts generated in a Databao Context Project.
|
|
41
|
+
|
|
42
|
+
The Databao Context Project should already have datasources configured and built (see DatabaoContextProjectManager), so that they can be used in the Engine.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
project_dir: The root directory of the Databao Context Project.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
project_dir: Path
|
|
49
|
+
_project_layout: ProjectLayout
|
|
50
|
+
|
|
51
|
+
def __init__(self, project_dir: Path) -> None:
|
|
52
|
+
"""Initialize the DatabaoContextEngine.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
project_dir: The root directory of the Databao Context Project.
|
|
56
|
+
There must be a valid DatabaoContextProject in this directory.
|
|
57
|
+
"""
|
|
58
|
+
self._project_layout = ensure_project_dir(project_dir=project_dir)
|
|
59
|
+
self.project_dir = project_dir
|
|
60
|
+
|
|
61
|
+
def get_introspected_datasource_list(self) -> list[Datasource]:
|
|
62
|
+
"""Return the list of datasources for which a context is available.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
A list of the datasources for which a context is available.
|
|
66
|
+
"""
|
|
67
|
+
return get_introspected_datasource_list(self._project_layout)
|
|
68
|
+
|
|
69
|
+
def get_datasource_context(self, datasource_id: DatasourceId) -> DatasourceContext:
|
|
70
|
+
"""Return the context available for a given datasource.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
datasource_id: The ID of the datasource.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The context for this datasource.
|
|
77
|
+
"""
|
|
78
|
+
return get_datasource_context(project_layout=self._project_layout, datasource_id=datasource_id)
|
|
79
|
+
|
|
80
|
+
def get_all_contexts(self) -> list[DatasourceContext]:
|
|
81
|
+
"""Return all contexts generated in the project.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A list of all contexts generated in the project.
|
|
85
|
+
"""
|
|
86
|
+
return get_all_contexts(project_layout=self._project_layout)
|
|
87
|
+
|
|
88
|
+
def get_all_contexts_formatted(self) -> str:
|
|
89
|
+
all_contexts = self.get_all_contexts()
|
|
90
|
+
|
|
91
|
+
all_results = os.linesep.join(
|
|
92
|
+
[f"{get_context_header_for_datasource(context.datasource_id)}{context.context}" for context in all_contexts]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return all_results
|
|
96
|
+
|
|
97
|
+
def search_context(
|
|
98
|
+
self,
|
|
99
|
+
retrieve_text: str,
|
|
100
|
+
limit: int | None = None,
|
|
101
|
+
export_to_file: bool = False,
|
|
102
|
+
datasource_ids: list[DatasourceId] | None = None,
|
|
103
|
+
) -> list[ContextSearchResult]:
|
|
104
|
+
"""Search in the avaialable context for the closest matches to the given text.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
retrieve_text: The text to search for in the contexts.
|
|
108
|
+
limit: The maximum number of results to return. If None is provided, a default limit of 10 will be used.
|
|
109
|
+
export_to_file: Whether the results should be exported to a file as a side-effect. If True, the results will be exported in a file in the run directory.
|
|
110
|
+
datasource_ids: Not Implemented yet: providing this argument changes nothing to the search
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
A list of the results found for the search, sorted by distance.
|
|
114
|
+
"""
|
|
115
|
+
# TODO: Filter with datasource_ids
|
|
116
|
+
# TODO: When no run_name is required, we can extract the "export_to_file" side-effect and let the caller (the CLI) do it themselves
|
|
117
|
+
|
|
118
|
+
results = retrieve_embeddings(
|
|
119
|
+
project_layout=self._project_layout,
|
|
120
|
+
retrieve_text=retrieve_text,
|
|
121
|
+
limit=limit,
|
|
122
|
+
export_to_file=export_to_file,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return [
|
|
126
|
+
ContextSearchResult(
|
|
127
|
+
datasource_id=result.datasource_id,
|
|
128
|
+
datasource_type=result.datasource_type,
|
|
129
|
+
distance=result.cosine_distance,
|
|
130
|
+
context_result=result.display_text,
|
|
131
|
+
)
|
|
132
|
+
for result in results
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
def run_sql(self, datasource_id: DatasourceId, sql: str, params: list[str]) -> dict[str, Any]:
|
|
136
|
+
"""Not Implemented yet. This will allow to run a SQL query against a datasource (if the datasource supports it)."""
|
|
137
|
+
raise NotImplementedError("Running SQL is not supported yet")
|
|
@@ -2,43 +2,100 @@ from dataclasses import dataclass
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any, overload
|
|
4
4
|
|
|
5
|
-
from databao_context_engine.build_sources
|
|
6
|
-
from databao_context_engine.
|
|
5
|
+
from databao_context_engine.build_sources import BuildContextResult, build_all_datasources
|
|
6
|
+
from databao_context_engine.databao_context_engine import DatabaoContextEngine
|
|
7
|
+
from databao_context_engine.datasources.add_config import (
|
|
7
8
|
create_datasource_config_file,
|
|
8
9
|
get_datasource_id_for_config_file,
|
|
9
10
|
)
|
|
10
|
-
from databao_context_engine.
|
|
11
|
+
from databao_context_engine.datasources.check_config import (
|
|
11
12
|
CheckDatasourceConnectionResult,
|
|
13
|
+
)
|
|
14
|
+
from databao_context_engine.datasources.check_config import (
|
|
12
15
|
check_datasource_connection as check_datasource_connection_internal,
|
|
13
16
|
)
|
|
17
|
+
from databao_context_engine.datasources.datasource_discovery import get_datasource_list
|
|
18
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
14
19
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
15
|
-
from databao_context_engine.project.datasource_discovery import DatasourceId
|
|
16
20
|
from databao_context_engine.project.layout import ensure_datasource_config_file_doesnt_exist, ensure_project_dir
|
|
17
21
|
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
@dataclass
|
|
21
25
|
class DatasourceConfigFile:
|
|
26
|
+
"""A datasource config file that was created by the DatabaoContextProjectManager.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
datasource_id: The unique identifier for the datasource.
|
|
30
|
+
config_file_path: The path to the datasource configuration file.
|
|
31
|
+
"""
|
|
32
|
+
|
|
22
33
|
datasource_id: DatasourceId
|
|
23
34
|
config_file_path: Path
|
|
24
35
|
|
|
25
36
|
|
|
26
37
|
class DatabaoContextProjectManager:
|
|
38
|
+
"""Project Manager for Databao Context Projects.
|
|
39
|
+
|
|
40
|
+
This project manager is responsible for configuring and building a Databao Context Project.
|
|
41
|
+
The project_dir should already have been initialized before a Project manager can be used.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
project_dir: The root directory of the Databao Context Project.
|
|
45
|
+
"""
|
|
46
|
+
|
|
27
47
|
project_dir: Path
|
|
28
48
|
|
|
29
49
|
def __init__(self, project_dir: Path) -> None:
|
|
50
|
+
"""Initialize the DatabaoContextProjectManager.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
project_dir: The root directory of the Databao Context Project.
|
|
54
|
+
"""
|
|
30
55
|
ensure_project_dir(project_dir=project_dir)
|
|
31
56
|
self.project_dir = project_dir
|
|
32
57
|
|
|
58
|
+
def get_configured_datasource_list(self) -> list[Datasource]:
|
|
59
|
+
"""Return the list of datasources configured in the project.
|
|
60
|
+
|
|
61
|
+
This method returns all datasources configured in the src folder of the project,
|
|
62
|
+
no matter whether the datasource configuration is valid or not.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The list of datasources configured in the project.
|
|
66
|
+
"""
|
|
67
|
+
return get_datasource_list(self.project_dir)
|
|
68
|
+
|
|
33
69
|
def build_context(
|
|
34
|
-
self,
|
|
70
|
+
self,
|
|
71
|
+
datasource_ids: list[DatasourceId] | None = None,
|
|
72
|
+
chunk_embedding_mode: ChunkEmbeddingMode = ChunkEmbeddingMode.EMBEDDABLE_TEXT_ONLY,
|
|
35
73
|
) -> list[BuildContextResult]:
|
|
74
|
+
"""Build the context for datasources in the project.
|
|
75
|
+
|
|
76
|
+
Any datasource with an invalid configuration will be skipped.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
datasource_ids: The list of datasource ids to build. If None, all datasources will be built.
|
|
80
|
+
chunk_embedding_mode: The mode to use for chunk embedding.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The list of all built results.
|
|
84
|
+
"""
|
|
36
85
|
# TODO: Filter which datasources to build by datasource_ids
|
|
37
86
|
return build_all_datasources(project_dir=self.project_dir, chunk_embedding_mode=chunk_embedding_mode)
|
|
38
87
|
|
|
39
88
|
def check_datasource_connection(
|
|
40
|
-
self, datasource_ids: list[DatasourceId] | None
|
|
89
|
+
self, datasource_ids: list[DatasourceId] | None = None
|
|
41
90
|
) -> list[CheckDatasourceConnectionResult]:
|
|
91
|
+
"""Check the connection for datasources in the project.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
datasource_ids: The list of datasource ids to check. If None, all datasources will be checked.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
The list of all connection check results, sorted by datasource id.
|
|
98
|
+
"""
|
|
42
99
|
return sorted(
|
|
43
100
|
check_datasource_connection_internal(project_dir=self.project_dir, datasource_ids=datasource_ids).values(),
|
|
44
101
|
key=lambda result: str(result.datasource_id),
|
|
@@ -51,6 +108,18 @@ class DatabaoContextProjectManager:
|
|
|
51
108
|
config_content: dict[str, Any],
|
|
52
109
|
overwrite_existing: bool = False,
|
|
53
110
|
) -> DatasourceConfigFile:
|
|
111
|
+
"""Create a new datasource configuration file in the project.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
datasource_type: The type of the datasource to create.
|
|
115
|
+
datasource_name: The name of the datasource to create.
|
|
116
|
+
config_content: The content of the datasource configuration. This is a dictionary that will be written as-is in a yaml file.
|
|
117
|
+
The actual content of the configuration is not checked and might not be valid for the requested type..
|
|
118
|
+
overwrite_existing: Whether to overwrite an existing datasource configuration file if it already exists.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The path to the created datasource configuration file.
|
|
122
|
+
"""
|
|
54
123
|
# TODO: Before creating the datasource, validate the config content based on which plugin will be used
|
|
55
124
|
config_file_path = create_datasource_config_file(
|
|
56
125
|
project_dir=self.project_dir,
|
|
@@ -77,6 +146,19 @@ class DatabaoContextProjectManager:
|
|
|
77
146
|
datasource_name: str | None = None,
|
|
78
147
|
datasource_id: DatasourceId | None = None,
|
|
79
148
|
) -> bool:
|
|
149
|
+
"""Check if a datasource configuration file already exists in the project.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
datasource_type: The type of the datasource. This must be used in conjunction with datasource_name.
|
|
153
|
+
datasource_name: The name of the datasource. This must be used in conjunction with datasource_type.
|
|
154
|
+
datasource_id: The id of the datasource. If provided, datasource_type and datasource_name will be ignored.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
True if there is already a datasource configuration file for this datasource, False otherwise.
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
ValueError: If the wrong set of arguments is provided.
|
|
161
|
+
"""
|
|
80
162
|
if datasource_type is not None and datasource_name is not None:
|
|
81
163
|
datasource_id = get_datasource_id_for_config_file(datasource_type, datasource_name)
|
|
82
164
|
elif datasource_id is None:
|
|
@@ -90,3 +172,11 @@ class DatabaoContextProjectManager:
|
|
|
90
172
|
return False
|
|
91
173
|
except ValueError:
|
|
92
174
|
return True
|
|
175
|
+
|
|
176
|
+
def get_engine_for_project(self) -> DatabaoContextEngine:
|
|
177
|
+
"""Instantiate a DatabaoContextEngine for the project.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
A DatabaoContextEngine instance for the project.
|
|
181
|
+
"""
|
|
182
|
+
return DatabaoContextEngine(project_dir=self.project_dir)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
5
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
6
|
+
from databao_context_engine.project.layout import (
|
|
7
|
+
create_datasource_config_file as create_datasource_config_file_internal,
|
|
8
|
+
)
|
|
9
|
+
from databao_context_engine.serialization.yaml import to_yaml_string
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_datasource_config_file(
|
|
13
|
+
project_dir: Path,
|
|
14
|
+
datasource_type: DatasourceType,
|
|
15
|
+
datasource_name: str,
|
|
16
|
+
config_content: dict[str, Any],
|
|
17
|
+
overwrite_existing: bool,
|
|
18
|
+
) -> Path:
|
|
19
|
+
basic_config = {"type": datasource_type.subtype, "name": datasource_name}
|
|
20
|
+
|
|
21
|
+
return create_datasource_config_file_internal(
|
|
22
|
+
project_dir,
|
|
23
|
+
get_datasource_id_for_config_file(datasource_type, datasource_name),
|
|
24
|
+
to_yaml_string(basic_config | config_content),
|
|
25
|
+
overwrite_existing=overwrite_existing,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_datasource_id_for_config_file(datasource_type: DatasourceType, datasource_name: str) -> DatasourceId:
|
|
30
|
+
return DatasourceId(
|
|
31
|
+
datasource_config_folder=datasource_type.config_folder,
|
|
32
|
+
datasource_name=datasource_name,
|
|
33
|
+
config_file_suffix=".yaml",
|
|
34
|
+
)
|
|
@@ -6,21 +6,23 @@ from pathlib import Path
|
|
|
6
6
|
|
|
7
7
|
from pydantic import ValidationError
|
|
8
8
|
|
|
9
|
-
from databao_context_engine.
|
|
10
|
-
from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
|
|
11
|
-
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
12
|
-
from databao_context_engine.project.datasource_discovery import (
|
|
9
|
+
from databao_context_engine.datasources.datasource_discovery import (
|
|
13
10
|
discover_datasources,
|
|
14
11
|
get_datasource_descriptors,
|
|
15
12
|
prepare_source,
|
|
16
13
|
)
|
|
14
|
+
from databao_context_engine.datasources.types import DatasourceId, PreparedConfig
|
|
15
|
+
from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
|
|
16
|
+
from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
|
|
17
|
+
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
17
18
|
from databao_context_engine.project.layout import ensure_project_dir
|
|
18
|
-
from databao_context_engine.project.types import DatasourceId, PreparedConfig
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class DatasourceConnectionStatus(Enum):
|
|
24
|
+
"""Status of the connection to a datasource."""
|
|
25
|
+
|
|
24
26
|
VALID = "Valid"
|
|
25
27
|
INVALID = "Invalid"
|
|
26
28
|
UNKNOWN = "Unknown"
|
|
@@ -28,6 +30,15 @@ class DatasourceConnectionStatus(Enum):
|
|
|
28
30
|
|
|
29
31
|
@dataclass(kw_only=True)
|
|
30
32
|
class CheckDatasourceConnectionResult:
|
|
33
|
+
"""Result of checking the connection status of a datasource.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
datasource_id: The id of the datasource.
|
|
37
|
+
connection_status: The connection status of the datasource.
|
|
38
|
+
summary: A summary of the connection status' error, or None if the connection is valid.
|
|
39
|
+
full_message: A detailed message about the connection status' error, or None if the connection is valid.
|
|
40
|
+
"""
|
|
41
|
+
|
|
31
42
|
datasource_id: DatasourceId
|
|
32
43
|
connection_status: DatasourceConnectionStatus
|
|
33
44
|
summary: str | None
|
|
@@ -103,12 +114,12 @@ def check_datasource_connection(
|
|
|
103
114
|
exc_info=True,
|
|
104
115
|
stack_info=True,
|
|
105
116
|
)
|
|
106
|
-
result[result_key] =
|
|
117
|
+
result[result_key] = _get_validation_result_from_error(result_key, e)
|
|
107
118
|
|
|
108
119
|
return result
|
|
109
120
|
|
|
110
121
|
|
|
111
|
-
def
|
|
122
|
+
def _get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
|
|
112
123
|
if isinstance(e, ValidationError):
|
|
113
124
|
return CheckDatasourceConnectionResult(
|
|
114
125
|
datasource_id=datasource_id,
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
|
|
9
|
+
from databao_context_engine.project.layout import ProjectLayout, get_output_dir
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(eq=True, frozen=True)
|
|
15
|
+
class DatasourceContext:
|
|
16
|
+
"""A generated Context for a Datasource.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
datasource_id: The id of the datasource.
|
|
20
|
+
context: The context generated for the datasource.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
datasource_id: DatasourceId
|
|
24
|
+
# TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
|
|
25
|
+
context: str
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceType:
|
|
29
|
+
with context_path.open("r") as context_file:
|
|
30
|
+
type_key = "datasource_type"
|
|
31
|
+
for line in context_file:
|
|
32
|
+
if line.startswith(f"{type_key}: "):
|
|
33
|
+
datasource_type = yaml.safe_load(line)[type_key]
|
|
34
|
+
return DatasourceType(full_type=datasource_type)
|
|
35
|
+
|
|
36
|
+
raise ValueError(f"Could not find type in context file {context_path}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
|
|
40
|
+
output_dir = get_output_dir(project_dir=project_layout.project_dir)
|
|
41
|
+
|
|
42
|
+
result = []
|
|
43
|
+
for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
|
|
44
|
+
for context_path in sorted(
|
|
45
|
+
(p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
|
|
46
|
+
):
|
|
47
|
+
try:
|
|
48
|
+
result.append(
|
|
49
|
+
Datasource(
|
|
50
|
+
id=DatasourceId.from_datasource_config_file_path(context_path),
|
|
51
|
+
type=_read_datasource_type_from_context_file(context_path),
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
logger.debug(str(e), exc_info=True, stack_info=True)
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_path.resolve()}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
|
|
64
|
+
output_dir = get_output_dir(project_dir=project_layout.project_dir)
|
|
65
|
+
|
|
66
|
+
context_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
67
|
+
if not context_path.is_file():
|
|
68
|
+
raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
|
|
69
|
+
|
|
70
|
+
context = context_path.read_text()
|
|
71
|
+
return DatasourceContext(datasource_id=datasource_id, context=context)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
|
|
75
|
+
output_dir = get_output_dir(project_dir=project_layout.project_dir)
|
|
76
|
+
|
|
77
|
+
result = []
|
|
78
|
+
for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
|
|
79
|
+
for context_path in sorted(
|
|
80
|
+
(p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
|
|
81
|
+
):
|
|
82
|
+
result.append(
|
|
83
|
+
DatasourceContext(
|
|
84
|
+
datasource_id=DatasourceId.from_datasource_context_file_path(context_path),
|
|
85
|
+
context=context_path.read_text(),
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return result
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
|
|
93
|
+
return f"# ===== {str(datasource_id)} ====={os.linesep}"
|
|
@@ -4,17 +4,17 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
import yaml
|
|
6
6
|
|
|
7
|
-
from databao_context_engine.
|
|
8
|
-
|
|
9
|
-
from databao_context_engine.project.types import (
|
|
7
|
+
from databao_context_engine.datasources.types import (
|
|
8
|
+
Datasource,
|
|
10
9
|
DatasourceDescriptor,
|
|
10
|
+
DatasourceId,
|
|
11
11
|
DatasourceKind,
|
|
12
12
|
PreparedConfig,
|
|
13
13
|
PreparedDatasource,
|
|
14
14
|
PreparedFile,
|
|
15
|
-
DatasourceId,
|
|
16
|
-
Datasource,
|
|
17
15
|
)
|
|
16
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
17
|
+
from databao_context_engine.project.layout import get_source_dir
|
|
18
18
|
from databao_context_engine.templating.renderer import render_template
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -41,13 +41,18 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
|
|
44
|
-
"""
|
|
45
|
-
Scan the project's src/ directory and return all discovered sources.
|
|
44
|
+
"""Scan the project's src/ directory and return all discovered sources.
|
|
46
45
|
|
|
47
46
|
Rules:
|
|
48
47
|
- Each first-level directory under src/ is treated as a main_type
|
|
49
48
|
- Unsupported or unreadable entries are skipped.
|
|
50
49
|
- The returned list is sorted by directory and then filename
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
A list of DatasourceDescriptor instances representing the discovered datasources.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: If the src directory does not exist in the project directory.
|
|
51
56
|
"""
|
|
52
57
|
src = get_source_dir(project_dir)
|
|
53
58
|
if not src.exists() or not src.is_dir():
|
|
@@ -56,7 +61,7 @@ def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
|
|
|
56
61
|
datasources: list[DatasourceDescriptor] = []
|
|
57
62
|
for main_dir in sorted((p for p in src.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
|
|
58
63
|
for path in sorted((p for p in main_dir.iterdir() if _is_datasource_file(p)), key=lambda p: p.name.lower()):
|
|
59
|
-
datasource =
|
|
64
|
+
datasource = _load_datasource_descriptor(path)
|
|
60
65
|
if datasource is not None:
|
|
61
66
|
datasources.append(datasource)
|
|
62
67
|
|
|
@@ -74,22 +79,20 @@ def get_datasource_descriptors(project_dir: Path, datasource_ids: list[Datasourc
|
|
|
74
79
|
raise ValueError(f"src directory does not exist in {project_dir}")
|
|
75
80
|
|
|
76
81
|
datasources: list[DatasourceDescriptor] = []
|
|
77
|
-
for datasource_id in datasource_ids:
|
|
82
|
+
for datasource_id in sorted(datasource_ids, key=lambda id: str(id)):
|
|
78
83
|
config_file_path = src.joinpath(datasource_id.relative_path_to_config_file())
|
|
79
84
|
if not config_file_path.is_file():
|
|
80
85
|
raise ValueError(f"Datasource config file not found: {config_file_path}")
|
|
81
86
|
|
|
82
|
-
datasource =
|
|
87
|
+
datasource = _load_datasource_descriptor(config_file_path)
|
|
83
88
|
if datasource is not None:
|
|
84
89
|
datasources.append(datasource)
|
|
85
90
|
|
|
86
91
|
return datasources
|
|
87
92
|
|
|
88
93
|
|
|
89
|
-
def
|
|
90
|
-
"""
|
|
91
|
-
Load a single file with src/<parent_name>/ into a DatasourceDescriptor
|
|
92
|
-
"""
|
|
94
|
+
def _load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
|
|
95
|
+
"""Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
|
|
93
96
|
if not path.is_file():
|
|
94
97
|
return None
|
|
95
98
|
|
|
@@ -110,9 +113,7 @@ def load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
|
|
|
110
113
|
|
|
111
114
|
|
|
112
115
|
def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
|
|
113
|
-
"""
|
|
114
|
-
Convert a discovered datasource into a prepared datasource ready for plugin execution
|
|
115
|
-
"""
|
|
116
|
+
"""Convert a discovered datasource into a prepared datasource ready for plugin execution."""
|
|
116
117
|
if datasource.kind is DatasourceKind.FILE:
|
|
117
118
|
file_subtype = datasource.path.suffix.lower().lstrip(".")
|
|
118
119
|
return PreparedFile(
|