databao-context-engine 0.1.2__py3-none-any.whl → 0.1.4.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +18 -6
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +84 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
- databao_context_engine/cli/add_datasource_config.py +41 -15
- databao_context_engine/cli/commands.py +12 -43
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +137 -0
- databao_context_engine/databao_context_project_manager.py +96 -6
- databao_context_engine/datasources/add_config.py +34 -0
- databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
- databao_context_engine/datasources/datasource_context.py +93 -0
- databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
- databao_context_engine/{project → datasources}/types.py +64 -15
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +67 -53
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/install.py +13 -20
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +10 -10
- databao_context_engine/plugin_loader.py +111 -0
- databao_context_engine/pluginlib/build_plugin.py +25 -9
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/base_db_plugin.py +5 -2
- databao_context_engine/plugins/databases/athena_introspector.py +85 -22
- databao_context_engine/plugins/databases/base_introspector.py +5 -3
- databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
- databao_context_engine/plugins/databases/duckdb_introspector.py +3 -5
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
- databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
- databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
- databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/plugin_loader.py +43 -42
- databao_context_engine/plugins/resources/parquet_introspector.py +7 -19
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +3 -3
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
- databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +2 -4
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.4.dev1.dist-info/METADATA +75 -0
- databao_context_engine-0.1.4.dev1.dist-info/RECORD +125 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.2.dist-info/METADATA +0 -187
- databao_context_engine-0.1.2.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
- {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -38,12 +38,16 @@ PreparedDatasource = PreparedConfig | PreparedFile
|
|
|
38
38
|
|
|
39
39
|
@dataclass(kw_only=True, frozen=True, eq=True)
|
|
40
40
|
class DatasourceId:
|
|
41
|
-
"""
|
|
42
|
-
The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
|
|
41
|
+
"""The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
|
|
43
42
|
|
|
44
43
|
e.g: "databases/my_postgres_datasource.yaml"
|
|
45
44
|
|
|
46
45
|
Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
datasource_config_folder: The folder where the datasource's config file is located.
|
|
49
|
+
datasource_name: The name of the datasource.
|
|
50
|
+
config_file_suffix: The suffix of the config (or raw) file.
|
|
47
51
|
"""
|
|
48
52
|
|
|
49
53
|
datasource_config_folder: str
|
|
@@ -78,18 +82,22 @@ class DatasourceId:
|
|
|
78
82
|
return str(self.relative_path_to_config_file())
|
|
79
83
|
|
|
80
84
|
def relative_path_to_config_file(self) -> Path:
|
|
81
|
-
"""
|
|
82
|
-
Returns a path to the config file for this datasource.
|
|
85
|
+
"""Return a path to the config file for this datasource.
|
|
83
86
|
|
|
84
87
|
The returned path is relative to the src folder in the project.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
The path to the config file relative to the src folder in the project.
|
|
85
91
|
"""
|
|
86
92
|
return Path(self.datasource_config_folder).joinpath(self.datasource_name + self.config_file_suffix)
|
|
87
93
|
|
|
88
94
|
def relative_path_to_context_file(self) -> Path:
|
|
89
|
-
"""
|
|
90
|
-
Returns a path to the config file for this datasource.
|
|
95
|
+
"""Return a path to the config file for this datasource.
|
|
91
96
|
|
|
92
97
|
The returned path is relative to an output run folder in the project.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
The path to the context file relative to the output folder in the project.
|
|
93
101
|
"""
|
|
94
102
|
# Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
|
|
95
103
|
suffix = ".yaml" if self.config_file_suffix == ".yaml" else (self.config_file_suffix + ".yaml")
|
|
@@ -97,13 +105,19 @@ class DatasourceId:
|
|
|
97
105
|
return Path(self.datasource_config_folder).joinpath(self.datasource_name + suffix)
|
|
98
106
|
|
|
99
107
|
@classmethod
|
|
100
|
-
def from_string_repr(cls, datasource_id_as_string: str):
|
|
101
|
-
"""
|
|
102
|
-
|
|
108
|
+
def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
|
|
109
|
+
"""Create a DatasourceId from a string representation.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
datasource_id_as_string: The string representation of a DatasourceId.
|
|
113
|
+
This is the path to the datasource's config file relative to the src folder in the project.
|
|
114
|
+
(e.g. "databases/my_postgres_datasource.yaml")
|
|
103
115
|
|
|
104
|
-
|
|
116
|
+
Returns:
|
|
117
|
+
The DatasourceId instance created from the string representation.
|
|
105
118
|
|
|
106
|
-
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If the string representation is invalid (e.g. too many parent folders).
|
|
107
121
|
"""
|
|
108
122
|
config_file_path = Path(datasource_id_as_string)
|
|
109
123
|
|
|
@@ -115,11 +129,15 @@ class DatasourceId:
|
|
|
115
129
|
return DatasourceId.from_datasource_config_file_path(config_file_path)
|
|
116
130
|
|
|
117
131
|
@classmethod
|
|
118
|
-
def from_datasource_config_file_path(cls, datasource_config_file: Path):
|
|
119
|
-
"""
|
|
120
|
-
Creates a DatasourceId from a config file path.
|
|
132
|
+
def from_datasource_config_file_path(cls, datasource_config_file: Path) -> "DatasourceId":
|
|
133
|
+
"""Create a DatasourceId from a config file path.
|
|
121
134
|
|
|
122
|
-
|
|
135
|
+
Args:
|
|
136
|
+
datasource_config_file: The path to the datasource config file.
|
|
137
|
+
This path can either be the config file path relative to the src folder or the full path to the config file.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
The DatasourceId instance created from the config file path.
|
|
123
141
|
"""
|
|
124
142
|
return DatasourceId(
|
|
125
143
|
datasource_config_folder=datasource_config_file.parent.name,
|
|
@@ -127,8 +145,39 @@ class DatasourceId:
|
|
|
127
145
|
config_file_suffix=datasource_config_file.suffix,
|
|
128
146
|
)
|
|
129
147
|
|
|
148
|
+
@classmethod
|
|
149
|
+
def from_datasource_context_file_path(cls, datasource_context_file: Path) -> "DatasourceId":
|
|
150
|
+
"""Create a DatasourceId from a context file path.
|
|
151
|
+
|
|
152
|
+
This factory handles the case where the context was generated from a raw file rather than from a config.
|
|
153
|
+
In that case, the context file name will look like "<my_datasource_name>.<raw_file_extension>.yaml"
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
datasource_context_file: The path to the datasource context file.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
The DatasourceId instance created from the context file path.
|
|
160
|
+
"""
|
|
161
|
+
if len(datasource_context_file.suffixes) > 1 and datasource_context_file.suffix == ".yaml":
|
|
162
|
+
# If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
|
|
163
|
+
context_file_name_without_yaml_extension = datasource_context_file.name[: -len(".yaml")]
|
|
164
|
+
datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
|
|
165
|
+
|
|
166
|
+
return DatasourceId(
|
|
167
|
+
datasource_config_folder=datasource_context_file.parent.name,
|
|
168
|
+
datasource_name=datasource_context_file.stem,
|
|
169
|
+
config_file_suffix=datasource_context_file.suffix,
|
|
170
|
+
)
|
|
171
|
+
|
|
130
172
|
|
|
131
173
|
@dataclass
|
|
132
174
|
class Datasource:
|
|
175
|
+
"""A datasource contained in the project.
|
|
176
|
+
|
|
177
|
+
Attributes:
|
|
178
|
+
id: The unique identifier of the datasource.
|
|
179
|
+
type: The type of the datasource.
|
|
180
|
+
"""
|
|
181
|
+
|
|
133
182
|
id: DatasourceId
|
|
134
183
|
type: DatasourceType
|
|
@@ -1,17 +1,39 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
|
|
4
|
-
from databao_context_engine.project.init_project import init_project_dir
|
|
4
|
+
from databao_context_engine.project.init_project import InitProjectError, init_project_dir
|
|
5
5
|
from databao_context_engine.project.layout import is_project_dir_valid
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
9
|
-
|
|
9
|
+
"""Initialize a Databao Context project in the given directory.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Args:
|
|
12
|
+
project_dir: The directory where the project should be initialized.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
A DatabaoContextProjectManager for the initialized project.
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
InitProjectError: If the project could not be initialized.
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
initialized_project_dir = init_project_dir(project_dir=project_dir)
|
|
22
|
+
|
|
23
|
+
return DatabaoContextProjectManager(project_dir=initialized_project_dir)
|
|
24
|
+
except InitProjectError as e:
|
|
25
|
+
raise e
|
|
12
26
|
|
|
13
27
|
|
|
14
28
|
def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
29
|
+
"""Initialize a Databao Context project in the given directory or get a DatabaoContextProjectManager for the project if it already exists.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
project_dir: The directory where the project should be initialized if it doesn't exist yet..
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A DatabaoContextProjectManager for the project in project_dir.
|
|
36
|
+
"""
|
|
15
37
|
if is_project_dir_valid(project_dir):
|
|
16
38
|
return DatabaoContextProjectManager(project_dir=project_dir)
|
|
17
39
|
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
import types
|
|
2
2
|
from dataclasses import MISSING, fields, is_dataclass
|
|
3
|
-
from typing import Annotated, Any, ForwardRef, Union, get_origin, get_type_hints
|
|
3
|
+
from typing import Annotated, Any, ForwardRef, Union, get_args, get_origin, get_type_hints
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel
|
|
6
6
|
from pydantic_core import PydanticUndefinedType
|
|
7
7
|
|
|
8
|
-
from databao_context_engine.pluginlib.config import
|
|
8
|
+
from databao_context_engine.pluginlib.config import (
|
|
9
|
+
ConfigPropertyAnnotation,
|
|
10
|
+
ConfigPropertyDefinition,
|
|
11
|
+
ConfigSinglePropertyDefinition,
|
|
12
|
+
ConfigUnionPropertyDefinition,
|
|
13
|
+
)
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
|
|
@@ -54,18 +59,14 @@ def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyD
|
|
|
54
59
|
raise ValueError(f"{parent_type} is not a dataclass")
|
|
55
60
|
|
|
56
61
|
dataclass_fields = fields(parent_type)
|
|
62
|
+
type_hints = get_type_hints(parent_type, include_extras=True)
|
|
57
63
|
|
|
58
64
|
result = []
|
|
59
65
|
for field in dataclass_fields:
|
|
60
66
|
has_field_default = field.default is not None and field.default != MISSING
|
|
61
67
|
|
|
62
|
-
if
|
|
63
|
-
|
|
64
|
-
property_type = _evaluate_type_string(field.type)
|
|
65
|
-
except Exception:
|
|
66
|
-
continue
|
|
67
|
-
else:
|
|
68
|
-
property_type = field.type
|
|
68
|
+
# Use the type hints if the field type wasn't resolved (aka. if it is a ForwardRef or a str)
|
|
69
|
+
property_type = type_hints[field.name] if isinstance(field.type, ForwardRef | str) else field.type
|
|
69
70
|
|
|
70
71
|
property_for_field = _create_property(
|
|
71
72
|
property_type=property_type,
|
|
@@ -84,6 +85,10 @@ def _get_property_list_from_pydantic_base_model(parent_type: type):
|
|
|
84
85
|
if not issubclass(parent_type, BaseModel):
|
|
85
86
|
raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
|
|
86
87
|
|
|
88
|
+
if any(isinstance(field.annotation, ForwardRef) for field in parent_type.model_fields.values()):
|
|
89
|
+
# If any field's future type wasn't resolved yet, we rebuild the model to resolve them
|
|
90
|
+
parent_type.model_rebuild(force=True)
|
|
91
|
+
|
|
87
92
|
pydantic_fields = parent_type.model_fields
|
|
88
93
|
result = []
|
|
89
94
|
|
|
@@ -123,26 +128,55 @@ def _create_property(
|
|
|
123
128
|
if annotation is not None and annotation.ignored_for_config_wizard:
|
|
124
129
|
return None
|
|
125
130
|
|
|
126
|
-
|
|
131
|
+
actual_property_types = _read_actual_property_type(property_type)
|
|
127
132
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
except TypeError:
|
|
131
|
-
return None
|
|
133
|
+
required = annotation.required if annotation else is_property_required
|
|
134
|
+
secret = annotation.secret if annotation else False
|
|
132
135
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
property_default=property_default,
|
|
136
|
-
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
137
|
-
)
|
|
136
|
+
if len(actual_property_types) > 1:
|
|
137
|
+
type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
|
|
138
138
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
139
|
+
for union_type in actual_property_types:
|
|
140
|
+
try:
|
|
141
|
+
nested_props = _get_property_list_from_type(
|
|
142
|
+
parent_type=union_type,
|
|
143
|
+
is_root_type=False,
|
|
144
|
+
)
|
|
145
|
+
except TypeError:
|
|
146
|
+
nested_props = []
|
|
147
|
+
|
|
148
|
+
type_properties[union_type] = nested_props
|
|
149
|
+
|
|
150
|
+
return ConfigUnionPropertyDefinition(
|
|
151
|
+
property_key=property_name,
|
|
152
|
+
types=actual_property_types,
|
|
153
|
+
type_properties=type_properties,
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
actual_property_type = actual_property_types[0]
|
|
157
|
+
try:
|
|
158
|
+
nested_properties = _get_property_list_from_type(
|
|
159
|
+
parent_type=actual_property_type,
|
|
160
|
+
is_root_type=False,
|
|
161
|
+
)
|
|
162
|
+
except TypeError:
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
resolved_type = actual_property_type if not nested_properties else None
|
|
166
|
+
default_value = compute_default_value(
|
|
167
|
+
annotation=annotation,
|
|
168
|
+
property_default=property_default,
|
|
169
|
+
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
return ConfigSinglePropertyDefinition(
|
|
173
|
+
property_key=property_name,
|
|
174
|
+
property_type=resolved_type,
|
|
175
|
+
required=required,
|
|
176
|
+
default_value=default_value,
|
|
177
|
+
nested_properties=nested_properties or None,
|
|
178
|
+
secret=secret,
|
|
179
|
+
)
|
|
146
180
|
|
|
147
181
|
|
|
148
182
|
def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
|
|
@@ -155,21 +189,16 @@ def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation |
|
|
|
155
189
|
return None
|
|
156
190
|
|
|
157
191
|
|
|
158
|
-
def _read_actual_property_type(property_type: type) -> type:
|
|
192
|
+
def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
|
|
159
193
|
property_type_origin = get_origin(property_type)
|
|
160
194
|
|
|
161
195
|
if property_type_origin is Annotated:
|
|
162
|
-
return property_type.__origin__ # type: ignore[attr-defined]
|
|
163
|
-
elif property_type_origin
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
# Uses the actual type T when the Union is "T | None" (or "None | T")
|
|
167
|
-
return next(arg for arg in type_args if arg is not None)
|
|
168
|
-
else:
|
|
169
|
-
# Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
|
|
170
|
-
return type(None)
|
|
196
|
+
return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
|
|
197
|
+
elif property_type_origin in (Union, types.UnionType):
|
|
198
|
+
args = tuple(arg for arg in get_args(property_type) if arg is not type(None))
|
|
199
|
+
return args
|
|
171
200
|
|
|
172
|
-
return property_type
|
|
201
|
+
return (property_type,)
|
|
173
202
|
|
|
174
203
|
|
|
175
204
|
def compute_default_value(
|
|
@@ -185,18 +214,3 @@ def compute_default_value(
|
|
|
185
214
|
return str(property_default)
|
|
186
215
|
|
|
187
216
|
return None
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _evaluate_type_string(property_type: str) -> type:
|
|
191
|
-
try:
|
|
192
|
-
# Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
|
|
193
|
-
return _internal._typing_extra.eval_type(property_type)
|
|
194
|
-
except Exception as initial_error:
|
|
195
|
-
try:
|
|
196
|
-
# Try to convert it ourselves if Pydantic didn't work
|
|
197
|
-
return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
|
|
198
|
-
globalns=globals(), localns=locals(), recursive_guard=frozenset()
|
|
199
|
-
)
|
|
200
|
-
except Exception as e:
|
|
201
|
-
# Ignore if we didn't manage to convert the str to a type
|
|
202
|
-
raise e from initial_error
|
|
@@ -3,14 +3,8 @@ class OllamaError(Exception):
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class OllamaTransientError(OllamaError):
|
|
6
|
-
"""
|
|
7
|
-
Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
|
|
8
|
-
Typically worth retrying.
|
|
9
|
-
"""
|
|
6
|
+
"""Errors that are likely temporary (network issues, timeouts, 5xx, etc.), typically worth retrying."""
|
|
10
7
|
|
|
11
8
|
|
|
12
9
|
class OllamaPermanentError(OllamaError):
|
|
13
|
-
"""
|
|
14
|
-
Errors that are unlikely to succeed on retry without changing inputs
|
|
15
|
-
or configuration (4xx, bad response schema, etc.).
|
|
16
|
-
"""
|
|
10
|
+
"""Errors that are unlikely to succeed on retry without changing inputs or configuration (4xx, bad response schema, etc.)."""
|
|
@@ -49,14 +49,15 @@ ARTIFACTS: dict[str, ArtifactInfo] = {
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def resolve_ollama_bin() -> str:
|
|
52
|
-
"""
|
|
53
|
-
Decide which `ollama` binary to use, in this order:
|
|
52
|
+
"""Decide which `ollama` binary to use.
|
|
54
53
|
|
|
54
|
+
Here is the priority order:
|
|
55
55
|
1. DCE_OLLAMA_BIN env var, if set and exists
|
|
56
56
|
2. `ollama` found on PATH
|
|
57
57
|
3. Managed installation under MANAGED_OLLAMA_BIN
|
|
58
58
|
|
|
59
|
-
Returns
|
|
59
|
+
Returns:
|
|
60
|
+
The full path to the binary
|
|
60
61
|
"""
|
|
61
62
|
override = os.environ.get("DCE_OLLAMA_BIN")
|
|
62
63
|
if override:
|
|
@@ -76,9 +77,7 @@ def resolve_ollama_bin() -> str:
|
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
def _detect_platform() -> str:
|
|
79
|
-
"""
|
|
80
|
-
Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'.
|
|
81
|
-
"""
|
|
80
|
+
"""Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'."""
|
|
82
81
|
os_name = sys.platform.lower()
|
|
83
82
|
arch = (os.uname().machine if hasattr(os, "uname") else "").lower()
|
|
84
83
|
|
|
@@ -97,9 +96,7 @@ def _detect_platform() -> str:
|
|
|
97
96
|
|
|
98
97
|
|
|
99
98
|
def _download_to_temp(url: str) -> Path:
|
|
100
|
-
"""
|
|
101
|
-
Download to a temporary file and return its path.
|
|
102
|
-
"""
|
|
99
|
+
"""Download to a temporary file and return its path."""
|
|
103
100
|
import urllib.request
|
|
104
101
|
|
|
105
102
|
tmp_dir = Path(tempfile.mkdtemp(prefix="ollama-download-"))
|
|
@@ -114,9 +111,7 @@ def _download_to_temp(url: str) -> Path:
|
|
|
114
111
|
|
|
115
112
|
|
|
116
113
|
def _verify_sha256(path: Path, expected_hex: str) -> None:
|
|
117
|
-
"""
|
|
118
|
-
Verify SHA-256 of path matches expected_hex
|
|
119
|
-
"""
|
|
114
|
+
"""Verify SHA-256 of path matches expected_hex."""
|
|
120
115
|
h = hashlib.sha256()
|
|
121
116
|
with path.open("rb") as f:
|
|
122
117
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
@@ -127,9 +122,7 @@ def _verify_sha256(path: Path, expected_hex: str) -> None:
|
|
|
127
122
|
|
|
128
123
|
|
|
129
124
|
def _extract_archive(archive: Path, target_dir: Path) -> None:
|
|
130
|
-
"""
|
|
131
|
-
Extract archive into target_dir.
|
|
132
|
-
"""
|
|
125
|
+
"""Extract archive into target_dir."""
|
|
133
126
|
name = archive.name.lower()
|
|
134
127
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
135
128
|
|
|
@@ -144,9 +137,7 @@ def _extract_archive(archive: Path, target_dir: Path) -> None:
|
|
|
144
137
|
|
|
145
138
|
|
|
146
139
|
def _ensure_executable(path: Path) -> None:
|
|
147
|
-
"""
|
|
148
|
-
Mark path as executable
|
|
149
|
-
"""
|
|
140
|
+
"""Mark path as executable."""
|
|
150
141
|
try:
|
|
151
142
|
mode = path.stat().st_mode
|
|
152
143
|
path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
@@ -155,8 +146,7 @@ def _ensure_executable(path: Path) -> None:
|
|
|
155
146
|
|
|
156
147
|
|
|
157
148
|
def install_ollama_to(target: Path) -> None:
|
|
158
|
-
"""
|
|
159
|
-
Ensure an Ollama binary exists.
|
|
149
|
+
"""Ensure an Ollama binary exist.
|
|
160
150
|
|
|
161
151
|
If it doesn't exist, this will:
|
|
162
152
|
- detect OS
|
|
@@ -164,6 +154,9 @@ def install_ollama_to(target: Path) -> None:
|
|
|
164
154
|
- verify its SHA-256 checksum
|
|
165
155
|
- extract into the installation directory
|
|
166
156
|
- make the binary executable
|
|
157
|
+
|
|
158
|
+
Raises:
|
|
159
|
+
RuntimeError: If the user's platform is not supported
|
|
167
160
|
"""
|
|
168
161
|
target = target.expanduser()
|
|
169
162
|
if target.parent.name == "bin":
|
|
@@ -36,9 +36,7 @@ class OllamaService:
|
|
|
36
36
|
return [float(x) for x in vec]
|
|
37
37
|
|
|
38
38
|
def describe(self, *, model: str, text: str, context: str) -> str:
|
|
39
|
-
"""
|
|
40
|
-
Ask Ollama to generate a short description for `text`
|
|
41
|
-
"""
|
|
39
|
+
"""Ask Ollama to generate a short description for `text`."""
|
|
42
40
|
prompt = self._build_description_prompt(text=text, context=context)
|
|
43
41
|
|
|
44
42
|
payload: dict[str, Any] = {"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}}
|
|
@@ -2,15 +2,17 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
from databao_context_engine.mcp.mcp_server import McpServer, McpTransport
|
|
5
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
5
6
|
|
|
6
7
|
logger = logging.getLogger(__name__)
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def run_mcp_server(
|
|
10
11
|
project_dir: Path,
|
|
11
|
-
run_name: str | None,
|
|
12
12
|
transport: McpTransport,
|
|
13
13
|
host: str | None = None,
|
|
14
14
|
port: int | None = None,
|
|
15
15
|
) -> None:
|
|
16
|
-
|
|
16
|
+
ensure_project_dir(project_dir=project_dir)
|
|
17
|
+
|
|
18
|
+
McpServer(project_dir, host, port).run(transport)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from contextlib import asynccontextmanager
|
|
3
|
+
from datetime import date
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Literal
|
|
5
6
|
|
|
@@ -7,8 +8,6 @@ from mcp.server import FastMCP
|
|
|
7
8
|
from mcp.types import ToolAnnotations
|
|
8
9
|
|
|
9
10
|
from databao_context_engine import DatabaoContextEngine
|
|
10
|
-
from databao_context_engine.mcp.all_results_tool import run_all_results_tool
|
|
11
|
-
from databao_context_engine.mcp.retrieve_tool import run_retrieve_tool
|
|
12
11
|
|
|
13
12
|
logger = logging.getLogger(__name__)
|
|
14
13
|
|
|
@@ -26,12 +25,10 @@ class McpServer:
|
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
28
27
|
project_dir: Path,
|
|
29
|
-
run_name: str | None,
|
|
30
28
|
host: str | None = None,
|
|
31
29
|
port: int | None = None,
|
|
32
30
|
):
|
|
33
31
|
self._databao_context_engine = DatabaoContextEngine(project_dir)
|
|
34
|
-
self._run_name = run_name
|
|
35
32
|
|
|
36
33
|
self._mcp_server = self._create_mcp_server(host, port)
|
|
37
34
|
|
|
@@ -43,20 +40,23 @@ class McpServer:
|
|
|
43
40
|
annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
|
|
44
41
|
)
|
|
45
42
|
def all_results_tool():
|
|
46
|
-
return
|
|
43
|
+
return self._databao_context_engine.get_all_contexts_formatted()
|
|
47
44
|
|
|
48
45
|
@mcp.tool(
|
|
49
46
|
description="Retrieve the context built from various resources, including databases, dbt tools, plain and structured files, to retrieve relevant information",
|
|
50
47
|
annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
|
|
51
48
|
)
|
|
52
49
|
def retrieve_tool(text: str, limit: int | None):
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
run_name=self._run_name,
|
|
56
|
-
text=text,
|
|
57
|
-
limit=limit or 50,
|
|
50
|
+
retrieve_results = self._databao_context_engine.search_context(
|
|
51
|
+
retrieve_text=text, limit=limit, export_to_file=False
|
|
58
52
|
)
|
|
59
53
|
|
|
54
|
+
display_results = [context_search_result.context_result for context_search_result in retrieve_results]
|
|
55
|
+
|
|
56
|
+
display_results.append(f"\nToday's date is {date.today()}")
|
|
57
|
+
|
|
58
|
+
return "\n".join(display_results)
|
|
59
|
+
|
|
60
60
|
return mcp
|
|
61
61
|
|
|
62
62
|
def run(self, transport: McpTransport):
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from databao_context_engine.introspection.property_extract import get_property_list_from_type
|
|
2
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
3
|
+
BuildDatasourcePlugin,
|
|
4
|
+
BuildFilePlugin,
|
|
5
|
+
BuildPlugin,
|
|
6
|
+
DatasourceType,
|
|
7
|
+
)
|
|
8
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyDefinition, CustomiseConfigProperties
|
|
9
|
+
from databao_context_engine.plugins.plugin_loader import (
|
|
10
|
+
load_plugins,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DatabaoContextPluginLoader:
|
|
15
|
+
"""Loader for plugins installed in the current environment."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, plugins_by_type: dict[DatasourceType, BuildPlugin] | None = None):
|
|
18
|
+
"""Initialize the DatabaoContextEngine.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
plugins_by_type: Override the list of plugins loaded from the environment.
|
|
22
|
+
Typical usage should not provide this argument and leave it as None.
|
|
23
|
+
"""
|
|
24
|
+
self._all_plugins_by_type = load_plugins() if plugins_by_type is None else plugins_by_type
|
|
25
|
+
|
|
26
|
+
def get_all_supported_datasource_types(self, exclude_file_plugins: bool = False) -> set[DatasourceType]:
|
|
27
|
+
"""Return the list of all supported datasource types.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
exclude_file_plugins: If True, do not return datasource types from plugins that deal with raw files.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
A set of all DatasourceType supported in the current installation environment.
|
|
34
|
+
"""
|
|
35
|
+
if exclude_file_plugins:
|
|
36
|
+
return {
|
|
37
|
+
datasource_type
|
|
38
|
+
for (datasource_type, plugin) in self._all_plugins_by_type.items()
|
|
39
|
+
if not isinstance(plugin, BuildFilePlugin)
|
|
40
|
+
}
|
|
41
|
+
else:
|
|
42
|
+
return set(self._all_plugins_by_type.keys())
|
|
43
|
+
|
|
44
|
+
def get_plugin_for_datasource_type(self, datasource_type: DatasourceType) -> BuildPlugin:
|
|
45
|
+
"""Return the plugin able to build a context for the given datasource type.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
datasource_type: The type of datasource for which to retrieve the plugin.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
The plugin able to build a context for the given datasource type.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If no plugin is found for the given datasource type.
|
|
55
|
+
"""
|
|
56
|
+
if datasource_type not in self._all_plugins_by_type:
|
|
57
|
+
raise ValueError(f"No plugin found for type '{datasource_type.full_type}'")
|
|
58
|
+
|
|
59
|
+
return self._all_plugins_by_type[datasource_type]
|
|
60
|
+
|
|
61
|
+
def get_config_file_type_for_datasource_type(self, datasource_type: DatasourceType) -> type:
|
|
62
|
+
"""Return the type of the config file for the given datasource type.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
datasource_type: The type of datasource for which to retrieve the config file type.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
The type of the config file for the given datasource type.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
ValueError: If no plugin is found for the given datasource type.
|
|
72
|
+
ValueError: If the plugin does not support config files.
|
|
73
|
+
"""
|
|
74
|
+
plugin = self.get_plugin_for_datasource_type(datasource_type)
|
|
75
|
+
|
|
76
|
+
if isinstance(plugin, BuildDatasourcePlugin):
|
|
77
|
+
return plugin.config_file_type
|
|
78
|
+
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f'Impossible to get a config file type for datasource type "{datasource_type.full_type}". The corresponding plugin is a {type(plugin).__name__} but should be a BuildDatasourcePlugin'
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def get_config_file_structure_for_datasource_type(
|
|
84
|
+
self, datasource_type: DatasourceType
|
|
85
|
+
) -> list[ConfigPropertyDefinition]:
|
|
86
|
+
"""Return the property structure of the config file for the given datasource type.
|
|
87
|
+
|
|
88
|
+
This can be used to generate a form for the user to fill in the config file.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
datasource_type: The type of datasource for which to retrieve the config file structure.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The structure of the config file for the given datasource type.
|
|
95
|
+
This structure is a list of ConfigPropertyDefinition objects.
|
|
96
|
+
Each object in the list describes a property of the config file and its potential nested properties.
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
ValueError: If no plugin is found for the given datasource type.
|
|
100
|
+
ValueError: If the plugin does not support config files.
|
|
101
|
+
"""
|
|
102
|
+
plugin = self.get_plugin_for_datasource_type(datasource_type)
|
|
103
|
+
|
|
104
|
+
if isinstance(plugin, CustomiseConfigProperties):
|
|
105
|
+
return plugin.get_config_file_properties()
|
|
106
|
+
elif isinstance(plugin, BuildDatasourcePlugin):
|
|
107
|
+
return get_property_list_from_type(plugin.config_file_type)
|
|
108
|
+
else:
|
|
109
|
+
raise ValueError(
|
|
110
|
+
f'Impossible to create a config for datasource type "{datasource_type.full_type}". The corresponding plugin is a {type(plugin).__name__} but should be a BuildDatasourcePlugin or CustomiseConfigProperties'
|
|
111
|
+
)
|