databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +32 -7
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +82 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
- databao_context_engine/cli/add_datasource_config.py +49 -44
- databao_context_engine/cli/commands.py +40 -55
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +127 -0
- databao_context_engine/databao_context_project_manager.py +147 -30
- databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
- databao_context_engine/datasources/datasource_context.py +90 -0
- databao_context_engine/datasources/datasource_discovery.py +143 -0
- databao_context_engine/datasources/types.py +194 -0
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +76 -57
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +26 -30
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +9 -11
- databao_context_engine/plugin_loader.py +110 -0
- databao_context_engine/pluginlib/build_plugin.py +12 -29
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
- databao_context_engine/plugins/databases/base_introspector.py +11 -12
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +58 -52
- databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +14 -15
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
- databao_context_engine/serialization/__init__.py +0 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +3 -5
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.5.dist-info/METADATA +228 -0
- databao_context_engine-0.1.5.dist-info/RECORD +135 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/project/datasource_discovery.py +0 -141
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/project/types.py +0 -134
- databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- databao_context_engine-0.1.1.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
- /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
- /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,18 +1,33 @@
|
|
|
1
1
|
import types
|
|
2
2
|
from dataclasses import MISSING, fields, is_dataclass
|
|
3
|
-
from typing import
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from typing import (
|
|
4
|
+
Annotated,
|
|
5
|
+
Any,
|
|
6
|
+
ForwardRef,
|
|
7
|
+
Iterable,
|
|
8
|
+
Mapping,
|
|
9
|
+
Union,
|
|
10
|
+
get_args,
|
|
11
|
+
get_origin,
|
|
12
|
+
get_type_hints,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel
|
|
6
16
|
from pydantic_core import PydanticUndefinedType
|
|
7
17
|
|
|
8
|
-
from databao_context_engine.pluginlib.config import
|
|
18
|
+
from databao_context_engine.pluginlib.config import (
|
|
19
|
+
ConfigPropertyAnnotation,
|
|
20
|
+
ConfigPropertyDefinition,
|
|
21
|
+
ConfigSinglePropertyDefinition,
|
|
22
|
+
ConfigUnionPropertyDefinition,
|
|
23
|
+
)
|
|
9
24
|
|
|
10
25
|
|
|
11
26
|
def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
|
|
12
|
-
return _get_property_list_from_type(parent_type=root_type
|
|
27
|
+
return _get_property_list_from_type(parent_type=root_type)
|
|
13
28
|
|
|
14
29
|
|
|
15
|
-
def _get_property_list_from_type(*, parent_type: type
|
|
30
|
+
def _get_property_list_from_type(*, parent_type: type) -> list[ConfigPropertyDefinition]:
|
|
16
31
|
if is_dataclass(parent_type):
|
|
17
32
|
return _get_property_list_from_dataclass(parent_type=parent_type)
|
|
18
33
|
|
|
@@ -24,20 +39,15 @@ def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> li
|
|
|
24
39
|
# issubclass is raising a TypeError: issubclass() arg 1 must be a class
|
|
25
40
|
pass
|
|
26
41
|
|
|
27
|
-
return _get_property_list_from_type_hints(parent_type=parent_type
|
|
42
|
+
return _get_property_list_from_type_hints(parent_type=parent_type)
|
|
28
43
|
|
|
29
44
|
|
|
30
|
-
def _get_property_list_from_type_hints(*, parent_type: type
|
|
45
|
+
def _get_property_list_from_type_hints(*, parent_type: type) -> list[ConfigPropertyDefinition]:
|
|
31
46
|
try:
|
|
32
47
|
type_hints = get_type_hints(parent_type, include_extras=True)
|
|
33
|
-
except TypeError
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
return []
|
|
37
|
-
else:
|
|
38
|
-
# If we're evaluating a nested property, we want to propagate the exception
|
|
39
|
-
# to let the parent property know that this type should be ignored
|
|
40
|
-
raise e
|
|
48
|
+
except TypeError:
|
|
49
|
+
# Return an empty list of properties for any type that is not an object (e.g: primitives like str or containers like dict, list, tuple, etc.
|
|
50
|
+
return []
|
|
41
51
|
|
|
42
52
|
result = []
|
|
43
53
|
for property_key, property_type in type_hints.items():
|
|
@@ -54,18 +64,14 @@ def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyD
|
|
|
54
64
|
raise ValueError(f"{parent_type} is not a dataclass")
|
|
55
65
|
|
|
56
66
|
dataclass_fields = fields(parent_type)
|
|
67
|
+
type_hints = get_type_hints(parent_type, include_extras=True)
|
|
57
68
|
|
|
58
69
|
result = []
|
|
59
70
|
for field in dataclass_fields:
|
|
60
71
|
has_field_default = field.default is not None and field.default != MISSING
|
|
61
72
|
|
|
62
|
-
if
|
|
63
|
-
|
|
64
|
-
property_type = _evaluate_type_string(field.type)
|
|
65
|
-
except Exception:
|
|
66
|
-
continue
|
|
67
|
-
else:
|
|
68
|
-
property_type = field.type
|
|
73
|
+
# Use the type hints if the field type wasn't resolved (aka. if it is a ForwardRef or a str)
|
|
74
|
+
property_type = type_hints[field.name] if isinstance(field.type, ForwardRef | str) else field.type
|
|
69
75
|
|
|
70
76
|
property_for_field = _create_property(
|
|
71
77
|
property_type=property_type,
|
|
@@ -84,6 +90,10 @@ def _get_property_list_from_pydantic_base_model(parent_type: type):
|
|
|
84
90
|
if not issubclass(parent_type, BaseModel):
|
|
85
91
|
raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
|
|
86
92
|
|
|
93
|
+
if any(isinstance(field.annotation, ForwardRef) for field in parent_type.model_fields.values()):
|
|
94
|
+
# If any field's future type wasn't resolved yet, we rebuild the model to resolve them
|
|
95
|
+
parent_type.model_rebuild(force=True)
|
|
96
|
+
|
|
87
97
|
pydantic_fields = parent_type.model_fields
|
|
88
98
|
result = []
|
|
89
99
|
|
|
@@ -123,28 +133,58 @@ def _create_property(
|
|
|
123
133
|
if annotation is not None and annotation.ignored_for_config_wizard:
|
|
124
134
|
return None
|
|
125
135
|
|
|
126
|
-
|
|
136
|
+
actual_property_types = _read_actual_property_type(property_type)
|
|
127
137
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
138
|
+
required = annotation.required if annotation else is_property_required
|
|
139
|
+
secret = annotation.secret if annotation else False
|
|
140
|
+
|
|
141
|
+
if len(actual_property_types) > 1:
|
|
142
|
+
type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
|
|
143
|
+
|
|
144
|
+
for union_type in actual_property_types:
|
|
145
|
+
nested_props = _get_property_list_from_type(parent_type=union_type)
|
|
146
|
+
|
|
147
|
+
type_properties[union_type] = nested_props
|
|
148
|
+
|
|
149
|
+
return ConfigUnionPropertyDefinition(
|
|
150
|
+
property_key=property_name,
|
|
151
|
+
types=actual_property_types,
|
|
152
|
+
type_properties=type_properties,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
actual_property_type = actual_property_types[0]
|
|
156
|
+
nested_properties = _get_property_list_from_type(parent_type=actual_property_type)
|
|
157
|
+
|
|
158
|
+
if len(nested_properties) == 0 and _is_mapping_or_iterable(actual_property_type):
|
|
159
|
+
# Ignore Iterables and Mappings for which we didn't resolve nested properties
|
|
160
|
+
# (TypedDict is a Mapping but since we manage to resolve nested properties, it won't be ignored)
|
|
131
161
|
return None
|
|
132
162
|
|
|
163
|
+
resolved_type = actual_property_type if not nested_properties else None
|
|
133
164
|
default_value = compute_default_value(
|
|
134
165
|
annotation=annotation,
|
|
135
166
|
property_default=property_default,
|
|
136
167
|
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
137
168
|
)
|
|
138
169
|
|
|
139
|
-
return
|
|
170
|
+
return ConfigSinglePropertyDefinition(
|
|
140
171
|
property_key=property_name,
|
|
141
|
-
property_type=
|
|
142
|
-
required=
|
|
172
|
+
property_type=resolved_type,
|
|
173
|
+
required=required,
|
|
143
174
|
default_value=default_value,
|
|
144
|
-
nested_properties=nested_properties
|
|
175
|
+
nested_properties=nested_properties or None,
|
|
176
|
+
secret=secret,
|
|
145
177
|
)
|
|
146
178
|
|
|
147
179
|
|
|
180
|
+
def _is_mapping_or_iterable(property_type: type):
|
|
181
|
+
# For types like list[str], we need to get the origin (ie. list) to use in issubclass
|
|
182
|
+
origin = get_origin(property_type)
|
|
183
|
+
|
|
184
|
+
# We make sure to not return True for str, which is an Iterable
|
|
185
|
+
return property_type is not str and issubclass(origin if origin else property_type, (Mapping, Iterable))
|
|
186
|
+
|
|
187
|
+
|
|
148
188
|
def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
|
|
149
189
|
if get_origin(property_type) is Annotated:
|
|
150
190
|
return next(
|
|
@@ -155,21 +195,15 @@ def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation |
|
|
|
155
195
|
return None
|
|
156
196
|
|
|
157
197
|
|
|
158
|
-
def _read_actual_property_type(property_type: type) -> type:
|
|
198
|
+
def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
|
|
159
199
|
property_type_origin = get_origin(property_type)
|
|
160
200
|
|
|
161
201
|
if property_type_origin is Annotated:
|
|
162
|
-
return property_type.__origin__ # type: ignore[attr-defined]
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if len(type_args) == 2 and type(None) in type_args:
|
|
166
|
-
# Uses the actual type T when the Union is "T | None" (or "None | T")
|
|
167
|
-
return next(arg for arg in type_args if arg is not None)
|
|
168
|
-
else:
|
|
169
|
-
# Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
|
|
170
|
-
return type(None)
|
|
202
|
+
return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
|
|
203
|
+
if property_type_origin in (Union, types.UnionType):
|
|
204
|
+
return tuple(arg for arg in get_args(property_type) if arg is not type(None))
|
|
171
205
|
|
|
172
|
-
return property_type
|
|
206
|
+
return (property_type,)
|
|
173
207
|
|
|
174
208
|
|
|
175
209
|
def compute_default_value(
|
|
@@ -185,18 +219,3 @@ def compute_default_value(
|
|
|
185
219
|
return str(property_default)
|
|
186
220
|
|
|
187
221
|
return None
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _evaluate_type_string(property_type: str) -> type:
|
|
191
|
-
try:
|
|
192
|
-
# Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
|
|
193
|
-
return _internal._typing_extra.eval_type(property_type)
|
|
194
|
-
except Exception as initial_error:
|
|
195
|
-
try:
|
|
196
|
-
# Try to convert it ourselves if Pydantic didn't work
|
|
197
|
-
return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
|
|
198
|
-
globalns=globals(), localns=locals(), recursive_guard=frozenset()
|
|
199
|
-
)
|
|
200
|
-
except Exception as e:
|
|
201
|
-
# Ignore if we didn't manage to convert the str to a type
|
|
202
|
-
raise e from initial_error
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from databao_context_engine.llm.api import download_ollama_models_if_needed, install_ollama_if_needed
|
|
2
|
+
from databao_context_engine.llm.errors import OllamaError, OllamaPermanentError, OllamaTransientError
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"install_ollama_if_needed",
|
|
6
|
+
"download_ollama_models_if_needed",
|
|
7
|
+
"OllamaError",
|
|
8
|
+
"OllamaTransientError",
|
|
9
|
+
"OllamaPermanentError",
|
|
10
|
+
]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from databao_context_engine.llm.errors import OllamaError
|
|
4
|
+
from databao_context_engine.llm.factory import (
|
|
5
|
+
DEFAULT_DESCRIPTION_GENERATOR_MODEL,
|
|
6
|
+
DEFAULT_EMBED_MODEL_ID,
|
|
7
|
+
create_ollama_service,
|
|
8
|
+
)
|
|
9
|
+
from databao_context_engine.llm.install import resolve_ollama_bin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def install_ollama_if_needed() -> Path:
|
|
13
|
+
"""Install the Ollama CLI locally if needed.
|
|
14
|
+
|
|
15
|
+
This will look for any existing installation of Ollama on the system. If none is found, it will install it locally.
|
|
16
|
+
|
|
17
|
+
Here is the priority order of how it looks for an installed Ollama CLI binary:
|
|
18
|
+
1. Look at the path defined in the DCE_OLLAMA_BIN env var, if it is set
|
|
19
|
+
2. Look for `ollama` in the PATH
|
|
20
|
+
3. Look for a DCE-managed installation in the global DCE path
|
|
21
|
+
|
|
22
|
+
If Ollama is not found, it will get installed as a DCE-managed installation in the global DCE path.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
The path to the Ollama CLI executable.
|
|
26
|
+
"""
|
|
27
|
+
return Path(resolve_ollama_bin())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def download_ollama_models_if_needed(
|
|
31
|
+
*, download_embed_model: bool = True, download_description_generator_model: bool = False
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Download the Ollama models required to run DCE if needed.
|
|
34
|
+
|
|
35
|
+
If the models were already downloaded, this method will do nothing.
|
|
36
|
+
|
|
37
|
+
If no Ollama CLI is found on the system, this method will install one as a DCE-managed installation in the global DCE path.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
download_embed_model: Whether to download the embedding model.
|
|
41
|
+
download_description_generator_model: Whether to download the description generator model.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
OllamaError: If there is an error downloading one of the models.
|
|
45
|
+
"""
|
|
46
|
+
ollama_service = create_ollama_service()
|
|
47
|
+
|
|
48
|
+
if download_embed_model:
|
|
49
|
+
try:
|
|
50
|
+
ollama_service.pull_model_if_needed(model=DEFAULT_EMBED_MODEL_ID)
|
|
51
|
+
except OllamaError as e:
|
|
52
|
+
raise e
|
|
53
|
+
if download_description_generator_model:
|
|
54
|
+
try:
|
|
55
|
+
ollama_service.pull_model_if_needed(model=DEFAULT_DESCRIPTION_GENERATOR_MODEL)
|
|
56
|
+
except OllamaError as e:
|
|
57
|
+
raise e
|
|
@@ -16,6 +16,4 @@ class OllamaDescriptionProvider(DescriptionProvider):
|
|
|
16
16
|
return self._model_id
|
|
17
17
|
|
|
18
18
|
def describe(self, text: str, context: str) -> str:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
return description
|
|
19
|
+
return self._service.describe(model=self._model_id, text=text, context=context)
|
|
@@ -3,14 +3,8 @@ class OllamaError(Exception):
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class OllamaTransientError(OllamaError):
|
|
6
|
-
"""
|
|
7
|
-
Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
|
|
8
|
-
Typically worth retrying.
|
|
9
|
-
"""
|
|
6
|
+
"""Errors that are likely temporary (network issues, timeouts, 5xx, etc.), typically worth retrying."""
|
|
10
7
|
|
|
11
8
|
|
|
12
9
|
class OllamaPermanentError(OllamaError):
|
|
13
|
-
"""
|
|
14
|
-
Errors that are unlikely to succeed on retry without changing inputs
|
|
15
|
-
or configuration (4xx, bad response schema, etc.).
|
|
16
|
-
"""
|
|
10
|
+
"""Errors that are unlikely to succeed on retry without changing inputs or configuration (4xx, bad response schema, etc.)."""
|
|
@@ -5,6 +5,9 @@ from databao_context_engine.llm.install import resolve_ollama_bin
|
|
|
5
5
|
from databao_context_engine.llm.runtime import OllamaRuntime
|
|
6
6
|
from databao_context_engine.llm.service import OllamaService
|
|
7
7
|
|
|
8
|
+
DEFAULT_EMBED_MODEL_ID = "nomic-embed-text:v1.5"
|
|
9
|
+
DEFAULT_DESCRIPTION_GENERATOR_MODEL = "llama3.2:1b"
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
def _create_ollama_service_common(
|
|
10
13
|
*,
|
|
@@ -39,7 +42,7 @@ def create_ollama_service(
|
|
|
39
42
|
def create_ollama_embedding_provider(
|
|
40
43
|
service: OllamaService,
|
|
41
44
|
*,
|
|
42
|
-
model_id: str =
|
|
45
|
+
model_id: str = DEFAULT_EMBED_MODEL_ID,
|
|
43
46
|
dim: int = 768,
|
|
44
47
|
pull_if_needed: bool = True,
|
|
45
48
|
) -> OllamaEmbeddingProvider:
|
|
@@ -52,7 +55,7 @@ def create_ollama_embedding_provider(
|
|
|
52
55
|
def create_ollama_description_provider(
|
|
53
56
|
service: OllamaService,
|
|
54
57
|
*,
|
|
55
|
-
model_id: str =
|
|
58
|
+
model_id: str = DEFAULT_DESCRIPTION_GENERATOR_MODEL,
|
|
56
59
|
pull_if_needed: bool = True,
|
|
57
60
|
):
|
|
58
61
|
if pull_if_needed:
|
|
@@ -49,14 +49,15 @@ ARTIFACTS: dict[str, ArtifactInfo] = {
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def resolve_ollama_bin() -> str:
|
|
52
|
-
"""
|
|
53
|
-
Decide which `ollama` binary to use, in this order:
|
|
52
|
+
"""Decide which `ollama` binary to use.
|
|
54
53
|
|
|
54
|
+
Here is the priority order:
|
|
55
55
|
1. DCE_OLLAMA_BIN env var, if set and exists
|
|
56
56
|
2. `ollama` found on PATH
|
|
57
57
|
3. Managed installation under MANAGED_OLLAMA_BIN
|
|
58
58
|
|
|
59
|
-
Returns
|
|
59
|
+
Returns:
|
|
60
|
+
The full path to the binary
|
|
60
61
|
"""
|
|
61
62
|
override = os.environ.get("DCE_OLLAMA_BIN")
|
|
62
63
|
if override:
|
|
@@ -76,9 +77,7 @@ def resolve_ollama_bin() -> str:
|
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
def _detect_platform() -> str:
|
|
79
|
-
"""
|
|
80
|
-
Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'.
|
|
81
|
-
"""
|
|
80
|
+
"""Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'."""
|
|
82
81
|
os_name = sys.platform.lower()
|
|
83
82
|
arch = (os.uname().machine if hasattr(os, "uname") else "").lower()
|
|
84
83
|
|
|
@@ -96,27 +95,25 @@ def _detect_platform() -> str:
|
|
|
96
95
|
raise RuntimeError(f"Unsupported OS/arch: os={os_name!r} arch={arch!r}")
|
|
97
96
|
|
|
98
97
|
|
|
99
|
-
def
|
|
100
|
-
"""
|
|
101
|
-
Download to a temporary file and return its path.
|
|
102
|
-
"""
|
|
98
|
+
def _download_artifact_to_temp(artifact_version: str, artifact_name: str) -> Path:
|
|
99
|
+
"""Download to a temporary file and return its path."""
|
|
103
100
|
import urllib.request
|
|
104
101
|
|
|
102
|
+
artifact_url = f"https://github.com/ollama/ollama/releases/download/{artifact_version}/{artifact_name}"
|
|
103
|
+
|
|
105
104
|
tmp_dir = Path(tempfile.mkdtemp(prefix="ollama-download-"))
|
|
106
|
-
file_name =
|
|
105
|
+
file_name = artifact_url.rsplit("/", 1)[-1]
|
|
107
106
|
dest = tmp_dir / file_name
|
|
108
107
|
|
|
109
|
-
logger.info("Downloading %s to %s",
|
|
110
|
-
with urllib.request.urlopen(
|
|
108
|
+
logger.info("Downloading %s to %s", artifact_url, dest)
|
|
109
|
+
with urllib.request.urlopen(artifact_url) as resp, dest.open("wb") as out:
|
|
111
110
|
shutil.copyfileobj(resp, out)
|
|
112
111
|
|
|
113
112
|
return dest
|
|
114
113
|
|
|
115
114
|
|
|
116
115
|
def _verify_sha256(path: Path, expected_hex: str) -> None:
|
|
117
|
-
"""
|
|
118
|
-
Verify SHA-256 of path matches expected_hex
|
|
119
|
-
"""
|
|
116
|
+
"""Verify SHA-256 of path matches expected_hex."""
|
|
120
117
|
h = hashlib.sha256()
|
|
121
118
|
with path.open("rb") as f:
|
|
122
119
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
@@ -127,36 +124,33 @@ def _verify_sha256(path: Path, expected_hex: str) -> None:
|
|
|
127
124
|
|
|
128
125
|
|
|
129
126
|
def _extract_archive(archive: Path, target_dir: Path) -> None:
|
|
130
|
-
"""
|
|
131
|
-
Extract archive into target_dir.
|
|
132
|
-
"""
|
|
127
|
+
"""Extract archive into target_dir."""
|
|
133
128
|
name = archive.name.lower()
|
|
134
129
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
135
130
|
|
|
136
131
|
if name.endswith(".zip"):
|
|
137
132
|
with ZipFile(archive, "r") as zf:
|
|
138
|
-
|
|
133
|
+
# There is no built-in protection against zip bombs in ZipFile.
|
|
134
|
+
# However, we previously checked the sha256 of the downloaded archive and we trust the origin (GitHub repo of Ollama)
|
|
135
|
+
zf.extractall(target_dir) # noqa: S202
|
|
139
136
|
elif name.endswith(".tgz") or name.endswith(".tar.gz"):
|
|
140
137
|
with tarfile.open(archive, "r:gz") as tf:
|
|
141
|
-
tf.extractall(target_dir)
|
|
138
|
+
tf.extractall(target_dir, filter="data")
|
|
142
139
|
else:
|
|
143
140
|
raise RuntimeError(f"Unsupported archive format: {archive}")
|
|
144
141
|
|
|
145
142
|
|
|
146
143
|
def _ensure_executable(path: Path) -> None:
|
|
147
|
-
"""
|
|
148
|
-
Mark path as executable
|
|
149
|
-
"""
|
|
144
|
+
"""Mark path as executable."""
|
|
150
145
|
try:
|
|
151
146
|
mode = path.stat().st_mode
|
|
152
147
|
path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
153
148
|
except Exception:
|
|
154
|
-
|
|
149
|
+
logger.debug("Failed to mark %s as executable", path, exc_info=True, stack_info=True)
|
|
155
150
|
|
|
156
151
|
|
|
157
152
|
def install_ollama_to(target: Path) -> None:
|
|
158
|
-
"""
|
|
159
|
-
Ensure an Ollama binary exists.
|
|
153
|
+
"""Ensure an Ollama binary exist.
|
|
160
154
|
|
|
161
155
|
If it doesn't exist, this will:
|
|
162
156
|
- detect OS
|
|
@@ -164,6 +158,9 @@ def install_ollama_to(target: Path) -> None:
|
|
|
164
158
|
- verify its SHA-256 checksum
|
|
165
159
|
- extract into the installation directory
|
|
166
160
|
- make the binary executable
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
RuntimeError: If the user's platform is not supported
|
|
167
164
|
"""
|
|
168
165
|
target = target.expanduser()
|
|
169
166
|
if target.parent.name == "bin":
|
|
@@ -179,8 +176,7 @@ def install_ollama_to(target: Path) -> None:
|
|
|
179
176
|
except KeyError as e:
|
|
180
177
|
raise RuntimeError(f"Unsupported platform: {platform_key}") from e
|
|
181
178
|
|
|
182
|
-
|
|
183
|
-
archive_path = _download_to_temp(url)
|
|
179
|
+
archive_path = _download_artifact_to_temp(DEFAULT_VERSION, artifact.name)
|
|
184
180
|
|
|
185
181
|
try:
|
|
186
182
|
_verify_sha256(archive_path, artifact.sha256)
|
|
@@ -224,4 +220,4 @@ def install_ollama_to(target: Path) -> None:
|
|
|
224
220
|
try:
|
|
225
221
|
archive_path.unlink(missing_ok=True)
|
|
226
222
|
except Exception:
|
|
227
|
-
|
|
223
|
+
logger.debug("Failed to remove temporary archive %s", archive_path, exc_info=True, stack_info=True)
|
|
@@ -26,7 +26,7 @@ class OllamaRuntime:
|
|
|
26
26
|
|
|
27
27
|
stdout = subprocess.DEVNULL
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
return subprocess.Popen( # noqa: S603 We're always running Ollama
|
|
30
30
|
cmd,
|
|
31
31
|
cwd=str(self._config.work_dir) if self._config.work_dir else None,
|
|
32
32
|
env=env,
|
|
@@ -36,8 +36,6 @@ class OllamaRuntime:
|
|
|
36
36
|
close_fds=os.name != "nt",
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
return proc
|
|
40
|
-
|
|
41
39
|
def start_and_await(
|
|
42
40
|
self,
|
|
43
41
|
*,
|
|
@@ -62,11 +60,11 @@ class OllamaRuntime:
|
|
|
62
60
|
try:
|
|
63
61
|
proc.terminate()
|
|
64
62
|
except Exception:
|
|
65
|
-
|
|
63
|
+
logger.debug("Failed to terminate Ollama server", exc_info=True, stack_info=True)
|
|
66
64
|
try:
|
|
67
65
|
proc.kill()
|
|
68
66
|
except Exception:
|
|
69
|
-
|
|
67
|
+
logger.debug("Failed to kill Ollama server", exc_info=True, stack_info=True)
|
|
70
68
|
|
|
71
69
|
raise TimeoutError(
|
|
72
70
|
f"Timed out waiting for Ollama to become healthy at http://{self._config.host}:{self._config.port}"
|
|
@@ -36,9 +36,7 @@ class OllamaService:
|
|
|
36
36
|
return [float(x) for x in vec]
|
|
37
37
|
|
|
38
38
|
def describe(self, *, model: str, text: str, context: str) -> str:
|
|
39
|
-
"""
|
|
40
|
-
Ask Ollama to generate a short description for `text`
|
|
41
|
-
"""
|
|
39
|
+
"""Ask Ollama to generate a short description for `text`."""
|
|
42
40
|
prompt = self._build_description_prompt(text=text, context=context)
|
|
43
41
|
|
|
44
42
|
payload: dict[str, Any] = {"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}}
|
|
@@ -2,15 +2,17 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
from databao_context_engine.mcp.mcp_server import McpServer, McpTransport
|
|
5
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
5
6
|
|
|
6
7
|
logger = logging.getLogger(__name__)
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def run_mcp_server(
|
|
10
11
|
project_dir: Path,
|
|
11
|
-
run_name: str | None,
|
|
12
12
|
transport: McpTransport,
|
|
13
13
|
host: str | None = None,
|
|
14
14
|
port: int | None = None,
|
|
15
15
|
) -> None:
|
|
16
|
-
|
|
16
|
+
ensure_project_dir(project_dir=project_dir)
|
|
17
|
+
|
|
18
|
+
McpServer(project_dir, host, port).run(transport)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from contextlib import asynccontextmanager
|
|
3
|
+
from datetime import date
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Literal
|
|
5
6
|
|
|
@@ -7,8 +8,6 @@ from mcp.server import FastMCP
|
|
|
7
8
|
from mcp.types import ToolAnnotations
|
|
8
9
|
|
|
9
10
|
from databao_context_engine import DatabaoContextEngine
|
|
10
|
-
from databao_context_engine.mcp.all_results_tool import run_all_results_tool
|
|
11
|
-
from databao_context_engine.mcp.retrieve_tool import run_retrieve_tool
|
|
12
11
|
|
|
13
12
|
logger = logging.getLogger(__name__)
|
|
14
13
|
|
|
@@ -26,12 +25,10 @@ class McpServer:
|
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
28
27
|
project_dir: Path,
|
|
29
|
-
run_name: str | None,
|
|
30
28
|
host: str | None = None,
|
|
31
29
|
port: int | None = None,
|
|
32
30
|
):
|
|
33
31
|
self._databao_context_engine = DatabaoContextEngine(project_dir)
|
|
34
|
-
self._run_name = run_name
|
|
35
32
|
|
|
36
33
|
self._mcp_server = self._create_mcp_server(host, port)
|
|
37
34
|
|
|
@@ -43,19 +40,20 @@ class McpServer:
|
|
|
43
40
|
annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
|
|
44
41
|
)
|
|
45
42
|
def all_results_tool():
|
|
46
|
-
return
|
|
43
|
+
return self._databao_context_engine.get_all_contexts_formatted()
|
|
47
44
|
|
|
48
45
|
@mcp.tool(
|
|
49
46
|
description="Retrieve the context built from various resources, including databases, dbt tools, plain and structured files, to retrieve relevant information",
|
|
50
47
|
annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
|
|
51
48
|
)
|
|
52
49
|
def retrieve_tool(text: str, limit: int | None):
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
50
|
+
retrieve_results = self._databao_context_engine.search_context(retrieve_text=text, limit=limit)
|
|
51
|
+
|
|
52
|
+
display_results = [context_search_result.context_result for context_search_result in retrieve_results]
|
|
53
|
+
|
|
54
|
+
display_results.append(f"\nToday's date is {date.today()}")
|
|
55
|
+
|
|
56
|
+
return "\n".join(display_results)
|
|
59
57
|
|
|
60
58
|
return mcp
|
|
61
59
|
|