databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. databao_context_engine/__init__.py +32 -7
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +82 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
  8. databao_context_engine/cli/add_datasource_config.py +49 -44
  9. databao_context_engine/cli/commands.py +40 -55
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +127 -0
  12. databao_context_engine/databao_context_project_manager.py +147 -30
  13. databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
  14. databao_context_engine/datasources/datasource_context.py +90 -0
  15. databao_context_engine/datasources/datasource_discovery.py +143 -0
  16. databao_context_engine/datasources/types.py +194 -0
  17. databao_context_engine/generate_configs_schemas.py +4 -5
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +76 -57
  20. databao_context_engine/llm/__init__.py +10 -0
  21. databao_context_engine/llm/api.py +57 -0
  22. databao_context_engine/llm/descriptions/ollama.py +1 -3
  23. databao_context_engine/llm/errors.py +2 -8
  24. databao_context_engine/llm/factory.py +5 -2
  25. databao_context_engine/llm/install.py +26 -30
  26. databao_context_engine/llm/runtime.py +3 -5
  27. databao_context_engine/llm/service.py +1 -3
  28. databao_context_engine/mcp/mcp_runner.py +4 -2
  29. databao_context_engine/mcp/mcp_server.py +9 -11
  30. databao_context_engine/plugin_loader.py +110 -0
  31. databao_context_engine/pluginlib/build_plugin.py +12 -29
  32. databao_context_engine/pluginlib/config.py +16 -2
  33. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  34. databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
  35. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
  36. databao_context_engine/plugins/databases/base_introspector.py +11 -12
  37. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  38. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
  39. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  40. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
  41. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  42. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  43. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  44. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  45. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
  46. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  47. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
  48. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  49. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  50. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
  51. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  52. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  53. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
  54. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  55. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  56. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  57. databao_context_engine/plugins/duckdb_tools.py +18 -0
  58. databao_context_engine/plugins/files/__init__.py +0 -0
  59. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  60. databao_context_engine/plugins/plugin_loader.py +58 -52
  61. databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
  62. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  63. databao_context_engine/project/info.py +34 -2
  64. databao_context_engine/project/init_project.py +16 -7
  65. databao_context_engine/project/layout.py +14 -15
  66. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  67. databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
  68. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
  69. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
  70. databao_context_engine/serialization/__init__.py +0 -0
  71. databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
  72. databao_context_engine/services/chunk_embedding_service.py +23 -11
  73. databao_context_engine/services/factories.py +1 -46
  74. databao_context_engine/services/persistence_service.py +11 -11
  75. databao_context_engine/storage/connection.py +11 -7
  76. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  77. databao_context_engine/storage/migrate.py +3 -5
  78. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  79. databao_context_engine/storage/models.py +2 -23
  80. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  81. databao_context_engine/storage/repositories/factories.py +1 -12
  82. databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
  83. databao_context_engine/system/properties.py +4 -2
  84. databao_context_engine-0.1.5.dist-info/METADATA +228 -0
  85. databao_context_engine-0.1.5.dist-info/RECORD +135 -0
  86. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
  87. databao_context_engine/build_sources/internal/build_service.py +0 -77
  88. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  89. databao_context_engine/build_sources/internal/export_results.py +0 -43
  90. databao_context_engine/build_sources/public/api.py +0 -4
  91. databao_context_engine/databao_engine.py +0 -85
  92. databao_context_engine/datasource_config/add_config.py +0 -50
  93. databao_context_engine/datasource_config/datasource_context.py +0 -60
  94. databao_context_engine/mcp/all_results_tool.py +0 -5
  95. databao_context_engine/mcp/retrieve_tool.py +0 -22
  96. databao_context_engine/plugins/databases/athena_introspector.py +0 -101
  97. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  98. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  99. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  100. databao_context_engine/project/datasource_discovery.py +0 -141
  101. databao_context_engine/project/runs.py +0 -39
  102. databao_context_engine/project/types.py +0 -134
  103. databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
  104. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
  105. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  106. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  107. databao_context_engine/services/run_name_policy.py +0 -8
  108. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  109. databao_context_engine/storage/repositories/run_repository.py +0 -157
  110. databao_context_engine-0.1.1.dist-info/METADATA +0 -186
  111. databao_context_engine-0.1.1.dist-info/RECORD +0 -135
  112. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  113. /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
  114. /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
  115. /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
  116. /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
  117. /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
  118. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,33 @@
1
1
  import types
2
2
  from dataclasses import MISSING, fields, is_dataclass
3
- from typing import Annotated, Any, ForwardRef, Union, get_origin, get_type_hints
4
-
5
- from pydantic import BaseModel, _internal
3
+ from typing import (
4
+ Annotated,
5
+ Any,
6
+ ForwardRef,
7
+ Iterable,
8
+ Mapping,
9
+ Union,
10
+ get_args,
11
+ get_origin,
12
+ get_type_hints,
13
+ )
14
+
15
+ from pydantic import BaseModel
6
16
  from pydantic_core import PydanticUndefinedType
7
17
 
8
- from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation, ConfigPropertyDefinition
18
+ from databao_context_engine.pluginlib.config import (
19
+ ConfigPropertyAnnotation,
20
+ ConfigPropertyDefinition,
21
+ ConfigSinglePropertyDefinition,
22
+ ConfigUnionPropertyDefinition,
23
+ )
9
24
 
10
25
 
11
26
  def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
12
- return _get_property_list_from_type(parent_type=root_type, is_root_type=True)
27
+ return _get_property_list_from_type(parent_type=root_type)
13
28
 
14
29
 
15
- def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
30
+ def _get_property_list_from_type(*, parent_type: type) -> list[ConfigPropertyDefinition]:
16
31
  if is_dataclass(parent_type):
17
32
  return _get_property_list_from_dataclass(parent_type=parent_type)
18
33
 
@@ -24,20 +39,15 @@ def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> li
24
39
  # issubclass is raising a TypeError: issubclass() arg 1 must be a class
25
40
  pass
26
41
 
27
- return _get_property_list_from_type_hints(parent_type=parent_type, is_root_type=is_root_type)
42
+ return _get_property_list_from_type_hints(parent_type=parent_type)
28
43
 
29
44
 
30
- def _get_property_list_from_type_hints(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
45
+ def _get_property_list_from_type_hints(*, parent_type: type) -> list[ConfigPropertyDefinition]:
31
46
  try:
32
47
  type_hints = get_type_hints(parent_type, include_extras=True)
33
- except TypeError as e:
34
- if is_root_type:
35
- # Ignore root types that don't have type hints like dict or list
36
- return []
37
- else:
38
- # If we're evaluating a nested property, we want to propagate the exception
39
- # to let the parent property know that this type should be ignored
40
- raise e
48
+ except TypeError:
49
+ # Return an empty list of properties for any type that is not an object (e.g: primitives like str or containers like dict, list, tuple, etc.
50
+ return []
41
51
 
42
52
  result = []
43
53
  for property_key, property_type in type_hints.items():
@@ -54,18 +64,14 @@ def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyD
54
64
  raise ValueError(f"{parent_type} is not a dataclass")
55
65
 
56
66
  dataclass_fields = fields(parent_type)
67
+ type_hints = get_type_hints(parent_type, include_extras=True)
57
68
 
58
69
  result = []
59
70
  for field in dataclass_fields:
60
71
  has_field_default = field.default is not None and field.default != MISSING
61
72
 
62
- if isinstance(field.type, str):
63
- try:
64
- property_type = _evaluate_type_string(field.type)
65
- except Exception:
66
- continue
67
- else:
68
- property_type = field.type
73
+ # Use the type hints if the field type wasn't resolved (aka. if it is a ForwardRef or a str)
74
+ property_type = type_hints[field.name] if isinstance(field.type, ForwardRef | str) else field.type
69
75
 
70
76
  property_for_field = _create_property(
71
77
  property_type=property_type,
@@ -84,6 +90,10 @@ def _get_property_list_from_pydantic_base_model(parent_type: type):
84
90
  if not issubclass(parent_type, BaseModel):
85
91
  raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
86
92
 
93
+ if any(isinstance(field.annotation, ForwardRef) for field in parent_type.model_fields.values()):
94
+ # If any field's future type wasn't resolved yet, we rebuild the model to resolve them
95
+ parent_type.model_rebuild(force=True)
96
+
87
97
  pydantic_fields = parent_type.model_fields
88
98
  result = []
89
99
 
@@ -123,28 +133,58 @@ def _create_property(
123
133
  if annotation is not None and annotation.ignored_for_config_wizard:
124
134
  return None
125
135
 
126
- actual_property_type = _read_actual_property_type(property_type)
136
+ actual_property_types = _read_actual_property_type(property_type)
127
137
 
128
- try:
129
- nested_properties = _get_property_list_from_type(parent_type=actual_property_type, is_root_type=False)
130
- except TypeError:
138
+ required = annotation.required if annotation else is_property_required
139
+ secret = annotation.secret if annotation else False
140
+
141
+ if len(actual_property_types) > 1:
142
+ type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
143
+
144
+ for union_type in actual_property_types:
145
+ nested_props = _get_property_list_from_type(parent_type=union_type)
146
+
147
+ type_properties[union_type] = nested_props
148
+
149
+ return ConfigUnionPropertyDefinition(
150
+ property_key=property_name,
151
+ types=actual_property_types,
152
+ type_properties=type_properties,
153
+ )
154
+
155
+ actual_property_type = actual_property_types[0]
156
+ nested_properties = _get_property_list_from_type(parent_type=actual_property_type)
157
+
158
+ if len(nested_properties) == 0 and _is_mapping_or_iterable(actual_property_type):
159
+ # Ignore Iterables and Mappings for which we didn't resolve nested properties
160
+ # (TypedDict is a Mapping but since we manage to resolve nested properties, it won't be ignored)
131
161
  return None
132
162
 
163
+ resolved_type = actual_property_type if not nested_properties else None
133
164
  default_value = compute_default_value(
134
165
  annotation=annotation,
135
166
  property_default=property_default,
136
167
  has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
137
168
  )
138
169
 
139
- return ConfigPropertyDefinition(
170
+ return ConfigSinglePropertyDefinition(
140
171
  property_key=property_name,
141
- property_type=actual_property_type if not nested_properties else None,
142
- required=annotation.required if annotation else is_property_required,
172
+ property_type=resolved_type,
173
+ required=required,
143
174
  default_value=default_value,
144
- nested_properties=nested_properties if nested_properties else None,
175
+ nested_properties=nested_properties or None,
176
+ secret=secret,
145
177
  )
146
178
 
147
179
 
180
+ def _is_mapping_or_iterable(property_type: type):
181
+ # For types like list[str], we need to get the origin (ie. list) to use in issubclass
182
+ origin = get_origin(property_type)
183
+
184
+ # We make sure to not return True for str, which is an Iterable
185
+ return property_type is not str and issubclass(origin if origin else property_type, (Mapping, Iterable))
186
+
187
+
148
188
  def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
149
189
  if get_origin(property_type) is Annotated:
150
190
  return next(
@@ -155,21 +195,15 @@ def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation |
155
195
  return None
156
196
 
157
197
 
158
- def _read_actual_property_type(property_type: type) -> type:
198
+ def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
159
199
  property_type_origin = get_origin(property_type)
160
200
 
161
201
  if property_type_origin is Annotated:
162
- return property_type.__origin__ # type: ignore[attr-defined]
163
- elif property_type_origin is Union or property_type_origin is types.UnionType:
164
- type_args = property_type.__args__ # type: ignore[attr-defined]
165
- if len(type_args) == 2 and type(None) in type_args:
166
- # Uses the actual type T when the Union is "T | None" (or "None | T")
167
- return next(arg for arg in type_args if arg is not None)
168
- else:
169
- # Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
170
- return type(None)
202
+ return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
203
+ if property_type_origin in (Union, types.UnionType):
204
+ return tuple(arg for arg in get_args(property_type) if arg is not type(None))
171
205
 
172
- return property_type
206
+ return (property_type,)
173
207
 
174
208
 
175
209
  def compute_default_value(
@@ -185,18 +219,3 @@ def compute_default_value(
185
219
  return str(property_default)
186
220
 
187
221
  return None
188
-
189
-
190
- def _evaluate_type_string(property_type: str) -> type:
191
- try:
192
- # Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
193
- return _internal._typing_extra.eval_type(property_type)
194
- except Exception as initial_error:
195
- try:
196
- # Try to convert it ourselves if Pydantic didn't work
197
- return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
198
- globalns=globals(), localns=locals(), recursive_guard=frozenset()
199
- )
200
- except Exception as e:
201
- # Ignore if we didn't manage to convert the str to a type
202
- raise e from initial_error
@@ -0,0 +1,10 @@
1
+ from databao_context_engine.llm.api import download_ollama_models_if_needed, install_ollama_if_needed
2
+ from databao_context_engine.llm.errors import OllamaError, OllamaPermanentError, OllamaTransientError
3
+
4
+ __all__ = [
5
+ "install_ollama_if_needed",
6
+ "download_ollama_models_if_needed",
7
+ "OllamaError",
8
+ "OllamaTransientError",
9
+ "OllamaPermanentError",
10
+ ]
@@ -0,0 +1,57 @@
1
+ from pathlib import Path
2
+
3
+ from databao_context_engine.llm.errors import OllamaError
4
+ from databao_context_engine.llm.factory import (
5
+ DEFAULT_DESCRIPTION_GENERATOR_MODEL,
6
+ DEFAULT_EMBED_MODEL_ID,
7
+ create_ollama_service,
8
+ )
9
+ from databao_context_engine.llm.install import resolve_ollama_bin
10
+
11
+
12
+ def install_ollama_if_needed() -> Path:
13
+ """Install the Ollama CLI locally if needed.
14
+
15
+ This will look for any existing installation of Ollama on the system. If none is found, it will install it locally.
16
+
17
+ Here is the priority order of how it looks for an installed Ollama CLI binary:
18
+ 1. Look at the path defined in the DCE_OLLAMA_BIN env var, if it is set
19
+ 2. Look for `ollama` in the PATH
20
+ 3. Look for a DCE-managed installation in the global DCE path
21
+
22
+ If Ollama is not found, it will get installed as a DCE-managed installation in the global DCE path.
23
+
24
+ Returns:
25
+ The path to the Ollama CLI executable.
26
+ """
27
+ return Path(resolve_ollama_bin())
28
+
29
+
30
+ def download_ollama_models_if_needed(
31
+ *, download_embed_model: bool = True, download_description_generator_model: bool = False
32
+ ) -> None:
33
+ """Download the Ollama models required to run DCE if needed.
34
+
35
+ If the models were already downloaded, this method will do nothing.
36
+
37
+ If no Ollama CLI is found on the system, this method will install one as a DCE-managed installation in the global DCE path.
38
+
39
+ Args:
40
+ download_embed_model: Whether to download the embedding model.
41
+ download_description_generator_model: Whether to download the description generator model.
42
+
43
+ Raises:
44
+ OllamaError: If there is an error downloading one of the models.
45
+ """
46
+ ollama_service = create_ollama_service()
47
+
48
+ if download_embed_model:
49
+ try:
50
+ ollama_service.pull_model_if_needed(model=DEFAULT_EMBED_MODEL_ID)
51
+ except OllamaError as e:
52
+ raise e
53
+ if download_description_generator_model:
54
+ try:
55
+ ollama_service.pull_model_if_needed(model=DEFAULT_DESCRIPTION_GENERATOR_MODEL)
56
+ except OllamaError as e:
57
+ raise e
@@ -16,6 +16,4 @@ class OllamaDescriptionProvider(DescriptionProvider):
16
16
  return self._model_id
17
17
 
18
18
  def describe(self, text: str, context: str) -> str:
19
- description = self._service.describe(model=self._model_id, text=text, context=context)
20
-
21
- return description
19
+ return self._service.describe(model=self._model_id, text=text, context=context)
@@ -3,14 +3,8 @@ class OllamaError(Exception):
3
3
 
4
4
 
5
5
  class OllamaTransientError(OllamaError):
6
- """
7
- Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
8
- Typically worth retrying.
9
- """
6
+ """Errors that are likely temporary (network issues, timeouts, 5xx, etc.), typically worth retrying."""
10
7
 
11
8
 
12
9
  class OllamaPermanentError(OllamaError):
13
- """
14
- Errors that are unlikely to succeed on retry without changing inputs
15
- or configuration (4xx, bad response schema, etc.).
16
- """
10
+ """Errors that are unlikely to succeed on retry without changing inputs or configuration (4xx, bad response schema, etc.)."""
@@ -5,6 +5,9 @@ from databao_context_engine.llm.install import resolve_ollama_bin
5
5
  from databao_context_engine.llm.runtime import OllamaRuntime
6
6
  from databao_context_engine.llm.service import OllamaService
7
7
 
8
+ DEFAULT_EMBED_MODEL_ID = "nomic-embed-text:v1.5"
9
+ DEFAULT_DESCRIPTION_GENERATOR_MODEL = "llama3.2:1b"
10
+
8
11
 
9
12
  def _create_ollama_service_common(
10
13
  *,
@@ -39,7 +42,7 @@ def create_ollama_service(
39
42
  def create_ollama_embedding_provider(
40
43
  service: OllamaService,
41
44
  *,
42
- model_id: str = "nomic-embed-text:v1.5",
45
+ model_id: str = DEFAULT_EMBED_MODEL_ID,
43
46
  dim: int = 768,
44
47
  pull_if_needed: bool = True,
45
48
  ) -> OllamaEmbeddingProvider:
@@ -52,7 +55,7 @@ def create_ollama_embedding_provider(
52
55
  def create_ollama_description_provider(
53
56
  service: OllamaService,
54
57
  *,
55
- model_id: str = "llama3.2:1b",
58
+ model_id: str = DEFAULT_DESCRIPTION_GENERATOR_MODEL,
56
59
  pull_if_needed: bool = True,
57
60
  ):
58
61
  if pull_if_needed:
@@ -49,14 +49,15 @@ ARTIFACTS: dict[str, ArtifactInfo] = {
49
49
 
50
50
 
51
51
  def resolve_ollama_bin() -> str:
52
- """
53
- Decide which `ollama` binary to use, in this order:
52
+ """Decide which `ollama` binary to use.
54
53
 
54
+ Here is the priority order:
55
55
  1. DCE_OLLAMA_BIN env var, if set and exists
56
56
  2. `ollama` found on PATH
57
57
  3. Managed installation under MANAGED_OLLAMA_BIN
58
58
 
59
- Returns the full path to the binary
59
+ Returns:
60
+ The full path to the binary
60
61
  """
61
62
  override = os.environ.get("DCE_OLLAMA_BIN")
62
63
  if override:
@@ -76,9 +77,7 @@ def resolve_ollama_bin() -> str:
76
77
 
77
78
 
78
79
  def _detect_platform() -> str:
79
- """
80
- Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'.
81
- """
80
+ """Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'."""
82
81
  os_name = sys.platform.lower()
83
82
  arch = (os.uname().machine if hasattr(os, "uname") else "").lower()
84
83
 
@@ -96,27 +95,25 @@ def _detect_platform() -> str:
96
95
  raise RuntimeError(f"Unsupported OS/arch: os={os_name!r} arch={arch!r}")
97
96
 
98
97
 
99
- def _download_to_temp(url: str) -> Path:
100
- """
101
- Download to a temporary file and return its path.
102
- """
98
+ def _download_artifact_to_temp(artifact_version: str, artifact_name: str) -> Path:
99
+ """Download to a temporary file and return its path."""
103
100
  import urllib.request
104
101
 
102
+ artifact_url = f"https://github.com/ollama/ollama/releases/download/{artifact_version}/{artifact_name}"
103
+
105
104
  tmp_dir = Path(tempfile.mkdtemp(prefix="ollama-download-"))
106
- file_name = url.rsplit("/", 1)[-1]
105
+ file_name = artifact_url.rsplit("/", 1)[-1]
107
106
  dest = tmp_dir / file_name
108
107
 
109
- logger.info("Downloading %s to %s", url, dest)
110
- with urllib.request.urlopen(url) as resp, dest.open("wb") as out:
108
+ logger.info("Downloading %s to %s", artifact_url, dest)
109
+ with urllib.request.urlopen(artifact_url) as resp, dest.open("wb") as out:
111
110
  shutil.copyfileobj(resp, out)
112
111
 
113
112
  return dest
114
113
 
115
114
 
116
115
  def _verify_sha256(path: Path, expected_hex: str) -> None:
117
- """
118
- Verify SHA-256 of path matches expected_hex
119
- """
116
+ """Verify SHA-256 of path matches expected_hex."""
120
117
  h = hashlib.sha256()
121
118
  with path.open("rb") as f:
122
119
  for chunk in iter(lambda: f.read(8192), b""):
@@ -127,36 +124,33 @@ def _verify_sha256(path: Path, expected_hex: str) -> None:
127
124
 
128
125
 
129
126
  def _extract_archive(archive: Path, target_dir: Path) -> None:
130
- """
131
- Extract archive into target_dir.
132
- """
127
+ """Extract archive into target_dir."""
133
128
  name = archive.name.lower()
134
129
  target_dir.mkdir(parents=True, exist_ok=True)
135
130
 
136
131
  if name.endswith(".zip"):
137
132
  with ZipFile(archive, "r") as zf:
138
- zf.extractall(target_dir)
133
+ # There is no built-in protection against zip bombs in ZipFile.
134
+ # However, we previously checked the sha256 of the downloaded archive and we trust the origin (GitHub repo of Ollama)
135
+ zf.extractall(target_dir) # noqa: S202
139
136
  elif name.endswith(".tgz") or name.endswith(".tar.gz"):
140
137
  with tarfile.open(archive, "r:gz") as tf:
141
- tf.extractall(target_dir)
138
+ tf.extractall(target_dir, filter="data")
142
139
  else:
143
140
  raise RuntimeError(f"Unsupported archive format: {archive}")
144
141
 
145
142
 
146
143
  def _ensure_executable(path: Path) -> None:
147
- """
148
- Mark path as executable
149
- """
144
+ """Mark path as executable."""
150
145
  try:
151
146
  mode = path.stat().st_mode
152
147
  path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
153
148
  except Exception:
154
- pass
149
+ logger.debug("Failed to mark %s as executable", path, exc_info=True, stack_info=True)
155
150
 
156
151
 
157
152
  def install_ollama_to(target: Path) -> None:
158
- """
159
- Ensure an Ollama binary exists.
153
+ """Ensure an Ollama binary exist.
160
154
 
161
155
  If it doesn't exist, this will:
162
156
  - detect OS
@@ -164,6 +158,9 @@ def install_ollama_to(target: Path) -> None:
164
158
  - verify its SHA-256 checksum
165
159
  - extract into the installation directory
166
160
  - make the binary executable
161
+
162
+ Raises:
163
+ RuntimeError: If the user's platform is not supported
167
164
  """
168
165
  target = target.expanduser()
169
166
  if target.parent.name == "bin":
@@ -179,8 +176,7 @@ def install_ollama_to(target: Path) -> None:
179
176
  except KeyError as e:
180
177
  raise RuntimeError(f"Unsupported platform: {platform_key}") from e
181
178
 
182
- url = f"https://github.com/ollama/ollama/releases/download/{DEFAULT_VERSION}/{artifact.name}"
183
- archive_path = _download_to_temp(url)
179
+ archive_path = _download_artifact_to_temp(DEFAULT_VERSION, artifact.name)
184
180
 
185
181
  try:
186
182
  _verify_sha256(archive_path, artifact.sha256)
@@ -224,4 +220,4 @@ def install_ollama_to(target: Path) -> None:
224
220
  try:
225
221
  archive_path.unlink(missing_ok=True)
226
222
  except Exception:
227
- pass
223
+ logger.debug("Failed to remove temporary archive %s", archive_path, exc_info=True, stack_info=True)
@@ -26,7 +26,7 @@ class OllamaRuntime:
26
26
 
27
27
  stdout = subprocess.DEVNULL
28
28
 
29
- proc = subprocess.Popen(
29
+ return subprocess.Popen( # noqa: S603 We're always running Ollama
30
30
  cmd,
31
31
  cwd=str(self._config.work_dir) if self._config.work_dir else None,
32
32
  env=env,
@@ -36,8 +36,6 @@ class OllamaRuntime:
36
36
  close_fds=os.name != "nt",
37
37
  )
38
38
 
39
- return proc
40
-
41
39
  def start_and_await(
42
40
  self,
43
41
  *,
@@ -62,11 +60,11 @@ class OllamaRuntime:
62
60
  try:
63
61
  proc.terminate()
64
62
  except Exception:
65
- pass
63
+ logger.debug("Failed to terminate Ollama server", exc_info=True, stack_info=True)
66
64
  try:
67
65
  proc.kill()
68
66
  except Exception:
69
- pass
67
+ logger.debug("Failed to kill Ollama server", exc_info=True, stack_info=True)
70
68
 
71
69
  raise TimeoutError(
72
70
  f"Timed out waiting for Ollama to become healthy at http://{self._config.host}:{self._config.port}"
@@ -36,9 +36,7 @@ class OllamaService:
36
36
  return [float(x) for x in vec]
37
37
 
38
38
  def describe(self, *, model: str, text: str, context: str) -> str:
39
- """
40
- Ask Ollama to generate a short description for `text`
41
- """
39
+ """Ask Ollama to generate a short description for `text`."""
42
40
  prompt = self._build_description_prompt(text=text, context=context)
43
41
 
44
42
  payload: dict[str, Any] = {"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}}
@@ -2,15 +2,17 @@ import logging
2
2
  from pathlib import Path
3
3
 
4
4
  from databao_context_engine.mcp.mcp_server import McpServer, McpTransport
5
+ from databao_context_engine.project.layout import ensure_project_dir
5
6
 
6
7
  logger = logging.getLogger(__name__)
7
8
 
8
9
 
9
10
  def run_mcp_server(
10
11
  project_dir: Path,
11
- run_name: str | None,
12
12
  transport: McpTransport,
13
13
  host: str | None = None,
14
14
  port: int | None = None,
15
15
  ) -> None:
16
- McpServer(project_dir, run_name, host, port).run(transport)
16
+ ensure_project_dir(project_dir=project_dir)
17
+
18
+ McpServer(project_dir, host, port).run(transport)
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from contextlib import asynccontextmanager
3
+ from datetime import date
3
4
  from pathlib import Path
4
5
  from typing import Literal
5
6
 
@@ -7,8 +8,6 @@ from mcp.server import FastMCP
7
8
  from mcp.types import ToolAnnotations
8
9
 
9
10
  from databao_context_engine import DatabaoContextEngine
10
- from databao_context_engine.mcp.all_results_tool import run_all_results_tool
11
- from databao_context_engine.mcp.retrieve_tool import run_retrieve_tool
12
11
 
13
12
  logger = logging.getLogger(__name__)
14
13
 
@@ -26,12 +25,10 @@ class McpServer:
26
25
  def __init__(
27
26
  self,
28
27
  project_dir: Path,
29
- run_name: str | None,
30
28
  host: str | None = None,
31
29
  port: int | None = None,
32
30
  ):
33
31
  self._databao_context_engine = DatabaoContextEngine(project_dir)
34
- self._run_name = run_name
35
32
 
36
33
  self._mcp_server = self._create_mcp_server(host, port)
37
34
 
@@ -43,19 +40,20 @@ class McpServer:
43
40
  annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
44
41
  )
45
42
  def all_results_tool():
46
- return run_all_results_tool(self._databao_context_engine, self._run_name)
43
+ return self._databao_context_engine.get_all_contexts_formatted()
47
44
 
48
45
  @mcp.tool(
49
46
  description="Retrieve the context built from various resources, including databases, dbt tools, plain and structured files, to retrieve relevant information",
50
47
  annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
51
48
  )
52
49
  def retrieve_tool(text: str, limit: int | None):
53
- return run_retrieve_tool(
54
- databao_context_engine=self._databao_context_engine,
55
- run_name=self._run_name,
56
- text=text,
57
- limit=limit or 50,
58
- )
50
+ retrieve_results = self._databao_context_engine.search_context(retrieve_text=text, limit=limit)
51
+
52
+ display_results = [context_search_result.context_result for context_search_result in retrieve_results]
53
+
54
+ display_results.append(f"\nToday's date is {date.today()}")
55
+
56
+ return "\n".join(display_results)
59
57
 
60
58
  return mcp
61
59