databao-context-engine 0.1.2__py3-none-any.whl → 0.1.4.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. databao_context_engine/__init__.py +18 -6
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +84 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
  8. databao_context_engine/cli/add_datasource_config.py +41 -15
  9. databao_context_engine/cli/commands.py +12 -43
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +137 -0
  12. databao_context_engine/databao_context_project_manager.py +96 -6
  13. databao_context_engine/datasources/add_config.py +34 -0
  14. databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
  15. databao_context_engine/datasources/datasource_context.py +93 -0
  16. databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
  17. databao_context_engine/{project → datasources}/types.py +64 -15
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +67 -53
  20. databao_context_engine/llm/errors.py +2 -8
  21. databao_context_engine/llm/install.py +13 -20
  22. databao_context_engine/llm/service.py +1 -3
  23. databao_context_engine/mcp/mcp_runner.py +4 -2
  24. databao_context_engine/mcp/mcp_server.py +10 -10
  25. databao_context_engine/plugin_loader.py +111 -0
  26. databao_context_engine/pluginlib/build_plugin.py +25 -9
  27. databao_context_engine/pluginlib/config.py +16 -2
  28. databao_context_engine/plugins/base_db_plugin.py +5 -2
  29. databao_context_engine/plugins/databases/athena_introspector.py +85 -22
  30. databao_context_engine/plugins/databases/base_introspector.py +5 -3
  31. databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
  32. databao_context_engine/plugins/databases/duckdb_introspector.py +3 -5
  33. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  34. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  35. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  36. databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
  37. databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
  38. databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
  39. databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
  40. databao_context_engine/plugins/duckdb_tools.py +18 -0
  41. databao_context_engine/plugins/plugin_loader.py +43 -42
  42. databao_context_engine/plugins/resources/parquet_introspector.py +7 -19
  43. databao_context_engine/project/info.py +34 -2
  44. databao_context_engine/project/init_project.py +16 -7
  45. databao_context_engine/project/layout.py +3 -3
  46. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  47. databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
  48. databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
  49. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
  50. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
  51. databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
  52. databao_context_engine/services/chunk_embedding_service.py +23 -11
  53. databao_context_engine/services/factories.py +1 -46
  54. databao_context_engine/services/persistence_service.py +11 -11
  55. databao_context_engine/storage/connection.py +11 -7
  56. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  57. databao_context_engine/storage/migrate.py +2 -4
  58. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  59. databao_context_engine/storage/models.py +2 -23
  60. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  61. databao_context_engine/storage/repositories/factories.py +1 -12
  62. databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
  63. databao_context_engine/system/properties.py +4 -2
  64. databao_context_engine-0.1.4.dev1.dist-info/METADATA +75 -0
  65. databao_context_engine-0.1.4.dev1.dist-info/RECORD +125 -0
  66. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/WHEEL +1 -1
  67. databao_context_engine/build_sources/internal/build_service.py +0 -77
  68. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  69. databao_context_engine/build_sources/internal/export_results.py +0 -43
  70. databao_context_engine/build_sources/public/api.py +0 -4
  71. databao_context_engine/databao_engine.py +0 -85
  72. databao_context_engine/datasource_config/__init__.py +0 -0
  73. databao_context_engine/datasource_config/add_config.py +0 -50
  74. databao_context_engine/datasource_config/datasource_context.py +0 -60
  75. databao_context_engine/mcp/all_results_tool.py +0 -5
  76. databao_context_engine/mcp/retrieve_tool.py +0 -22
  77. databao_context_engine/project/runs.py +0 -39
  78. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  79. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  80. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  81. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  82. databao_context_engine/serialisation/__init__.py +0 -0
  83. databao_context_engine/services/run_name_policy.py +0 -8
  84. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  85. databao_context_engine/storage/repositories/run_repository.py +0 -157
  86. databao_context_engine-0.1.2.dist-info/METADATA +0 -187
  87. databao_context_engine-0.1.2.dist-info/RECORD +0 -135
  88. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  89. /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
  90. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.4.dev1.dist-info}/entry_points.txt +0 -0
@@ -38,12 +38,16 @@ PreparedDatasource = PreparedConfig | PreparedFile
38
38
 
39
39
  @dataclass(kw_only=True, frozen=True, eq=True)
40
40
  class DatasourceId:
41
- """
42
- The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
41
+ """The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
43
42
 
44
43
  e.g: "databases/my_postgres_datasource.yaml"
45
44
 
46
45
  Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
46
+
47
+ Attributes:
48
+ datasource_config_folder: The folder where the datasource's config file is located.
49
+ datasource_name: The name of the datasource.
50
+ config_file_suffix: The suffix of the config (or raw) file.
47
51
  """
48
52
 
49
53
  datasource_config_folder: str
@@ -78,18 +82,22 @@ class DatasourceId:
78
82
  return str(self.relative_path_to_config_file())
79
83
 
80
84
  def relative_path_to_config_file(self) -> Path:
81
- """
82
- Returns a path to the config file for this datasource.
85
+ """Return a path to the config file for this datasource.
83
86
 
84
87
  The returned path is relative to the src folder in the project.
88
+
89
+ Returns:
90
+ The path to the config file relative to the src folder in the project.
85
91
  """
86
92
  return Path(self.datasource_config_folder).joinpath(self.datasource_name + self.config_file_suffix)
87
93
 
88
94
  def relative_path_to_context_file(self) -> Path:
89
- """
90
- Returns a path to the config file for this datasource.
95
+ """Return a path to the config file for this datasource.
91
96
 
92
97
  The returned path is relative to an output run folder in the project.
98
+
99
+ Returns:
100
+ The path to the context file relative to the output folder in the project.
93
101
  """
94
102
  # Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
95
103
  suffix = ".yaml" if self.config_file_suffix == ".yaml" else (self.config_file_suffix + ".yaml")
@@ -97,13 +105,19 @@ class DatasourceId:
97
105
  return Path(self.datasource_config_folder).joinpath(self.datasource_name + suffix)
98
106
 
99
107
  @classmethod
100
- def from_string_repr(cls, datasource_id_as_string: str):
101
- """
102
- Creates a DatasourceId from a string representation.
108
+ def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
109
+ """Create a DatasourceId from a string representation.
110
+
111
+ Args:
112
+ datasource_id_as_string: The string representation of a DatasourceId.
113
+ This is the path to the datasource's config file relative to the src folder in the project.
114
+ (e.g. "databases/my_postgres_datasource.yaml")
103
115
 
104
- The string representation of a DatasourceId is the path to the datasource's config file relative to the src folder in the project.
116
+ Returns:
117
+ The DatasourceId instance created from the string representation.
105
118
 
106
- e.g: "databases/my_postgres_datasource.yaml"
119
+ Raises:
120
+ ValueError: If the string representation is invalid (e.g. too many parent folders).
107
121
  """
108
122
  config_file_path = Path(datasource_id_as_string)
109
123
 
@@ -115,11 +129,15 @@ class DatasourceId:
115
129
  return DatasourceId.from_datasource_config_file_path(config_file_path)
116
130
 
117
131
  @classmethod
118
- def from_datasource_config_file_path(cls, datasource_config_file: Path):
119
- """
120
- Creates a DatasourceId from a config file path.
132
+ def from_datasource_config_file_path(cls, datasource_config_file: Path) -> "DatasourceId":
133
+ """Create a DatasourceId from a config file path.
121
134
 
122
- The `datasource_config_file` path provided can either be the config file path relative to the src folder or the full path to the config file.
135
+ Args:
136
+ datasource_config_file: The path to the datasource config file.
137
+ This path can either be the config file path relative to the src folder or the full path to the config file.
138
+
139
+ Returns:
140
+ The DatasourceId instance created from the config file path.
123
141
  """
124
142
  return DatasourceId(
125
143
  datasource_config_folder=datasource_config_file.parent.name,
@@ -127,8 +145,39 @@ class DatasourceId:
127
145
  config_file_suffix=datasource_config_file.suffix,
128
146
  )
129
147
 
148
+ @classmethod
149
+ def from_datasource_context_file_path(cls, datasource_context_file: Path) -> "DatasourceId":
150
+ """Create a DatasourceId from a context file path.
151
+
152
+ This factory handles the case where the context was generated from a raw file rather than from a config.
153
+ In that case, the context file name will look like "<my_datasource_name>.<raw_file_extension>.yaml"
154
+
155
+ Args:
156
+ datasource_context_file: The path to the datasource context file.
157
+
158
+ Returns:
159
+ The DatasourceId instance created from the context file path.
160
+ """
161
+ if len(datasource_context_file.suffixes) > 1 and datasource_context_file.suffix == ".yaml":
162
+ # If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
163
+ context_file_name_without_yaml_extension = datasource_context_file.name[: -len(".yaml")]
164
+ datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
165
+
166
+ return DatasourceId(
167
+ datasource_config_folder=datasource_context_file.parent.name,
168
+ datasource_name=datasource_context_file.stem,
169
+ config_file_suffix=datasource_context_file.suffix,
170
+ )
171
+
130
172
 
131
173
  @dataclass
132
174
  class Datasource:
175
+ """A datasource contained in the project.
176
+
177
+ Attributes:
178
+ id: The unique identifier of the datasource.
179
+ type: The type of the datasource.
180
+ """
181
+
133
182
  id: DatasourceId
134
183
  type: DatasourceType
@@ -1,17 +1,39 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
4
- from databao_context_engine.project.init_project import init_project_dir
4
+ from databao_context_engine.project.init_project import InitProjectError, init_project_dir
5
5
  from databao_context_engine.project.layout import is_project_dir_valid
6
6
 
7
7
 
8
8
  def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
9
- initialised_project_dir = init_project_dir(project_dir=project_dir)
9
+ """Initialize a Databao Context project in the given directory.
10
10
 
11
- return DatabaoContextProjectManager(project_dir=initialised_project_dir)
11
+ Args:
12
+ project_dir: The directory where the project should be initialized.
13
+
14
+ Returns:
15
+ A DatabaoContextProjectManager for the initialized project.
16
+
17
+ Raises:
18
+ InitProjectError: If the project could not be initialized.
19
+ """
20
+ try:
21
+ initialized_project_dir = init_project_dir(project_dir=project_dir)
22
+
23
+ return DatabaoContextProjectManager(project_dir=initialized_project_dir)
24
+ except InitProjectError as e:
25
+ raise e
12
26
 
13
27
 
14
28
  def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
29
+ """Initialize a Databao Context project in the given directory or get a DatabaoContextProjectManager for the project if it already exists.
30
+
31
+ Args:
32
+ project_dir: The directory where the project should be initialized if it doesn't exist yet..
33
+
34
+ Returns:
35
+ A DatabaoContextProjectManager for the project in project_dir.
36
+ """
15
37
  if is_project_dir_valid(project_dir):
16
38
  return DatabaoContextProjectManager(project_dir=project_dir)
17
39
 
@@ -1,11 +1,16 @@
1
1
  import types
2
2
  from dataclasses import MISSING, fields, is_dataclass
3
- from typing import Annotated, Any, ForwardRef, Union, get_origin, get_type_hints
3
+ from typing import Annotated, Any, ForwardRef, Union, get_args, get_origin, get_type_hints
4
4
 
5
- from pydantic import BaseModel, _internal
5
+ from pydantic import BaseModel
6
6
  from pydantic_core import PydanticUndefinedType
7
7
 
8
- from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation, ConfigPropertyDefinition
8
+ from databao_context_engine.pluginlib.config import (
9
+ ConfigPropertyAnnotation,
10
+ ConfigPropertyDefinition,
11
+ ConfigSinglePropertyDefinition,
12
+ ConfigUnionPropertyDefinition,
13
+ )
9
14
 
10
15
 
11
16
  def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
@@ -54,18 +59,14 @@ def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyD
54
59
  raise ValueError(f"{parent_type} is not a dataclass")
55
60
 
56
61
  dataclass_fields = fields(parent_type)
62
+ type_hints = get_type_hints(parent_type, include_extras=True)
57
63
 
58
64
  result = []
59
65
  for field in dataclass_fields:
60
66
  has_field_default = field.default is not None and field.default != MISSING
61
67
 
62
- if isinstance(field.type, str):
63
- try:
64
- property_type = _evaluate_type_string(field.type)
65
- except Exception:
66
- continue
67
- else:
68
- property_type = field.type
68
+ # Use the type hints if the field type wasn't resolved (aka. if it is a ForwardRef or a str)
69
+ property_type = type_hints[field.name] if isinstance(field.type, ForwardRef | str) else field.type
69
70
 
70
71
  property_for_field = _create_property(
71
72
  property_type=property_type,
@@ -84,6 +85,10 @@ def _get_property_list_from_pydantic_base_model(parent_type: type):
84
85
  if not issubclass(parent_type, BaseModel):
85
86
  raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
86
87
 
88
+ if any(isinstance(field.annotation, ForwardRef) for field in parent_type.model_fields.values()):
89
+ # If any field's future type wasn't resolved yet, we rebuild the model to resolve them
90
+ parent_type.model_rebuild(force=True)
91
+
87
92
  pydantic_fields = parent_type.model_fields
88
93
  result = []
89
94
 
@@ -123,26 +128,55 @@ def _create_property(
123
128
  if annotation is not None and annotation.ignored_for_config_wizard:
124
129
  return None
125
130
 
126
- actual_property_type = _read_actual_property_type(property_type)
131
+ actual_property_types = _read_actual_property_type(property_type)
127
132
 
128
- try:
129
- nested_properties = _get_property_list_from_type(parent_type=actual_property_type, is_root_type=False)
130
- except TypeError:
131
- return None
133
+ required = annotation.required if annotation else is_property_required
134
+ secret = annotation.secret if annotation else False
132
135
 
133
- default_value = compute_default_value(
134
- annotation=annotation,
135
- property_default=property_default,
136
- has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
137
- )
136
+ if len(actual_property_types) > 1:
137
+ type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
138
138
 
139
- return ConfigPropertyDefinition(
140
- property_key=property_name,
141
- property_type=actual_property_type if not nested_properties else None,
142
- required=annotation.required if annotation else is_property_required,
143
- default_value=default_value,
144
- nested_properties=nested_properties if nested_properties else None,
145
- )
139
+ for union_type in actual_property_types:
140
+ try:
141
+ nested_props = _get_property_list_from_type(
142
+ parent_type=union_type,
143
+ is_root_type=False,
144
+ )
145
+ except TypeError:
146
+ nested_props = []
147
+
148
+ type_properties[union_type] = nested_props
149
+
150
+ return ConfigUnionPropertyDefinition(
151
+ property_key=property_name,
152
+ types=actual_property_types,
153
+ type_properties=type_properties,
154
+ )
155
+ else:
156
+ actual_property_type = actual_property_types[0]
157
+ try:
158
+ nested_properties = _get_property_list_from_type(
159
+ parent_type=actual_property_type,
160
+ is_root_type=False,
161
+ )
162
+ except TypeError:
163
+ return None
164
+
165
+ resolved_type = actual_property_type if not nested_properties else None
166
+ default_value = compute_default_value(
167
+ annotation=annotation,
168
+ property_default=property_default,
169
+ has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
170
+ )
171
+
172
+ return ConfigSinglePropertyDefinition(
173
+ property_key=property_name,
174
+ property_type=resolved_type,
175
+ required=required,
176
+ default_value=default_value,
177
+ nested_properties=nested_properties or None,
178
+ secret=secret,
179
+ )
146
180
 
147
181
 
148
182
  def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
@@ -155,21 +189,16 @@ def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation |
155
189
  return None
156
190
 
157
191
 
158
- def _read_actual_property_type(property_type: type) -> type:
192
+ def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
159
193
  property_type_origin = get_origin(property_type)
160
194
 
161
195
  if property_type_origin is Annotated:
162
- return property_type.__origin__ # type: ignore[attr-defined]
163
- elif property_type_origin is Union or property_type_origin is types.UnionType:
164
- type_args = property_type.__args__ # type: ignore[attr-defined]
165
- if len(type_args) == 2 and type(None) in type_args:
166
- # Uses the actual type T when the Union is "T | None" (or "None | T")
167
- return next(arg for arg in type_args if arg is not None)
168
- else:
169
- # Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
170
- return type(None)
196
+ return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
197
+ elif property_type_origin in (Union, types.UnionType):
198
+ args = tuple(arg for arg in get_args(property_type) if arg is not type(None))
199
+ return args
171
200
 
172
- return property_type
201
+ return (property_type,)
173
202
 
174
203
 
175
204
  def compute_default_value(
@@ -185,18 +214,3 @@ def compute_default_value(
185
214
  return str(property_default)
186
215
 
187
216
  return None
188
-
189
-
190
- def _evaluate_type_string(property_type: str) -> type:
191
- try:
192
- # Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
193
- return _internal._typing_extra.eval_type(property_type)
194
- except Exception as initial_error:
195
- try:
196
- # Try to convert it ourselves if Pydantic didn't work
197
- return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
198
- globalns=globals(), localns=locals(), recursive_guard=frozenset()
199
- )
200
- except Exception as e:
201
- # Ignore if we didn't manage to convert the str to a type
202
- raise e from initial_error
@@ -3,14 +3,8 @@ class OllamaError(Exception):
3
3
 
4
4
 
5
5
  class OllamaTransientError(OllamaError):
6
- """
7
- Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
8
- Typically worth retrying.
9
- """
6
+ """Errors that are likely temporary (network issues, timeouts, 5xx, etc.), typically worth retrying."""
10
7
 
11
8
 
12
9
  class OllamaPermanentError(OllamaError):
13
- """
14
- Errors that are unlikely to succeed on retry without changing inputs
15
- or configuration (4xx, bad response schema, etc.).
16
- """
10
+ """Errors that are unlikely to succeed on retry without changing inputs or configuration (4xx, bad response schema, etc.)."""
@@ -49,14 +49,15 @@ ARTIFACTS: dict[str, ArtifactInfo] = {
49
49
 
50
50
 
51
51
  def resolve_ollama_bin() -> str:
52
- """
53
- Decide which `ollama` binary to use, in this order:
52
+ """Decide which `ollama` binary to use.
54
53
 
54
+ Here is the priority order:
55
55
  1. DCE_OLLAMA_BIN env var, if set and exists
56
56
  2. `ollama` found on PATH
57
57
  3. Managed installation under MANAGED_OLLAMA_BIN
58
58
 
59
- Returns the full path to the binary
59
+ Returns:
60
+ The full path to the binary
60
61
  """
61
62
  override = os.environ.get("DCE_OLLAMA_BIN")
62
63
  if override:
@@ -76,9 +77,7 @@ def resolve_ollama_bin() -> str:
76
77
 
77
78
 
78
79
  def _detect_platform() -> str:
79
- """
80
- Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'.
81
- """
80
+ """Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'."""
82
81
  os_name = sys.platform.lower()
83
82
  arch = (os.uname().machine if hasattr(os, "uname") else "").lower()
84
83
 
@@ -97,9 +96,7 @@ def _detect_platform() -> str:
97
96
 
98
97
 
99
98
  def _download_to_temp(url: str) -> Path:
100
- """
101
- Download to a temporary file and return its path.
102
- """
99
+ """Download to a temporary file and return its path."""
103
100
  import urllib.request
104
101
 
105
102
  tmp_dir = Path(tempfile.mkdtemp(prefix="ollama-download-"))
@@ -114,9 +111,7 @@ def _download_to_temp(url: str) -> Path:
114
111
 
115
112
 
116
113
  def _verify_sha256(path: Path, expected_hex: str) -> None:
117
- """
118
- Verify SHA-256 of path matches expected_hex
119
- """
114
+ """Verify SHA-256 of path matches expected_hex."""
120
115
  h = hashlib.sha256()
121
116
  with path.open("rb") as f:
122
117
  for chunk in iter(lambda: f.read(8192), b""):
@@ -127,9 +122,7 @@ def _verify_sha256(path: Path, expected_hex: str) -> None:
127
122
 
128
123
 
129
124
  def _extract_archive(archive: Path, target_dir: Path) -> None:
130
- """
131
- Extract archive into target_dir.
132
- """
125
+ """Extract archive into target_dir."""
133
126
  name = archive.name.lower()
134
127
  target_dir.mkdir(parents=True, exist_ok=True)
135
128
 
@@ -144,9 +137,7 @@ def _extract_archive(archive: Path, target_dir: Path) -> None:
144
137
 
145
138
 
146
139
  def _ensure_executable(path: Path) -> None:
147
- """
148
- Mark path as executable
149
- """
140
+ """Mark path as executable."""
150
141
  try:
151
142
  mode = path.stat().st_mode
152
143
  path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
@@ -155,8 +146,7 @@ def _ensure_executable(path: Path) -> None:
155
146
 
156
147
 
157
148
  def install_ollama_to(target: Path) -> None:
158
- """
159
- Ensure an Ollama binary exists.
149
+ """Ensure an Ollama binary exist.
160
150
 
161
151
  If it doesn't exist, this will:
162
152
  - detect OS
@@ -164,6 +154,9 @@ def install_ollama_to(target: Path) -> None:
164
154
  - verify its SHA-256 checksum
165
155
  - extract into the installation directory
166
156
  - make the binary executable
157
+
158
+ Raises:
159
+ RuntimeError: If the user's platform is not supported
167
160
  """
168
161
  target = target.expanduser()
169
162
  if target.parent.name == "bin":
@@ -36,9 +36,7 @@ class OllamaService:
36
36
  return [float(x) for x in vec]
37
37
 
38
38
  def describe(self, *, model: str, text: str, context: str) -> str:
39
- """
40
- Ask Ollama to generate a short description for `text`
41
- """
39
+ """Ask Ollama to generate a short description for `text`."""
42
40
  prompt = self._build_description_prompt(text=text, context=context)
43
41
 
44
42
  payload: dict[str, Any] = {"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}}
@@ -2,15 +2,17 @@ import logging
2
2
  from pathlib import Path
3
3
 
4
4
  from databao_context_engine.mcp.mcp_server import McpServer, McpTransport
5
+ from databao_context_engine.project.layout import ensure_project_dir
5
6
 
6
7
  logger = logging.getLogger(__name__)
7
8
 
8
9
 
9
10
  def run_mcp_server(
10
11
  project_dir: Path,
11
- run_name: str | None,
12
12
  transport: McpTransport,
13
13
  host: str | None = None,
14
14
  port: int | None = None,
15
15
  ) -> None:
16
- McpServer(project_dir, run_name, host, port).run(transport)
16
+ ensure_project_dir(project_dir=project_dir)
17
+
18
+ McpServer(project_dir, host, port).run(transport)
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from contextlib import asynccontextmanager
3
+ from datetime import date
3
4
  from pathlib import Path
4
5
  from typing import Literal
5
6
 
@@ -7,8 +8,6 @@ from mcp.server import FastMCP
7
8
  from mcp.types import ToolAnnotations
8
9
 
9
10
  from databao_context_engine import DatabaoContextEngine
10
- from databao_context_engine.mcp.all_results_tool import run_all_results_tool
11
- from databao_context_engine.mcp.retrieve_tool import run_retrieve_tool
12
11
 
13
12
  logger = logging.getLogger(__name__)
14
13
 
@@ -26,12 +25,10 @@ class McpServer:
26
25
  def __init__(
27
26
  self,
28
27
  project_dir: Path,
29
- run_name: str | None,
30
28
  host: str | None = None,
31
29
  port: int | None = None,
32
30
  ):
33
31
  self._databao_context_engine = DatabaoContextEngine(project_dir)
34
- self._run_name = run_name
35
32
 
36
33
  self._mcp_server = self._create_mcp_server(host, port)
37
34
 
@@ -43,20 +40,23 @@ class McpServer:
43
40
  annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
44
41
  )
45
42
  def all_results_tool():
46
- return run_all_results_tool(self._databao_context_engine, self._run_name)
43
+ return self._databao_context_engine.get_all_contexts_formatted()
47
44
 
48
45
  @mcp.tool(
49
46
  description="Retrieve the context built from various resources, including databases, dbt tools, plain and structured files, to retrieve relevant information",
50
47
  annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
51
48
  )
52
49
  def retrieve_tool(text: str, limit: int | None):
53
- return run_retrieve_tool(
54
- databao_context_engine=self._databao_context_engine,
55
- run_name=self._run_name,
56
- text=text,
57
- limit=limit or 50,
50
+ retrieve_results = self._databao_context_engine.search_context(
51
+ retrieve_text=text, limit=limit, export_to_file=False
58
52
  )
59
53
 
54
+ display_results = [context_search_result.context_result for context_search_result in retrieve_results]
55
+
56
+ display_results.append(f"\nToday's date is {date.today()}")
57
+
58
+ return "\n".join(display_results)
59
+
60
60
  return mcp
61
61
 
62
62
  def run(self, transport: McpTransport):
@@ -0,0 +1,111 @@
1
+ from databao_context_engine.introspection.property_extract import get_property_list_from_type
2
+ from databao_context_engine.pluginlib.build_plugin import (
3
+ BuildDatasourcePlugin,
4
+ BuildFilePlugin,
5
+ BuildPlugin,
6
+ DatasourceType,
7
+ )
8
+ from databao_context_engine.pluginlib.config import ConfigPropertyDefinition, CustomiseConfigProperties
9
+ from databao_context_engine.plugins.plugin_loader import (
10
+ load_plugins,
11
+ )
12
+
13
+
14
+ class DatabaoContextPluginLoader:
15
+ """Loader for plugins installed in the current environment."""
16
+
17
+ def __init__(self, plugins_by_type: dict[DatasourceType, BuildPlugin] | None = None):
18
+ """Initialize the DatabaoContextEngine.
19
+
20
+ Args:
21
+ plugins_by_type: Override the list of plugins loaded from the environment.
22
+ Typical usage should not provide this argument and leave it as None.
23
+ """
24
+ self._all_plugins_by_type = load_plugins() if plugins_by_type is None else plugins_by_type
25
+
26
+ def get_all_supported_datasource_types(self, exclude_file_plugins: bool = False) -> set[DatasourceType]:
27
+ """Return the list of all supported datasource types.
28
+
29
+ Args:
30
+ exclude_file_plugins: If True, do not return datasource types from plugins that deal with raw files.
31
+
32
+ Returns:
33
+ A set of all DatasourceType supported in the current installation environment.
34
+ """
35
+ if exclude_file_plugins:
36
+ return {
37
+ datasource_type
38
+ for (datasource_type, plugin) in self._all_plugins_by_type.items()
39
+ if not isinstance(plugin, BuildFilePlugin)
40
+ }
41
+ else:
42
+ return set(self._all_plugins_by_type.keys())
43
+
44
+ def get_plugin_for_datasource_type(self, datasource_type: DatasourceType) -> BuildPlugin:
45
+ """Return the plugin able to build a context for the given datasource type.
46
+
47
+ Args:
48
+ datasource_type: The type of datasource for which to retrieve the plugin.
49
+
50
+ Returns:
51
+ The plugin able to build a context for the given datasource type.
52
+
53
+ Raises:
54
+ ValueError: If no plugin is found for the given datasource type.
55
+ """
56
+ if datasource_type not in self._all_plugins_by_type:
57
+ raise ValueError(f"No plugin found for type '{datasource_type.full_type}'")
58
+
59
+ return self._all_plugins_by_type[datasource_type]
60
+
61
+ def get_config_file_type_for_datasource_type(self, datasource_type: DatasourceType) -> type:
62
+ """Return the type of the config file for the given datasource type.
63
+
64
+ Args:
65
+ datasource_type: The type of datasource for which to retrieve the config file type.
66
+
67
+ Returns:
68
+ The type of the config file for the given datasource type.
69
+
70
+ Raises:
71
+ ValueError: If no plugin is found for the given datasource type.
72
+ ValueError: If the plugin does not support config files.
73
+ """
74
+ plugin = self.get_plugin_for_datasource_type(datasource_type)
75
+
76
+ if isinstance(plugin, BuildDatasourcePlugin):
77
+ return plugin.config_file_type
78
+
79
+ raise ValueError(
80
+ f'Impossible to get a config file type for datasource type "{datasource_type.full_type}". The corresponding plugin is a {type(plugin).__name__} but should be a BuildDatasourcePlugin'
81
+ )
82
+
83
+ def get_config_file_structure_for_datasource_type(
84
+ self, datasource_type: DatasourceType
85
+ ) -> list[ConfigPropertyDefinition]:
86
+ """Return the property structure of the config file for the given datasource type.
87
+
88
+ This can be used to generate a form for the user to fill in the config file.
89
+
90
+ Args:
91
+ datasource_type: The type of datasource for which to retrieve the config file structure.
92
+
93
+ Returns:
94
+ The structure of the config file for the given datasource type.
95
+ This structure is a list of ConfigPropertyDefinition objects.
96
+ Each object in the list describes a property of the config file and its potential nested properties.
97
+
98
+ Raises:
99
+ ValueError: If no plugin is found for the given datasource type.
100
+ ValueError: If the plugin does not support config files.
101
+ """
102
+ plugin = self.get_plugin_for_datasource_type(datasource_type)
103
+
104
+ if isinstance(plugin, CustomiseConfigProperties):
105
+ return plugin.get_config_file_properties()
106
+ elif isinstance(plugin, BuildDatasourcePlugin):
107
+ return get_property_list_from_type(plugin.config_file_type)
108
+ else:
109
+ raise ValueError(
110
+ f'Impossible to create a config for datasource type "{datasource_type.full_type}". The corresponding plugin is a {type(plugin).__name__} but should be a BuildDatasourcePlugin or CustomiseConfigProperties'
111
+ )