databao-context-engine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. databao_context_engine/__init__.py +35 -0
  2. databao_context_engine/build_sources/__init__.py +0 -0
  3. databao_context_engine/build_sources/internal/__init__.py +0 -0
  4. databao_context_engine/build_sources/internal/build_runner.py +111 -0
  5. databao_context_engine/build_sources/internal/build_service.py +77 -0
  6. databao_context_engine/build_sources/internal/build_wiring.py +52 -0
  7. databao_context_engine/build_sources/internal/export_results.py +43 -0
  8. databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
  9. databao_context_engine/build_sources/public/__init__.py +0 -0
  10. databao_context_engine/build_sources/public/api.py +4 -0
  11. databao_context_engine/cli/__init__.py +0 -0
  12. databao_context_engine/cli/add_datasource_config.py +130 -0
  13. databao_context_engine/cli/commands.py +256 -0
  14. databao_context_engine/cli/datasources.py +64 -0
  15. databao_context_engine/cli/info.py +32 -0
  16. databao_context_engine/config/__init__.py +0 -0
  17. databao_context_engine/config/log_config.yaml +16 -0
  18. databao_context_engine/config/logging.py +43 -0
  19. databao_context_engine/databao_context_project_manager.py +92 -0
  20. databao_context_engine/databao_engine.py +85 -0
  21. databao_context_engine/datasource_config/__init__.py +0 -0
  22. databao_context_engine/datasource_config/add_config.py +50 -0
  23. databao_context_engine/datasource_config/check_config.py +131 -0
  24. databao_context_engine/datasource_config/datasource_context.py +60 -0
  25. databao_context_engine/event_journal/__init__.py +0 -0
  26. databao_context_engine/event_journal/writer.py +29 -0
  27. databao_context_engine/generate_configs_schemas.py +92 -0
  28. databao_context_engine/init_project.py +18 -0
  29. databao_context_engine/introspection/__init__.py +0 -0
  30. databao_context_engine/introspection/property_extract.py +202 -0
  31. databao_context_engine/llm/__init__.py +0 -0
  32. databao_context_engine/llm/config.py +20 -0
  33. databao_context_engine/llm/descriptions/__init__.py +0 -0
  34. databao_context_engine/llm/descriptions/ollama.py +21 -0
  35. databao_context_engine/llm/descriptions/provider.py +10 -0
  36. databao_context_engine/llm/embeddings/__init__.py +0 -0
  37. databao_context_engine/llm/embeddings/ollama.py +37 -0
  38. databao_context_engine/llm/embeddings/provider.py +13 -0
  39. databao_context_engine/llm/errors.py +16 -0
  40. databao_context_engine/llm/factory.py +61 -0
  41. databao_context_engine/llm/install.py +227 -0
  42. databao_context_engine/llm/runtime.py +73 -0
  43. databao_context_engine/llm/service.py +159 -0
  44. databao_context_engine/main.py +19 -0
  45. databao_context_engine/mcp/__init__.py +0 -0
  46. databao_context_engine/mcp/all_results_tool.py +5 -0
  47. databao_context_engine/mcp/mcp_runner.py +16 -0
  48. databao_context_engine/mcp/mcp_server.py +63 -0
  49. databao_context_engine/mcp/retrieve_tool.py +22 -0
  50. databao_context_engine/pluginlib/__init__.py +0 -0
  51. databao_context_engine/pluginlib/build_plugin.py +107 -0
  52. databao_context_engine/pluginlib/config.py +37 -0
  53. databao_context_engine/pluginlib/plugin_utils.py +68 -0
  54. databao_context_engine/plugins/__init__.py +0 -0
  55. databao_context_engine/plugins/athena_db_plugin.py +12 -0
  56. databao_context_engine/plugins/base_db_plugin.py +45 -0
  57. databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
  58. databao_context_engine/plugins/databases/__init__.py +0 -0
  59. databao_context_engine/plugins/databases/athena_introspector.py +101 -0
  60. databao_context_engine/plugins/databases/base_introspector.py +144 -0
  61. databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
  62. databao_context_engine/plugins/databases/database_chunker.py +69 -0
  63. databao_context_engine/plugins/databases/databases_types.py +114 -0
  64. databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
  65. databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
  66. databao_context_engine/plugins/databases/introspection_scope.py +74 -0
  67. databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
  68. databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
  69. databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
  70. databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
  71. databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
  72. databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
  73. databao_context_engine/plugins/mssql_db_plugin.py +12 -0
  74. databao_context_engine/plugins/mysql_db_plugin.py +12 -0
  75. databao_context_engine/plugins/parquet_plugin.py +32 -0
  76. databao_context_engine/plugins/plugin_loader.py +110 -0
  77. databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
  78. databao_context_engine/plugins/resources/__init__.py +0 -0
  79. databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
  80. databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
  81. databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
  82. databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
  83. databao_context_engine/project/__init__.py +0 -0
  84. databao_context_engine/project/datasource_discovery.py +141 -0
  85. databao_context_engine/project/info.py +44 -0
  86. databao_context_engine/project/init_project.py +102 -0
  87. databao_context_engine/project/layout.py +127 -0
  88. databao_context_engine/project/project_config.py +32 -0
  89. databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
  90. databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
  91. databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
  92. databao_context_engine/project/runs.py +39 -0
  93. databao_context_engine/project/types.py +134 -0
  94. databao_context_engine/retrieve_embeddings/__init__.py +0 -0
  95. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  96. databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
  97. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
  98. databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
  99. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
  100. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  101. databao_context_engine/retrieve_embeddings/public/api.py +3 -0
  102. databao_context_engine/serialisation/__init__.py +0 -0
  103. databao_context_engine/serialisation/yaml.py +35 -0
  104. databao_context_engine/services/__init__.py +0 -0
  105. databao_context_engine/services/chunk_embedding_service.py +104 -0
  106. databao_context_engine/services/embedding_shard_resolver.py +64 -0
  107. databao_context_engine/services/factories.py +88 -0
  108. databao_context_engine/services/models.py +12 -0
  109. databao_context_engine/services/persistence_service.py +61 -0
  110. databao_context_engine/services/run_name_policy.py +8 -0
  111. databao_context_engine/services/table_name_policy.py +15 -0
  112. databao_context_engine/storage/__init__.py +0 -0
  113. databao_context_engine/storage/connection.py +32 -0
  114. databao_context_engine/storage/exceptions/__init__.py +0 -0
  115. databao_context_engine/storage/exceptions/exceptions.py +6 -0
  116. databao_context_engine/storage/migrate.py +127 -0
  117. databao_context_engine/storage/migrations/V01__init.sql +63 -0
  118. databao_context_engine/storage/models.py +51 -0
  119. databao_context_engine/storage/repositories/__init__.py +0 -0
  120. databao_context_engine/storage/repositories/chunk_repository.py +130 -0
  121. databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
  122. databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
  123. databao_context_engine/storage/repositories/embedding_repository.py +113 -0
  124. databao_context_engine/storage/repositories/factories.py +35 -0
  125. databao_context_engine/storage/repositories/run_repository.py +157 -0
  126. databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
  127. databao_context_engine/storage/transaction.py +14 -0
  128. databao_context_engine/system/__init__.py +0 -0
  129. databao_context_engine/system/properties.py +13 -0
  130. databao_context_engine/templating/__init__.py +0 -0
  131. databao_context_engine/templating/renderer.py +29 -0
  132. databao_context_engine-0.1.1.dist-info/METADATA +186 -0
  133. databao_context_engine-0.1.1.dist-info/RECORD +135 -0
  134. databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
  135. databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,61 @@
1
+ from databao_context_engine.llm.config import OllamaConfig
2
+ from databao_context_engine.llm.descriptions.ollama import OllamaDescriptionProvider
3
+ from databao_context_engine.llm.embeddings.ollama import OllamaEmbeddingProvider
4
+ from databao_context_engine.llm.install import resolve_ollama_bin
5
+ from databao_context_engine.llm.runtime import OllamaRuntime
6
+ from databao_context_engine.llm.service import OllamaService
7
+
8
+
9
+ def _create_ollama_service_common(
10
+ *,
11
+ host: str,
12
+ port: int,
13
+ ensure_ready: bool,
14
+ ) -> OllamaService:
15
+ bin_path = resolve_ollama_bin()
16
+ config = OllamaConfig(host=host, port=port, bin_path=bin_path)
17
+ service = OllamaService(config)
18
+
19
+ if ensure_ready:
20
+ runtime = OllamaRuntime(config=config, service=service)
21
+ runtime.start_and_await(timeout=120)
22
+
23
+ return service
24
+
25
+
26
+ def create_ollama_service(
27
+ *,
28
+ host: str = "127.0.0.1",
29
+ port: int = 11434,
30
+ ensure_ready: bool = True,
31
+ ) -> OllamaService:
32
+ return _create_ollama_service_common(
33
+ host=host,
34
+ port=port,
35
+ ensure_ready=ensure_ready,
36
+ )
37
+
38
+
39
+ def create_ollama_embedding_provider(
40
+ service: OllamaService,
41
+ *,
42
+ model_id: str = "nomic-embed-text:v1.5",
43
+ dim: int = 768,
44
+ pull_if_needed: bool = True,
45
+ ) -> OllamaEmbeddingProvider:
46
+ if pull_if_needed:
47
+ service.pull_model_if_needed(model=model_id, timeout=900)
48
+
49
+ return OllamaEmbeddingProvider(service=service, model_id=model_id, dim=dim)
50
+
51
+
52
+ def create_ollama_description_provider(
53
+ service: OllamaService,
54
+ *,
55
+ model_id: str = "llama3.2:1b",
56
+ pull_if_needed: bool = True,
57
+ ):
58
+ if pull_if_needed:
59
+ service.pull_model_if_needed(model=model_id, timeout=900)
60
+
61
+ return OllamaDescriptionProvider(service=service, model_id=model_id)
@@ -0,0 +1,227 @@
1
+ import hashlib
2
+ import logging
3
+ import os
4
+ import shutil
5
+ import stat
6
+ import sys
7
+ import tarfile
8
+ import tempfile
9
+ from pathlib import Path
10
+ from typing import NamedTuple
11
+ from zipfile import ZipFile
12
+
13
+ from databao_context_engine.system.properties import get_dce_path
14
+
15
+ MANAGED_OLLAMA_BIN = Path(get_dce_path() / "ollama/bin/ollama").expanduser()
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ArtifactInfo(NamedTuple):
21
+ name: str
22
+ sha256: str
23
+
24
+
25
+ DEFAULT_VERSION = "v0.13.0"
26
+
27
+ ARTIFACTS: dict[str, ArtifactInfo] = {
28
+ "darwin": ArtifactInfo(
29
+ "ollama-darwin.tgz",
30
+ "fa4ca04c48453c5ff81447d0630e996ee3e6b6af76a9eba52c69c0732f748161",
31
+ ),
32
+ "linux-amd64": ArtifactInfo(
33
+ "ollama-linux-amd64.tgz",
34
+ "c5e5b4840008d9c9bf955ec32c32b03afc57c986ac1c382d44c89c9f7dd2cc30",
35
+ ),
36
+ "linux-arm64": ArtifactInfo(
37
+ "ollama-linux-arm64.tgz",
38
+ "05eb97b87c690fa82626c6f4c7d656ae46ad5f2b7ee6aa324cc19dd88b89982b",
39
+ ),
40
+ "windows-amd64": ArtifactInfo(
41
+ "ollama-windows-amd64.zip",
42
+ "0fc913fc3763b8d2a490f2be90a51d474491ee22ea5a43ff31f1c58301a89656",
43
+ ),
44
+ "windows-arm64": ArtifactInfo(
45
+ "ollama-windows-arm64.zip",
46
+ "84c395e4187bd560cfc7c26b0142d970bcbdf0e0214b007bc527b7954430ea21",
47
+ ),
48
+ }
49
+
50
+
51
+ def resolve_ollama_bin() -> str:
52
+ """
53
+ Decide which `ollama` binary to use, in this order:
54
+
55
+ 1. DCE_OLLAMA_BIN env var, if set and exists
56
+ 2. `ollama` found on PATH
57
+ 3. Managed installation under MANAGED_OLLAMA_BIN
58
+
59
+ Returns the full path to the binary
60
+ """
61
+ override = os.environ.get("DCE_OLLAMA_BIN")
62
+ if override:
63
+ p = Path(override).expanduser()
64
+ if p.is_file() and os.access(p, os.X_OK):
65
+ return str(p)
66
+
67
+ system_ollama = shutil.which("ollama")
68
+ if system_ollama:
69
+ return system_ollama
70
+
71
+ if not MANAGED_OLLAMA_BIN.exists():
72
+ logger.info("No existing Ollama installation detected. We will download and install Ollama.")
73
+ install_ollama_to(MANAGED_OLLAMA_BIN)
74
+
75
+ return str(MANAGED_OLLAMA_BIN)
76
+
77
+
78
+ def _detect_platform() -> str:
79
+ """
80
+ Return one of: 'darwin', 'linux-amd64', 'linux-arm64', 'windows-amd64', 'windows-arm64'.
81
+ """
82
+ os_name = sys.platform.lower()
83
+ arch = (os.uname().machine if hasattr(os, "uname") else "").lower()
84
+
85
+ if os_name.startswith("darwin"):
86
+ return "darwin"
87
+ if os_name.startswith("win"):
88
+ if "arm" in arch or "aarch64" in arch:
89
+ return "windows-arm64"
90
+ return "windows-amd64"
91
+ if os_name.startswith("linux"):
92
+ if "arm" in arch or "aarch64" in arch:
93
+ return "linux-arm64"
94
+ return "linux-amd64"
95
+
96
+ raise RuntimeError(f"Unsupported OS/arch: os={os_name!r} arch={arch!r}")
97
+
98
+
99
+ def _download_to_temp(url: str) -> Path:
100
+ """
101
+ Download to a temporary file and return its path.
102
+ """
103
+ import urllib.request
104
+
105
+ tmp_dir = Path(tempfile.mkdtemp(prefix="ollama-download-"))
106
+ file_name = url.rsplit("/", 1)[-1]
107
+ dest = tmp_dir / file_name
108
+
109
+ logger.info("Downloading %s to %s", url, dest)
110
+ with urllib.request.urlopen(url) as resp, dest.open("wb") as out:
111
+ shutil.copyfileobj(resp, out)
112
+
113
+ return dest
114
+
115
+
116
+ def _verify_sha256(path: Path, expected_hex: str) -> None:
117
+ """
118
+ Verify SHA-256 of path matches expected_hex
119
+ """
120
+ h = hashlib.sha256()
121
+ with path.open("rb") as f:
122
+ for chunk in iter(lambda: f.read(8192), b""):
123
+ h.update(chunk)
124
+ actual = h.hexdigest()
125
+ if actual.lower() != expected_hex.lower():
126
+ raise RuntimeError(f"SHA256 mismatch for {path}: expected {expected_hex}, got {actual}")
127
+
128
+
129
+ def _extract_archive(archive: Path, target_dir: Path) -> None:
130
+ """
131
+ Extract archive into target_dir.
132
+ """
133
+ name = archive.name.lower()
134
+ target_dir.mkdir(parents=True, exist_ok=True)
135
+
136
+ if name.endswith(".zip"):
137
+ with ZipFile(archive, "r") as zf:
138
+ zf.extractall(target_dir)
139
+ elif name.endswith(".tgz") or name.endswith(".tar.gz"):
140
+ with tarfile.open(archive, "r:gz") as tf:
141
+ tf.extractall(target_dir)
142
+ else:
143
+ raise RuntimeError(f"Unsupported archive format: {archive}")
144
+
145
+
146
+ def _ensure_executable(path: Path) -> None:
147
+ """
148
+ Mark path as executable
149
+ """
150
+ try:
151
+ mode = path.stat().st_mode
152
+ path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
153
+ except Exception:
154
+ pass
155
+
156
+
157
+ def install_ollama_to(target: Path) -> None:
158
+ """
159
+ Ensure an Ollama binary exists.
160
+
161
+ If it doesn't exist, this will:
162
+ - detect OS
163
+ - download the archive from GitHub
164
+ - verify its SHA-256 checksum
165
+ - extract into the installation directory
166
+ - make the binary executable
167
+ """
168
+ target = target.expanduser()
169
+ if target.parent.name == "bin":
170
+ install_root = target.parent.parent
171
+ else:
172
+ install_root = target.parent
173
+
174
+ install_root.mkdir(parents=True, exist_ok=True)
175
+
176
+ platform_key = _detect_platform()
177
+ try:
178
+ artifact = ARTIFACTS[platform_key]
179
+ except KeyError as e:
180
+ raise RuntimeError(f"Unsupported platform: {platform_key}") from e
181
+
182
+ url = f"https://github.com/ollama/ollama/releases/download/{DEFAULT_VERSION}/{artifact.name}"
183
+ archive_path = _download_to_temp(url)
184
+
185
+ try:
186
+ _verify_sha256(archive_path, artifact.sha256)
187
+ logger.info("Verified SHA256 for %s", archive_path.name)
188
+
189
+ _extract_archive(archive_path, install_root)
190
+
191
+ candidates: list[Path] = []
192
+ if sys.platform.startswith("win"):
193
+ candidates.extend(
194
+ [
195
+ install_root / "ollama.exe",
196
+ install_root / "bin" / "ollama.exe",
197
+ ]
198
+ )
199
+ else:
200
+ candidates.extend(
201
+ [
202
+ install_root / "ollama",
203
+ install_root / "bin" / "ollama",
204
+ ]
205
+ )
206
+
207
+ binary: Path | None = None
208
+ for c in candidates:
209
+ if c.exists():
210
+ binary = c
211
+ break
212
+
213
+ if binary is None:
214
+ raise RuntimeError(f"Installed Ollama archive but could not find binary under {install_root}")
215
+
216
+ if binary.resolve() != target.resolve():
217
+ target.parent.mkdir(parents=True, exist_ok=True)
218
+ shutil.copy2(binary, target)
219
+
220
+ _ensure_executable(target)
221
+ logger.info("Ollama installed at %s", target)
222
+
223
+ finally:
224
+ try:
225
+ archive_path.unlink(missing_ok=True)
226
+ except Exception:
227
+ pass
@@ -0,0 +1,73 @@
1
+ import logging
2
+ import os
3
+ import subprocess
4
+
5
+ from databao_context_engine.llm.config import OllamaConfig
6
+ from databao_context_engine.llm.service import OllamaService
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class OllamaRuntime:
12
+ def __init__(self, service: OllamaService, config: OllamaConfig | None = None):
13
+ self._service = service
14
+ self._config = config or OllamaConfig()
15
+
16
+ def start_if_needed(self) -> subprocess.Popen | None:
17
+ if self._service.is_healthy():
18
+ return None
19
+
20
+ logger.info("Ollama server not running. Starting Ollama server...")
21
+ cmd = [self._config.bin_path, "serve"]
22
+ env = os.environ.copy()
23
+ env["OLLAMA_HOST"] = f"{self._config.host}:{self._config.port}"
24
+ if self._config.extra_env:
25
+ env.update(self._config.extra_env)
26
+
27
+ stdout = subprocess.DEVNULL
28
+
29
+ proc = subprocess.Popen(
30
+ cmd,
31
+ cwd=str(self._config.work_dir) if self._config.work_dir else None,
32
+ env=env,
33
+ stdout=stdout,
34
+ stderr=subprocess.STDOUT,
35
+ text=False,
36
+ close_fds=os.name != "nt",
37
+ )
38
+
39
+ return proc
40
+
41
+ def start_and_await(
42
+ self,
43
+ *,
44
+ timeout: float = 60.0,
45
+ poll_interval: float = 0.5,
46
+ ) -> subprocess.Popen | None:
47
+ already_healthy = self._service.is_healthy()
48
+ proc: subprocess.Popen | None = None
49
+
50
+ if not already_healthy:
51
+ proc = self.start_if_needed()
52
+
53
+ ok = self._service.wait_until_healthy(timeout=timeout, poll_interval=poll_interval)
54
+ if ok:
55
+ if proc is not None:
56
+ logger.info("Started Ollama server")
57
+ else:
58
+ logger.debug("Ollama server was already running")
59
+ return proc
60
+
61
+ if proc is not None:
62
+ try:
63
+ proc.terminate()
64
+ except Exception:
65
+ pass
66
+ try:
67
+ proc.kill()
68
+ except Exception:
69
+ pass
70
+
71
+ raise TimeoutError(
72
+ f"Timed out waiting for Ollama to become healthy at http://{self._config.host}:{self._config.port}"
73
+ )
@@ -0,0 +1,159 @@
1
+ import logging
2
+ import textwrap
3
+ import time
4
+ from typing import Any
5
+
6
+ import requests
7
+
8
+ from databao_context_engine.llm.config import OllamaConfig
9
+ from databao_context_engine.llm.errors import OllamaPermanentError, OllamaTransientError
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class OllamaService:
15
+ def __init__(self, config: OllamaConfig, session: requests.Session | None = None):
16
+ self._base = config.base_url.rstrip("/")
17
+ self._timeout = config.timeout
18
+ self._headers = config.headers
19
+ self._session = session or requests.Session()
20
+
21
+ def embed(self, *, model: str, text: str) -> list[float]:
22
+ payload: dict[str, Any] = {
23
+ "model": model,
24
+ "prompt": text,
25
+ }
26
+ data = self._request_json(method="POST", path="/api/embeddings", json=payload)
27
+
28
+ vec = data.get("embedding")
29
+ if not isinstance(vec, list) or not all(isinstance(x, (int, float)) for x in vec):
30
+ alt = data.get("data")
31
+ if isinstance(alt, list) and alt and isinstance(alt[0], dict) and isinstance(alt[0].get("embedding"), list):
32
+ vec = alt[0]["embedding"]
33
+ else:
34
+ raise ValueError("Unexpected Ollama embedding response schema")
35
+
36
+ return [float(x) for x in vec]
37
+
38
+ def describe(self, *, model: str, text: str, context: str) -> str:
39
+ """
40
+ Ask Ollama to generate a short description for `text`
41
+ """
42
+ prompt = self._build_description_prompt(text=text, context=context)
43
+
44
+ payload: dict[str, Any] = {"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}}
45
+ data = self._request_json(method="POST", path="/api/generate", json=payload)
46
+
47
+ response_text = data.get("response")
48
+ if not isinstance(response_text, str):
49
+ raise ValueError("Unexpected Ollama generate response schema (missing 'response' string)")
50
+ return response_text.strip()
51
+
52
+ def pull_model_if_needed(self, *, model: str, timeout: float = 900.0) -> None:
53
+ if self._is_model_available(model_name=model):
54
+ logger.debug(f"Ollama model {model} was already available, skipping pull")
55
+ return
56
+
57
+ logger.info("Ollama model %s not found locally. Pulling it (this may take several minutes)...", model)
58
+ self.pull_model(model=model, timeout=timeout)
59
+ logger.info("Ollama model %s pulled successfully", model)
60
+
61
+ def pull_model(self, *, model: str, timeout: float = 900.0) -> None:
62
+ payload: dict[str, Any] = {"name": model}
63
+ self._request(method="POST", path="/api/pull", json=payload, timeout=timeout)
64
+
65
+ def is_healthy(self, *, timeout: float = 3.0) -> bool:
66
+ url = f"{self._base}/api/tags"
67
+ try:
68
+ r = self._session.get(url, headers=self._headers, timeout=timeout)
69
+ return 200 <= r.status_code < 300
70
+ except requests.RequestException:
71
+ return False
72
+
73
+ def wait_until_healthy(self, *, timeout: float = 60.0, poll_interval: float = 0.5) -> bool:
74
+ deadline = time.monotonic() + float(timeout)
75
+ while time.monotonic() < deadline:
76
+ if self.is_healthy(timeout=min(poll_interval, timeout)):
77
+ return True
78
+ time.sleep(poll_interval)
79
+ return self.is_healthy(timeout=min(poll_interval, timeout))
80
+
81
+ def _is_model_available(self, *, model_name, timeout: float = 5.0) -> bool:
82
+ url = f"{self._base}/api/tags"
83
+ try:
84
+ r = self._session.get(url, headers=self._headers, timeout=timeout)
85
+
86
+ if 200 <= r.status_code < 300:
87
+ models = r.json().get("models")
88
+ if models and isinstance(models, list):
89
+ local_model = next((model for model in models if model.get("name") == model_name), None)
90
+ return local_model is not None
91
+
92
+ return False
93
+ except requests.RequestException:
94
+ return False
95
+
96
+ def _request(
97
+ self,
98
+ *,
99
+ method: str,
100
+ path: str,
101
+ timeout: float | None = None,
102
+ **kwargs,
103
+ ) -> requests.Response:
104
+ url = f"{self._base}{path}"
105
+ try:
106
+ resp = self._session.request(
107
+ method,
108
+ url,
109
+ headers=self._headers,
110
+ timeout=timeout or self._timeout,
111
+ **kwargs,
112
+ )
113
+ except requests.Timeout as e:
114
+ raise OllamaTransientError(f"Ollama request to {path} timed out after {timeout}s") from e
115
+ except requests.RequestException as e:
116
+ raise OllamaTransientError(f"Ollama request to {path} failed: {e}") from e
117
+
118
+ try:
119
+ resp.raise_for_status()
120
+ except requests.HTTPError as e:
121
+ raise OllamaPermanentError(f"Ollama error {resp.status_code} for {path}: {resp.text}") from e
122
+
123
+ return resp
124
+
125
+ def _request_json(
126
+ self,
127
+ *,
128
+ method: str,
129
+ path: str,
130
+ timeout: float | None = None,
131
+ **kwargs,
132
+ ) -> dict[str, Any]:
133
+ resp = self._request(method=method, path=path, timeout=timeout, **kwargs)
134
+ try:
135
+ return resp.json()
136
+ except ValueError as e:
137
+ raise OllamaPermanentError(f"Invalid JSON from Ollama for {path}") from e
138
+
139
+ @staticmethod
140
+ def _build_description_prompt(text: str, context: str) -> str:
141
+ base = """
142
+ You are a helpful assistant.
143
+
144
+ I will give you some TEXT and CONTEXT.
145
+ Write a concise, human-readable description of the TEXT suitable for displaying in a UI.
146
+ - 1-2 sentences
147
+ - Be factual and avoid speculation
148
+ - No markdown
149
+ - No preambles or labels, just the description itself.
150
+ - Your entire reply MUST be only the description itself. No extra commentary.
151
+
152
+ CONTEXT:
153
+ {context}
154
+
155
+ TEXT:
156
+ {text}
157
+ """
158
+
159
+ return textwrap.dedent(base).format(context=context, text=text).strip()
@@ -0,0 +1,19 @@
1
+ import logging
2
+
3
+ from databao_context_engine.cli.commands import dce
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def main() -> None:
9
+ try:
10
+ dce(obj={})
11
+ except Exception as e:
12
+ if logger.isEnabledFor(logging.DEBUG):
13
+ logger.exception(e)
14
+ else:
15
+ logger.error(str(e))
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
File without changes
@@ -0,0 +1,5 @@
1
+ from databao_context_engine import DatabaoContextEngine
2
+
3
+
4
+ def run_all_results_tool(databao_context_engine: DatabaoContextEngine, run_name: str | None) -> str:
5
+ return databao_context_engine.get_all_contexts_formatted(run_name=run_name)
@@ -0,0 +1,16 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.mcp.mcp_server import McpServer, McpTransport
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def run_mcp_server(
10
+ project_dir: Path,
11
+ run_name: str | None,
12
+ transport: McpTransport,
13
+ host: str | None = None,
14
+ port: int | None = None,
15
+ ) -> None:
16
+ McpServer(project_dir, run_name, host, port).run(transport)
@@ -0,0 +1,63 @@
1
+ import logging
2
+ from contextlib import asynccontextmanager
3
+ from pathlib import Path
4
+ from typing import Literal
5
+
6
+ from mcp.server import FastMCP
7
+ from mcp.types import ToolAnnotations
8
+
9
+ from databao_context_engine import DatabaoContextEngine
10
+ from databao_context_engine.mcp.all_results_tool import run_all_results_tool
11
+ from databao_context_engine.mcp.retrieve_tool import run_retrieve_tool
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ McpTransport = Literal["stdio", "streamable-http"]
16
+
17
+
18
+ @asynccontextmanager
19
+ async def mcp_server_lifespan(server: FastMCP):
20
+ logger.info(f"Starting MCP server on {server.settings.host}:{server.settings.port}...")
21
+ yield
22
+ logger.info("Stopping MCP server")
23
+
24
+
25
+ class McpServer:
26
+ def __init__(
27
+ self,
28
+ project_dir: Path,
29
+ run_name: str | None,
30
+ host: str | None = None,
31
+ port: int | None = None,
32
+ ):
33
+ self._databao_context_engine = DatabaoContextEngine(project_dir)
34
+ self._run_name = run_name
35
+
36
+ self._mcp_server = self._create_mcp_server(host, port)
37
+
38
+ def _create_mcp_server(self, host: str | None = None, port: int | None = None) -> FastMCP:
39
+ mcp = FastMCP(host=host or "127.0.0.1", port=port or 8000, lifespan=mcp_server_lifespan)
40
+
41
+ @mcp.tool(
42
+ description="Retrieve the contents of the all_results file",
43
+ annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
44
+ )
45
+ def all_results_tool():
46
+ return run_all_results_tool(self._databao_context_engine, self._run_name)
47
+
48
+ @mcp.tool(
49
+ description="Retrieve the context built from various resources, including databases, dbt tools, plain and structured files, to retrieve relevant information",
50
+ annotations=ToolAnnotations(readOnlyHint=True, idempotentHint=True, openWorldHint=False),
51
+ )
52
+ def retrieve_tool(text: str, limit: int | None):
53
+ return run_retrieve_tool(
54
+ databao_context_engine=self._databao_context_engine,
55
+ run_name=self._run_name,
56
+ text=text,
57
+ limit=limit or 50,
58
+ )
59
+
60
+ return mcp
61
+
62
+ def run(self, transport: McpTransport):
63
+ self._mcp_server.run(transport=transport)
@@ -0,0 +1,22 @@
1
+ import datetime
2
+
3
+ from databao_context_engine import DatabaoContextEngine
4
+
5
+
6
+ def run_retrieve_tool(
7
+ *, databao_context_engine: DatabaoContextEngine, run_name: str | None, text: str, limit: int | None = None
8
+ ) -> str:
9
+ """
10
+ Execute the retrieve flow for MCP and return the matching display texts
11
+ Adds the current date to the end
12
+ """
13
+
14
+ retrieve_results = databao_context_engine.search_context(
15
+ retrieve_text=text, run_name=run_name, limit=limit, export_to_file=False
16
+ )
17
+
18
+ display_results = [context_search_result.context_result for context_search_result in retrieve_results]
19
+
20
+ display_results.append(f"\nToday's date is {datetime.date.today()}")
21
+
22
+ return "\n".join(display_results)
File without changes