databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. databao_context_engine/__init__.py +32 -7
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +82 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
  8. databao_context_engine/cli/add_datasource_config.py +49 -44
  9. databao_context_engine/cli/commands.py +40 -55
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +127 -0
  12. databao_context_engine/databao_context_project_manager.py +147 -30
  13. databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
  14. databao_context_engine/datasources/datasource_context.py +90 -0
  15. databao_context_engine/datasources/datasource_discovery.py +143 -0
  16. databao_context_engine/datasources/types.py +194 -0
  17. databao_context_engine/generate_configs_schemas.py +4 -5
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +76 -57
  20. databao_context_engine/llm/__init__.py +10 -0
  21. databao_context_engine/llm/api.py +57 -0
  22. databao_context_engine/llm/descriptions/ollama.py +1 -3
  23. databao_context_engine/llm/errors.py +2 -8
  24. databao_context_engine/llm/factory.py +5 -2
  25. databao_context_engine/llm/install.py +26 -30
  26. databao_context_engine/llm/runtime.py +3 -5
  27. databao_context_engine/llm/service.py +1 -3
  28. databao_context_engine/mcp/mcp_runner.py +4 -2
  29. databao_context_engine/mcp/mcp_server.py +9 -11
  30. databao_context_engine/plugin_loader.py +110 -0
  31. databao_context_engine/pluginlib/build_plugin.py +12 -29
  32. databao_context_engine/pluginlib/config.py +16 -2
  33. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  34. databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
  35. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
  36. databao_context_engine/plugins/databases/base_introspector.py +11 -12
  37. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  38. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
  39. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  40. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
  41. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  42. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  43. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  44. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  45. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
  46. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  47. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
  48. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  49. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  50. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
  51. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  52. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  53. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
  54. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  55. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  56. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  57. databao_context_engine/plugins/duckdb_tools.py +18 -0
  58. databao_context_engine/plugins/files/__init__.py +0 -0
  59. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  60. databao_context_engine/plugins/plugin_loader.py +58 -52
  61. databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
  62. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  63. databao_context_engine/project/info.py +34 -2
  64. databao_context_engine/project/init_project.py +16 -7
  65. databao_context_engine/project/layout.py +14 -15
  66. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  67. databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
  68. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
  69. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
  70. databao_context_engine/serialization/__init__.py +0 -0
  71. databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
  72. databao_context_engine/services/chunk_embedding_service.py +23 -11
  73. databao_context_engine/services/factories.py +1 -46
  74. databao_context_engine/services/persistence_service.py +11 -11
  75. databao_context_engine/storage/connection.py +11 -7
  76. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  77. databao_context_engine/storage/migrate.py +3 -5
  78. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  79. databao_context_engine/storage/models.py +2 -23
  80. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  81. databao_context_engine/storage/repositories/factories.py +1 -12
  82. databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
  83. databao_context_engine/system/properties.py +4 -2
  84. databao_context_engine-0.1.5.dist-info/METADATA +228 -0
  85. databao_context_engine-0.1.5.dist-info/RECORD +135 -0
  86. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
  87. databao_context_engine/build_sources/internal/build_service.py +0 -77
  88. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  89. databao_context_engine/build_sources/internal/export_results.py +0 -43
  90. databao_context_engine/build_sources/public/api.py +0 -4
  91. databao_context_engine/databao_engine.py +0 -85
  92. databao_context_engine/datasource_config/add_config.py +0 -50
  93. databao_context_engine/datasource_config/datasource_context.py +0 -60
  94. databao_context_engine/mcp/all_results_tool.py +0 -5
  95. databao_context_engine/mcp/retrieve_tool.py +0 -22
  96. databao_context_engine/plugins/databases/athena_introspector.py +0 -101
  97. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  98. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  99. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  100. databao_context_engine/project/datasource_discovery.py +0 -141
  101. databao_context_engine/project/runs.py +0 -39
  102. databao_context_engine/project/types.py +0 -134
  103. databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
  104. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
  105. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  106. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  107. databao_context_engine/services/run_name_policy.py +0 -8
  108. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  109. databao_context_engine/storage/repositories/run_repository.py +0 -157
  110. databao_context_engine-0.1.1.dist-info/METADATA +0 -186
  111. databao_context_engine-0.1.1.dist-info/RECORD +0 -135
  112. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  113. /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
  114. /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
  115. /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
  116. /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
  117. /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
  118. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -1,24 +1,13 @@
1
- from _duckdb import DuckDBPyConnection
1
+ from duckdb import DuckDBPyConnection
2
2
 
3
- from databao_context_engine.services.run_name_policy import RunNamePolicy
4
3
  from databao_context_engine.storage.repositories.chunk_repository import ChunkRepository
5
- from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
6
4
  from databao_context_engine.storage.repositories.embedding_model_registry_repository import (
7
5
  EmbeddingModelRegistryRepository,
8
6
  )
9
7
  from databao_context_engine.storage.repositories.embedding_repository import EmbeddingRepository
10
- from databao_context_engine.storage.repositories.run_repository import RunRepository
11
8
  from databao_context_engine.storage.repositories.vector_search_repository import VectorSearchRepository
12
9
 
13
10
 
14
- def create_run_repository(conn: DuckDBPyConnection) -> RunRepository:
15
- return RunRepository(conn, run_name_policy=RunNamePolicy())
16
-
17
-
18
- def create_datasource_run_repository(conn: DuckDBPyConnection) -> DatasourceRunRepository:
19
- return DatasourceRunRepository(conn)
20
-
21
-
22
11
  def create_chunk_repository(conn: DuckDBPyConnection) -> ChunkRepository:
23
12
  return ChunkRepository(conn)
24
13
 
@@ -1,10 +1,11 @@
1
1
  from collections.abc import Sequence
2
2
  from dataclasses import dataclass
3
+ from typing import Any
3
4
 
4
5
  import duckdb
5
6
 
7
+ from databao_context_engine.datasources.types import DatasourceId
6
8
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
7
- from databao_context_engine.project.types import DatasourceId
8
9
 
9
10
 
10
11
  @dataclass(kw_only=True, frozen=True)
@@ -23,32 +24,38 @@ class VectorSearchRepository:
23
24
  self._conn = conn
24
25
 
25
26
  def get_display_texts_by_similarity(
26
- self, *, table_name: str, run_id: int, retrieve_vec: Sequence[float], dimension: int, limit: int
27
+ self,
28
+ *,
29
+ table_name: str,
30
+ retrieve_vec: Sequence[float],
31
+ dimension: int,
32
+ limit: int,
33
+ datasource_ids: list[DatasourceId] | None = None,
27
34
  ) -> list[VectorSearchResult]:
28
- """
29
- Read only similarity search on a specific embedding shard table.
30
- Returns the display text for the closest matches in a given run
31
- """
35
+ """Read only similarity search on a specific embedding shard table."""
36
+ params: list[Any] = [list(retrieve_vec), self._DEFAULT_DISTANCE_THRESHOLD, list(retrieve_vec), limit]
37
+ if datasource_ids:
38
+ params.append([str(datasource_id) for datasource_id in datasource_ids])
39
+
32
40
  rows = self._conn.execute(
33
41
  f"""
34
42
  SELECT
35
43
  COALESCE(c.display_text, c.embeddable_text) AS display_text,
36
- c.embeddable_text AS embeddable_text,
37
- array_cosine_distance(e.vec, CAST(? AS FLOAT[{dimension}])) AS cosine_distance,
38
- dr.full_type,
39
- dr.source_id,
44
+ c.embeddable_text,
45
+ array_cosine_distance(e.vec, CAST($1 AS FLOAT[{dimension}])) AS cosine_distance,
46
+ c.full_type,
47
+ c.datasource_id,
40
48
  FROM
41
49
  {table_name} e
42
50
  JOIN chunk c ON e.chunk_id = c.chunk_id
43
- JOIN datasource_run dr ON c.datasource_run_id = dr.datasource_run_id
44
51
  WHERE
45
- dr.run_id = ?
46
- AND cosine_distance < ?
52
+ cosine_distance < $2
53
+ {"AND c.datasource_id IN $5" if datasource_ids else ""}
47
54
  ORDER BY
48
- array_cosine_distance(e.vec, CAST(? AS FLOAT[{dimension}])) ASC
49
- LIMIT ?
55
+ array_cosine_distance(e.vec, CAST($3 AS FLOAT[{dimension}])) ASC
56
+ LIMIT $4
50
57
  """,
51
- [list(retrieve_vec), run_id, self._DEFAULT_DISTANCE_THRESHOLD, list(retrieve_vec), limit],
58
+ params,
52
59
  ).fetchall()
53
60
 
54
61
  return [
@@ -1,6 +1,8 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
 
4
+ from databao_context_engine.project.layout import get_output_dir
5
+
4
6
  # it's private, so it doesn't get imported directy. This value is mocked in tests
5
7
  _dce_path = Path(os.getenv("DATABAO_CONTEXT_ENGINE_PATH") or "~/.dce").expanduser().resolve()
6
8
 
@@ -9,5 +11,5 @@ def get_dce_path() -> Path:
9
11
  return _dce_path
10
12
 
11
13
 
12
- def get_db_path() -> Path:
13
- return get_dce_path() / "dce.duckdb"
14
+ def get_db_path(project_dir: Path) -> Path:
15
+ return get_output_dir(project_dir) / "dce.duckdb"
@@ -0,0 +1,228 @@
1
+ Metadata-Version: 2.3
2
+ Name: databao-context-engine
3
+ Version: 0.1.5
4
+ Summary: Semantic context for your LLMs — generated automatically
5
+ Requires-Dist: click>=8.3.0
6
+ Requires-Dist: duckdb>=1.4.3
7
+ Requires-Dist: pyyaml>=6.0.3
8
+ Requires-Dist: requests>=2.32.5
9
+ Requires-Dist: mcp>=1.23.3
10
+ Requires-Dist: pydantic>=2.12.4
11
+ Requires-Dist: jinja2>=3.1.6
12
+ Requires-Dist: pyathena>=3.25.0 ; extra == 'athena'
13
+ Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'clickhouse'
14
+ Requires-Dist: mssql-python>=1.0.0 ; extra == 'mssql'
15
+ Requires-Dist: pymysql>=1.1.2 ; extra == 'mysql'
16
+ Requires-Dist: asyncpg>=0.31.0 ; extra == 'postgresql'
17
+ Requires-Dist: snowflake-connector-python>=4.2.0 ; extra == 'snowflake'
18
+ Requires-Python: >=3.12
19
+ Provides-Extra: athena
20
+ Provides-Extra: clickhouse
21
+ Provides-Extra: mssql
22
+ Provides-Extra: mysql
23
+ Provides-Extra: postgresql
24
+ Provides-Extra: snowflake
25
+ Description-Content-Type: text/markdown
26
+
27
+ [![official project](https://jb.gg/badges/official.svg)](https://github.com/JetBrains#jetbrains-on-github)
28
+ [![PyPI version](https://img.shields.io/pypi/v/databao-context-engine.svg)](https://pypi.org/project/databao-context-engine)
29
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/JetBrains/databao-context-engine?tab=License-1-ov-file)
30
+
31
+ [//]: # ([![Python versions]&#40;https://img.shields.io/pypi/pyversions/databao-context-engine.svg&#41;]&#40;https://pypi.org/project/databao-context-engine/&#41;)
32
+
33
+
34
+ <h1 align="center">Databao Context Engine</h1>
35
+ <p align="center">
36
+ <b>Semantic context for your LLMs — generated automatically.</b><br/>
37
+ No more copying schemas. No manual documentation. Just accurate answers.
38
+ </p>
39
+ <p align="center">
40
+ <a href="https://databao.app">Website</a> •
41
+ <a href="#quickstart">Quickstart</a> •
42
+ <a href="#supported-data-sources">Data Sources</a> •
43
+ <a href="#contributing">Contributing</a>
44
+ </p>
45
+
46
+ ---
47
+
48
+ ## What is Databao Context Engine?
49
+
50
+ Databao Context Engine is a CLI tool that **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
51
+
52
+ Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
53
+
54
+ ```
55
+ Your data sources → Context Engine → Unified semantic graph → Any LLM
56
+ ```
57
+
58
+ ## Why choose Databao Context Engine?
59
+
60
+ | Feature | What it means for you |
61
+ |----------------------------|----------------------------------------------------------------|
62
+ | **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
63
+ | **Runs locally** | Your data never leaves your environment |
64
+ | **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
65
+ | **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
66
+ | **Built-in benchmarks** | Measure and improve context quality over time |
67
+ | **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
68
+ | **Governed & versioned** | Track, version, and share context across your team |
69
+ | **Dynamic or static** | Serve context via MCP server or export as artifact |
70
+
71
+ ## Installation
72
+
73
+ Databao Context Engine is [available on PyPI](https://pypi.org/project/databao-context-engine/) and can be installed with uv, pip, or another package manage.
74
+
75
+ ### Using uv
76
+
77
+ 1. Install Databao Context Engine:
78
+
79
+ ```bash
80
+ uv tool install databao-context-engine
81
+ ```
82
+
83
+ 1. Add it to your PATH:
84
+
85
+ ```bash
86
+ uv tool update-shell
87
+ ```
88
+
89
+ 1. Verify the installation:
90
+
91
+ ```bash
92
+ dce --help
93
+ ```
94
+
95
+ ### Using pip
96
+
97
+ 1. Install Databao Context Engine:
98
+
99
+ ```bash
100
+ pip install databao-context-engine
101
+ ```
102
+
103
+ 1. Verify the installation:
104
+
105
+ ```bash
106
+ dce --help
107
+ ```
108
+
109
+ ## Supported data sources
110
+
111
+ * <img src="https://cdn.simpleicons.org/postgresql/316192" width="16" height="16" alt=""> PostgreSQL
112
+ * <img src="https://cdn.simpleicons.org/mysql/4479A1" width="16" height="16" alt=""> MySQL
113
+ * <img src="https://cdn.simpleicons.org/sqlite/003B57" width="16" height="16" alt=""> SQLite
114
+ * <img src="https://cdn.simpleicons.org/duckdb/FFF000" width="16" height="16" alt=""> DuckDB
115
+ * <img src="https://cdn.simpleicons.org/dbt/FF694B" width="16" height="16" alt=""> dbt projects
116
+ * 📄 Documents & spreadsheets *(coming soon)*
117
+
118
+ ## Supported LLMs
119
+
120
+ | Provider | Configuration |
121
+ |---------------|----------------------------------------------|
122
+ | **Ollama** | `languageModel: OLLAMA`: runs locally, free |
123
+ | **OpenAI** | `languageModel: OPENAI`: requires an API key |
124
+ | **Anthropic** | `languageModel: CLAUDE`: requires an API key |
125
+ | **Google** | `languageModel: GEMINI`: requires an API key |
126
+
127
+ ## Quickstart
128
+
129
+ ### 1. Create a project
130
+
131
+ 1. Create a new directory for your project and navigate to it:
132
+
133
+ ```bash
134
+ mkdir dce-project && cd dce-project
135
+ ```
136
+
137
+ 1. Initialize a new project:
138
+
139
+ ```bash
140
+ dce init
141
+ ```
142
+
143
+ ### 2. Configure data sources
144
+
145
+ 1. When prompted, agree to create a new datasource.
146
+ You can also use the `dce datasource add` command.
147
+
148
+ 1. Provide the data source type and its name.
149
+
150
+ 1. Open the config file that was created for you in your editor and fill in the connection details.
151
+
152
+ 1. Repeat these steps for all data sources you want to include in your project.
153
+
154
+ 1. If you have data in Markdown or text files,
155
+ you can add them to the `dce/src/files` directory.
156
+
157
+ ### 3. Build context
158
+
159
+ 1. To build the context, run the following command:
160
+
161
+ ```bash
162
+ dce build
163
+ ```
164
+
165
+ ### 4. Use Context with Your LLM
166
+
167
+ **Option A: Dynamic via MCP Server**
168
+
169
+ Databao Context Engine exposes the context through a local MCP Server, so your agent can access the latest context at runtime.
170
+
171
+ 1. In **Claude Desktop**, **Cursor**, or another MCP-compatible agent, add the following configuration.
172
+ Replace `dce-project/` with the path to your project directory:
173
+
174
+ ```json
175
+ # claude_desktop_config.json, mcp.json, or similar
176
+
177
+ {
178
+ "mcpServers": {
179
+ "dce": {
180
+ "command": "dce mcp",
181
+ "args": ["--project-dir", "dce-project/"]
182
+ }
183
+ }
184
+ }
185
+ ```
186
+
187
+ 1. Save the file and restart your agent.
188
+
189
+ 1. Open a new chat, in the chat window, select the `dce` server, and ask questions related to your project context.
190
+
191
+ **Option B: Static artifact**
192
+
193
+ Even if you don’t have Claude or Cursor installed on your local machine,
194
+ you can still use the context built by Databao Context Engine by pasting it directly into your chat with an AI assistant.
195
+
196
+ 1. Navigate to `dce-project/output/` and open the directory with the latest run.
197
+
198
+ 1. Attach the `all_results.yaml` file to your chat with the AI assistant or copy and paste its contents into your chat.
199
+
200
+ ## Contributing
201
+
202
+ We’d love your help! Here’s how to get involved:
203
+
204
+ - ⭐ **Star this repo** — it helps others find us!
205
+ - 🐛 **Found a bug?** [Open an issue](https://github.com/JetBrains/databao-context-engine/issues)
206
+ - 💡 **Have an idea?** We’re all ears — create a feature request
207
+ - 👍 **Upvote issues** you care about — helps us prioritize
208
+ - 🔧 **Submit a PR**
209
+ - 📝 **Improve docs** — typos, examples, tutorials — everything helps!
210
+
211
+ New to open source? No worries! We're friendly and happy to help you get started. 🌱
212
+
213
+ For more details, see [CONTRIBUTING](CONTRIBUTING.md).
214
+
215
+ ## 📄 License
216
+
217
+ Apache 2.0 — use it however you want. See the [LICENSE](LICENSE.md) file for details.
218
+
219
+ ---
220
+
221
+ <p align="center">
222
+ <b>Like Databao Context Engine?</b> Give us a ⭐ — it means a lot!
223
+ </p>
224
+
225
+ <p align="center">
226
+ <a href="https://databao.app">Website</a> •
227
+ <a href="https://discord.gg/databao">Discord</a>
228
+ </p>
@@ -0,0 +1,135 @@
1
+ databao_context_engine/__init__.py,sha256=sPhGoLZhsFvf1n-WLt9gFUL1IZegCQJPkOZrcrJYn2c,2239
2
+ databao_context_engine/build_sources/__init__.py,sha256=FteGp3MvSuX_fh-Fx1pq-jOjFcjYKQSXFSll4tQffvk,224
3
+ databao_context_engine/build_sources/build_runner.py,sha256=aSxp99R4tbnbHlEDFrg8iw4Bi1dGvO_5uTO-mIrB080,3986
4
+ databao_context_engine/build_sources/build_service.py,sha256=eywJzpvZUVrj-JYxXii_kxhTBzVUT0xfbNtdAFcpGWQ,1624
5
+ databao_context_engine/build_sources/build_wiring.py,sha256=Hfdk0CF0LuGiGGQjOl7wVpVBnYRgrLvT_iLUq-Xljng,3174
6
+ databao_context_engine/build_sources/export_results.py,sha256=E3HP4uaQKYDrmAJKFWwDT190j7ZKfO791_pjKebTMQM,1773
7
+ databao_context_engine/build_sources/plugin_execution.py,sha256=wfMCF_9ixuEnV2VMhCDwOwBN5bi8Yz6ap5ayau2TAYk,2090
8
+ databao_context_engine/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ databao_context_engine/cli/add_datasource_config.py,sha256=fK1MIaTCoz1jurldJZD_BpPIgc6kr-NaAyVjVgjsOTU,5523
10
+ databao_context_engine/cli/commands.py,sha256=VJOHB3H28wNjOUuFC8hhF8-rnlIFjsZMlNT3C2ZbIUY,7807
11
+ databao_context_engine/cli/datasources.py,sha256=kHbXhEnDVaYXTRAE3WeaDGohH1Y9cwyYjjcycQ8eh1U,2376
12
+ databao_context_engine/cli/info.py,sha256=2iegfvRAxMae3KkWMDu-BOnljeJMVQ5u8P5k_8nOKpc,1203
13
+ databao_context_engine/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ databao_context_engine/config/log_config.yaml,sha256=hH2NPumufcyig8L-7Z_DbLQ88_G2N6CAEl5fvHafeAw,360
15
+ databao_context_engine/config/logging.py,sha256=66k7x3ftEdHx-CKqbOss4pqZfGn4nMFv7AzVPs9aVnY,1525
16
+ databao_context_engine/databao_context_engine.py,sha256=l8F1JOzDDZYvIannM6Yq_tGR--G23XzjTVZn6dnjSwY,4966
17
+ databao_context_engine/databao_context_project_manager.py,sha256=xF_ZQUd8NxnP6ow-y6YCGxzPO9Gfy5TTSqIeC5N9XOU,8288
18
+ databao_context_engine/datasources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ databao_context_engine/datasources/check_config.py,sha256=YsrWFCwdJv3jvXG_tTuCRr129351GmRTKB0BRYXxtbo,5407
20
+ databao_context_engine/datasources/datasource_context.py,sha256=uRuviRJUsZdHcNan5DYc3ZBpw2SSsHIMuZjR4PbXUkg,3687
21
+ databao_context_engine/datasources/datasource_discovery.py,sha256=ZrjVyKIk93XbBLTVnnNAR4CFBFflmPZgvmBfOq83hZs,5573
22
+ databao_context_engine/datasources/types.py,sha256=gF2BfNC-NSSvXEg-mefGPBKCGsLhUiDySIv56SsvbIM,7449
23
+ databao_context_engine/event_journal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ databao_context_engine/event_journal/writer.py,sha256=abkl2SPHYzO8XAUsjbtrcXm6WQk15zWpXhYm86qb6G0,986
25
+ databao_context_engine/generate_configs_schemas.py,sha256=rUjSg_SnF7C8-OJ8dNwPpbEdPQqxnF2X8qz3LwIo0e0,3007
26
+ databao_context_engine/init_project.py,sha256=enLvYOd5MpGYVdWly7DuLMzPLUEE6130bEuxh0YZft8,1496
27
+ databao_context_engine/introspection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ databao_context_engine/introspection/property_extract.py,sha256=DlTdqvysEmYMsmeJMdaxQuzbBalMxTq7XLqMSwecYCg,7891
29
+ databao_context_engine/llm/__init__.py,sha256=FAGvpG3kBp4ssNYre8ytTtPeCF6XdZYNP7qpEUhpNAk,366
30
+ databao_context_engine/llm/api.py,sha256=eizZD1PGwkJbj0CFHDtdqcPWp3OdbbUSWIIcF3M3MSo,2091
31
+ databao_context_engine/llm/config.py,sha256=geIygR-9maeKbvmcO5J3iNZXKS4LZc7zpKYyaH8vCoI,463
32
+ databao_context_engine/llm/descriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ databao_context_engine/llm/descriptions/ollama.py,sha256=MkRajlkIsVvLzQvkVBsGwjHqj5lNylexu5o-HQSnbQQ,622
34
+ databao_context_engine/llm/descriptions/provider.py,sha256=nojEI0l55TfFuNooQwGNvfa2k6A1vZ1Oi3PBLAr9eFo,227
35
+ databao_context_engine/llm/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
+ databao_context_engine/llm/embeddings/ollama.py,sha256=GQzlMBRJGJe_xreY9-Y3REBFWWaOyKhiGQxWxrR6xfY,949
37
+ databao_context_engine/llm/embeddings/provider.py,sha256=IVrOptwad2Fr1wf1SBOM3-7y66k08mOZnga1NA6yhbA,300
38
+ databao_context_engine/llm/errors.py,sha256=Y4CVHYJKkn2oHnFWVlVL1QJl4gtvW4z11V3FB9qp3Cc,402
39
+ databao_context_engine/llm/factory.py,sha256=Ct6IyC984POktLNEfOyrIw9y1IqZ6YNg4zSx16L7VhQ,1888
40
+ databao_context_engine/llm/install.py,sha256=1NqpoERmLvX3maFFZQHZ0I7uW-Moi3oyySQoTavZqbM,7082
41
+ databao_context_engine/llm/runtime.py,sha256=uEIiGb9OkjGwlZH6arb3lmD3NHRByh1nngJW-4EWQn0,2343
42
+ databao_context_engine/llm/service.py,sha256=rM_DrF2F7NylxBhtfVGporUcv91eR_nF1mF--Zxh2tc,5978
43
+ databao_context_engine/main.py,sha256=3gWwEc9GZJUFANJtW9Fx4smVzib_sDtFkBGPILjisaI,350
44
+ databao_context_engine/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ databao_context_engine/mcp/mcp_runner.py,sha256=bnB2a-_P1zOlDBqpyFIq4n2hWkfwqb6G7dHb3-3Q2I0,468
46
+ databao_context_engine/mcp/mcp_server.py,sha256=Z2pesefHCFhzfSdPa8OsruxynE28dmEKuEmwx3N18kM,2179
47
+ databao_context_engine/plugin_loader.py,sha256=KIHUwxMcWdyPvZlHFxIDGwIMILTm6mO_gJAzw1UIA00,4784
48
+ databao_context_engine/pluginlib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ databao_context_engine/pluginlib/build_plugin.py,sha256=y-guT2X7wS8Yn1C6iBWLF_HDA4NYVFJsQ-oe6IgVvDo,2831
50
+ databao_context_engine/pluginlib/config.py,sha256=fAu1AuxSFI6LOoAyvwuFE5p11bOsHp9EZzn2ST8DrUI,1534
51
+ databao_context_engine/pluginlib/plugin_utils.py,sha256=f76ksdMUG3Wy4KOytUYxLMrq9iOHytu3QF8wc5NzzJE,2471
52
+ databao_context_engine/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ databao_context_engine/plugins/databases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
+ databao_context_engine/plugins/databases/athena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ databao_context_engine/plugins/databases/athena/athena_db_plugin.py,sha256=1Jn3kqPExKUxvj4oXqgJQwRoLiu_3uKgSzEESvGm_Qs,463
56
+ databao_context_engine/plugins/databases/athena/athena_introspector.py,sha256=PCdU8pHEAr6UliLwDgLNEqiN9MqwJ0k94H2f6TgBBmA,5627
57
+ databao_context_engine/plugins/databases/base_db_plugin.py,sha256=ivCgxqb5wBjCgGuPxV5-oFF8R0h-zcAt1ckgTQ87V4Q,1680
58
+ databao_context_engine/plugins/databases/base_introspector.py,sha256=08L6nmIBJT3rfWB2fGdm7jSrNrW4un2MMMLt5shorKo,5752
59
+ databao_context_engine/plugins/databases/clickhouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
+ databao_context_engine/plugins/databases/clickhouse/clickhouse_db_plugin.py,sha256=K3cWnZFyKGpFfg5VBJ0016C9H9DuADIBJdomzH9T49E,520
61
+ databao_context_engine/plugins/databases/clickhouse/clickhouse_introspector.py,sha256=wQbriPsb1HHbx7b7P86K-dPDGIziXxF7rjM_gUy_cUs,6267
62
+ databao_context_engine/plugins/databases/database_chunker.py,sha256=dn9Mb9Sw4f8IwBe9RtukQGT0yyKLQt5i4tjvsrFh1ew,2100
63
+ databao_context_engine/plugins/databases/databases_types.py,sha256=kYZEaFa_cnROg_n1k7tIonoC2VunLBe5SQR47GOt_8k,2579
64
+ databao_context_engine/plugins/databases/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
+ databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py,sha256=UyFp-MihNj562Mx9KcogFX33yNbNUMX_fdmvF6MrJ1M,458
66
+ databao_context_engine/plugins/databases/duckdb/duckdb_introspector.py,sha256=dhZBfKKpyETOrfbQBIM8urfztH_qfhTrvzNFlIesC6c,11260
67
+ databao_context_engine/plugins/databases/introspection_model_builder.py,sha256=-i7e4jqwdcsu3NnS-LHVSH_DrG3riN09KX3SDHAGF0c,10350
68
+ databao_context_engine/plugins/databases/introspection_scope.py,sha256=0BZAAmVTiHifOzeuQkCsGKrGRnZCPHRCLqam5nQqr88,2225
69
+ databao_context_engine/plugins/databases/introspection_scope_matcher.py,sha256=gseF4j-SD-7SSPMDlDqHUOTI3tPSaOGKJgp42WDVlsY,3605
70
+ databao_context_engine/plugins/databases/mssql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ databao_context_engine/plugins/databases/mssql/mssql_db_plugin.py,sha256=KV_KQw2f_wIj0B7_rtW0XGcesSOCqPy5A9FTYEDY7PI,452
72
+ databao_context_engine/plugins/databases/mssql/mssql_introspector.py,sha256=bqaERs8uRZlTfVQ91A9FygHpDX-KmGZXNUp8iDgxPdg,18704
73
+ databao_context_engine/plugins/databases/mysql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
+ databao_context_engine/plugins/databases/mysql/mysql_db_plugin.py,sha256=40c6wznmQjU7bSs1kStS4WgNyoZ44wU1Ik9uLdvDqfI,452
75
+ databao_context_engine/plugins/databases/mysql/mysql_introspector.py,sha256=-OlXluCVTUTTZzRJPAVvcHeRdocopHcDKgBFWXF9kGo,14461
76
+ databao_context_engine/plugins/databases/postgresql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py,sha256=VHCtLkoZhgbw631Z-cjcZOyI7dQZii-1fkdCJmntLWE,510
78
+ databao_context_engine/plugins/databases/postgresql/postgresql_introspector.py,sha256=OoaVLgpLOB4HDTU21S5upNyZD3H3B-z5fJq5NMOtakU,17275
79
+ databao_context_engine/plugins/databases/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
+ databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py,sha256=DCIpicjpS9qkd2EfumO7UDG6RQERW3s3FDPDSue7ehg,509
81
+ databao_context_engine/plugins/databases/snowflake/snowflake_introspector.py,sha256=8-a478TSu3uL_1z5HeS9nnF4VxZ9DZNnjH1coyqINM0,11962
82
+ databao_context_engine/plugins/databases/sqlite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
+ databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py,sha256=MI896G7Yq8NcQ8sTWErAFD7YK8qfDpZZprxoszJ46l8,460
84
+ databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py,sha256=kKFNzclp1NmQ6BF3ylGn86R5PuQh6aqevj6E7_zAKmQ,8361
85
+ databao_context_engine/plugins/duckdb_tools.py,sha256=46rctnTxDPAhHtaiTp1DxMuuDuRKrtKWJFSSM2w7uUU,645
86
+ databao_context_engine/plugins/files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
+ databao_context_engine/plugins/files/unstructured_files_plugin.py,sha256=eqs1anQhYBZh7xu4CwhfkqXQjGE5gJnKEwyJbtUR78E,2384
88
+ databao_context_engine/plugins/plugin_loader.py,sha256=8j7JAKqtG6_VjX3GbfAD6kjrWuuJ7NnC2nnHPEujux8,4074
89
+ databao_context_engine/plugins/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
+ databao_context_engine/plugins/resources/parquet_chunker.py,sha256=R9WCOBqpKRTVN6t5eeOm_mmnKBOxvjIiQ9zTc8vnUb4,848
91
+ databao_context_engine/plugins/resources/parquet_introspector.py,sha256=Cn_yh6E-dOTOZstlavEGAsV6ZRKZXJraVAl_pzJJuGs,5629
92
+ databao_context_engine/plugins/resources/parquet_plugin.py,sha256=FVRdAqhZkEBvUsW1CkYH4yPPs3P1pRsofR8PdHN26AE,1128
93
+ databao_context_engine/project/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
+ databao_context_engine/project/info.py,sha256=3chVSZ4pV2EsYJSeVfOYTK746POlJImciIBOvrQuYbc,2479
95
+ databao_context_engine/project/init_project.py,sha256=QiLalH--BMQGB-c2WBbPAUKBinDbO0cJkmilUfqPAuI,3970
96
+ databao_context_engine/project/layout.py,sha256=3ttCJDLMh4AS1tBRcSROgT7hG6uOqkI1QGsQEcfGsqg,4087
97
+ databao_context_engine/project/project_config.py,sha256=rI-Wkll7lca7RlYIaFScs5CIKIZ8uujtKRxSjibGIt8,1132
98
+ databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml,sha256=5o6jF_zR4WSvS9QcZnGW-S2FmahsjNp-tMbAkzbNabo,139
99
+ databao_context_engine/project/resources/examples/src/files/documentation.md,sha256=k5G5lrN4S_ie2o7CBxXgLiWGIGtb3v-77c_3n4C7Lvw,1090
100
+ databao_context_engine/project/resources/examples/src/files/notes.txt,sha256=0T1u8lIwIRwNopEsfWmqfIbhfRl-DN3XuiIN7suRAiE,3668
101
+ databao_context_engine/retrieve_embeddings/__init__.py,sha256=__aOhCc3sBfLtjxz0hlVGuSYUMRsEM3PucPYNC6LVq0,126
102
+ databao_context_engine/retrieve_embeddings/retrieve_runner.py,sha256=s7XwPkMupBgN9JADFaHsytM5ztujyqujB7qdmXKv7ms,585
103
+ databao_context_engine/retrieve_embeddings/retrieve_service.py,sha256=0KOSaxyj6DfzLbHtcvxFPUjXO4MD5t91RP95rsVuE6Q,2275
104
+ databao_context_engine/retrieve_embeddings/retrieve_wiring.py,sha256=GQ9Eh6XyQ9MdG8Kb0H72XPj_8-gPDLSM_Bs0NdmSMX4,2012
105
+ databao_context_engine/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
+ databao_context_engine/serialization/yaml.py,sha256=ke-0iHGaA2eynuGf-tG9IZip56XFfMDrNswzi5rNaDE,1219
107
+ databao_context_engine/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
+ databao_context_engine/services/chunk_embedding_service.py,sha256=Dy25u2QSSdq03v1zs2bjjm8Sze-VEd12F49gzoKXjOA,4924
109
+ databao_context_engine/services/embedding_shard_resolver.py,sha256=6oOVm6QJb74lEIbYXijWMqISTx1VfmKQaQjdSHr8zDA,2289
110
+ databao_context_engine/services/factories.py,sha256=IoRmJk9a7q2eszwz_zfqE2hJDOwpB76SmmmgR0DI-qY,1830
111
+ databao_context_engine/services/models.py,sha256=wHmk4eheoSopXkCs3v2ggoDAGoTPdHYcADMZ4gdtSCI,299
112
+ databao_context_engine/services/persistence_service.py,sha256=Kcu4rw-TCqyHiGW1h5rYZfe3F-HZTn0tRAM4_czzEAc,2252
113
+ databao_context_engine/services/table_name_policy.py,sha256=q0scAQ_ZoTrV8V0J6cmxvsMYxTcmUlAqEWpzZUitpLQ,567
114
+ databao_context_engine/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
+ databao_context_engine/storage/connection.py,sha256=UGnJOBJ83jDAmN3fmjVZI60qUw5cAC3YNLToJZLOhGQ,941
116
+ databao_context_engine/storage/exceptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
+ databao_context_engine/storage/exceptions/exceptions.py,sha256=zwJ8n-ekMhIH-bs9dvYqQuP4VfQ9rt0jdsptoojA5oU,174
118
+ databao_context_engine/storage/migrate.py,sha256=rbZDhQEzykQyfGflJodGv-1ohs43oGSGYXEyT6jTVeo,4617
119
+ databao_context_engine/storage/migrations/V01__init.sql,sha256=hJvUoPIW-rQQB5DSDrPjGJQoey9bpDRz0FFoUyTe9Qo,1368
120
+ databao_context_engine/storage/models.py,sha256=TmoeyOhrFL9mu-C43XUtqOWGhkQu2h-sZQdYCjui5HM,580
121
+ databao_context_engine/storage/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
+ databao_context_engine/storage/repositories/chunk_repository.py,sha256=nwDF8NeCwQJLlwtlvqILUccmWmQTfpg6Uf_WCDXm564,3681
123
+ databao_context_engine/storage/repositories/embedding_model_registry_repository.py,sha256=g91WCflYRc5jPDbW9RBp5HxWFsSIta4n8OYNwgoLv9c,2413
124
+ databao_context_engine/storage/repositories/embedding_repository.py,sha256=UCvr5mf2jTJgX_uMdLIMiN5NWa8Zn1U3HEr19TQRXAw,3278
125
+ databao_context_engine/storage/repositories/factories.py,sha256=YUkVrHyf5UV2rZ7H97Fi_n0ZYxTwXy8_w5zJnMkO8Lw,970
126
+ databao_context_engine/storage/repositories/vector_search_repository.py,sha256=jqCRZ18FKuJFVMsZtHRs19RejVlbZ4VDZY9E6ryeQfM,2289
127
+ databao_context_engine/storage/transaction.py,sha256=QpfE1VtgDd3_7OBzFYI3HPJJ7ESXkgCdcWukFkAjbSg,270
128
+ databao_context_engine/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
+ databao_context_engine/system/properties.py,sha256=mQ7-_PZeYSESYn1cMUQ0IK7rJEnbhc7t4WesFjAgo-Q,429
130
+ databao_context_engine/templating/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
+ databao_context_engine/templating/renderer.py,sha256=W2-0IGStAp6oxANmsKs_Z-UoIR6Gt_c4ILYFa3Hruo4,662
132
+ databao_context_engine-0.1.5.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
133
+ databao_context_engine-0.1.5.dist-info/entry_points.txt,sha256=5EeQJ1W8zEFh4HuF1bs2zBeoP408oiwuM9UrkJiurgI,138
134
+ databao_context_engine-0.1.5.dist-info/METADATA,sha256=HfJBADmvbXEEK710S6zzybfWl-mqnDXfm_18LIgh7kk,7773
135
+ databao_context_engine-0.1.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.9.24
2
+ Generator: uv 0.9.7
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- from datetime import datetime
5
-
6
- from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
7
- from databao_context_engine.pluginlib.build_plugin import (
8
- BuildPlugin,
9
- )
10
- from databao_context_engine.project.types import PreparedDatasource
11
- from databao_context_engine.serialisation.yaml import to_yaml_string
12
- from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
13
- from databao_context_engine.storage.models import RunDTO
14
- from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
15
- from databao_context_engine.storage.repositories.run_repository import RunRepository
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class BuildService:
21
- def __init__(
22
- self,
23
- *,
24
- run_repo: RunRepository,
25
- datasource_run_repo: DatasourceRunRepository,
26
- chunk_embedding_service: ChunkEmbeddingService,
27
- ) -> None:
28
- self._run_repo = run_repo
29
- self._datasource_run_repo = datasource_run_repo
30
- self._chunk_embedding_service = chunk_embedding_service
31
-
32
- def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
33
- """
34
- Create a new run row and return (run_id, started_at).
35
- """
36
- return self._run_repo.create(project_id=project_id, dce_version=dce_version)
37
-
38
- def finalize_run(self, *, run_id: int):
39
- """
40
- Mark the run as complete (sets ended_at).
41
- """
42
- self._run_repo.update(run_id=run_id, ended_at=datetime.now())
43
-
44
- def process_prepared_source(
45
- self,
46
- *,
47
- run_id: int,
48
- prepared_source: PreparedDatasource,
49
- plugin: BuildPlugin,
50
- ) -> BuiltDatasourceContext:
51
- """
52
- Process a single source.
53
-
54
- 1) Execute the plugin
55
- 2) Divide the results into chunks
56
- 3) Embed and persist the chunks
57
- """
58
- result = execute(prepared_source, plugin)
59
-
60
- chunks = plugin.divide_context_into_chunks(result.context)
61
- if not chunks:
62
- logger.info("No chunks for %s — skipping.", prepared_source.path.name)
63
- return result
64
-
65
- datasource_run = self._datasource_run_repo.create(
66
- run_id=run_id,
67
- plugin=plugin.name,
68
- full_type=prepared_source.datasource_type.full_type,
69
- source_id=result.datasource_id,
70
- storage_directory=str(prepared_source.path.parent),
71
- )
72
-
73
- self._chunk_embedding_service.embed_chunks(
74
- datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
75
- )
76
-
77
- return result
@@ -1,52 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
5
- from databao_context_engine.llm.factory import (
6
- create_ollama_description_provider,
7
- create_ollama_embedding_provider,
8
- create_ollama_service,
9
- )
10
- from databao_context_engine.project.info import get_dce_version
11
- from databao_context_engine.project.layout import ensure_project_dir
12
- from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
13
- from databao_context_engine.services.factories import (
14
- create_build_service,
15
- )
16
- from databao_context_engine.storage.connection import open_duckdb_connection
17
- from databao_context_engine.system.properties import get_db_path
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
23
- """
24
- Public build entrypoint
25
- - Instantiates the build service
26
- - Delegates the actual build logic to the build runner
27
- """
28
- project_layout = ensure_project_dir(project_dir)
29
-
30
- logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
31
-
32
- with open_duckdb_connection(get_db_path()) as conn:
33
- ollama_service = create_ollama_service()
34
- embedding_provider = create_ollama_embedding_provider(ollama_service)
35
- description_provider = (
36
- create_ollama_description_provider(ollama_service)
37
- if chunk_embedding_mode.should_generate_description()
38
- else None
39
- )
40
- build_service = create_build_service(
41
- conn,
42
- embedding_provider=embedding_provider,
43
- description_provider=description_provider,
44
- chunk_embedding_mode=chunk_embedding_mode,
45
- )
46
- dce_config = project_layout.read_config_file()
47
- return build(
48
- project_dir=project_dir,
49
- build_service=build_service,
50
- project_id=str(dce_config.project_id),
51
- dce_version=get_dce_version(),
52
- )
@@ -1,43 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext
5
- from databao_context_engine.datasource_config.datasource_context import get_context_header_for_datasource
6
- from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME, get_output_dir
7
- from databao_context_engine.project.types import DatasourceId
8
- from databao_context_engine.serialisation.yaml import write_yaml_to_stream
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- def create_run_dir(project_dir: Path, run_name: str) -> Path:
14
- output_dir = get_output_dir(project_dir)
15
-
16
- run_dir = output_dir.joinpath(run_name)
17
- run_dir.mkdir(parents=True, exist_ok=False)
18
-
19
- return run_dir
20
-
21
-
22
- def export_build_result(run_dir: Path, result: BuiltDatasourceContext) -> Path:
23
- datasource_id = DatasourceId.from_string_repr(result.datasource_id)
24
- export_file_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
25
-
26
- # Make sure the parent folder exists
27
- export_file_path.parent.mkdir(exist_ok=True)
28
-
29
- with export_file_path.open("w") as export_file:
30
- write_yaml_to_stream(data=result, file_stream=export_file)
31
-
32
- logger.info(f"Exported result to {export_file_path.resolve()}")
33
-
34
- return export_file_path
35
-
36
-
37
- def append_result_to_all_results(run_dir: Path, result: BuiltDatasourceContext):
38
- path = run_dir.joinpath(ALL_RESULTS_FILE_NAME)
39
- path.parent.mkdir(parents=True, exist_ok=True)
40
- with path.open("a", encoding="utf-8") as export_file:
41
- export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
42
- write_yaml_to_stream(data=result, file_stream=export_file)
43
- export_file.write("\n")