databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +32 -7
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +82 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
- databao_context_engine/cli/add_datasource_config.py +49 -44
- databao_context_engine/cli/commands.py +40 -55
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +127 -0
- databao_context_engine/databao_context_project_manager.py +147 -30
- databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
- databao_context_engine/datasources/datasource_context.py +90 -0
- databao_context_engine/datasources/datasource_discovery.py +143 -0
- databao_context_engine/datasources/types.py +194 -0
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +76 -57
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +26 -30
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +9 -11
- databao_context_engine/plugin_loader.py +110 -0
- databao_context_engine/pluginlib/build_plugin.py +12 -29
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
- databao_context_engine/plugins/databases/base_introspector.py +11 -12
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +58 -52
- databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +14 -15
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
- databao_context_engine/serialization/__init__.py +0 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +3 -5
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.5.dist-info/METADATA +228 -0
- databao_context_engine-0.1.5.dist-info/RECORD +135 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/project/datasource_discovery.py +0 -141
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/project/types.py +0 -134
- databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- databao_context_engine-0.1.1.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
- /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
- /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,24 +1,13 @@
|
|
|
1
|
-
from
|
|
1
|
+
from duckdb import DuckDBPyConnection
|
|
2
2
|
|
|
3
|
-
from databao_context_engine.services.run_name_policy import RunNamePolicy
|
|
4
3
|
from databao_context_engine.storage.repositories.chunk_repository import ChunkRepository
|
|
5
|
-
from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
|
|
6
4
|
from databao_context_engine.storage.repositories.embedding_model_registry_repository import (
|
|
7
5
|
EmbeddingModelRegistryRepository,
|
|
8
6
|
)
|
|
9
7
|
from databao_context_engine.storage.repositories.embedding_repository import EmbeddingRepository
|
|
10
|
-
from databao_context_engine.storage.repositories.run_repository import RunRepository
|
|
11
8
|
from databao_context_engine.storage.repositories.vector_search_repository import VectorSearchRepository
|
|
12
9
|
|
|
13
10
|
|
|
14
|
-
def create_run_repository(conn: DuckDBPyConnection) -> RunRepository:
|
|
15
|
-
return RunRepository(conn, run_name_policy=RunNamePolicy())
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def create_datasource_run_repository(conn: DuckDBPyConnection) -> DatasourceRunRepository:
|
|
19
|
-
return DatasourceRunRepository(conn)
|
|
20
|
-
|
|
21
|
-
|
|
22
11
|
def create_chunk_repository(conn: DuckDBPyConnection) -> ChunkRepository:
|
|
23
12
|
return ChunkRepository(conn)
|
|
24
13
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
import duckdb
|
|
5
6
|
|
|
7
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
6
8
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
7
|
-
from databao_context_engine.project.types import DatasourceId
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
@dataclass(kw_only=True, frozen=True)
|
|
@@ -23,32 +24,38 @@ class VectorSearchRepository:
|
|
|
23
24
|
self._conn = conn
|
|
24
25
|
|
|
25
26
|
def get_display_texts_by_similarity(
|
|
26
|
-
self,
|
|
27
|
+
self,
|
|
28
|
+
*,
|
|
29
|
+
table_name: str,
|
|
30
|
+
retrieve_vec: Sequence[float],
|
|
31
|
+
dimension: int,
|
|
32
|
+
limit: int,
|
|
33
|
+
datasource_ids: list[DatasourceId] | None = None,
|
|
27
34
|
) -> list[VectorSearchResult]:
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
35
|
+
"""Read only similarity search on a specific embedding shard table."""
|
|
36
|
+
params: list[Any] = [list(retrieve_vec), self._DEFAULT_DISTANCE_THRESHOLD, list(retrieve_vec), limit]
|
|
37
|
+
if datasource_ids:
|
|
38
|
+
params.append([str(datasource_id) for datasource_id in datasource_ids])
|
|
39
|
+
|
|
32
40
|
rows = self._conn.execute(
|
|
33
41
|
f"""
|
|
34
42
|
SELECT
|
|
35
43
|
COALESCE(c.display_text, c.embeddable_text) AS display_text,
|
|
36
|
-
c.embeddable_text
|
|
37
|
-
array_cosine_distance(e.vec, CAST(
|
|
38
|
-
|
|
39
|
-
|
|
44
|
+
c.embeddable_text,
|
|
45
|
+
array_cosine_distance(e.vec, CAST($1 AS FLOAT[{dimension}])) AS cosine_distance,
|
|
46
|
+
c.full_type,
|
|
47
|
+
c.datasource_id,
|
|
40
48
|
FROM
|
|
41
49
|
{table_name} e
|
|
42
50
|
JOIN chunk c ON e.chunk_id = c.chunk_id
|
|
43
|
-
JOIN datasource_run dr ON c.datasource_run_id = dr.datasource_run_id
|
|
44
51
|
WHERE
|
|
45
|
-
|
|
46
|
-
AND
|
|
52
|
+
cosine_distance < $2
|
|
53
|
+
{"AND c.datasource_id IN $5" if datasource_ids else ""}
|
|
47
54
|
ORDER BY
|
|
48
|
-
array_cosine_distance(e.vec, CAST(
|
|
49
|
-
LIMIT
|
|
55
|
+
array_cosine_distance(e.vec, CAST($3 AS FLOAT[{dimension}])) ASC
|
|
56
|
+
LIMIT $4
|
|
50
57
|
""",
|
|
51
|
-
|
|
58
|
+
params,
|
|
52
59
|
).fetchall()
|
|
53
60
|
|
|
54
61
|
return [
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
from databao_context_engine.project.layout import get_output_dir
|
|
5
|
+
|
|
4
6
|
# it's private, so it doesn't get imported directy. This value is mocked in tests
|
|
5
7
|
_dce_path = Path(os.getenv("DATABAO_CONTEXT_ENGINE_PATH") or "~/.dce").expanduser().resolve()
|
|
6
8
|
|
|
@@ -9,5 +11,5 @@ def get_dce_path() -> Path:
|
|
|
9
11
|
return _dce_path
|
|
10
12
|
|
|
11
13
|
|
|
12
|
-
def get_db_path() -> Path:
|
|
13
|
-
return
|
|
14
|
+
def get_db_path(project_dir: Path) -> Path:
|
|
15
|
+
return get_output_dir(project_dir) / "dce.duckdb"
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: databao-context-engine
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: Semantic context for your LLMs — generated automatically
|
|
5
|
+
Requires-Dist: click>=8.3.0
|
|
6
|
+
Requires-Dist: duckdb>=1.4.3
|
|
7
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
8
|
+
Requires-Dist: requests>=2.32.5
|
|
9
|
+
Requires-Dist: mcp>=1.23.3
|
|
10
|
+
Requires-Dist: pydantic>=2.12.4
|
|
11
|
+
Requires-Dist: jinja2>=3.1.6
|
|
12
|
+
Requires-Dist: pyathena>=3.25.0 ; extra == 'athena'
|
|
13
|
+
Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'clickhouse'
|
|
14
|
+
Requires-Dist: mssql-python>=1.0.0 ; extra == 'mssql'
|
|
15
|
+
Requires-Dist: pymysql>=1.1.2 ; extra == 'mysql'
|
|
16
|
+
Requires-Dist: asyncpg>=0.31.0 ; extra == 'postgresql'
|
|
17
|
+
Requires-Dist: snowflake-connector-python>=4.2.0 ; extra == 'snowflake'
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Provides-Extra: athena
|
|
20
|
+
Provides-Extra: clickhouse
|
|
21
|
+
Provides-Extra: mssql
|
|
22
|
+
Provides-Extra: mysql
|
|
23
|
+
Provides-Extra: postgresql
|
|
24
|
+
Provides-Extra: snowflake
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
[](https://github.com/JetBrains#jetbrains-on-github)
|
|
28
|
+
[](https://pypi.org/project/databao-context-engine)
|
|
29
|
+
[](https://github.com/JetBrains/databao-context-engine?tab=License-1-ov-file)
|
|
30
|
+
|
|
31
|
+
[//]: # ([](https://pypi.org/project/databao-context-engine/))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
<h1 align="center">Databao Context Engine</h1>
|
|
35
|
+
<p align="center">
|
|
36
|
+
<b>Semantic context for your LLMs — generated automatically.</b><br/>
|
|
37
|
+
No more copying schemas. No manual documentation. Just accurate answers.
|
|
38
|
+
</p>
|
|
39
|
+
<p align="center">
|
|
40
|
+
<a href="https://databao.app">Website</a> •
|
|
41
|
+
<a href="#quickstart">Quickstart</a> •
|
|
42
|
+
<a href="#supported-data-sources">Data Sources</a> •
|
|
43
|
+
<a href="#contributing">Contributing</a>
|
|
44
|
+
</p>
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What is Databao Context Engine?
|
|
49
|
+
|
|
50
|
+
Databao Context Engine is a CLI tool that **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
|
|
51
|
+
|
|
52
|
+
Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Your data sources → Context Engine → Unified semantic graph → Any LLM
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Why choose Databao Context Engine?
|
|
59
|
+
|
|
60
|
+
| Feature | What it means for you |
|
|
61
|
+
|----------------------------|----------------------------------------------------------------|
|
|
62
|
+
| **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
|
|
63
|
+
| **Runs locally** | Your data never leaves your environment |
|
|
64
|
+
| **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
|
|
65
|
+
| **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
|
|
66
|
+
| **Built-in benchmarks** | Measure and improve context quality over time |
|
|
67
|
+
| **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
|
|
68
|
+
| **Governed & versioned** | Track, version, and share context across your team |
|
|
69
|
+
| **Dynamic or static** | Serve context via MCP server or export as artifact |
|
|
70
|
+
|
|
71
|
+
## Installation
|
|
72
|
+
|
|
73
|
+
Databao Context Engine is [available on PyPI](https://pypi.org/project/databao-context-engine/) and can be installed with uv, pip, or another package manage.
|
|
74
|
+
|
|
75
|
+
### Using uv
|
|
76
|
+
|
|
77
|
+
1. Install Databao Context Engine:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
uv tool install databao-context-engine
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
1. Add it to your PATH:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
uv tool update-shell
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
1. Verify the installation:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
dce --help
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Using pip
|
|
96
|
+
|
|
97
|
+
1. Install Databao Context Engine:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install databao-context-engine
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
1. Verify the installation:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
dce --help
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Supported data sources
|
|
110
|
+
|
|
111
|
+
* <img src="https://cdn.simpleicons.org/postgresql/316192" width="16" height="16" alt=""> PostgreSQL
|
|
112
|
+
* <img src="https://cdn.simpleicons.org/mysql/4479A1" width="16" height="16" alt=""> MySQL
|
|
113
|
+
* <img src="https://cdn.simpleicons.org/sqlite/003B57" width="16" height="16" alt=""> SQLite
|
|
114
|
+
* <img src="https://cdn.simpleicons.org/duckdb/FFF000" width="16" height="16" alt=""> DuckDB
|
|
115
|
+
* <img src="https://cdn.simpleicons.org/dbt/FF694B" width="16" height="16" alt=""> dbt projects
|
|
116
|
+
* 📄 Documents & spreadsheets *(coming soon)*
|
|
117
|
+
|
|
118
|
+
## Supported LLMs
|
|
119
|
+
|
|
120
|
+
| Provider | Configuration |
|
|
121
|
+
|---------------|----------------------------------------------|
|
|
122
|
+
| **Ollama** | `languageModel: OLLAMA`: runs locally, free |
|
|
123
|
+
| **OpenAI** | `languageModel: OPENAI`: requires an API key |
|
|
124
|
+
| **Anthropic** | `languageModel: CLAUDE`: requires an API key |
|
|
125
|
+
| **Google** | `languageModel: GEMINI`: requires an API key |
|
|
126
|
+
|
|
127
|
+
## Quickstart
|
|
128
|
+
|
|
129
|
+
### 1. Create a project
|
|
130
|
+
|
|
131
|
+
1. Create a new directory for your project and navigate to it:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
mkdir dce-project && cd dce-project
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
1. Initialize a new project:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
dce init
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### 2. Configure data sources
|
|
144
|
+
|
|
145
|
+
1. When prompted, agree to create a new datasource.
|
|
146
|
+
You can also use the `dce datasource add` command.
|
|
147
|
+
|
|
148
|
+
1. Provide the data source type and its name.
|
|
149
|
+
|
|
150
|
+
1. Open the config file that was created for you in your editor and fill in the connection details.
|
|
151
|
+
|
|
152
|
+
1. Repeat these steps for all data sources you want to include in your project.
|
|
153
|
+
|
|
154
|
+
1. If you have data in Markdown or text files,
|
|
155
|
+
you can add them to the `dce/src/files` directory.
|
|
156
|
+
|
|
157
|
+
### 3. Build context
|
|
158
|
+
|
|
159
|
+
1. To build the context, run the following command:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
dce build
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### 4. Use Context with Your LLM
|
|
166
|
+
|
|
167
|
+
**Option A: Dynamic via MCP Server**
|
|
168
|
+
|
|
169
|
+
Databao Context Engine exposes the context through a local MCP Server, so your agent can access the latest context at runtime.
|
|
170
|
+
|
|
171
|
+
1. In **Claude Desktop**, **Cursor**, or another MCP-compatible agent, add the following configuration.
|
|
172
|
+
Replace `dce-project/` with the path to your project directory:
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
# claude_desktop_config.json, mcp.json, or similar
|
|
176
|
+
|
|
177
|
+
{
|
|
178
|
+
"mcpServers": {
|
|
179
|
+
"dce": {
|
|
180
|
+
"command": "dce mcp",
|
|
181
|
+
"args": ["--project-dir", "dce-project/"]
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
1. Save the file and restart your agent.
|
|
188
|
+
|
|
189
|
+
1. Open a new chat, in the chat window, select the `dce` server, and ask questions related to your project context.
|
|
190
|
+
|
|
191
|
+
**Option B: Static artifact**
|
|
192
|
+
|
|
193
|
+
Even if you don’t have Claude or Cursor installed on your local machine,
|
|
194
|
+
you can still use the context built by Databao Context Engine by pasting it directly into your chat with an AI assistant.
|
|
195
|
+
|
|
196
|
+
1. Navigate to `dce-project/output/` and open the directory with the latest run.
|
|
197
|
+
|
|
198
|
+
1. Attach the `all_results.yaml` file to your chat with the AI assistant or copy and paste its contents into your chat.
|
|
199
|
+
|
|
200
|
+
## Contributing
|
|
201
|
+
|
|
202
|
+
We’d love your help! Here’s how to get involved:
|
|
203
|
+
|
|
204
|
+
- ⭐ **Star this repo** — it helps others find us!
|
|
205
|
+
- 🐛 **Found a bug?** [Open an issue](https://github.com/JetBrains/databao-context-engine/issues)
|
|
206
|
+
- 💡 **Have an idea?** We’re all ears — create a feature request
|
|
207
|
+
- 👍 **Upvote issues** you care about — helps us prioritize
|
|
208
|
+
- 🔧 **Submit a PR**
|
|
209
|
+
- 📝 **Improve docs** — typos, examples, tutorials — everything helps!
|
|
210
|
+
|
|
211
|
+
New to open source? No worries! We're friendly and happy to help you get started. 🌱
|
|
212
|
+
|
|
213
|
+
For more details, see [CONTRIBUTING](CONTRIBUTING.md).
|
|
214
|
+
|
|
215
|
+
## 📄 License
|
|
216
|
+
|
|
217
|
+
Apache 2.0 — use it however you want. See the [LICENSE](LICENSE.md) file for details.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
<p align="center">
|
|
222
|
+
<b>Like Databao Context Engine?</b> Give us a ⭐ — it means a lot!
|
|
223
|
+
</p>
|
|
224
|
+
|
|
225
|
+
<p align="center">
|
|
226
|
+
<a href="https://databao.app">Website</a> •
|
|
227
|
+
<a href="https://discord.gg/databao">Discord</a>
|
|
228
|
+
</p>
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
databao_context_engine/__init__.py,sha256=sPhGoLZhsFvf1n-WLt9gFUL1IZegCQJPkOZrcrJYn2c,2239
|
|
2
|
+
databao_context_engine/build_sources/__init__.py,sha256=FteGp3MvSuX_fh-Fx1pq-jOjFcjYKQSXFSll4tQffvk,224
|
|
3
|
+
databao_context_engine/build_sources/build_runner.py,sha256=aSxp99R4tbnbHlEDFrg8iw4Bi1dGvO_5uTO-mIrB080,3986
|
|
4
|
+
databao_context_engine/build_sources/build_service.py,sha256=eywJzpvZUVrj-JYxXii_kxhTBzVUT0xfbNtdAFcpGWQ,1624
|
|
5
|
+
databao_context_engine/build_sources/build_wiring.py,sha256=Hfdk0CF0LuGiGGQjOl7wVpVBnYRgrLvT_iLUq-Xljng,3174
|
|
6
|
+
databao_context_engine/build_sources/export_results.py,sha256=E3HP4uaQKYDrmAJKFWwDT190j7ZKfO791_pjKebTMQM,1773
|
|
7
|
+
databao_context_engine/build_sources/plugin_execution.py,sha256=wfMCF_9ixuEnV2VMhCDwOwBN5bi8Yz6ap5ayau2TAYk,2090
|
|
8
|
+
databao_context_engine/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
databao_context_engine/cli/add_datasource_config.py,sha256=fK1MIaTCoz1jurldJZD_BpPIgc6kr-NaAyVjVgjsOTU,5523
|
|
10
|
+
databao_context_engine/cli/commands.py,sha256=VJOHB3H28wNjOUuFC8hhF8-rnlIFjsZMlNT3C2ZbIUY,7807
|
|
11
|
+
databao_context_engine/cli/datasources.py,sha256=kHbXhEnDVaYXTRAE3WeaDGohH1Y9cwyYjjcycQ8eh1U,2376
|
|
12
|
+
databao_context_engine/cli/info.py,sha256=2iegfvRAxMae3KkWMDu-BOnljeJMVQ5u8P5k_8nOKpc,1203
|
|
13
|
+
databao_context_engine/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
databao_context_engine/config/log_config.yaml,sha256=hH2NPumufcyig8L-7Z_DbLQ88_G2N6CAEl5fvHafeAw,360
|
|
15
|
+
databao_context_engine/config/logging.py,sha256=66k7x3ftEdHx-CKqbOss4pqZfGn4nMFv7AzVPs9aVnY,1525
|
|
16
|
+
databao_context_engine/databao_context_engine.py,sha256=l8F1JOzDDZYvIannM6Yq_tGR--G23XzjTVZn6dnjSwY,4966
|
|
17
|
+
databao_context_engine/databao_context_project_manager.py,sha256=xF_ZQUd8NxnP6ow-y6YCGxzPO9Gfy5TTSqIeC5N9XOU,8288
|
|
18
|
+
databao_context_engine/datasources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
databao_context_engine/datasources/check_config.py,sha256=YsrWFCwdJv3jvXG_tTuCRr129351GmRTKB0BRYXxtbo,5407
|
|
20
|
+
databao_context_engine/datasources/datasource_context.py,sha256=uRuviRJUsZdHcNan5DYc3ZBpw2SSsHIMuZjR4PbXUkg,3687
|
|
21
|
+
databao_context_engine/datasources/datasource_discovery.py,sha256=ZrjVyKIk93XbBLTVnnNAR4CFBFflmPZgvmBfOq83hZs,5573
|
|
22
|
+
databao_context_engine/datasources/types.py,sha256=gF2BfNC-NSSvXEg-mefGPBKCGsLhUiDySIv56SsvbIM,7449
|
|
23
|
+
databao_context_engine/event_journal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
databao_context_engine/event_journal/writer.py,sha256=abkl2SPHYzO8XAUsjbtrcXm6WQk15zWpXhYm86qb6G0,986
|
|
25
|
+
databao_context_engine/generate_configs_schemas.py,sha256=rUjSg_SnF7C8-OJ8dNwPpbEdPQqxnF2X8qz3LwIo0e0,3007
|
|
26
|
+
databao_context_engine/init_project.py,sha256=enLvYOd5MpGYVdWly7DuLMzPLUEE6130bEuxh0YZft8,1496
|
|
27
|
+
databao_context_engine/introspection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
databao_context_engine/introspection/property_extract.py,sha256=DlTdqvysEmYMsmeJMdaxQuzbBalMxTq7XLqMSwecYCg,7891
|
|
29
|
+
databao_context_engine/llm/__init__.py,sha256=FAGvpG3kBp4ssNYre8ytTtPeCF6XdZYNP7qpEUhpNAk,366
|
|
30
|
+
databao_context_engine/llm/api.py,sha256=eizZD1PGwkJbj0CFHDtdqcPWp3OdbbUSWIIcF3M3MSo,2091
|
|
31
|
+
databao_context_engine/llm/config.py,sha256=geIygR-9maeKbvmcO5J3iNZXKS4LZc7zpKYyaH8vCoI,463
|
|
32
|
+
databao_context_engine/llm/descriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
databao_context_engine/llm/descriptions/ollama.py,sha256=MkRajlkIsVvLzQvkVBsGwjHqj5lNylexu5o-HQSnbQQ,622
|
|
34
|
+
databao_context_engine/llm/descriptions/provider.py,sha256=nojEI0l55TfFuNooQwGNvfa2k6A1vZ1Oi3PBLAr9eFo,227
|
|
35
|
+
databao_context_engine/llm/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
databao_context_engine/llm/embeddings/ollama.py,sha256=GQzlMBRJGJe_xreY9-Y3REBFWWaOyKhiGQxWxrR6xfY,949
|
|
37
|
+
databao_context_engine/llm/embeddings/provider.py,sha256=IVrOptwad2Fr1wf1SBOM3-7y66k08mOZnga1NA6yhbA,300
|
|
38
|
+
databao_context_engine/llm/errors.py,sha256=Y4CVHYJKkn2oHnFWVlVL1QJl4gtvW4z11V3FB9qp3Cc,402
|
|
39
|
+
databao_context_engine/llm/factory.py,sha256=Ct6IyC984POktLNEfOyrIw9y1IqZ6YNg4zSx16L7VhQ,1888
|
|
40
|
+
databao_context_engine/llm/install.py,sha256=1NqpoERmLvX3maFFZQHZ0I7uW-Moi3oyySQoTavZqbM,7082
|
|
41
|
+
databao_context_engine/llm/runtime.py,sha256=uEIiGb9OkjGwlZH6arb3lmD3NHRByh1nngJW-4EWQn0,2343
|
|
42
|
+
databao_context_engine/llm/service.py,sha256=rM_DrF2F7NylxBhtfVGporUcv91eR_nF1mF--Zxh2tc,5978
|
|
43
|
+
databao_context_engine/main.py,sha256=3gWwEc9GZJUFANJtW9Fx4smVzib_sDtFkBGPILjisaI,350
|
|
44
|
+
databao_context_engine/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
+
databao_context_engine/mcp/mcp_runner.py,sha256=bnB2a-_P1zOlDBqpyFIq4n2hWkfwqb6G7dHb3-3Q2I0,468
|
|
46
|
+
databao_context_engine/mcp/mcp_server.py,sha256=Z2pesefHCFhzfSdPa8OsruxynE28dmEKuEmwx3N18kM,2179
|
|
47
|
+
databao_context_engine/plugin_loader.py,sha256=KIHUwxMcWdyPvZlHFxIDGwIMILTm6mO_gJAzw1UIA00,4784
|
|
48
|
+
databao_context_engine/pluginlib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
+
databao_context_engine/pluginlib/build_plugin.py,sha256=y-guT2X7wS8Yn1C6iBWLF_HDA4NYVFJsQ-oe6IgVvDo,2831
|
|
50
|
+
databao_context_engine/pluginlib/config.py,sha256=fAu1AuxSFI6LOoAyvwuFE5p11bOsHp9EZzn2ST8DrUI,1534
|
|
51
|
+
databao_context_engine/pluginlib/plugin_utils.py,sha256=f76ksdMUG3Wy4KOytUYxLMrq9iOHytu3QF8wc5NzzJE,2471
|
|
52
|
+
databao_context_engine/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
+
databao_context_engine/plugins/databases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
databao_context_engine/plugins/databases/athena/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
+
databao_context_engine/plugins/databases/athena/athena_db_plugin.py,sha256=1Jn3kqPExKUxvj4oXqgJQwRoLiu_3uKgSzEESvGm_Qs,463
|
|
56
|
+
databao_context_engine/plugins/databases/athena/athena_introspector.py,sha256=PCdU8pHEAr6UliLwDgLNEqiN9MqwJ0k94H2f6TgBBmA,5627
|
|
57
|
+
databao_context_engine/plugins/databases/base_db_plugin.py,sha256=ivCgxqb5wBjCgGuPxV5-oFF8R0h-zcAt1ckgTQ87V4Q,1680
|
|
58
|
+
databao_context_engine/plugins/databases/base_introspector.py,sha256=08L6nmIBJT3rfWB2fGdm7jSrNrW4un2MMMLt5shorKo,5752
|
|
59
|
+
databao_context_engine/plugins/databases/clickhouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
+
databao_context_engine/plugins/databases/clickhouse/clickhouse_db_plugin.py,sha256=K3cWnZFyKGpFfg5VBJ0016C9H9DuADIBJdomzH9T49E,520
|
|
61
|
+
databao_context_engine/plugins/databases/clickhouse/clickhouse_introspector.py,sha256=wQbriPsb1HHbx7b7P86K-dPDGIziXxF7rjM_gUy_cUs,6267
|
|
62
|
+
databao_context_engine/plugins/databases/database_chunker.py,sha256=dn9Mb9Sw4f8IwBe9RtukQGT0yyKLQt5i4tjvsrFh1ew,2100
|
|
63
|
+
databao_context_engine/plugins/databases/databases_types.py,sha256=kYZEaFa_cnROg_n1k7tIonoC2VunLBe5SQR47GOt_8k,2579
|
|
64
|
+
databao_context_engine/plugins/databases/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
|
+
databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py,sha256=UyFp-MihNj562Mx9KcogFX33yNbNUMX_fdmvF6MrJ1M,458
|
|
66
|
+
databao_context_engine/plugins/databases/duckdb/duckdb_introspector.py,sha256=dhZBfKKpyETOrfbQBIM8urfztH_qfhTrvzNFlIesC6c,11260
|
|
67
|
+
databao_context_engine/plugins/databases/introspection_model_builder.py,sha256=-i7e4jqwdcsu3NnS-LHVSH_DrG3riN09KX3SDHAGF0c,10350
|
|
68
|
+
databao_context_engine/plugins/databases/introspection_scope.py,sha256=0BZAAmVTiHifOzeuQkCsGKrGRnZCPHRCLqam5nQqr88,2225
|
|
69
|
+
databao_context_engine/plugins/databases/introspection_scope_matcher.py,sha256=gseF4j-SD-7SSPMDlDqHUOTI3tPSaOGKJgp42WDVlsY,3605
|
|
70
|
+
databao_context_engine/plugins/databases/mssql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
+
databao_context_engine/plugins/databases/mssql/mssql_db_plugin.py,sha256=KV_KQw2f_wIj0B7_rtW0XGcesSOCqPy5A9FTYEDY7PI,452
|
|
72
|
+
databao_context_engine/plugins/databases/mssql/mssql_introspector.py,sha256=bqaERs8uRZlTfVQ91A9FygHpDX-KmGZXNUp8iDgxPdg,18704
|
|
73
|
+
databao_context_engine/plugins/databases/mysql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
|
+
databao_context_engine/plugins/databases/mysql/mysql_db_plugin.py,sha256=40c6wznmQjU7bSs1kStS4WgNyoZ44wU1Ik9uLdvDqfI,452
|
|
75
|
+
databao_context_engine/plugins/databases/mysql/mysql_introspector.py,sha256=-OlXluCVTUTTZzRJPAVvcHeRdocopHcDKgBFWXF9kGo,14461
|
|
76
|
+
databao_context_engine/plugins/databases/postgresql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py,sha256=VHCtLkoZhgbw631Z-cjcZOyI7dQZii-1fkdCJmntLWE,510
|
|
78
|
+
databao_context_engine/plugins/databases/postgresql/postgresql_introspector.py,sha256=OoaVLgpLOB4HDTU21S5upNyZD3H3B-z5fJq5NMOtakU,17275
|
|
79
|
+
databao_context_engine/plugins/databases/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
+
databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py,sha256=DCIpicjpS9qkd2EfumO7UDG6RQERW3s3FDPDSue7ehg,509
|
|
81
|
+
databao_context_engine/plugins/databases/snowflake/snowflake_introspector.py,sha256=8-a478TSu3uL_1z5HeS9nnF4VxZ9DZNnjH1coyqINM0,11962
|
|
82
|
+
databao_context_engine/plugins/databases/sqlite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
|
+
databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py,sha256=MI896G7Yq8NcQ8sTWErAFD7YK8qfDpZZprxoszJ46l8,460
|
|
84
|
+
databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py,sha256=kKFNzclp1NmQ6BF3ylGn86R5PuQh6aqevj6E7_zAKmQ,8361
|
|
85
|
+
databao_context_engine/plugins/duckdb_tools.py,sha256=46rctnTxDPAhHtaiTp1DxMuuDuRKrtKWJFSSM2w7uUU,645
|
|
86
|
+
databao_context_engine/plugins/files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
+
databao_context_engine/plugins/files/unstructured_files_plugin.py,sha256=eqs1anQhYBZh7xu4CwhfkqXQjGE5gJnKEwyJbtUR78E,2384
|
|
88
|
+
databao_context_engine/plugins/plugin_loader.py,sha256=8j7JAKqtG6_VjX3GbfAD6kjrWuuJ7NnC2nnHPEujux8,4074
|
|
89
|
+
databao_context_engine/plugins/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
+
databao_context_engine/plugins/resources/parquet_chunker.py,sha256=R9WCOBqpKRTVN6t5eeOm_mmnKBOxvjIiQ9zTc8vnUb4,848
|
|
91
|
+
databao_context_engine/plugins/resources/parquet_introspector.py,sha256=Cn_yh6E-dOTOZstlavEGAsV6ZRKZXJraVAl_pzJJuGs,5629
|
|
92
|
+
databao_context_engine/plugins/resources/parquet_plugin.py,sha256=FVRdAqhZkEBvUsW1CkYH4yPPs3P1pRsofR8PdHN26AE,1128
|
|
93
|
+
databao_context_engine/project/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
|
+
databao_context_engine/project/info.py,sha256=3chVSZ4pV2EsYJSeVfOYTK746POlJImciIBOvrQuYbc,2479
|
|
95
|
+
databao_context_engine/project/init_project.py,sha256=QiLalH--BMQGB-c2WBbPAUKBinDbO0cJkmilUfqPAuI,3970
|
|
96
|
+
databao_context_engine/project/layout.py,sha256=3ttCJDLMh4AS1tBRcSROgT7hG6uOqkI1QGsQEcfGsqg,4087
|
|
97
|
+
databao_context_engine/project/project_config.py,sha256=rI-Wkll7lca7RlYIaFScs5CIKIZ8uujtKRxSjibGIt8,1132
|
|
98
|
+
databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml,sha256=5o6jF_zR4WSvS9QcZnGW-S2FmahsjNp-tMbAkzbNabo,139
|
|
99
|
+
databao_context_engine/project/resources/examples/src/files/documentation.md,sha256=k5G5lrN4S_ie2o7CBxXgLiWGIGtb3v-77c_3n4C7Lvw,1090
|
|
100
|
+
databao_context_engine/project/resources/examples/src/files/notes.txt,sha256=0T1u8lIwIRwNopEsfWmqfIbhfRl-DN3XuiIN7suRAiE,3668
|
|
101
|
+
databao_context_engine/retrieve_embeddings/__init__.py,sha256=__aOhCc3sBfLtjxz0hlVGuSYUMRsEM3PucPYNC6LVq0,126
|
|
102
|
+
databao_context_engine/retrieve_embeddings/retrieve_runner.py,sha256=s7XwPkMupBgN9JADFaHsytM5ztujyqujB7qdmXKv7ms,585
|
|
103
|
+
databao_context_engine/retrieve_embeddings/retrieve_service.py,sha256=0KOSaxyj6DfzLbHtcvxFPUjXO4MD5t91RP95rsVuE6Q,2275
|
|
104
|
+
databao_context_engine/retrieve_embeddings/retrieve_wiring.py,sha256=GQ9Eh6XyQ9MdG8Kb0H72XPj_8-gPDLSM_Bs0NdmSMX4,2012
|
|
105
|
+
databao_context_engine/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
|
+
databao_context_engine/serialization/yaml.py,sha256=ke-0iHGaA2eynuGf-tG9IZip56XFfMDrNswzi5rNaDE,1219
|
|
107
|
+
databao_context_engine/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
|
+
databao_context_engine/services/chunk_embedding_service.py,sha256=Dy25u2QSSdq03v1zs2bjjm8Sze-VEd12F49gzoKXjOA,4924
|
|
109
|
+
databao_context_engine/services/embedding_shard_resolver.py,sha256=6oOVm6QJb74lEIbYXijWMqISTx1VfmKQaQjdSHr8zDA,2289
|
|
110
|
+
databao_context_engine/services/factories.py,sha256=IoRmJk9a7q2eszwz_zfqE2hJDOwpB76SmmmgR0DI-qY,1830
|
|
111
|
+
databao_context_engine/services/models.py,sha256=wHmk4eheoSopXkCs3v2ggoDAGoTPdHYcADMZ4gdtSCI,299
|
|
112
|
+
databao_context_engine/services/persistence_service.py,sha256=Kcu4rw-TCqyHiGW1h5rYZfe3F-HZTn0tRAM4_czzEAc,2252
|
|
113
|
+
databao_context_engine/services/table_name_policy.py,sha256=q0scAQ_ZoTrV8V0J6cmxvsMYxTcmUlAqEWpzZUitpLQ,567
|
|
114
|
+
databao_context_engine/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
+
databao_context_engine/storage/connection.py,sha256=UGnJOBJ83jDAmN3fmjVZI60qUw5cAC3YNLToJZLOhGQ,941
|
|
116
|
+
databao_context_engine/storage/exceptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
117
|
+
databao_context_engine/storage/exceptions/exceptions.py,sha256=zwJ8n-ekMhIH-bs9dvYqQuP4VfQ9rt0jdsptoojA5oU,174
|
|
118
|
+
databao_context_engine/storage/migrate.py,sha256=rbZDhQEzykQyfGflJodGv-1ohs43oGSGYXEyT6jTVeo,4617
|
|
119
|
+
databao_context_engine/storage/migrations/V01__init.sql,sha256=hJvUoPIW-rQQB5DSDrPjGJQoey9bpDRz0FFoUyTe9Qo,1368
|
|
120
|
+
databao_context_engine/storage/models.py,sha256=TmoeyOhrFL9mu-C43XUtqOWGhkQu2h-sZQdYCjui5HM,580
|
|
121
|
+
databao_context_engine/storage/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
122
|
+
databao_context_engine/storage/repositories/chunk_repository.py,sha256=nwDF8NeCwQJLlwtlvqILUccmWmQTfpg6Uf_WCDXm564,3681
|
|
123
|
+
databao_context_engine/storage/repositories/embedding_model_registry_repository.py,sha256=g91WCflYRc5jPDbW9RBp5HxWFsSIta4n8OYNwgoLv9c,2413
|
|
124
|
+
databao_context_engine/storage/repositories/embedding_repository.py,sha256=UCvr5mf2jTJgX_uMdLIMiN5NWa8Zn1U3HEr19TQRXAw,3278
|
|
125
|
+
databao_context_engine/storage/repositories/factories.py,sha256=YUkVrHyf5UV2rZ7H97Fi_n0ZYxTwXy8_w5zJnMkO8Lw,970
|
|
126
|
+
databao_context_engine/storage/repositories/vector_search_repository.py,sha256=jqCRZ18FKuJFVMsZtHRs19RejVlbZ4VDZY9E6ryeQfM,2289
|
|
127
|
+
databao_context_engine/storage/transaction.py,sha256=QpfE1VtgDd3_7OBzFYI3HPJJ7ESXkgCdcWukFkAjbSg,270
|
|
128
|
+
databao_context_engine/system/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
+
databao_context_engine/system/properties.py,sha256=mQ7-_PZeYSESYn1cMUQ0IK7rJEnbhc7t4WesFjAgo-Q,429
|
|
130
|
+
databao_context_engine/templating/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
|
+
databao_context_engine/templating/renderer.py,sha256=W2-0IGStAp6oxANmsKs_Z-UoIR6Gt_c4ILYFa3Hruo4,662
|
|
132
|
+
databao_context_engine-0.1.5.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
133
|
+
databao_context_engine-0.1.5.dist-info/entry_points.txt,sha256=5EeQJ1W8zEFh4HuF1bs2zBeoP408oiwuM9UrkJiurgI,138
|
|
134
|
+
databao_context_engine-0.1.5.dist-info/METADATA,sha256=HfJBADmvbXEEK710S6zzybfWl-mqnDXfm_18LIgh7kk,7773
|
|
135
|
+
databao_context_engine-0.1.5.dist-info/RECORD,,
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
|
|
6
|
-
from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
|
|
7
|
-
from databao_context_engine.pluginlib.build_plugin import (
|
|
8
|
-
BuildPlugin,
|
|
9
|
-
)
|
|
10
|
-
from databao_context_engine.project.types import PreparedDatasource
|
|
11
|
-
from databao_context_engine.serialisation.yaml import to_yaml_string
|
|
12
|
-
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
|
|
13
|
-
from databao_context_engine.storage.models import RunDTO
|
|
14
|
-
from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
|
|
15
|
-
from databao_context_engine.storage.repositories.run_repository import RunRepository
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class BuildService:
|
|
21
|
-
def __init__(
|
|
22
|
-
self,
|
|
23
|
-
*,
|
|
24
|
-
run_repo: RunRepository,
|
|
25
|
-
datasource_run_repo: DatasourceRunRepository,
|
|
26
|
-
chunk_embedding_service: ChunkEmbeddingService,
|
|
27
|
-
) -> None:
|
|
28
|
-
self._run_repo = run_repo
|
|
29
|
-
self._datasource_run_repo = datasource_run_repo
|
|
30
|
-
self._chunk_embedding_service = chunk_embedding_service
|
|
31
|
-
|
|
32
|
-
def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
|
|
33
|
-
"""
|
|
34
|
-
Create a new run row and return (run_id, started_at).
|
|
35
|
-
"""
|
|
36
|
-
return self._run_repo.create(project_id=project_id, dce_version=dce_version)
|
|
37
|
-
|
|
38
|
-
def finalize_run(self, *, run_id: int):
|
|
39
|
-
"""
|
|
40
|
-
Mark the run as complete (sets ended_at).
|
|
41
|
-
"""
|
|
42
|
-
self._run_repo.update(run_id=run_id, ended_at=datetime.now())
|
|
43
|
-
|
|
44
|
-
def process_prepared_source(
|
|
45
|
-
self,
|
|
46
|
-
*,
|
|
47
|
-
run_id: int,
|
|
48
|
-
prepared_source: PreparedDatasource,
|
|
49
|
-
plugin: BuildPlugin,
|
|
50
|
-
) -> BuiltDatasourceContext:
|
|
51
|
-
"""
|
|
52
|
-
Process a single source.
|
|
53
|
-
|
|
54
|
-
1) Execute the plugin
|
|
55
|
-
2) Divide the results into chunks
|
|
56
|
-
3) Embed and persist the chunks
|
|
57
|
-
"""
|
|
58
|
-
result = execute(prepared_source, plugin)
|
|
59
|
-
|
|
60
|
-
chunks = plugin.divide_context_into_chunks(result.context)
|
|
61
|
-
if not chunks:
|
|
62
|
-
logger.info("No chunks for %s — skipping.", prepared_source.path.name)
|
|
63
|
-
return result
|
|
64
|
-
|
|
65
|
-
datasource_run = self._datasource_run_repo.create(
|
|
66
|
-
run_id=run_id,
|
|
67
|
-
plugin=plugin.name,
|
|
68
|
-
full_type=prepared_source.datasource_type.full_type,
|
|
69
|
-
source_id=result.datasource_id,
|
|
70
|
-
storage_directory=str(prepared_source.path.parent),
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
self._chunk_embedding_service.embed_chunks(
|
|
74
|
-
datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
return result
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
|
|
5
|
-
from databao_context_engine.llm.factory import (
|
|
6
|
-
create_ollama_description_provider,
|
|
7
|
-
create_ollama_embedding_provider,
|
|
8
|
-
create_ollama_service,
|
|
9
|
-
)
|
|
10
|
-
from databao_context_engine.project.info import get_dce_version
|
|
11
|
-
from databao_context_engine.project.layout import ensure_project_dir
|
|
12
|
-
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
13
|
-
from databao_context_engine.services.factories import (
|
|
14
|
-
create_build_service,
|
|
15
|
-
)
|
|
16
|
-
from databao_context_engine.storage.connection import open_duckdb_connection
|
|
17
|
-
from databao_context_engine.system.properties import get_db_path
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
|
|
23
|
-
"""
|
|
24
|
-
Public build entrypoint
|
|
25
|
-
- Instantiates the build service
|
|
26
|
-
- Delegates the actual build logic to the build runner
|
|
27
|
-
"""
|
|
28
|
-
project_layout = ensure_project_dir(project_dir)
|
|
29
|
-
|
|
30
|
-
logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
|
|
31
|
-
|
|
32
|
-
with open_duckdb_connection(get_db_path()) as conn:
|
|
33
|
-
ollama_service = create_ollama_service()
|
|
34
|
-
embedding_provider = create_ollama_embedding_provider(ollama_service)
|
|
35
|
-
description_provider = (
|
|
36
|
-
create_ollama_description_provider(ollama_service)
|
|
37
|
-
if chunk_embedding_mode.should_generate_description()
|
|
38
|
-
else None
|
|
39
|
-
)
|
|
40
|
-
build_service = create_build_service(
|
|
41
|
-
conn,
|
|
42
|
-
embedding_provider=embedding_provider,
|
|
43
|
-
description_provider=description_provider,
|
|
44
|
-
chunk_embedding_mode=chunk_embedding_mode,
|
|
45
|
-
)
|
|
46
|
-
dce_config = project_layout.read_config_file()
|
|
47
|
-
return build(
|
|
48
|
-
project_dir=project_dir,
|
|
49
|
-
build_service=build_service,
|
|
50
|
-
project_id=str(dce_config.project_id),
|
|
51
|
-
dce_version=get_dce_version(),
|
|
52
|
-
)
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext
|
|
5
|
-
from databao_context_engine.datasource_config.datasource_context import get_context_header_for_datasource
|
|
6
|
-
from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME, get_output_dir
|
|
7
|
-
from databao_context_engine.project.types import DatasourceId
|
|
8
|
-
from databao_context_engine.serialisation.yaml import write_yaml_to_stream
|
|
9
|
-
|
|
10
|
-
logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def create_run_dir(project_dir: Path, run_name: str) -> Path:
|
|
14
|
-
output_dir = get_output_dir(project_dir)
|
|
15
|
-
|
|
16
|
-
run_dir = output_dir.joinpath(run_name)
|
|
17
|
-
run_dir.mkdir(parents=True, exist_ok=False)
|
|
18
|
-
|
|
19
|
-
return run_dir
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def export_build_result(run_dir: Path, result: BuiltDatasourceContext) -> Path:
|
|
23
|
-
datasource_id = DatasourceId.from_string_repr(result.datasource_id)
|
|
24
|
-
export_file_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
25
|
-
|
|
26
|
-
# Make sure the parent folder exists
|
|
27
|
-
export_file_path.parent.mkdir(exist_ok=True)
|
|
28
|
-
|
|
29
|
-
with export_file_path.open("w") as export_file:
|
|
30
|
-
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
31
|
-
|
|
32
|
-
logger.info(f"Exported result to {export_file_path.resolve()}")
|
|
33
|
-
|
|
34
|
-
return export_file_path
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def append_result_to_all_results(run_dir: Path, result: BuiltDatasourceContext):
|
|
38
|
-
path = run_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
39
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
-
with path.open("a", encoding="utf-8") as export_file:
|
|
41
|
-
export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
|
|
42
|
-
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
43
|
-
export_file.write("\n")
|