databao-context-engine 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. databao_context_engine-0.1.1/PKG-INFO +186 -0
  2. databao_context_engine-0.1.1/README.md +164 -0
  3. databao_context_engine-0.1.1/pyproject.toml +57 -0
  4. databao_context_engine-0.1.1/src/databao_context_engine/__init__.py +35 -0
  5. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/__init__.py +0 -0
  6. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/__init__.py +0 -0
  7. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/build_runner.py +111 -0
  8. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/build_service.py +77 -0
  9. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/build_wiring.py +52 -0
  10. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/export_results.py +43 -0
  11. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
  12. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/public/__init__.py +0 -0
  13. databao_context_engine-0.1.1/src/databao_context_engine/build_sources/public/api.py +4 -0
  14. databao_context_engine-0.1.1/src/databao_context_engine/cli/__init__.py +0 -0
  15. databao_context_engine-0.1.1/src/databao_context_engine/cli/add_datasource_config.py +130 -0
  16. databao_context_engine-0.1.1/src/databao_context_engine/cli/commands.py +256 -0
  17. databao_context_engine-0.1.1/src/databao_context_engine/cli/datasources.py +64 -0
  18. databao_context_engine-0.1.1/src/databao_context_engine/cli/info.py +32 -0
  19. databao_context_engine-0.1.1/src/databao_context_engine/config/__init__.py +0 -0
  20. databao_context_engine-0.1.1/src/databao_context_engine/config/log_config.yaml +16 -0
  21. databao_context_engine-0.1.1/src/databao_context_engine/config/logging.py +43 -0
  22. databao_context_engine-0.1.1/src/databao_context_engine/databao_context_project_manager.py +92 -0
  23. databao_context_engine-0.1.1/src/databao_context_engine/databao_engine.py +85 -0
  24. databao_context_engine-0.1.1/src/databao_context_engine/datasource_config/__init__.py +0 -0
  25. databao_context_engine-0.1.1/src/databao_context_engine/datasource_config/add_config.py +50 -0
  26. databao_context_engine-0.1.1/src/databao_context_engine/datasource_config/check_config.py +131 -0
  27. databao_context_engine-0.1.1/src/databao_context_engine/datasource_config/datasource_context.py +60 -0
  28. databao_context_engine-0.1.1/src/databao_context_engine/event_journal/__init__.py +0 -0
  29. databao_context_engine-0.1.1/src/databao_context_engine/event_journal/writer.py +29 -0
  30. databao_context_engine-0.1.1/src/databao_context_engine/generate_configs_schemas.py +92 -0
  31. databao_context_engine-0.1.1/src/databao_context_engine/init_project.py +18 -0
  32. databao_context_engine-0.1.1/src/databao_context_engine/introspection/__init__.py +0 -0
  33. databao_context_engine-0.1.1/src/databao_context_engine/introspection/property_extract.py +202 -0
  34. databao_context_engine-0.1.1/src/databao_context_engine/llm/__init__.py +0 -0
  35. databao_context_engine-0.1.1/src/databao_context_engine/llm/config.py +20 -0
  36. databao_context_engine-0.1.1/src/databao_context_engine/llm/descriptions/__init__.py +0 -0
  37. databao_context_engine-0.1.1/src/databao_context_engine/llm/descriptions/ollama.py +21 -0
  38. databao_context_engine-0.1.1/src/databao_context_engine/llm/descriptions/provider.py +10 -0
  39. databao_context_engine-0.1.1/src/databao_context_engine/llm/embeddings/__init__.py +0 -0
  40. databao_context_engine-0.1.1/src/databao_context_engine/llm/embeddings/ollama.py +37 -0
  41. databao_context_engine-0.1.1/src/databao_context_engine/llm/embeddings/provider.py +13 -0
  42. databao_context_engine-0.1.1/src/databao_context_engine/llm/errors.py +16 -0
  43. databao_context_engine-0.1.1/src/databao_context_engine/llm/factory.py +61 -0
  44. databao_context_engine-0.1.1/src/databao_context_engine/llm/install.py +227 -0
  45. databao_context_engine-0.1.1/src/databao_context_engine/llm/runtime.py +73 -0
  46. databao_context_engine-0.1.1/src/databao_context_engine/llm/service.py +159 -0
  47. databao_context_engine-0.1.1/src/databao_context_engine/main.py +19 -0
  48. databao_context_engine-0.1.1/src/databao_context_engine/mcp/__init__.py +0 -0
  49. databao_context_engine-0.1.1/src/databao_context_engine/mcp/all_results_tool.py +5 -0
  50. databao_context_engine-0.1.1/src/databao_context_engine/mcp/mcp_runner.py +16 -0
  51. databao_context_engine-0.1.1/src/databao_context_engine/mcp/mcp_server.py +63 -0
  52. databao_context_engine-0.1.1/src/databao_context_engine/mcp/retrieve_tool.py +22 -0
  53. databao_context_engine-0.1.1/src/databao_context_engine/pluginlib/__init__.py +0 -0
  54. databao_context_engine-0.1.1/src/databao_context_engine/pluginlib/build_plugin.py +107 -0
  55. databao_context_engine-0.1.1/src/databao_context_engine/pluginlib/config.py +37 -0
  56. databao_context_engine-0.1.1/src/databao_context_engine/pluginlib/plugin_utils.py +68 -0
  57. databao_context_engine-0.1.1/src/databao_context_engine/plugins/__init__.py +0 -0
  58. databao_context_engine-0.1.1/src/databao_context_engine/plugins/athena_db_plugin.py +12 -0
  59. databao_context_engine-0.1.1/src/databao_context_engine/plugins/base_db_plugin.py +45 -0
  60. databao_context_engine-0.1.1/src/databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
  61. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/__init__.py +0 -0
  62. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/athena_introspector.py +101 -0
  63. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/base_introspector.py +144 -0
  64. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
  65. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/database_chunker.py +69 -0
  66. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/databases_types.py +114 -0
  67. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
  68. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
  69. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/introspection_scope.py +74 -0
  70. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
  71. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
  72. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
  73. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
  74. databao_context_engine-0.1.1/src/databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
  75. databao_context_engine-0.1.1/src/databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
  76. databao_context_engine-0.1.1/src/databao_context_engine/plugins/mssql_db_plugin.py +12 -0
  77. databao_context_engine-0.1.1/src/databao_context_engine/plugins/mysql_db_plugin.py +12 -0
  78. databao_context_engine-0.1.1/src/databao_context_engine/plugins/parquet_plugin.py +32 -0
  79. databao_context_engine-0.1.1/src/databao_context_engine/plugins/plugin_loader.py +110 -0
  80. databao_context_engine-0.1.1/src/databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
  81. databao_context_engine-0.1.1/src/databao_context_engine/plugins/resources/__init__.py +0 -0
  82. databao_context_engine-0.1.1/src/databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
  83. databao_context_engine-0.1.1/src/databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
  84. databao_context_engine-0.1.1/src/databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
  85. databao_context_engine-0.1.1/src/databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
  86. databao_context_engine-0.1.1/src/databao_context_engine/project/__init__.py +0 -0
  87. databao_context_engine-0.1.1/src/databao_context_engine/project/datasource_discovery.py +141 -0
  88. databao_context_engine-0.1.1/src/databao_context_engine/project/info.py +44 -0
  89. databao_context_engine-0.1.1/src/databao_context_engine/project/init_project.py +102 -0
  90. databao_context_engine-0.1.1/src/databao_context_engine/project/layout.py +127 -0
  91. databao_context_engine-0.1.1/src/databao_context_engine/project/project_config.py +32 -0
  92. databao_context_engine-0.1.1/src/databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
  93. databao_context_engine-0.1.1/src/databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
  94. databao_context_engine-0.1.1/src/databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
  95. databao_context_engine-0.1.1/src/databao_context_engine/project/runs.py +39 -0
  96. databao_context_engine-0.1.1/src/databao_context_engine/project/types.py +134 -0
  97. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/__init__.py +0 -0
  98. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  99. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
  100. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
  101. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
  102. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
  103. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  104. databao_context_engine-0.1.1/src/databao_context_engine/retrieve_embeddings/public/api.py +3 -0
  105. databao_context_engine-0.1.1/src/databao_context_engine/serialisation/__init__.py +0 -0
  106. databao_context_engine-0.1.1/src/databao_context_engine/serialisation/yaml.py +35 -0
  107. databao_context_engine-0.1.1/src/databao_context_engine/services/__init__.py +0 -0
  108. databao_context_engine-0.1.1/src/databao_context_engine/services/chunk_embedding_service.py +104 -0
  109. databao_context_engine-0.1.1/src/databao_context_engine/services/embedding_shard_resolver.py +64 -0
  110. databao_context_engine-0.1.1/src/databao_context_engine/services/factories.py +88 -0
  111. databao_context_engine-0.1.1/src/databao_context_engine/services/models.py +12 -0
  112. databao_context_engine-0.1.1/src/databao_context_engine/services/persistence_service.py +61 -0
  113. databao_context_engine-0.1.1/src/databao_context_engine/services/run_name_policy.py +8 -0
  114. databao_context_engine-0.1.1/src/databao_context_engine/services/table_name_policy.py +15 -0
  115. databao_context_engine-0.1.1/src/databao_context_engine/storage/__init__.py +0 -0
  116. databao_context_engine-0.1.1/src/databao_context_engine/storage/connection.py +32 -0
  117. databao_context_engine-0.1.1/src/databao_context_engine/storage/exceptions/__init__.py +0 -0
  118. databao_context_engine-0.1.1/src/databao_context_engine/storage/exceptions/exceptions.py +6 -0
  119. databao_context_engine-0.1.1/src/databao_context_engine/storage/migrate.py +127 -0
  120. databao_context_engine-0.1.1/src/databao_context_engine/storage/migrations/V01__init.sql +63 -0
  121. databao_context_engine-0.1.1/src/databao_context_engine/storage/models.py +51 -0
  122. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/__init__.py +0 -0
  123. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/chunk_repository.py +130 -0
  124. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
  125. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
  126. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/embedding_repository.py +113 -0
  127. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/factories.py +35 -0
  128. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/run_repository.py +157 -0
  129. databao_context_engine-0.1.1/src/databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
  130. databao_context_engine-0.1.1/src/databao_context_engine/storage/transaction.py +14 -0
  131. databao_context_engine-0.1.1/src/databao_context_engine/system/__init__.py +0 -0
  132. databao_context_engine-0.1.1/src/databao_context_engine/system/properties.py +13 -0
  133. databao_context_engine-0.1.1/src/databao_context_engine/templating/__init__.py +0 -0
  134. databao_context_engine-0.1.1/src/databao_context_engine/templating/renderer.py +29 -0
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.3
2
+ Name: databao-context-engine
3
+ Version: 0.1.1
4
+ Summary: Add your description here
5
+ Requires-Dist: click>=8.3.0
6
+ Requires-Dist: duckdb>=1.4.3
7
+ Requires-Dist: pyyaml>=6.0.3
8
+ Requires-Dist: requests>=2.32.5
9
+ Requires-Dist: pymysql>=1.1.2
10
+ Requires-Dist: clickhouse-connect>=0.10.0
11
+ Requires-Dist: mcp>=1.23.3
12
+ Requires-Dist: pyathena>=3.22.0
13
+ Requires-Dist: snowflake-connector-python>=4.1.0
14
+ Requires-Dist: mssql-python>=1.0.0
15
+ Requires-Dist: pydantic>=2.12.4
16
+ Requires-Dist: jinja2>=3.1.6
17
+ Requires-Dist: asyncpg>=0.31.0
18
+ Requires-Dist: asyncio>=4.0.0
19
+ Requires-Dist: asyncpg-stubs>=0.31.1
20
+ Requires-Python: >=3.12
21
+ Description-Content-Type: text/markdown
22
+
23
+ [![official project](https://jb.gg/badges/official.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
24
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/JetBrains/databao-context-engine/blob/main/LICENSE)
25
+
26
+ [//]: # ([![PyPI version](https://img.shields.io/pypi/v/databao-context-engine.svg)](https://pypi.org/project/databao-context-engine))
27
+
28
+ [//]: # ([![Python versions](https://img.shields.io/pypi/pyversions/databao-context-engine.svg)](https://pypi.org/project/databao-context-engine/))
29
+
30
+
31
+ <h1 align="center">Databao Context Engine</h1>
32
+ <p align="center">
33
+ <b>Semantic context for your LLMs — generated automatically.</b><br/>
34
+ No more copying schemas. No manual documentation. Just accurate answers.
35
+ </p>
36
+ <p align="center">
37
+ <a href="https://databao.app">Website</a>
38
+
39
+ [//]: # (•)
40
+
41
+ [//]: # ( <a href="#quickstart">Quickstart</a> •)
42
+
43
+ [//]: # ( <a href="#supported-data-sources">Data Sources</a> •)
44
+
45
+ [//]: # ( <a href="#contributing">Contributing</a>)
46
+ </p>
47
+
48
+ ---
49
+
50
+ ## What is Databao Context Engine?
51
+
52
+ Databao Context Engine **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
53
+
54
+ Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
55
+
56
+ ```
57
+ Your data sources → Context Engine → Unified semantic graph → Any LLM
58
+ ```
59
+
60
+ ## Why choose Databao Context Engine?
61
+
62
+ | Feature | What it means for you |
63
+ |----------------------------|----------------------------------------------------------------|
64
+ | **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
65
+ | **Runs locally** | Your data never leaves your environment |
66
+ | **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
67
+ | **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
68
+ | **Built-in benchmarks** | Measure and improve context quality over time |
69
+ | **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
70
+ | **Governed & versioned** | Track, version, and share context across your team |
71
+ | **Dynamic or static** | Serve context via MCP server or export as artifact |
72
+
73
+ # Prerequisites
74
+
75
+ This README assumes you will use `uv` as your package manager.
76
+
77
+ You can install it following the instructions [here](https://docs.astral.sh/uv/getting-started/installation/)
78
+
79
+ If you are going to push to the repository, please make sure to install git pre-commit hooks by running
80
+
81
+ ```bash
82
+ uv run pre-commit install
83
+ ```
84
+
85
+ # How to run?
86
+
87
+ You can run it with:
88
+
89
+ ```bash
90
+ uv run dce info
91
+ ```
92
+
93
+ Not providing the `info` subcommand or using the `--help` flag will show the help screen for the command.
94
+
95
+ ## Using the dce command directly
96
+
97
+ To be able to use the `dce` command directly (without using `uv run` or `python`) there are two options.
98
+
99
+ ### Installing dce locally
100
+
101
+ For that one needs to:
102
+
103
+ 1. Build the project by running
104
+
105
+ ```bash
106
+ uv build
107
+ ```
108
+
109
+ 2. Installing the project on our machine by running:
110
+
111
+ ```bash
112
+ uv tool install -e .
113
+ ```
114
+
115
+ This second step will install the `dce` script on your machine and add it into your path.
116
+
117
+ ### Create dce alias using nix
118
+
119
+ This method will simply create a new shell environment with `dce` alias. For that one needs to install `nix` package
120
+ manager (https://nixos.org/download/). After that one could simply run in the project root
121
+
122
+ ```bash
123
+ $ nix-shell
124
+ ```
125
+
126
+ which is a short version of `$ nix-shell shell.nix`.
127
+
128
+ Alternatively, one could specify the path to the project repository
129
+
130
+ ```bash
131
+ $ nix-shell {path_to_dce_repository}
132
+ ```
133
+
134
+ After that, you can then directly use:
135
+
136
+ ```bash
137
+ dce --help
138
+ ```
139
+
140
+ Note: when we actually release our built Python package, users that don't use `uv` will still be able to install the CLI
141
+ by using `pipx install` instead.
142
+
143
+ # Running Mypy
144
+
145
+ [mypy](https://mypy.readthedocs.io/en/stable/getting_started.html) has been added to the project for type checking.
146
+
147
+ You can run it with the following:
148
+
149
+ ```bash
150
+ uv run mypy src --exclude "test_*" --exclude dist
151
+ ```
152
+
153
+ NB: the above runs type checking on all files within the `src` directory, excluding all test files.
154
+
155
+ # Running tests
156
+
157
+ You can run the tests with:
158
+
159
+ ```bash
160
+ uv run pytest
161
+ ```
162
+
163
+ (there is currently one test succeeding and one test failing in the project)
164
+
165
+ # Generating JSON Schemas for our plugin's config files
166
+
167
+ To be able to build a datasource, each plugin requires a yaml config file that describes how to connect to the
168
+ datasource,
169
+ as well as other information needed to customise the plugin.
170
+
171
+ To document what each config file should look like, we can generate a JSON schema describing the fields allowed in that
172
+ file.
173
+
174
+ You can generate all JSON schemas for all plugins by running:
175
+
176
+ ```bash
177
+ uv run generate_configs_schemas
178
+ ```
179
+
180
+ Some options can be provided to the command to choose which plugins to include or exclude from the generation.
181
+ To see the options available, you can refer to the help:
182
+
183
+ ```bash
184
+ uv run generate_configs_schemas --help
185
+ ```
186
+
@@ -0,0 +1,164 @@
1
+ [![official project](https://jb.gg/badges/official.svg)](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
2
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/JetBrains/databao-context-engine/blob/main/LICENSE)
3
+
4
+ [//]: # ([![PyPI version]&#40;https://img.shields.io/pypi/v/databao-context-engine.svg&#41;]&#40;https://pypi.org/project/databao-context-engine&#41;)
5
+
6
+ [//]: # ([![Python versions]&#40;https://img.shields.io/pypi/pyversions/databao-context-engine.svg&#41;]&#40;https://pypi.org/project/databao-context-engine/&#41;)
7
+
8
+
9
+ <h1 align="center">Databao Context Engine</h1>
10
+ <p align="center">
11
+ <b>Semantic context for your LLMs — generated automatically.</b><br/>
12
+ No more copying schemas. No manual documentation. Just accurate answers.
13
+ </p>
14
+ <p align="center">
15
+ <a href="https://databao.app">Website</a>
16
+
17
+ [//]: # (•)
18
+
19
+ [//]: # ( <a href="#quickstart">Quickstart</a> •)
20
+
21
+ [//]: # ( <a href="#supported-data-sources">Data Sources</a> •)
22
+
23
+ [//]: # ( <a href="#contributing">Contributing</a>)
24
+ </p>
25
+
26
+ ---
27
+
28
+ ## What is Databao Context Engine?
29
+
30
+ Databao Context Engine **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
31
+
32
+ Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
33
+
34
+ ```
35
+ Your data sources → Context Engine → Unified semantic graph → Any LLM
36
+ ```
37
+
38
+ ## Why choose Databao Context Engine?
39
+
40
+ | Feature | What it means for you |
41
+ |----------------------------|----------------------------------------------------------------|
42
+ | **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
43
+ | **Runs locally** | Your data never leaves your environment |
44
+ | **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
45
+ | **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
46
+ | **Built-in benchmarks** | Measure and improve context quality over time |
47
+ | **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
48
+ | **Governed & versioned** | Track, version, and share context across your team |
49
+ | **Dynamic or static** | Serve context via MCP server or export as artifact |
50
+
51
+ # Prerequisites
52
+
53
+ This README assumes you will use `uv` as your package manager.
54
+
55
+ You can install it following the instructions [here](https://docs.astral.sh/uv/getting-started/installation/)
56
+
57
+ If you are going to push to the repository, please make sure to install git pre-commit hooks by running
58
+
59
+ ```bash
60
+ uv run pre-commit install
61
+ ```
62
+
63
+ # How to run?
64
+
65
+ You can run it with:
66
+
67
+ ```bash
68
+ uv run dce info
69
+ ```
70
+
71
+ Not providing the `info` subcommand or using the `--help` flag will show the help screen for the command.
72
+
73
+ ## Using the dce command directly
74
+
75
+ To be able to use the `dce` command directly (without using `uv run` or `python`) there are two options.
76
+
77
+ ### Installing dce locally
78
+
79
+ For that one needs to:
80
+
81
+ 1. Build the project by running
82
+
83
+ ```bash
84
+ uv build
85
+ ```
86
+
87
+ 2. Installing the project on our machine by running:
88
+
89
+ ```bash
90
+ uv tool install -e .
91
+ ```
92
+
93
+ This second step will install the `dce` script on your machine and add it into your path.
94
+
95
+ ### Create dce alias using nix
96
+
97
+ This method will simply create a new shell environment with `dce` alias. For that one needs to install `nix` package
98
+ manager (https://nixos.org/download/). After that one could simply run in the project root
99
+
100
+ ```bash
101
+ $ nix-shell
102
+ ```
103
+
104
+ which is a short version of `$ nix-shell shell.nix`.
105
+
106
+ Alternatively, one could specify the path to the project repository
107
+
108
+ ```bash
109
+ $ nix-shell {path_to_dce_repository}
110
+ ```
111
+
112
+ After that, you can then directly use:
113
+
114
+ ```bash
115
+ dce --help
116
+ ```
117
+
118
+ Note: when we actually release our built Python package, users that don't use `uv` will still be able to install the CLI
119
+ by using `pipx install` instead.
120
+
121
+ # Running Mypy
122
+
123
+ [mypy](https://mypy.readthedocs.io/en/stable/getting_started.html) has been added to the project for type checking.
124
+
125
+ You can run it with the following:
126
+
127
+ ```bash
128
+ uv run mypy src --exclude "test_*" --exclude dist
129
+ ```
130
+
131
+ NB: the above runs type checking on all files within the `src` directory, excluding all test files.
132
+
133
+ # Running tests
134
+
135
+ You can run the tests with:
136
+
137
+ ```bash
138
+ uv run pytest
139
+ ```
140
+
141
+ (there is currently one test succeeding and one test failing in the project)
142
+
143
+ # Generating JSON Schemas for our plugin's config files
144
+
145
+ To be able to build a datasource, each plugin requires a yaml config file that describes how to connect to the
146
+ datasource,
147
+ as well as other information needed to customise the plugin.
148
+
149
+ To document what each config file should look like, we can generate a JSON schema describing the fields allowed in that
150
+ file.
151
+
152
+ You can generate all JSON schemas for all plugins by running:
153
+
154
+ ```bash
155
+ uv run generate_configs_schemas
156
+ ```
157
+
158
+ Some options can be provided to the command to choose which plugins to include or exclude from the generation.
159
+ To see the options available, you can refer to the help:
160
+
161
+ ```bash
162
+ uv run generate_configs_schemas --help
163
+ ```
164
+
@@ -0,0 +1,57 @@
1
+ [project]
2
+ name = "databao-context-engine"
3
+ version = "0.1.1"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "click>=8.3.0",
9
+ "duckdb>=1.4.3",
10
+ "pyyaml>=6.0.3",
11
+ "requests>=2.32.5",
12
+ "pymysql>=1.1.2",
13
+ "clickhouse-connect>=0.10.0",
14
+ "mcp>=1.23.3",
15
+ "pyathena>=3.22.0",
16
+ "snowflake-connector-python>=4.1.0",
17
+ "mssql-python>=1.0.0",
18
+ "pydantic>=2.12.4",
19
+ "jinja2>=3.1.6",
20
+ "asyncpg>=0.31.0",
21
+ "asyncio>=4.0.0",
22
+ "asyncpg-stubs>=0.31.1",
23
+ ]
24
+
25
+ [build-system]
26
+ requires = ["uv_build>=0.9.6,<0.10.0"]
27
+ build-backend = "uv_build"
28
+
29
+ [dependency-groups]
30
+ dev = [
31
+ "mypy>=1.18.2",
32
+ "pre-commit>=4.3.0",
33
+ "pytest>=8.4.2",
34
+ "pytest-unordered>=0.7.0",
35
+ "ruff>=0.14.2",
36
+ "testcontainers==4.12.0",
37
+ "types-pyyaml>=6.0.12.20250915",
38
+ "types-pymysql>=1.1.0.20250916",
39
+ "pytest-mock>=3.15.1",
40
+ ]
41
+
42
+ [project.scripts]
43
+ dce = "databao_context_engine.main:main"
44
+ generate_configs_schemas = "databao_context_engine.generate_configs_schemas:main"
45
+
46
+ [tool.uv]
47
+ override-dependencies = ["urllib3>=2.6.3"]
48
+
49
+ [tool.uv.build-backend]
50
+ source-exclude = ["tests"]
51
+
52
+ [[tool.mypy.overrides]]
53
+ module = ["requests", "requests.*"]
54
+ ignore_missing_imports = true
55
+
56
+ [tool.pytest]
57
+ addopts = ["--import-mode=importlib"]
@@ -0,0 +1,35 @@
1
+ from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
2
+ from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
3
+ from databao_context_engine.databao_engine import ContextSearchResult, DatabaoContextEngine
4
+ from databao_context_engine.datasource_config.check_config import (
5
+ CheckDatasourceConnectionResult,
6
+ DatasourceConnectionStatus,
7
+ )
8
+ from databao_context_engine.datasource_config.datasource_context import DatasourceContext
9
+ from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
10
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
11
+ from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
12
+ from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
13
+ from databao_context_engine.project.types import Datasource, DatasourceId
14
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
15
+
16
+ __all__ = [
17
+ "DatabaoContextEngine",
18
+ "Datasource",
19
+ "DatasourceId",
20
+ "DatasourceContext",
21
+ "ContextSearchResult",
22
+ "DatabaoContextProjectManager",
23
+ "ChunkEmbeddingMode",
24
+ "BuildContextResult",
25
+ "CheckDatasourceConnectionResult",
26
+ "DatasourceConnectionStatus",
27
+ "DatasourceConfigFile",
28
+ "DatasourceType",
29
+ "get_databao_context_engine_info",
30
+ "DceInfo",
31
+ "init_dce_project",
32
+ "init_or_get_dce_project",
33
+ "InitErrorReason",
34
+ "InitProjectError",
35
+ ]
@@ -0,0 +1,111 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from databao_context_engine.build_sources.internal.build_service import BuildService
7
+ from databao_context_engine.build_sources.internal.export_results import (
8
+ append_result_to_all_results,
9
+ create_run_dir,
10
+ export_build_result,
11
+ )
12
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
+ from databao_context_engine.plugins.plugin_loader import load_plugins
14
+ from databao_context_engine.project.datasource_discovery import discover_datasources, prepare_source
15
+ from databao_context_engine.project.types import DatasourceId
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class BuildContextResult:
22
+ datasource_id: DatasourceId
23
+ datasource_type: DatasourceType
24
+ context_built_at: datetime
25
+ context_file_path: Path
26
+
27
+
28
+ def build(
29
+ project_dir: Path,
30
+ *,
31
+ build_service: BuildService,
32
+ project_id: str,
33
+ dce_version: str,
34
+ ) -> list[BuildContextResult]:
35
+ """
36
+ Build entrypoint.
37
+
38
+ 1) Load available plugins
39
+ 2) Discover sources
40
+ 3) Create a run
41
+ 4) For each source, call process_source
42
+ """
43
+ plugins = load_plugins()
44
+
45
+ datasources = discover_datasources(project_dir)
46
+
47
+ if not datasources:
48
+ logger.info("No sources discovered under %s", project_dir)
49
+ return []
50
+
51
+ run = None
52
+ run_dir = None
53
+
54
+ number_of_failed_builds = 0
55
+ build_result = []
56
+ for discovered_datasource in datasources:
57
+ try:
58
+ prepared_source = prepare_source(discovered_datasource)
59
+
60
+ logger.info(
61
+ f'Found datasource of type "{prepared_source.datasource_type.full_type}" with name {prepared_source.path.stem}'
62
+ )
63
+
64
+ plugin = plugins.get(prepared_source.datasource_type)
65
+ if plugin is None:
66
+ logger.warning(
67
+ "No plugin for '%s' (datasource=%s) — skipping.",
68
+ prepared_source.datasource_type.full_type,
69
+ prepared_source.path,
70
+ )
71
+ number_of_failed_builds += 1
72
+ continue
73
+
74
+ if run is None or run_dir is None:
75
+ # Initialiase the run as soon as we found a source
76
+ run = build_service.start_run(project_id=project_id, dce_version=dce_version)
77
+ run_dir = create_run_dir(project_dir, run.run_name)
78
+
79
+ result = build_service.process_prepared_source(
80
+ run_id=run.run_id,
81
+ prepared_source=prepared_source,
82
+ plugin=plugin,
83
+ )
84
+
85
+ context_file_path = export_build_result(run_dir, result)
86
+ append_result_to_all_results(run_dir, result)
87
+
88
+ build_result.append(
89
+ BuildContextResult(
90
+ datasource_id=DatasourceId.from_string_repr(result.datasource_id),
91
+ datasource_type=DatasourceType(full_type=result.datasource_type),
92
+ context_built_at=result.context_built_at,
93
+ context_file_path=context_file_path,
94
+ )
95
+ )
96
+ except Exception as e:
97
+ logger.debug(str(e), exc_info=True, stack_info=True)
98
+ logger.info(f"Failed to build source at ({discovered_datasource.path}): {str(e)}")
99
+
100
+ number_of_failed_builds += 1
101
+
102
+ if run is not None:
103
+ build_service.finalize_run(run_id=run.run_id)
104
+
105
+ logger.debug(
106
+ "Successfully built %d datasources. %s",
107
+ len(build_result),
108
+ f"Failed to build {number_of_failed_builds}." if number_of_failed_builds > 0 else "",
109
+ )
110
+
111
+ return build_result
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
7
+ from databao_context_engine.pluginlib.build_plugin import (
8
+ BuildPlugin,
9
+ )
10
+ from databao_context_engine.project.types import PreparedDatasource
11
+ from databao_context_engine.serialisation.yaml import to_yaml_string
12
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
13
+ from databao_context_engine.storage.models import RunDTO
14
+ from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
15
+ from databao_context_engine.storage.repositories.run_repository import RunRepository
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BuildService:
21
+ def __init__(
22
+ self,
23
+ *,
24
+ run_repo: RunRepository,
25
+ datasource_run_repo: DatasourceRunRepository,
26
+ chunk_embedding_service: ChunkEmbeddingService,
27
+ ) -> None:
28
+ self._run_repo = run_repo
29
+ self._datasource_run_repo = datasource_run_repo
30
+ self._chunk_embedding_service = chunk_embedding_service
31
+
32
+ def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
33
+ """
34
+ Create a new run row and return (run_id, started_at).
35
+ """
36
+ return self._run_repo.create(project_id=project_id, dce_version=dce_version)
37
+
38
+ def finalize_run(self, *, run_id: int):
39
+ """
40
+ Mark the run as complete (sets ended_at).
41
+ """
42
+ self._run_repo.update(run_id=run_id, ended_at=datetime.now())
43
+
44
+ def process_prepared_source(
45
+ self,
46
+ *,
47
+ run_id: int,
48
+ prepared_source: PreparedDatasource,
49
+ plugin: BuildPlugin,
50
+ ) -> BuiltDatasourceContext:
51
+ """
52
+ Process a single source.
53
+
54
+ 1) Execute the plugin
55
+ 2) Divide the results into chunks
56
+ 3) Embed and persist the chunks
57
+ """
58
+ result = execute(prepared_source, plugin)
59
+
60
+ chunks = plugin.divide_context_into_chunks(result.context)
61
+ if not chunks:
62
+ logger.info("No chunks for %s — skipping.", prepared_source.path.name)
63
+ return result
64
+
65
+ datasource_run = self._datasource_run_repo.create(
66
+ run_id=run_id,
67
+ plugin=plugin.name,
68
+ full_type=prepared_source.datasource_type.full_type,
69
+ source_id=result.datasource_id,
70
+ storage_directory=str(prepared_source.path.parent),
71
+ )
72
+
73
+ self._chunk_embedding_service.embed_chunks(
74
+ datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
75
+ )
76
+
77
+ return result
@@ -0,0 +1,52 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
5
+ from databao_context_engine.llm.factory import (
6
+ create_ollama_description_provider,
7
+ create_ollama_embedding_provider,
8
+ create_ollama_service,
9
+ )
10
+ from databao_context_engine.project.info import get_dce_version
11
+ from databao_context_engine.project.layout import ensure_project_dir
12
+ from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
13
+ from databao_context_engine.services.factories import (
14
+ create_build_service,
15
+ )
16
+ from databao_context_engine.storage.connection import open_duckdb_connection
17
+ from databao_context_engine.system.properties import get_db_path
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
23
+ """
24
+ Public build entrypoint
25
+ - Instantiates the build service
26
+ - Delegates the actual build logic to the build runner
27
+ """
28
+ project_layout = ensure_project_dir(project_dir)
29
+
30
+ logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
31
+
32
+ with open_duckdb_connection(get_db_path()) as conn:
33
+ ollama_service = create_ollama_service()
34
+ embedding_provider = create_ollama_embedding_provider(ollama_service)
35
+ description_provider = (
36
+ create_ollama_description_provider(ollama_service)
37
+ if chunk_embedding_mode.should_generate_description()
38
+ else None
39
+ )
40
+ build_service = create_build_service(
41
+ conn,
42
+ embedding_provider=embedding_provider,
43
+ description_provider=description_provider,
44
+ chunk_embedding_mode=chunk_embedding_mode,
45
+ )
46
+ dce_config = project_layout.read_config_file()
47
+ return build(
48
+ project_dir=project_dir,
49
+ build_service=build_service,
50
+ project_id=str(dce_config.project_id),
51
+ dce_version=get_dce_version(),
52
+ )