databao-context-engine 0.1.2__tar.gz → 0.1.4.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine-0.1.4.dev1/PKG-INFO +75 -0
- databao_context_engine-0.1.4.dev1/README.md +49 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/pyproject.toml +25 -12
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/__init__.py +18 -6
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources/__init__.py +4 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources}/build_runner.py +27 -23
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources/build_wiring.py +84 -0
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources/export_results.py +41 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/build_sources}/plugin_execution.py +3 -7
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/cli/add_datasource_config.py +41 -15
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/cli/commands.py +12 -43
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/cli/info.py +3 -2
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/databao_context_engine.py +137 -0
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/databao_context_project_manager.py +182 -0
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources/add_config.py +34 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/datasource_config → databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources}/check_config.py +18 -7
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources/datasource_context.py +93 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/project → databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources}/datasource_discovery.py +17 -16
- {databao_context_engine-0.1.2/src/databao_context_engine/project → databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources}/types.py +64 -15
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/init_project.py +40 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/introspection/property_extract.py +67 -53
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/llm/errors.py +10 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/install.py +13 -20
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/service.py +1 -3
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/mcp/mcp_runner.py +4 -2
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/mcp/mcp_server.py +10 -10
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugin_loader.py +111 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/pluginlib/build_plugin.py +25 -9
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/pluginlib/config.py +16 -2
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/base_db_plugin.py +5 -2
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugins/databases/athena_introspector.py +164 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/base_introspector.py +5 -3
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/duckdb_introspector.py +3 -5
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugins/duckdb_tools.py +18 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/plugin_loader.py +43 -42
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/resources/parquet_introspector.py +7 -19
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/project/info.py +76 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/init_project.py +16 -7
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/layout.py +3 -3
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/retrieve_embeddings}/export_results.py +2 -2
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/retrieve_embeddings}/retrieve_runner.py +5 -9
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/retrieve_embeddings}/retrieve_service.py +3 -17
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/serialisation → databao_context_engine-0.1.4.dev1/src/databao_context_engine/serialization}/yaml.py +1 -1
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/chunk_embedding_service.py +23 -11
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/factories.py +1 -46
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/persistence_service.py +11 -11
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/connection.py +11 -7
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/storage/exceptions/exceptions.py +6 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/migrate.py +2 -4
- databao_context_engine-0.1.4.dev1/src/databao_context_engine/storage/migrations/V01__init.sql +38 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/models.py +2 -23
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/repositories/factories.py +1 -12
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.2/PKG-INFO +0 -187
- databao_context_engine-0.1.2/README.md +0 -164
- databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine-0.1.2/src/databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine-0.1.2/src/databao_context_engine/databao_context_project_manager.py +0 -92
- databao_context_engine-0.1.2/src/databao_context_engine/databao_engine.py +0 -85
- databao_context_engine-0.1.2/src/databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine-0.1.2/src/databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine-0.1.2/src/databao_context_engine/init_project.py +0 -18
- databao_context_engine-0.1.2/src/databao_context_engine/llm/errors.py +0 -16
- databao_context_engine-0.1.2/src/databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine-0.1.2/src/databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine-0.1.2/src/databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine-0.1.2/src/databao_context_engine/project/info.py +0 -44
- databao_context_engine-0.1.2/src/databao_context_engine/project/runs.py +0 -39
- databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine-0.1.2/src/databao_context_engine/services/__init__.py +0 -0
- databao_context_engine-0.1.2/src/databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine-0.1.2/src/databao_context_engine/storage/__init__.py +0 -0
- databao_context_engine-0.1.2/src/databao_context_engine/storage/exceptions/__init__.py +0 -0
- databao_context_engine-0.1.2/src/databao_context_engine/storage/exceptions/exceptions.py +0 -6
- databao_context_engine-0.1.2/src/databao_context_engine/storage/migrations/V01__init.sql +0 -63
- databao_context_engine-0.1.2/src/databao_context_engine/storage/repositories/__init__.py +0 -0
- databao_context_engine-0.1.2/src/databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine-0.1.2/src/databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.2/src/databao_context_engine/system/__init__.py +0 -0
- databao_context_engine-0.1.2/src/databao_context_engine/templating/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/build_sources → databao_context_engine-0.1.4.dev1/src/databao_context_engine/cli}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/cli/datasources.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/build_sources/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/config}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/config/log_config.yaml +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/config/logging.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/build_sources/public → databao_context_engine-0.1.4.dev1/src/databao_context_engine/datasources}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/cli → databao_context_engine-0.1.4.dev1/src/databao_context_engine/event_journal}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/event_journal/writer.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/generate_configs_schemas.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/config → databao_context_engine-0.1.4.dev1/src/databao_context_engine/introspection}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/datasource_config → databao_context_engine-0.1.4.dev1/src/databao_context_engine/llm}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/config.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/event_journal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/llm/descriptions}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/descriptions/ollama.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/descriptions/provider.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/introspection → databao_context_engine-0.1.4.dev1/src/databao_context_engine/llm/embeddings}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/embeddings/ollama.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/embeddings/provider.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/factory.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/llm/runtime.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/main.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/llm → databao_context_engine-0.1.4.dev1/src/databao_context_engine/mcp}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/llm/descriptions → databao_context_engine-0.1.4.dev1/src/databao_context_engine/pluginlib}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/pluginlib/plugin_utils.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/llm/embeddings → databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugins}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/athena_db_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/clickhouse_db_plugin.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/mcp → databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugins/databases}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/database_chunker.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/databases_types.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/duckdb_db_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/mssql_db_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/mysql_db_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/parquet_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/postgresql_db_plugin.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/pluginlib → databao_context_engine-0.1.4.dev1/src/databao_context_engine/plugins/resources}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/resources/parquet_chunker.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/snowflake_db_plugin.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/plugins/unstructured_files_plugin.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/plugins → databao_context_engine-0.1.4.dev1/src/databao_context_engine/project}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/project_config.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/resources/examples/src/files/documentation.md +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/project/resources/examples/src/files/notes.txt +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/plugins/databases → databao_context_engine-0.1.4.dev1/src/databao_context_engine/serialization}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/plugins/resources → databao_context_engine-0.1.4.dev1/src/databao_context_engine/services}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/embedding_shard_resolver.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/models.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/services/table_name_policy.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/project → databao_context_engine-0.1.4.dev1/src/databao_context_engine/storage}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings → databao_context_engine-0.1.4.dev1/src/databao_context_engine/storage/exceptions}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/internal → databao_context_engine-0.1.4.dev1/src/databao_context_engine/storage/repositories}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/repositories/embedding_model_registry_repository.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/repositories/embedding_repository.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/storage/transaction.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/retrieve_embeddings/public → databao_context_engine-0.1.4.dev1/src/databao_context_engine/system}/__init__.py +0 -0
- {databao_context_engine-0.1.2/src/databao_context_engine/serialisation → databao_context_engine-0.1.4.dev1/src/databao_context_engine/templating}/__init__.py +0 -0
- {databao_context_engine-0.1.2 → databao_context_engine-0.1.4.dev1}/src/databao_context_engine/templating/renderer.py +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: databao-context-engine
|
|
3
|
+
Version: 0.1.4.dev1
|
|
4
|
+
Summary: Semantic context for your LLMs — generated automatically
|
|
5
|
+
Requires-Dist: click>=8.3.0
|
|
6
|
+
Requires-Dist: duckdb>=1.4.3
|
|
7
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
8
|
+
Requires-Dist: requests>=2.32.5
|
|
9
|
+
Requires-Dist: mcp>=1.23.3
|
|
10
|
+
Requires-Dist: pydantic>=2.12.4
|
|
11
|
+
Requires-Dist: jinja2>=3.1.6
|
|
12
|
+
Requires-Dist: pyathena>=3.25.0 ; extra == 'athena'
|
|
13
|
+
Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'clickhouse'
|
|
14
|
+
Requires-Dist: mssql-python>=1.0.0 ; extra == 'mssql'
|
|
15
|
+
Requires-Dist: pymysql>=1.1.2 ; extra == 'mysql'
|
|
16
|
+
Requires-Dist: asyncpg>=0.31.0 ; extra == 'postgresql'
|
|
17
|
+
Requires-Dist: snowflake-connector-python>=4.2.0 ; extra == 'snowflake'
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Provides-Extra: athena
|
|
20
|
+
Provides-Extra: clickhouse
|
|
21
|
+
Provides-Extra: mssql
|
|
22
|
+
Provides-Extra: mysql
|
|
23
|
+
Provides-Extra: postgresql
|
|
24
|
+
Provides-Extra: snowflake
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
[](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
|
|
28
|
+
[](https://github.com/JetBrains/databao-context-engine/blob/main/LICENSE)
|
|
29
|
+
|
|
30
|
+
[//]: # ([](https://pypi.org/project/databao-context-engine))
|
|
31
|
+
|
|
32
|
+
[//]: # ([](https://pypi.org/project/databao-context-engine/))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
<h1 align="center">Databao Context Engine</h1>
|
|
36
|
+
<p align="center">
|
|
37
|
+
<b>Semantic context for your LLMs — generated automatically.</b><br/>
|
|
38
|
+
No more copying schemas. No manual documentation. Just accurate answers.
|
|
39
|
+
</p>
|
|
40
|
+
<p align="center">
|
|
41
|
+
<a href="https://databao.app">Website</a>
|
|
42
|
+
|
|
43
|
+
[//]: # (•)
|
|
44
|
+
|
|
45
|
+
[//]: # ( <a href="#quickstart">Quickstart</a> •)
|
|
46
|
+
|
|
47
|
+
[//]: # ( <a href="#supported-data-sources">Data Sources</a> •)
|
|
48
|
+
|
|
49
|
+
[//]: # ( <a href="#contributing">Contributing</a>)
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## What is Databao Context Engine?
|
|
55
|
+
|
|
56
|
+
Databao Context Engine **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
|
|
57
|
+
|
|
58
|
+
Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
Your data sources → Context Engine → Unified semantic graph → Any LLM
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Why choose Databao Context Engine?
|
|
65
|
+
|
|
66
|
+
| Feature | What it means for you |
|
|
67
|
+
|----------------------------|----------------------------------------------------------------|
|
|
68
|
+
| **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
|
|
69
|
+
| **Runs locally** | Your data never leaves your environment |
|
|
70
|
+
| **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
|
|
71
|
+
| **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
|
|
72
|
+
| **Built-in benchmarks** | Measure and improve context quality over time |
|
|
73
|
+
| **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
|
|
74
|
+
| **Governed & versioned** | Track, version, and share context across your team |
|
|
75
|
+
| **Dynamic or static** | Serve context via MCP server or export as artifact |
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[](https://confluence.jetbrains.com/display/ALL/JetBrains+on+GitHub)
|
|
2
|
+
[](https://github.com/JetBrains/databao-context-engine/blob/main/LICENSE)
|
|
3
|
+
|
|
4
|
+
[//]: # ([](https://pypi.org/project/databao-context-engine))
|
|
5
|
+
|
|
6
|
+
[//]: # ([](https://pypi.org/project/databao-context-engine/))
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
<h1 align="center">Databao Context Engine</h1>
|
|
10
|
+
<p align="center">
|
|
11
|
+
<b>Semantic context for your LLMs — generated automatically.</b><br/>
|
|
12
|
+
No more copying schemas. No manual documentation. Just accurate answers.
|
|
13
|
+
</p>
|
|
14
|
+
<p align="center">
|
|
15
|
+
<a href="https://databao.app">Website</a>
|
|
16
|
+
|
|
17
|
+
[//]: # (•)
|
|
18
|
+
|
|
19
|
+
[//]: # ( <a href="#quickstart">Quickstart</a> •)
|
|
20
|
+
|
|
21
|
+
[//]: # ( <a href="#supported-data-sources">Data Sources</a> •)
|
|
22
|
+
|
|
23
|
+
[//]: # ( <a href="#contributing">Contributing</a>)
|
|
24
|
+
</p>
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## What is Databao Context Engine?
|
|
29
|
+
|
|
30
|
+
Databao Context Engine **automatically generates governed semantic context** from your databases, BI tools, documents, and spreadsheets.
|
|
31
|
+
|
|
32
|
+
Integrate it with any LLM to deliver **accurate, context-aware answers** — without copying schemas or writing documentation by hand.
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
Your data sources → Context Engine → Unified semantic graph → Any LLM
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Why choose Databao Context Engine?
|
|
39
|
+
|
|
40
|
+
| Feature | What it means for you |
|
|
41
|
+
|----------------------------|----------------------------------------------------------------|
|
|
42
|
+
| **Auto-generated context** | Extracts schemas, relationships, and semantics automatically |
|
|
43
|
+
| **Runs locally** | Your data never leaves your environment |
|
|
44
|
+
| **MCP integration** | Works with Claude Desktop, Cursor, and any MCP-compatible tool |
|
|
45
|
+
| **Multiple sources** | Databases, dbt projects, spreadsheets, documents |
|
|
46
|
+
| **Built-in benchmarks** | Measure and improve context quality over time |
|
|
47
|
+
| **LLM agnostic** | OpenAI, Anthropic, Ollama, Gemini — use any model |
|
|
48
|
+
| **Governed & versioned** | Track, version, and share context across your team |
|
|
49
|
+
| **Dynamic or static** | Serve context via MCP server or export as artifact |
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "databao-context-engine"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "
|
|
3
|
+
version = "0.1.4.dev1"
|
|
4
|
+
description = "Semantic context for your LLMs — generated automatically"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
7
|
dependencies = [
|
|
@@ -9,22 +9,34 @@ dependencies = [
|
|
|
9
9
|
"duckdb>=1.4.3",
|
|
10
10
|
"pyyaml>=6.0.3",
|
|
11
11
|
"requests>=2.32.5",
|
|
12
|
-
"pymysql>=1.1.2",
|
|
13
|
-
"clickhouse-connect>=0.10.0",
|
|
14
12
|
"mcp>=1.23.3",
|
|
15
|
-
"pyathena>=3.22.0",
|
|
16
|
-
"snowflake-connector-python>=4.1.0",
|
|
17
13
|
"pydantic>=2.12.4",
|
|
18
14
|
"jinja2>=3.1.6",
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
15
|
+
]
|
|
16
|
+
default-optional-dependency-keys = [
|
|
17
|
+
"mysql",
|
|
18
|
+
"postgresql"
|
|
22
19
|
]
|
|
23
20
|
|
|
24
21
|
[project.optional-dependencies]
|
|
25
22
|
mssql = [
|
|
26
23
|
"mssql-python>=1.0.0"
|
|
27
24
|
]
|
|
25
|
+
clickhouse = [
|
|
26
|
+
"clickhouse-connect>=0.10.0"
|
|
27
|
+
]
|
|
28
|
+
athena = [
|
|
29
|
+
"pyathena>=3.25.0"
|
|
30
|
+
]
|
|
31
|
+
snowflake = [
|
|
32
|
+
"snowflake-connector-python>=4.2.0",
|
|
33
|
+
]
|
|
34
|
+
mysql = [
|
|
35
|
+
"pymysql>=1.1.2"
|
|
36
|
+
]
|
|
37
|
+
postgresql = [
|
|
38
|
+
"asyncpg>=0.31.0",
|
|
39
|
+
]
|
|
28
40
|
|
|
29
41
|
[build-system]
|
|
30
42
|
requires = ["uv_build>=0.9.6,<0.10.0"]
|
|
@@ -33,14 +45,15 @@ build-backend = "uv_build"
|
|
|
33
45
|
[dependency-groups]
|
|
34
46
|
dev = [
|
|
35
47
|
"mypy>=1.18.2",
|
|
36
|
-
"pre-commit>=4.
|
|
37
|
-
"pytest>=
|
|
48
|
+
"pre-commit>=4.5.1",
|
|
49
|
+
"pytest>=9.0.2",
|
|
38
50
|
"pytest-unordered>=0.7.0",
|
|
39
51
|
"ruff>=0.14.2",
|
|
40
52
|
"testcontainers==4.12.0",
|
|
41
53
|
"types-pyyaml>=6.0.12.20250915",
|
|
42
54
|
"types-pymysql>=1.1.0.20250916",
|
|
43
55
|
"pytest-mock>=3.15.1",
|
|
56
|
+
"asyncpg-stubs>=0.31.1",
|
|
44
57
|
]
|
|
45
58
|
|
|
46
59
|
[project.scripts]
|
|
@@ -48,7 +61,7 @@ dce = "databao_context_engine.main:main"
|
|
|
48
61
|
generate_configs_schemas = "databao_context_engine.generate_configs_schemas:main"
|
|
49
62
|
|
|
50
63
|
[tool.uv]
|
|
51
|
-
override-dependencies = ["urllib3>=2.6.3", "filelock>=3.20.3"]
|
|
64
|
+
override-dependencies = ["urllib3>=2.6.3", "filelock>=3.20.3", "virtualenv>=20.36.1"]
|
|
52
65
|
|
|
53
66
|
[tool.uv.build-backend]
|
|
54
67
|
source-exclude = ["tests"]
|
|
@@ -1,16 +1,23 @@
|
|
|
1
|
-
from databao_context_engine.build_sources
|
|
1
|
+
from databao_context_engine.build_sources import BuildContextResult
|
|
2
|
+
from databao_context_engine.databao_context_engine import ContextSearchResult, DatabaoContextEngine
|
|
2
3
|
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager, DatasourceConfigFile
|
|
3
|
-
from databao_context_engine.
|
|
4
|
-
from databao_context_engine.datasource_config.check_config import (
|
|
4
|
+
from databao_context_engine.datasources.check_config import (
|
|
5
5
|
CheckDatasourceConnectionResult,
|
|
6
6
|
DatasourceConnectionStatus,
|
|
7
7
|
)
|
|
8
|
-
from databao_context_engine.
|
|
8
|
+
from databao_context_engine.datasources.datasource_context import DatasourceContext
|
|
9
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId
|
|
9
10
|
from databao_context_engine.init_project import init_dce_project, init_or_get_dce_project
|
|
10
|
-
from databao_context_engine.
|
|
11
|
+
from databao_context_engine.plugin_loader import DatabaoContextPluginLoader
|
|
12
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
13
|
+
BuildDatasourcePlugin,
|
|
14
|
+
BuildFilePlugin,
|
|
15
|
+
BuildPlugin,
|
|
16
|
+
DatasourceType,
|
|
17
|
+
)
|
|
18
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyDefinition
|
|
11
19
|
from databao_context_engine.project.info import DceInfo, get_databao_context_engine_info
|
|
12
20
|
from databao_context_engine.project.init_project import InitErrorReason, InitProjectError
|
|
13
|
-
from databao_context_engine.project.types import Datasource, DatasourceId
|
|
14
21
|
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
15
22
|
|
|
16
23
|
__all__ = [
|
|
@@ -32,4 +39,9 @@ __all__ = [
|
|
|
32
39
|
"init_or_get_dce_project",
|
|
33
40
|
"InitErrorReason",
|
|
34
41
|
"InitProjectError",
|
|
42
|
+
"DatabaoContextPluginLoader",
|
|
43
|
+
"ConfigPropertyDefinition",
|
|
44
|
+
"BuildPlugin",
|
|
45
|
+
"BuildDatasourcePlugin",
|
|
46
|
+
"BuildFilePlugin",
|
|
35
47
|
]
|
|
@@ -3,22 +3,32 @@ from dataclasses import dataclass
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
from databao_context_engine.build_sources.
|
|
7
|
-
from databao_context_engine.build_sources.
|
|
6
|
+
from databao_context_engine.build_sources.build_service import BuildService
|
|
7
|
+
from databao_context_engine.build_sources.export_results import (
|
|
8
8
|
append_result_to_all_results,
|
|
9
|
-
create_run_dir,
|
|
10
9
|
export_build_result,
|
|
10
|
+
reset_all_results,
|
|
11
11
|
)
|
|
12
|
+
from databao_context_engine.datasources.datasource_discovery import discover_datasources, prepare_source
|
|
13
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
12
14
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
13
15
|
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
14
|
-
from databao_context_engine.project.
|
|
15
|
-
from databao_context_engine.project.types import DatasourceId
|
|
16
|
+
from databao_context_engine.project.layout import get_output_dir
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
@dataclass
|
|
21
22
|
class BuildContextResult:
|
|
23
|
+
"""Result of a single datasource context build.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
datasource_id: The id of the datasource.
|
|
27
|
+
datasource_type: The type of the datasource.
|
|
28
|
+
context_built_at: The timestamp when the context was built.
|
|
29
|
+
context_file_path: The path to the generated context file.
|
|
30
|
+
"""
|
|
31
|
+
|
|
22
32
|
datasource_id: DatasourceId
|
|
23
33
|
datasource_type: DatasourceType
|
|
24
34
|
context_built_at: datetime
|
|
@@ -29,16 +39,19 @@ def build(
|
|
|
29
39
|
project_dir: Path,
|
|
30
40
|
*,
|
|
31
41
|
build_service: BuildService,
|
|
32
|
-
project_id: str,
|
|
33
|
-
dce_version: str,
|
|
34
42
|
) -> list[BuildContextResult]:
|
|
35
|
-
"""
|
|
36
|
-
|
|
43
|
+
"""Build the context for all datasources in the project.
|
|
44
|
+
|
|
45
|
+
Unless you already have access to BuildService, this should not be called directly.
|
|
46
|
+
Instead, internal callers should go through the build_wiring module or directly use DatabaoContextProjectManager.build_context().
|
|
37
47
|
|
|
38
48
|
1) Load available plugins
|
|
39
49
|
2) Discover sources
|
|
40
50
|
3) Create a run
|
|
41
51
|
4) For each source, call process_source
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
A list of all the contexts built.
|
|
42
55
|
"""
|
|
43
56
|
plugins = load_plugins()
|
|
44
57
|
|
|
@@ -48,11 +61,9 @@ def build(
|
|
|
48
61
|
logger.info("No sources discovered under %s", project_dir)
|
|
49
62
|
return []
|
|
50
63
|
|
|
51
|
-
run = None
|
|
52
|
-
run_dir = None
|
|
53
|
-
|
|
54
64
|
number_of_failed_builds = 0
|
|
55
65
|
build_result = []
|
|
66
|
+
reset_all_results(get_output_dir(project_dir))
|
|
56
67
|
for discovered_datasource in datasources:
|
|
57
68
|
try:
|
|
58
69
|
prepared_source = prepare_source(discovered_datasource)
|
|
@@ -71,19 +82,15 @@ def build(
|
|
|
71
82
|
number_of_failed_builds += 1
|
|
72
83
|
continue
|
|
73
84
|
|
|
74
|
-
if run is None or run_dir is None:
|
|
75
|
-
# Initialiase the run as soon as we found a source
|
|
76
|
-
run = build_service.start_run(project_id=project_id, dce_version=dce_version)
|
|
77
|
-
run_dir = create_run_dir(project_dir, run.run_name)
|
|
78
|
-
|
|
79
85
|
result = build_service.process_prepared_source(
|
|
80
|
-
run_id=run.run_id,
|
|
81
86
|
prepared_source=prepared_source,
|
|
82
87
|
plugin=plugin,
|
|
83
88
|
)
|
|
84
89
|
|
|
85
|
-
|
|
86
|
-
|
|
90
|
+
output_dir = get_output_dir(project_dir)
|
|
91
|
+
|
|
92
|
+
context_file_path = export_build_result(output_dir, result)
|
|
93
|
+
append_result_to_all_results(output_dir, result)
|
|
87
94
|
|
|
88
95
|
build_result.append(
|
|
89
96
|
BuildContextResult(
|
|
@@ -99,9 +106,6 @@ def build(
|
|
|
99
106
|
|
|
100
107
|
number_of_failed_builds += 1
|
|
101
108
|
|
|
102
|
-
if run is not None:
|
|
103
|
-
build_service.finalize_run(run_id=run.run_id)
|
|
104
|
-
|
|
105
109
|
logger.debug(
|
|
106
110
|
"Successfully built %d datasources. %s",
|
|
107
111
|
len(build_result),
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext, execute
|
|
6
|
+
from databao_context_engine.datasources.types import PreparedDatasource
|
|
7
|
+
from databao_context_engine.pluginlib.build_plugin import (
|
|
8
|
+
BuildPlugin,
|
|
9
|
+
)
|
|
10
|
+
from databao_context_engine.serialization.yaml import to_yaml_string
|
|
11
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BuildService:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
chunk_embedding_service: ChunkEmbeddingService,
|
|
21
|
+
) -> None:
|
|
22
|
+
self._chunk_embedding_service = chunk_embedding_service
|
|
23
|
+
|
|
24
|
+
def process_prepared_source(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
prepared_source: PreparedDatasource,
|
|
28
|
+
plugin: BuildPlugin,
|
|
29
|
+
) -> BuiltDatasourceContext:
|
|
30
|
+
"""Process a single source to build its context.
|
|
31
|
+
|
|
32
|
+
1) Execute the plugin
|
|
33
|
+
2) Divide the results into chunks
|
|
34
|
+
3) Embed and persist the chunks
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The built context.
|
|
38
|
+
"""
|
|
39
|
+
result = execute(prepared_source, plugin)
|
|
40
|
+
|
|
41
|
+
chunks = plugin.divide_context_into_chunks(result.context)
|
|
42
|
+
if not chunks:
|
|
43
|
+
logger.info("No chunks for %s — skipping.", prepared_source.path.name)
|
|
44
|
+
return result
|
|
45
|
+
|
|
46
|
+
self._chunk_embedding_service.embed_chunks(
|
|
47
|
+
chunks=chunks,
|
|
48
|
+
result=to_yaml_string(result.context),
|
|
49
|
+
full_type=prepared_source.datasource_type.full_type,
|
|
50
|
+
datasource_id=result.datasource_id,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return result
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from duckdb import DuckDBPyConnection
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.build_sources.build_runner import BuildContextResult, build
|
|
7
|
+
from databao_context_engine.build_sources.build_service import BuildService
|
|
8
|
+
from databao_context_engine.llm.descriptions.provider import DescriptionProvider
|
|
9
|
+
from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
|
|
10
|
+
from databao_context_engine.llm.factory import (
|
|
11
|
+
create_ollama_description_provider,
|
|
12
|
+
create_ollama_embedding_provider,
|
|
13
|
+
create_ollama_service,
|
|
14
|
+
)
|
|
15
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
16
|
+
from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
|
|
17
|
+
from databao_context_engine.services.factories import create_chunk_embedding_service
|
|
18
|
+
from databao_context_engine.storage.connection import open_duckdb_connection
|
|
19
|
+
from databao_context_engine.storage.migrate import migrate
|
|
20
|
+
from databao_context_engine.system.properties import get_db_path
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
|
|
26
|
+
"""Build the context for all datasources in the project.
|
|
27
|
+
|
|
28
|
+
- Instantiates the build service
|
|
29
|
+
- Delegates the actual build logic to the build runner
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A list of all the contexts built.
|
|
33
|
+
"""
|
|
34
|
+
ensure_project_dir(project_dir)
|
|
35
|
+
|
|
36
|
+
logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
|
|
37
|
+
|
|
38
|
+
# Think about alternative solutions. This solution will mirror the current behaviour
|
|
39
|
+
# The current behaviour only builds what is currently in the /src folder
|
|
40
|
+
# This will need to change in the future when we can pick which datasources to build
|
|
41
|
+
db_path = get_db_path(project_dir)
|
|
42
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
if db_path.exists():
|
|
44
|
+
db_path.unlink()
|
|
45
|
+
|
|
46
|
+
with open_duckdb_connection(db_path) as conn:
|
|
47
|
+
migrate(db_path)
|
|
48
|
+
|
|
49
|
+
ollama_service = create_ollama_service()
|
|
50
|
+
embedding_provider = create_ollama_embedding_provider(ollama_service)
|
|
51
|
+
description_provider = (
|
|
52
|
+
create_ollama_description_provider(ollama_service)
|
|
53
|
+
if chunk_embedding_mode.should_generate_description()
|
|
54
|
+
else None
|
|
55
|
+
)
|
|
56
|
+
build_service = _create_build_service(
|
|
57
|
+
conn,
|
|
58
|
+
embedding_provider=embedding_provider,
|
|
59
|
+
description_provider=description_provider,
|
|
60
|
+
chunk_embedding_mode=chunk_embedding_mode,
|
|
61
|
+
)
|
|
62
|
+
return build(
|
|
63
|
+
project_dir=project_dir,
|
|
64
|
+
build_service=build_service,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _create_build_service(
|
|
69
|
+
conn: DuckDBPyConnection,
|
|
70
|
+
*,
|
|
71
|
+
embedding_provider: EmbeddingProvider,
|
|
72
|
+
description_provider: DescriptionProvider | None,
|
|
73
|
+
chunk_embedding_mode: ChunkEmbeddingMode,
|
|
74
|
+
) -> BuildService:
|
|
75
|
+
chunk_embedding_service = create_chunk_embedding_service(
|
|
76
|
+
conn,
|
|
77
|
+
embedding_provider=embedding_provider,
|
|
78
|
+
description_provider=description_provider,
|
|
79
|
+
chunk_embedding_mode=chunk_embedding_mode,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return BuildService(
|
|
83
|
+
chunk_embedding_service=chunk_embedding_service,
|
|
84
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
|
|
5
|
+
from databao_context_engine.datasources.datasource_context import get_context_header_for_datasource
|
|
6
|
+
from databao_context_engine.datasources.types import DatasourceId
|
|
7
|
+
from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME
|
|
8
|
+
from databao_context_engine.serialization.yaml import write_yaml_to_stream
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_build_result(output_dir: Path, result: BuiltDatasourceContext) -> Path:
|
|
14
|
+
datasource_id = DatasourceId.from_string_repr(result.datasource_id)
|
|
15
|
+
export_file_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
16
|
+
|
|
17
|
+
# Make sure the parent folder exists
|
|
18
|
+
export_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
with export_file_path.open("w") as export_file:
|
|
21
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
22
|
+
|
|
23
|
+
logger.info(f"Exported result to {export_file_path.resolve()}")
|
|
24
|
+
|
|
25
|
+
return export_file_path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def append_result_to_all_results(output_dir: Path, result: BuiltDatasourceContext):
|
|
29
|
+
path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
30
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
with path.open("a", encoding="utf-8") as export_file:
|
|
32
|
+
export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
|
|
33
|
+
write_yaml_to_stream(data=result, file_stream=export_file)
|
|
34
|
+
export_file.write("\n")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Here temporarily because it needs to be reset between runs.
|
|
38
|
+
# A subsequent PR will remove the existence of the all_results file
|
|
39
|
+
def reset_all_results(output_dir: Path):
|
|
40
|
+
path = output_dir.joinpath(ALL_RESULTS_FILE_NAME)
|
|
41
|
+
path.unlink(missing_ok=True)
|
|
@@ -2,20 +2,18 @@ from dataclasses import dataclass
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from typing import Any, cast
|
|
4
4
|
|
|
5
|
+
from databao_context_engine.datasources.types import DatasourceId, PreparedConfig, PreparedDatasource
|
|
5
6
|
from databao_context_engine.pluginlib.build_plugin import (
|
|
6
7
|
BuildDatasourcePlugin,
|
|
7
8
|
BuildFilePlugin,
|
|
8
9
|
BuildPlugin,
|
|
9
10
|
)
|
|
10
11
|
from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
|
|
11
|
-
from databao_context_engine.project.types import PreparedConfig, PreparedDatasource, DatasourceId
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@dataclass()
|
|
15
15
|
class BuiltDatasourceContext:
|
|
16
|
-
"""
|
|
17
|
-
Dataclass defining the result of building a datasource's context.
|
|
18
|
-
"""
|
|
16
|
+
"""Dataclass defining the result of building a datasource's context."""
|
|
19
17
|
|
|
20
18
|
datasource_id: str
|
|
21
19
|
"""
|
|
@@ -53,9 +51,7 @@ def execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Bui
|
|
|
53
51
|
|
|
54
52
|
|
|
55
53
|
def _execute(prepared_datasource: PreparedDatasource, plugin: BuildPlugin) -> Any:
|
|
56
|
-
"""
|
|
57
|
-
Run a prepared source through the plugin
|
|
58
|
-
"""
|
|
54
|
+
"""Run a prepared source through the plugin."""
|
|
59
55
|
if isinstance(prepared_datasource, PreparedConfig):
|
|
60
56
|
ds_plugin = cast(BuildDatasourcePlugin, plugin)
|
|
61
57
|
|