databao-context-engine 0.7.1.dev1__tar.gz → 0.7.1.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/PKG-INFO +1 -1
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/pyproject.toml +1 -1
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/__init__.py +2 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/context_loader.py +2 -9
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/plugin_execution.py +5 -3
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/databao_context_engine.py +4 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/build_plugin.py +2 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/database_chunker.py +2 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/config_file.py +5 -1
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/chunk_search_repository.py +60 -11
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_runner.py +3 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_service.py +7 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_wiring.py +3 -1
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/persistence_service.py +4 -1
- databao_context_engine-0.7.1.dev2/src/databao_context_engine/storage/migrations/V04__add_chunk_type.sql +1 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/models.py +1 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/chunk_repository.py +10 -6
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/LICENSE.md +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/README.md +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_runner.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_service.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_wiring.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/export_results.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/commands.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/datasources.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/info.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/user_input_cb_impl.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/log_config.yaml +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/logging.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/databao_context_domain_manager.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/check_config.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/config_wizard.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/datasource_context.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/datasource_discovery.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/execute_sql_query.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/sql_read_only.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/event_journal/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/event_journal/writer.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/generate_configs_schemas.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/init_domain.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/introspection/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/introspection/property_extract.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/api.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/config.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/ollama.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/provider.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/ollama.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/provider.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/errors.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/factory.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/install.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/ollama.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/provider.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/runtime.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/service.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/main.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/mcp_runner.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/mcp_server.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/perf/core.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/config.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/plugin_utils.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/sql/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/sql/sql_types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/context_enricher.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/database_context_explorer.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/databases_types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_model_builder.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_scope.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_scope_matcher.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/sync_asyncpg_connection.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/profiling_config.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sampling_scope.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sampling_scope_matcher.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/config_file.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_connector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/context_filtering.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_chunker.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_context_extractor.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/types_artifacts.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/duckdb_tools.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/docling_chunker.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/pdf_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/unstructured_files_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/plugin_loader.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_chunker.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_introspector.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_plugin.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/types.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/progress/progress.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/info.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/init_project.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/layout.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/project_config.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/files/documentation.md +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/files/notes.txt +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/py.typed +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/serialization/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/serialization/yaml.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/chunk_embedding_service.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/embedding_shard_resolver.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/factories.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/models.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/table_name_policy.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/connection.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/exceptions/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/exceptions/exceptions.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrate.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V01__init.sql +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V02__add_keyword_index_text.sql +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V03__add_indexed_datasource_table.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V03__add_indexed_datasource_table.sql +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/datasource_context_repository.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/embedding_model_registry_repository.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/embedding_repository.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/factories.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/transaction.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/system/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/system/properties.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/templating/__init__.py +0 -0
- {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/templating/renderer.py +0 -0
|
@@ -57,6 +57,7 @@ from databao_context_engine.plugins.databases.snowflake.config_file import (
|
|
|
57
57
|
SnowflakeConfigFile,
|
|
58
58
|
SnowflakeConnectionProperties,
|
|
59
59
|
SnowflakeKeyPairAuth,
|
|
60
|
+
SnowflakeOAuthAuth,
|
|
60
61
|
SnowflakePasswordAuth,
|
|
61
62
|
SnowflakeSSOAuth,
|
|
62
63
|
)
|
|
@@ -134,6 +135,7 @@ __all__ = [
|
|
|
134
135
|
"SnowflakeConfigFile",
|
|
135
136
|
"SnowflakeConnectionProperties",
|
|
136
137
|
"SnowflakeSSOAuth",
|
|
138
|
+
"SnowflakeOAuthAuth",
|
|
137
139
|
"SnowflakeKeyPairAuth",
|
|
138
140
|
"SnowflakePasswordAuth",
|
|
139
141
|
"SQLiteConfigFile",
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from dataclasses import replace
|
|
4
3
|
from typing import Any
|
|
5
4
|
|
|
6
5
|
import yaml
|
|
7
|
-
from pydantic import
|
|
6
|
+
from pydantic import TypeAdapter
|
|
8
7
|
|
|
9
8
|
from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
|
|
10
9
|
from databao_context_engine.datasources.datasource_context import (
|
|
@@ -45,14 +44,8 @@ def deserialize_built_context(
|
|
|
45
44
|
) -> BuiltDatasourceContext:
|
|
46
45
|
"""Parse a datasource output YAML payload and type the embedded context."""
|
|
47
46
|
raw_context = yaml.safe_load(context.context)
|
|
48
|
-
built = TypeAdapter(BuiltDatasourceContext).validate_python(raw_context)
|
|
49
47
|
|
|
50
|
-
|
|
51
|
-
typed_context: Any = context_type.model_validate(built.context)
|
|
52
|
-
else:
|
|
53
|
-
typed_context = TypeAdapter(context_type).validate_python(built.context)
|
|
54
|
-
|
|
55
|
-
return replace(built, context=typed_context)
|
|
48
|
+
return TypeAdapter(BuiltDatasourceContext[context_type]).validate_python(raw_context) # type: ignore[valid-type]
|
|
56
49
|
|
|
57
50
|
|
|
58
51
|
def _load_typed_built_context(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Any, cast
|
|
2
|
+
from typing import Any, Generic, TypeVar, cast
|
|
3
3
|
|
|
4
4
|
from databao_context_engine.datasources.types import PreparedConfig, PreparedDatasource
|
|
5
5
|
from databao_context_engine.pluginlib.build_plugin import (
|
|
@@ -10,9 +10,11 @@ from databao_context_engine.pluginlib.build_plugin import (
|
|
|
10
10
|
from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
|
|
11
11
|
from databao_context_engine.project.layout import ProjectLayout
|
|
12
12
|
|
|
13
|
+
ContextT = TypeVar("ContextT")
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
@dataclass()
|
|
15
|
-
class BuiltDatasourceContext:
|
|
17
|
+
class BuiltDatasourceContext(Generic[ContextT]):
|
|
16
18
|
"""Dataclass defining the result of building a datasource's context."""
|
|
17
19
|
|
|
18
20
|
datasource_id: str
|
|
@@ -25,7 +27,7 @@ class BuiltDatasourceContext:
|
|
|
25
27
|
The type of the built data source
|
|
26
28
|
"""
|
|
27
29
|
|
|
28
|
-
context:
|
|
30
|
+
context: ContextT
|
|
29
31
|
"""
|
|
30
32
|
A dictionary containing the actual context generated for the data source.
|
|
31
33
|
This dictionary should be serializable in YAML format.
|
|
@@ -25,6 +25,7 @@ from databao_context_engine.plugins.databases.database_context_explorer import (
|
|
|
25
25
|
from databao_context_engine.plugins.plugin_loader import DatabaoContextPluginLoader
|
|
26
26
|
from databao_context_engine.project.layout import ProjectLayout, ensure_project_dir
|
|
27
27
|
from databao_context_engine.search_context import search_context as search_context_internal
|
|
28
|
+
from databao_context_engine.search_context.chunk_search_repository import ChunkType
|
|
28
29
|
from databao_context_engine.search_context.search_service import ContextSearchMode
|
|
29
30
|
|
|
30
31
|
|
|
@@ -131,6 +132,7 @@ class DatabaoContextEngine:
|
|
|
131
132
|
limit: int | None = None,
|
|
132
133
|
datasource_ids: list[DatasourceId] | None = None,
|
|
133
134
|
context_search_mode: ContextSearchMode | None = None,
|
|
135
|
+
chunk_types: list[ChunkType] | None = None,
|
|
134
136
|
) -> list[ContextSearchResult]:
|
|
135
137
|
"""Search in the available context for the closest matches to the given text.
|
|
136
138
|
|
|
@@ -139,6 +141,7 @@ class DatabaoContextEngine:
|
|
|
139
141
|
limit: The maximum number of results to return. If None is provided, a default limit of 10 will be used.
|
|
140
142
|
datasource_ids: If provided, the search results will only come from the datasources with these IDs.
|
|
141
143
|
context_search_mode: Search strategy to use. Defaults to HYBRID_SEARCH if None is provided.
|
|
144
|
+
chunk_types: If provided, the search results will only come from the chunks of these types.
|
|
142
145
|
|
|
143
146
|
Returns:
|
|
144
147
|
A list of the results found for the search, sorted by score.
|
|
@@ -153,6 +156,7 @@ class DatabaoContextEngine:
|
|
|
153
156
|
limit=limit,
|
|
154
157
|
datasource_ids=datasource_ids,
|
|
155
158
|
context_search_mode=context_search_mode,
|
|
159
|
+
chunk_types=chunk_types,
|
|
156
160
|
)
|
|
157
161
|
|
|
158
162
|
return [
|
|
@@ -12,10 +12,12 @@ class EmbeddableChunk:
|
|
|
12
12
|
"""A chunk that will be embedded as a vector and used when searching context from a given AI prompt.
|
|
13
13
|
|
|
14
14
|
Attributes:
|
|
15
|
+
type: The type of the chunk, e.g. "table", "column" to allow for search by chunk type.
|
|
15
16
|
embeddable_text: The text to embed as a vector for search usage
|
|
16
17
|
content: The content to return as a response when the embedding has been selected in a search
|
|
17
18
|
"""
|
|
18
19
|
|
|
20
|
+
type: str | None = None
|
|
19
21
|
embeddable_text: str
|
|
20
22
|
keyword_indexable_text: str | None = None
|
|
21
23
|
content: Any
|
|
@@ -39,6 +39,7 @@ def build_database_chunks(result: DatabaseIntrospectionResult) -> list[Embeddabl
|
|
|
39
39
|
|
|
40
40
|
def _create_table_chunk(catalog_name: str, schema_name: str, table: DatabaseTable) -> EmbeddableChunk:
|
|
41
41
|
return EmbeddableChunk(
|
|
42
|
+
type="table",
|
|
42
43
|
embeddable_text=_build_table_chunk_text(table),
|
|
43
44
|
content=DatabaseTableChunkContent(
|
|
44
45
|
catalog_name=catalog_name,
|
|
@@ -52,6 +53,7 @@ def _create_column_chunk(
|
|
|
52
53
|
catalog_name: str, schema_name: str, table: DatabaseTable, column: DatabaseColumn
|
|
53
54
|
) -> EmbeddableChunk:
|
|
54
55
|
return EmbeddableChunk(
|
|
56
|
+
type="column",
|
|
55
57
|
embeddable_text=_build_column_chunk_text(table, column),
|
|
56
58
|
content=DatabaseColumnChunkContent(
|
|
57
59
|
catalog_name=catalog_name,
|
|
@@ -22,13 +22,17 @@ class SnowflakeSSOAuth(BaseModel):
|
|
|
22
22
|
authenticator: str = Field(description='e.g. "externalbrowser"')
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
class SnowflakeOAuthAuth(BaseModel):
|
|
26
|
+
token: Annotated[str, ConfigPropertyAnnotation(secret=True)]
|
|
27
|
+
|
|
28
|
+
|
|
25
29
|
class SnowflakeConnectionProperties(BaseModel):
|
|
26
30
|
account: Annotated[str, ConfigPropertyAnnotation(required=True)]
|
|
27
31
|
warehouse: str | None = None
|
|
28
32
|
database: str | None = None
|
|
29
33
|
user: str | None = None
|
|
30
34
|
role: str | None = None
|
|
31
|
-
auth: SnowflakePasswordAuth | SnowflakeKeyPairAuth | SnowflakeSSOAuth
|
|
35
|
+
auth: SnowflakePasswordAuth | SnowflakeKeyPairAuth | SnowflakeSSOAuth | SnowflakeOAuthAuth
|
|
32
36
|
additional_properties: dict[str, Any] = {}
|
|
33
37
|
|
|
34
38
|
def to_snowflake_kwargs(self) -> dict[str, Any]:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Sequence
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from typing import Any
|
|
5
6
|
|
|
6
7
|
import duckdb
|
|
@@ -13,9 +14,17 @@ from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
17
|
+
class ChunkType(str, Enum):
|
|
18
|
+
"""Enum of the supported chunk types to search."""
|
|
19
|
+
|
|
20
|
+
TABLE = "table"
|
|
21
|
+
COLUMN = "column"
|
|
22
|
+
|
|
23
|
+
|
|
16
24
|
@dataclass(kw_only=True, frozen=True)
|
|
17
25
|
class VectorSearchCandidate:
|
|
18
26
|
chunk_id: int
|
|
27
|
+
chunk_type: ChunkType | None
|
|
19
28
|
display_text: str
|
|
20
29
|
embeddable_text: str
|
|
21
30
|
cosine_distance: float
|
|
@@ -26,6 +35,7 @@ class VectorSearchCandidate:
|
|
|
26
35
|
@dataclass(kw_only=True, frozen=True)
|
|
27
36
|
class Bm25SearchCandidate:
|
|
28
37
|
chunk_id: int
|
|
38
|
+
chunk_type: ChunkType | None
|
|
29
39
|
display_text: str
|
|
30
40
|
embeddable_text: str
|
|
31
41
|
bm25_score: float
|
|
@@ -65,6 +75,7 @@ class KeywordSearchScore:
|
|
|
65
75
|
@dataclass(kw_only=True, frozen=True)
|
|
66
76
|
class SearchResult:
|
|
67
77
|
chunk_id: int
|
|
78
|
+
chunk_type: ChunkType | None
|
|
68
79
|
display_text: str
|
|
69
80
|
embeddable_text: str
|
|
70
81
|
datasource_type: DatasourceType
|
|
@@ -90,6 +101,7 @@ class ChunkSearchRepository:
|
|
|
90
101
|
dimension: int,
|
|
91
102
|
limit: int,
|
|
92
103
|
datasource_context_hashes: list[DatasourceContextHash],
|
|
104
|
+
chunk_types: list[ChunkType] | None = None,
|
|
93
105
|
) -> list[SearchResult]:
|
|
94
106
|
"""Read only similarity search on a specific embedding shard table."""
|
|
95
107
|
vector_candidates = self._get_vector_candidates(
|
|
@@ -98,10 +110,12 @@ class ChunkSearchRepository:
|
|
|
98
110
|
dimension=dimension,
|
|
99
111
|
limit=limit,
|
|
100
112
|
datasource_context_hashes=datasource_context_hashes,
|
|
113
|
+
chunk_types=chunk_types,
|
|
101
114
|
)
|
|
102
115
|
return [
|
|
103
116
|
SearchResult(
|
|
104
117
|
chunk_id=candidate.chunk_id,
|
|
118
|
+
chunk_type=candidate.chunk_type,
|
|
105
119
|
display_text=candidate.display_text,
|
|
106
120
|
embeddable_text=candidate.embeddable_text,
|
|
107
121
|
datasource_type=candidate.datasource_type,
|
|
@@ -120,15 +134,26 @@ class ChunkSearchRepository:
|
|
|
120
134
|
dimension: int,
|
|
121
135
|
limit: int,
|
|
122
136
|
datasource_context_hashes: list[DatasourceContextHash],
|
|
137
|
+
chunk_types: list[ChunkType] | None = None,
|
|
123
138
|
) -> list[VectorSearchCandidate]:
|
|
124
139
|
"""Read only vector candidates on a specific embedding shard table."""
|
|
125
140
|
if not datasource_context_hashes:
|
|
126
141
|
return []
|
|
127
142
|
|
|
128
143
|
allowed_hashes_sql, hash_params = self._build_allowed_hashes_values(datasource_context_hashes)
|
|
144
|
+
|
|
145
|
+
chunk_types_param: list[list[ChunkType]]
|
|
146
|
+
if chunk_types:
|
|
147
|
+
search_candidates_chunk_type_filter = "WHERE c.chunk_type IN ?"
|
|
148
|
+
chunk_types_param = [chunk_types]
|
|
149
|
+
else:
|
|
150
|
+
chunk_types_param = []
|
|
151
|
+
search_candidates_chunk_type_filter = ""
|
|
152
|
+
|
|
129
153
|
params: list[Any] = [
|
|
130
154
|
*hash_params,
|
|
131
155
|
list(search_vec),
|
|
156
|
+
*chunk_types_param,
|
|
132
157
|
self._DEFAULT_DISTANCE_THRESHOLD,
|
|
133
158
|
limit,
|
|
134
159
|
]
|
|
@@ -141,6 +166,7 @@ class ChunkSearchRepository:
|
|
|
141
166
|
vector_candidates AS (
|
|
142
167
|
SELECT
|
|
143
168
|
c.chunk_id,
|
|
169
|
+
c.chunk_type,
|
|
144
170
|
COALESCE(c.display_text, c.embeddable_text) AS display_text,
|
|
145
171
|
c.embeddable_text,
|
|
146
172
|
array_cosine_distance(e.vec, CAST(? AS FLOAT[{dimension}])) AS cosine_distance,
|
|
@@ -154,9 +180,11 @@ class ChunkSearchRepository:
|
|
|
154
180
|
ON h.datasource_id = ah.datasource_id
|
|
155
181
|
AND h.hash = ah.hash
|
|
156
182
|
AND h.hash_algorithm = ah.hash_algorithm
|
|
183
|
+
{search_candidates_chunk_type_filter}
|
|
157
184
|
)
|
|
158
185
|
SELECT
|
|
159
186
|
vc.chunk_id,
|
|
187
|
+
vc.chunk_type,
|
|
160
188
|
vc.display_text,
|
|
161
189
|
vc.embeddable_text,
|
|
162
190
|
vc.cosine_distance,
|
|
@@ -176,11 +204,12 @@ class ChunkSearchRepository:
|
|
|
176
204
|
return [
|
|
177
205
|
VectorSearchCandidate(
|
|
178
206
|
chunk_id=row[0],
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
207
|
+
chunk_type=ChunkType(row[1]) if row[1] else None,
|
|
208
|
+
display_text=row[2],
|
|
209
|
+
embeddable_text=row[3],
|
|
210
|
+
cosine_distance=row[4],
|
|
211
|
+
datasource_type=DatasourceType(full_type=row[5]),
|
|
212
|
+
datasource_id=DatasourceId.from_string_repr(row[6]),
|
|
184
213
|
)
|
|
185
214
|
for row in rows
|
|
186
215
|
]
|
|
@@ -195,6 +224,7 @@ class ChunkSearchRepository:
|
|
|
195
224
|
dimension: int,
|
|
196
225
|
limit: int,
|
|
197
226
|
datasource_context_hashes: list[DatasourceContextHash],
|
|
227
|
+
chunk_types: list[ChunkType] | None = None,
|
|
198
228
|
) -> list[SearchResult]:
|
|
199
229
|
"""Hybrid retrieval combining vector similarity and BM25 with Reciprocal Rank Fusion (RRF).
|
|
200
230
|
|
|
@@ -208,12 +238,14 @@ class ChunkSearchRepository:
|
|
|
208
238
|
dimension=dimension,
|
|
209
239
|
limit=candidate_limit,
|
|
210
240
|
datasource_context_hashes=datasource_context_hashes,
|
|
241
|
+
chunk_types=chunk_types,
|
|
211
242
|
)
|
|
212
243
|
|
|
213
244
|
bm25_candidates = self._get_bm25_candidates(
|
|
214
245
|
query_text=search_text,
|
|
215
246
|
limit=candidate_limit,
|
|
216
247
|
datasource_context_hashes=datasource_context_hashes,
|
|
248
|
+
chunk_types=chunk_types,
|
|
217
249
|
)
|
|
218
250
|
return self._fuse_by_rrf(
|
|
219
251
|
vector_candidates=vector_candidates,
|
|
@@ -228,17 +260,20 @@ class ChunkSearchRepository:
|
|
|
228
260
|
query_text: str,
|
|
229
261
|
limit: int,
|
|
230
262
|
datasource_context_hashes: list[DatasourceContextHash],
|
|
263
|
+
chunk_types: list[ChunkType] | None = None,
|
|
231
264
|
) -> list[SearchResult]:
|
|
232
265
|
"""Read only BM25 search over chunk text."""
|
|
233
266
|
bm25_candidates = self._get_bm25_candidates(
|
|
234
267
|
query_text=query_text,
|
|
235
268
|
limit=limit,
|
|
236
269
|
datasource_context_hashes=datasource_context_hashes,
|
|
270
|
+
chunk_types=chunk_types,
|
|
237
271
|
)
|
|
238
272
|
|
|
239
273
|
return [
|
|
240
274
|
SearchResult(
|
|
241
275
|
chunk_id=candidate.chunk_id,
|
|
276
|
+
chunk_type=candidate.chunk_type,
|
|
242
277
|
display_text=candidate.display_text,
|
|
243
278
|
embeddable_text=candidate.embeddable_text,
|
|
244
279
|
datasource_type=candidate.datasource_type,
|
|
@@ -255,12 +290,21 @@ class ChunkSearchRepository:
|
|
|
255
290
|
query_text: str,
|
|
256
291
|
limit: int,
|
|
257
292
|
datasource_context_hashes: list[DatasourceContextHash],
|
|
293
|
+
chunk_types: list[ChunkType] | None = None,
|
|
258
294
|
) -> list[Bm25SearchCandidate]:
|
|
259
295
|
if not datasource_context_hashes:
|
|
260
296
|
return []
|
|
261
297
|
|
|
262
298
|
allowed_hashes_sql, hash_params = self._build_allowed_hashes_values(datasource_context_hashes)
|
|
263
|
-
|
|
299
|
+
chunk_types_param: list[list[ChunkType]]
|
|
300
|
+
if chunk_types:
|
|
301
|
+
search_candidates_chunk_type_filter = "WHERE c.chunk_type IN ?"
|
|
302
|
+
chunk_types_param = [chunk_types]
|
|
303
|
+
else:
|
|
304
|
+
chunk_types_param = []
|
|
305
|
+
search_candidates_chunk_type_filter = ""
|
|
306
|
+
|
|
307
|
+
params: list[Any] = [*hash_params, query_text, *chunk_types_param, limit]
|
|
264
308
|
|
|
265
309
|
rows = self._conn.execute(
|
|
266
310
|
f"""
|
|
@@ -270,6 +314,7 @@ class ChunkSearchRepository:
|
|
|
270
314
|
bm25_candidates AS (
|
|
271
315
|
SELECT
|
|
272
316
|
c.chunk_id,
|
|
317
|
+
c.chunk_type,
|
|
273
318
|
COALESCE(c.display_text, c.embeddable_text) AS display_text,
|
|
274
319
|
c.embeddable_text,
|
|
275
320
|
c.full_type,
|
|
@@ -285,9 +330,11 @@ class ChunkSearchRepository:
|
|
|
285
330
|
ON h.datasource_id = ah.datasource_id
|
|
286
331
|
AND h.hash = ah.hash
|
|
287
332
|
AND h.hash_algorithm = ah.hash_algorithm
|
|
333
|
+
{search_candidates_chunk_type_filter}
|
|
288
334
|
)
|
|
289
335
|
SELECT
|
|
290
336
|
b.chunk_id,
|
|
337
|
+
b.chunk_type,
|
|
291
338
|
b.display_text,
|
|
292
339
|
b.embeddable_text,
|
|
293
340
|
b.bm25_score,
|
|
@@ -307,11 +354,12 @@ class ChunkSearchRepository:
|
|
|
307
354
|
return [
|
|
308
355
|
Bm25SearchCandidate(
|
|
309
356
|
chunk_id=row[0],
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
357
|
+
chunk_type=ChunkType(row[1]) if row[1] else None,
|
|
358
|
+
display_text=row[2],
|
|
359
|
+
embeddable_text=row[3],
|
|
360
|
+
bm25_score=row[4],
|
|
361
|
+
datasource_type=DatasourceType(full_type=row[5]),
|
|
362
|
+
datasource_id=DatasourceId.from_string_repr(row[6]),
|
|
315
363
|
)
|
|
316
364
|
for row in rows
|
|
317
365
|
]
|
|
@@ -365,6 +413,7 @@ class ChunkSearchRepository:
|
|
|
365
413
|
results.append(
|
|
366
414
|
SearchResult(
|
|
367
415
|
chunk_id=chunk_id,
|
|
416
|
+
chunk_type=data_candidate.chunk_type,
|
|
368
417
|
display_text=data_candidate.display_text,
|
|
369
418
|
embeddable_text=data_candidate.embeddable_text,
|
|
370
419
|
datasource_type=data_candidate.datasource_type,
|
|
@@ -5,6 +5,7 @@ from databao_context_engine.datasources.datasource_context import (
|
|
|
5
5
|
)
|
|
6
6
|
from databao_context_engine.datasources.types import DatasourceId
|
|
7
7
|
from databao_context_engine.project.layout import ProjectLayout
|
|
8
|
+
from databao_context_engine.search_context.chunk_search_repository import ChunkType
|
|
8
9
|
from databao_context_engine.search_context.search_service import RAG_MODE, ContextSearchMode, SearchContextService
|
|
9
10
|
|
|
10
11
|
|
|
@@ -18,6 +19,7 @@ def run_context_search(
|
|
|
18
19
|
datasource_ids: list[DatasourceId] | None,
|
|
19
20
|
rag_mode: RAG_MODE,
|
|
20
21
|
context_search_mode: ContextSearchMode,
|
|
22
|
+
chunk_types: list[ChunkType] | None = None,
|
|
21
23
|
):
|
|
22
24
|
context_hashes = (
|
|
23
25
|
get_datasource_context_hashes(project_layout, datasource_ids)
|
|
@@ -33,4 +35,5 @@ def run_context_search(
|
|
|
33
35
|
datasource_context_hashes=context_hashes,
|
|
34
36
|
rag_mode=rag_mode,
|
|
35
37
|
context_search_mode=context_search_mode,
|
|
38
|
+
chunk_types=chunk_types,
|
|
36
39
|
)
|
|
@@ -8,6 +8,7 @@ from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
|
|
|
8
8
|
from databao_context_engine.llm.prompts.provider import PromptProvider
|
|
9
9
|
from databao_context_engine.search_context.chunk_search_repository import (
|
|
10
10
|
ChunkSearchRepository,
|
|
11
|
+
ChunkType,
|
|
11
12
|
SearchResult,
|
|
12
13
|
)
|
|
13
14
|
from databao_context_engine.services.embedding_shard_resolver import EmbeddingShardResolver
|
|
@@ -55,6 +56,7 @@ class SearchContextService:
|
|
|
55
56
|
limit: int | None = None,
|
|
56
57
|
rag_mode: RAG_MODE,
|
|
57
58
|
context_search_mode: ContextSearchMode,
|
|
59
|
+
chunk_types: list[ChunkType] | None = None,
|
|
58
60
|
) -> list[SearchResult]:
|
|
59
61
|
if limit is None:
|
|
60
62
|
limit = 10
|
|
@@ -65,6 +67,7 @@ class SearchContextService:
|
|
|
65
67
|
limit=limit,
|
|
66
68
|
rag_mode=rag_mode,
|
|
67
69
|
context_search_mode=context_search_mode,
|
|
70
|
+
chunk_types=chunk_types,
|
|
68
71
|
)
|
|
69
72
|
|
|
70
73
|
logger.debug(f"Found {len(search_results)} search results")
|
|
@@ -90,6 +93,7 @@ class SearchContextService:
|
|
|
90
93
|
limit: int,
|
|
91
94
|
rag_mode: RAG_MODE,
|
|
92
95
|
context_search_mode: ContextSearchMode,
|
|
96
|
+
chunk_types: list[ChunkType] | None = None,
|
|
93
97
|
) -> list[SearchResult]:
|
|
94
98
|
if context_search_mode == ContextSearchMode.KEYWORD_SEARCH:
|
|
95
99
|
query_text = self._rewrite_search_query(text) if rag_mode == RAG_MODE.REWRITE_QUERY else text
|
|
@@ -98,6 +102,7 @@ class SearchContextService:
|
|
|
98
102
|
query_text=query_text,
|
|
99
103
|
limit=limit,
|
|
100
104
|
datasource_context_hashes=datasource_context_hashes,
|
|
105
|
+
chunk_types=chunk_types,
|
|
101
106
|
)
|
|
102
107
|
|
|
103
108
|
table_name, dimension = self._shard_resolver.resolve(
|
|
@@ -130,6 +135,7 @@ class SearchContextService:
|
|
|
130
135
|
dimension=dimension,
|
|
131
136
|
limit=limit,
|
|
132
137
|
datasource_context_hashes=datasource_context_hashes,
|
|
138
|
+
chunk_types=chunk_types,
|
|
133
139
|
)
|
|
134
140
|
case ContextSearchMode.HYBRID_SEARCH:
|
|
135
141
|
return self._chunk_search_repo.search_chunks_with_hybrid_search(
|
|
@@ -139,6 +145,7 @@ class SearchContextService:
|
|
|
139
145
|
dimension=dimension,
|
|
140
146
|
limit=limit,
|
|
141
147
|
datasource_context_hashes=datasource_context_hashes,
|
|
148
|
+
chunk_types=chunk_types,
|
|
142
149
|
)
|
|
143
150
|
|
|
144
151
|
@perf.perf_span("search_context.rewrite_query")
|
|
@@ -14,7 +14,7 @@ from databao_context_engine.llm.factory import (
|
|
|
14
14
|
from databao_context_engine.llm.prompts.provider import PromptProvider
|
|
15
15
|
from databao_context_engine.plugins.plugin_loader import DatabaoContextPluginLoader
|
|
16
16
|
from databao_context_engine.project.layout import ProjectLayout
|
|
17
|
-
from databao_context_engine.search_context.chunk_search_repository import ChunkSearchRepository, SearchResult
|
|
17
|
+
from databao_context_engine.search_context.chunk_search_repository import ChunkSearchRepository, ChunkType, SearchResult
|
|
18
18
|
from databao_context_engine.search_context.search_runner import run_context_search
|
|
19
19
|
from databao_context_engine.search_context.search_service import RAG_MODE, ContextSearchMode, SearchContextService
|
|
20
20
|
from databao_context_engine.services.factories import create_shard_resolver
|
|
@@ -38,6 +38,7 @@ def search_context(
|
|
|
38
38
|
limit: int | None,
|
|
39
39
|
datasource_ids: list[DatasourceId] | None,
|
|
40
40
|
context_search_mode: ContextSearchMode,
|
|
41
|
+
chunk_types: list[ChunkType] | None = None,
|
|
41
42
|
) -> list[SearchResult]:
|
|
42
43
|
with open_duckdb_connection(project_layout.db_path) as conn:
|
|
43
44
|
ollama_service = create_ollama_service()
|
|
@@ -62,6 +63,7 @@ def search_context(
|
|
|
62
63
|
datasource_ids=datasource_ids,
|
|
63
64
|
rag_mode=rag_mode,
|
|
64
65
|
context_search_mode=context_search_mode,
|
|
66
|
+
chunk_types=chunk_types,
|
|
65
67
|
)
|
|
66
68
|
|
|
67
69
|
|
|
@@ -135,7 +135,10 @@ class PersistenceService:
|
|
|
135
135
|
full_type=full_type,
|
|
136
136
|
datasource_id=datasource_id,
|
|
137
137
|
datasource_context_hash_id=datasource_context_hash_id,
|
|
138
|
-
chunk_contents=[
|
|
138
|
+
chunk_contents=[
|
|
139
|
+
(ce.embedded_text, ce.display_text, ce.keyword_indexable_text, ce.original_chunk.type)
|
|
140
|
+
for ce in chunk_embeddings
|
|
141
|
+
],
|
|
139
142
|
)
|
|
140
143
|
|
|
141
144
|
@perf.perf_span("persistence.bulk_insert_embeddings")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ALTER TABLE chunk ADD COLUMN IF NOT EXISTS chunk_type TEXT;
|
|
@@ -19,6 +19,7 @@ class ChunkRepository:
|
|
|
19
19
|
self,
|
|
20
20
|
*,
|
|
21
21
|
full_type: str,
|
|
22
|
+
chunk_type: str | None = None,
|
|
22
23
|
datasource_id: str,
|
|
23
24
|
embeddable_text: str,
|
|
24
25
|
display_text: Optional[str],
|
|
@@ -30,14 +31,15 @@ class ChunkRepository:
|
|
|
30
31
|
cur=self._conn,
|
|
31
32
|
sql="""
|
|
32
33
|
INSERT INTO
|
|
33
|
-
chunk(full_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
|
|
34
|
+
chunk(full_type, chunk_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
|
|
34
35
|
VALUES
|
|
35
|
-
(?, ?, ?, ?, ?, ?)
|
|
36
|
+
(?, ?, ?, ?, ?, ?, ?)
|
|
36
37
|
RETURNING
|
|
37
38
|
*
|
|
38
39
|
""",
|
|
39
40
|
params=[
|
|
40
41
|
full_type,
|
|
42
|
+
chunk_type,
|
|
41
43
|
datasource_id,
|
|
42
44
|
embeddable_text,
|
|
43
45
|
display_text,
|
|
@@ -181,13 +183,13 @@ class ChunkRepository:
|
|
|
181
183
|
*,
|
|
182
184
|
full_type: str,
|
|
183
185
|
datasource_id: str,
|
|
186
|
+
chunk_contents: Sequence[Tuple[str, Optional[str], str, Optional[str]]],
|
|
184
187
|
datasource_context_hash_id: int,
|
|
185
|
-
chunk_contents: Sequence[Tuple[str, Optional[str], str]],
|
|
186
188
|
) -> Sequence[int]:
|
|
187
|
-
values_sql = ", ".join(["(?, ?, ?, ?, ?, ?)"] * len(chunk_contents))
|
|
189
|
+
values_sql = ", ".join(["(?, ?, ?, ?, ?, ?, ?)"] * len(chunk_contents))
|
|
188
190
|
sql = f"""
|
|
189
191
|
INSERT INTO
|
|
190
|
-
chunk(full_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
|
|
192
|
+
chunk(full_type, chunk_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
|
|
191
193
|
VALUES
|
|
192
194
|
{values_sql}
|
|
193
195
|
RETURNING
|
|
@@ -195,10 +197,11 @@ class ChunkRepository:
|
|
|
195
197
|
"""
|
|
196
198
|
|
|
197
199
|
params: list[Any] = []
|
|
198
|
-
for embeddable_text, display_text, keyword_index_text in chunk_contents:
|
|
200
|
+
for embeddable_text, display_text, keyword_index_text, chunk_type in chunk_contents:
|
|
199
201
|
params.extend(
|
|
200
202
|
[
|
|
201
203
|
full_type,
|
|
204
|
+
chunk_type,
|
|
202
205
|
datasource_id,
|
|
203
206
|
embeddable_text,
|
|
204
207
|
display_text,
|
|
@@ -227,6 +230,7 @@ class ChunkRepository:
|
|
|
227
230
|
return ChunkDTO(
|
|
228
231
|
chunk_id=int(row["chunk_id"]),
|
|
229
232
|
full_type=row["full_type"],
|
|
233
|
+
chunk_type=row["chunk_type"],
|
|
230
234
|
datasource_id=row["datasource_id"],
|
|
231
235
|
embeddable_text=row["embeddable_text"],
|
|
232
236
|
display_text=row["display_text"],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|