databao-context-engine 0.7.1.dev1__tar.gz → 0.7.1.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/PKG-INFO +1 -1
  2. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/pyproject.toml +1 -1
  3. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/__init__.py +2 -0
  4. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/context_loader.py +2 -9
  5. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/plugin_execution.py +5 -3
  6. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/databao_context_engine.py +4 -0
  7. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/build_plugin.py +2 -0
  8. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/database_chunker.py +2 -0
  9. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/config_file.py +5 -1
  10. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/chunk_search_repository.py +60 -11
  11. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_runner.py +3 -0
  12. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_service.py +7 -0
  13. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/search_wiring.py +3 -1
  14. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/persistence_service.py +4 -1
  15. databao_context_engine-0.7.1.dev2/src/databao_context_engine/storage/migrations/V04__add_chunk_type.sql +1 -0
  16. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/models.py +1 -0
  17. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/chunk_repository.py +10 -6
  18. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/LICENSE.md +0 -0
  19. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/README.md +0 -0
  20. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/__init__.py +0 -0
  21. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_runner.py +0 -0
  22. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_service.py +0 -0
  23. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/build_wiring.py +0 -0
  24. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/export_results.py +0 -0
  25. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/build_sources/types.py +0 -0
  26. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/__init__.py +0 -0
  27. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/commands.py +0 -0
  28. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/datasources.py +0 -0
  29. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/info.py +0 -0
  30. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/cli/user_input_cb_impl.py +0 -0
  31. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/__init__.py +0 -0
  32. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/log_config.yaml +0 -0
  33. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/config/logging.py +0 -0
  34. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/databao_context_domain_manager.py +0 -0
  35. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/__init__.py +0 -0
  36. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/check_config.py +0 -0
  37. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/config_wizard.py +0 -0
  38. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/datasource_context.py +0 -0
  39. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/datasource_discovery.py +0 -0
  40. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/execute_sql_query.py +0 -0
  41. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/sql_read_only.py +0 -0
  42. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/datasources/types.py +0 -0
  43. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/event_journal/__init__.py +0 -0
  44. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/event_journal/writer.py +0 -0
  45. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/generate_configs_schemas.py +0 -0
  46. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/init_domain.py +0 -0
  47. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/introspection/__init__.py +0 -0
  48. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/introspection/property_extract.py +0 -0
  49. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/__init__.py +0 -0
  50. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/api.py +0 -0
  51. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/config.py +0 -0
  52. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/__init__.py +0 -0
  53. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/ollama.py +0 -0
  54. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/descriptions/provider.py +0 -0
  55. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/__init__.py +0 -0
  56. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/ollama.py +0 -0
  57. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/embeddings/provider.py +0 -0
  58. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/errors.py +0 -0
  59. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/factory.py +0 -0
  60. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/install.py +0 -0
  61. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/__init__.py +0 -0
  62. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/ollama.py +0 -0
  63. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/prompts/provider.py +0 -0
  64. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/runtime.py +0 -0
  65. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/llm/service.py +0 -0
  66. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/main.py +0 -0
  67. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/__init__.py +0 -0
  68. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/mcp_runner.py +0 -0
  69. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/mcp/mcp_server.py +0 -0
  70. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/perf/core.py +0 -0
  71. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/__init__.py +0 -0
  72. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/config.py +0 -0
  73. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/plugin_utils.py +0 -0
  74. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/sql/__init__.py +0 -0
  75. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/pluginlib/sql/sql_types.py +0 -0
  76. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/__init__.py +0 -0
  77. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/__init__.py +0 -0
  78. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/__init__.py +0 -0
  79. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_connector.py +0 -0
  80. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_db_plugin.py +0 -0
  81. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/athena_introspector.py +0 -0
  82. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/athena/config_file.py +0 -0
  83. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_connector.py +0 -0
  84. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_db_plugin.py +0 -0
  85. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/base_introspector.py +0 -0
  86. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/__init__.py +0 -0
  87. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_connector.py +0 -0
  88. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_db_plugin.py +0 -0
  89. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/bigquery_introspector.py +0 -0
  90. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/bigquery/config_file.py +0 -0
  91. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/__init__.py +0 -0
  92. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_connector.py +0 -0
  93. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_db_plugin.py +0 -0
  94. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/clickhouse_introspector.py +0 -0
  95. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/clickhouse/config_file.py +0 -0
  96. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/context_enricher.py +0 -0
  97. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/database_context_explorer.py +0 -0
  98. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/databases_types.py +0 -0
  99. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/__init__.py +0 -0
  100. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/config_file.py +0 -0
  101. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_connector.py +0 -0
  102. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +0 -0
  103. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/duckdb/duckdb_introspector.py +0 -0
  104. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_model_builder.py +0 -0
  105. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_scope.py +0 -0
  106. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/introspection_scope_matcher.py +0 -0
  107. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/__init__.py +0 -0
  108. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/config_file.py +0 -0
  109. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_connector.py +0 -0
  110. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_db_plugin.py +0 -0
  111. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mssql/mssql_introspector.py +0 -0
  112. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/__init__.py +0 -0
  113. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/config_file.py +0 -0
  114. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_connector.py +0 -0
  115. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_db_plugin.py +0 -0
  116. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/mysql/mysql_introspector.py +0 -0
  117. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  118. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/config_file.py +0 -0
  119. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_connector.py +0 -0
  120. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +0 -0
  121. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/postgresql_introspector.py +0 -0
  122. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/postgresql/sync_asyncpg_connection.py +0 -0
  123. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/profiling_config.py +0 -0
  124. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sampling_scope.py +0 -0
  125. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sampling_scope_matcher.py +0 -0
  126. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  127. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_connector.py +0 -0
  128. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +0 -0
  129. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/snowflake/snowflake_introspector.py +0 -0
  130. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  131. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/config_file.py +0 -0
  132. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_connector.py +0 -0
  133. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +0 -0
  134. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +0 -0
  135. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/__init__.py +0 -0
  136. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/context_filtering.py +0 -0
  137. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_chunker.py +0 -0
  138. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_context_extractor.py +0 -0
  139. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/dbt_plugin.py +0 -0
  140. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/types.py +0 -0
  141. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/dbt/types_artifacts.py +0 -0
  142. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/duckdb_tools.py +0 -0
  143. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/__init__.py +0 -0
  144. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/docling_chunker.py +0 -0
  145. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/pdf_plugin.py +0 -0
  146. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/files/unstructured_files_plugin.py +0 -0
  147. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/plugin_loader.py +0 -0
  148. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/__init__.py +0 -0
  149. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_chunker.py +0 -0
  150. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_introspector.py +0 -0
  151. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/parquet_plugin.py +0 -0
  152. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/plugins/resources/types.py +0 -0
  153. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/progress/progress.py +0 -0
  154. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/__init__.py +0 -0
  155. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/info.py +0 -0
  156. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/init_project.py +0 -0
  157. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/layout.py +0 -0
  158. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/project_config.py +0 -0
  159. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +0 -0
  160. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/files/documentation.md +0 -0
  161. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/project/resources/examples/src/files/notes.txt +0 -0
  162. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/py.typed +0 -0
  163. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/search_context/__init__.py +0 -0
  164. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/serialization/__init__.py +0 -0
  165. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/serialization/yaml.py +0 -0
  166. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/__init__.py +0 -0
  167. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/chunk_embedding_service.py +0 -0
  168. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/embedding_shard_resolver.py +0 -0
  169. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/factories.py +0 -0
  170. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/models.py +0 -0
  171. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/services/table_name_policy.py +0 -0
  172. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/__init__.py +0 -0
  173. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/connection.py +0 -0
  174. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/exceptions/__init__.py +0 -0
  175. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/exceptions/exceptions.py +0 -0
  176. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrate.py +0 -0
  177. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V01__init.sql +0 -0
  178. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V02__add_keyword_index_text.sql +0 -0
  179. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V03__add_indexed_datasource_table.py +0 -0
  180. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/migrations/V03__add_indexed_datasource_table.sql +0 -0
  181. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/__init__.py +0 -0
  182. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/datasource_context_repository.py +0 -0
  183. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/embedding_model_registry_repository.py +0 -0
  184. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/embedding_repository.py +0 -0
  185. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/repositories/factories.py +0 -0
  186. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/storage/transaction.py +0 -0
  187. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/system/__init__.py +0 -0
  188. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/system/properties.py +0 -0
  189. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/templating/__init__.py +0 -0
  190. {databao_context_engine-0.7.1.dev1 → databao_context_engine-0.7.1.dev2}/src/databao_context_engine/templating/renderer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databao-context-engine
3
- Version: 0.7.1.dev1
3
+ Version: 0.7.1.dev2
4
4
  Summary: Semantic context for your LLMs — generated automatically
5
5
  License-Expression: Apache-2.0 AND LicenseRef-Additional-Terms
6
6
  License-File: LICENSE.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "databao-context-engine"
3
- version = "0.7.1.dev1"
3
+ version = "0.7.1.dev2"
4
4
  description = "Semantic context for your LLMs — generated automatically"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -57,6 +57,7 @@ from databao_context_engine.plugins.databases.snowflake.config_file import (
57
57
  SnowflakeConfigFile,
58
58
  SnowflakeConnectionProperties,
59
59
  SnowflakeKeyPairAuth,
60
+ SnowflakeOAuthAuth,
60
61
  SnowflakePasswordAuth,
61
62
  SnowflakeSSOAuth,
62
63
  )
@@ -134,6 +135,7 @@ __all__ = [
134
135
  "SnowflakeConfigFile",
135
136
  "SnowflakeConnectionProperties",
136
137
  "SnowflakeSSOAuth",
138
+ "SnowflakeOAuthAuth",
137
139
  "SnowflakeKeyPairAuth",
138
140
  "SnowflakePasswordAuth",
139
141
  "SQLiteConfigFile",
@@ -1,10 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- from dataclasses import replace
4
3
  from typing import Any
5
4
 
6
5
  import yaml
7
- from pydantic import BaseModel, TypeAdapter
6
+ from pydantic import TypeAdapter
8
7
 
9
8
  from databao_context_engine.build_sources.plugin_execution import BuiltDatasourceContext
10
9
  from databao_context_engine.datasources.datasource_context import (
@@ -45,14 +44,8 @@ def deserialize_built_context(
45
44
  ) -> BuiltDatasourceContext:
46
45
  """Parse a datasource output YAML payload and type the embedded context."""
47
46
  raw_context = yaml.safe_load(context.context)
48
- built = TypeAdapter(BuiltDatasourceContext).validate_python(raw_context)
49
47
 
50
- if isinstance(context_type, type) and issubclass(context_type, BaseModel):
51
- typed_context: Any = context_type.model_validate(built.context)
52
- else:
53
- typed_context = TypeAdapter(context_type).validate_python(built.context)
54
-
55
- return replace(built, context=typed_context)
48
+ return TypeAdapter(BuiltDatasourceContext[context_type]).validate_python(raw_context) # type: ignore[valid-type]
56
49
 
57
50
 
58
51
  def _load_typed_built_context(
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import Any, cast
2
+ from typing import Any, Generic, TypeVar, cast
3
3
 
4
4
  from databao_context_engine.datasources.types import PreparedConfig, PreparedDatasource
5
5
  from databao_context_engine.pluginlib.build_plugin import (
@@ -10,9 +10,11 @@ from databao_context_engine.pluginlib.build_plugin import (
10
10
  from databao_context_engine.pluginlib.plugin_utils import execute_datasource_plugin, execute_file_plugin
11
11
  from databao_context_engine.project.layout import ProjectLayout
12
12
 
13
+ ContextT = TypeVar("ContextT")
14
+
13
15
 
14
16
  @dataclass()
15
- class BuiltDatasourceContext:
17
+ class BuiltDatasourceContext(Generic[ContextT]):
16
18
  """Dataclass defining the result of building a datasource's context."""
17
19
 
18
20
  datasource_id: str
@@ -25,7 +27,7 @@ class BuiltDatasourceContext:
25
27
  The type of the built data source
26
28
  """
27
29
 
28
- context: Any
30
+ context: ContextT
29
31
  """
30
32
  A dictionary containing the actual context generated for the data source.
31
33
  This dictionary should be serializable in YAML format.
@@ -25,6 +25,7 @@ from databao_context_engine.plugins.databases.database_context_explorer import (
25
25
  from databao_context_engine.plugins.plugin_loader import DatabaoContextPluginLoader
26
26
  from databao_context_engine.project.layout import ProjectLayout, ensure_project_dir
27
27
  from databao_context_engine.search_context import search_context as search_context_internal
28
+ from databao_context_engine.search_context.chunk_search_repository import ChunkType
28
29
  from databao_context_engine.search_context.search_service import ContextSearchMode
29
30
 
30
31
 
@@ -131,6 +132,7 @@ class DatabaoContextEngine:
131
132
  limit: int | None = None,
132
133
  datasource_ids: list[DatasourceId] | None = None,
133
134
  context_search_mode: ContextSearchMode | None = None,
135
+ chunk_types: list[ChunkType] | None = None,
134
136
  ) -> list[ContextSearchResult]:
135
137
  """Search in the available context for the closest matches to the given text.
136
138
 
@@ -139,6 +141,7 @@ class DatabaoContextEngine:
139
141
  limit: The maximum number of results to return. If None is provided, a default limit of 10 will be used.
140
142
  datasource_ids: If provided, the search results will only come from the datasources with these IDs.
141
143
  context_search_mode: Search strategy to use. Defaults to HYBRID_SEARCH if None is provided.
144
+ chunk_types: If provided, the search results will only come from the chunks of these types.
142
145
 
143
146
  Returns:
144
147
  A list of the results found for the search, sorted by score.
@@ -153,6 +156,7 @@ class DatabaoContextEngine:
153
156
  limit=limit,
154
157
  datasource_ids=datasource_ids,
155
158
  context_search_mode=context_search_mode,
159
+ chunk_types=chunk_types,
156
160
  )
157
161
 
158
162
  return [
@@ -12,10 +12,12 @@ class EmbeddableChunk:
12
12
  """A chunk that will be embedded as a vector and used when searching context from a given AI prompt.
13
13
 
14
14
  Attributes:
15
+ type: The type of the chunk, e.g. "table", "column" to allow for search by chunk type.
15
16
  embeddable_text: The text to embed as a vector for search usage
16
17
  content: The content to return as a response when the embedding has been selected in a search
17
18
  """
18
19
 
20
+ type: str | None = None
19
21
  embeddable_text: str
20
22
  keyword_indexable_text: str | None = None
21
23
  content: Any
@@ -39,6 +39,7 @@ def build_database_chunks(result: DatabaseIntrospectionResult) -> list[Embeddabl
39
39
 
40
40
  def _create_table_chunk(catalog_name: str, schema_name: str, table: DatabaseTable) -> EmbeddableChunk:
41
41
  return EmbeddableChunk(
42
+ type="table",
42
43
  embeddable_text=_build_table_chunk_text(table),
43
44
  content=DatabaseTableChunkContent(
44
45
  catalog_name=catalog_name,
@@ -52,6 +53,7 @@ def _create_column_chunk(
52
53
  catalog_name: str, schema_name: str, table: DatabaseTable, column: DatabaseColumn
53
54
  ) -> EmbeddableChunk:
54
55
  return EmbeddableChunk(
56
+ type="column",
55
57
  embeddable_text=_build_column_chunk_text(table, column),
56
58
  content=DatabaseColumnChunkContent(
57
59
  catalog_name=catalog_name,
@@ -22,13 +22,17 @@ class SnowflakeSSOAuth(BaseModel):
22
22
  authenticator: str = Field(description='e.g. "externalbrowser"')
23
23
 
24
24
 
25
+ class SnowflakeOAuthAuth(BaseModel):
26
+ token: Annotated[str, ConfigPropertyAnnotation(secret=True)]
27
+
28
+
25
29
  class SnowflakeConnectionProperties(BaseModel):
26
30
  account: Annotated[str, ConfigPropertyAnnotation(required=True)]
27
31
  warehouse: str | None = None
28
32
  database: str | None = None
29
33
  user: str | None = None
30
34
  role: str | None = None
31
- auth: SnowflakePasswordAuth | SnowflakeKeyPairAuth | SnowflakeSSOAuth
35
+ auth: SnowflakePasswordAuth | SnowflakeKeyPairAuth | SnowflakeSSOAuth | SnowflakeOAuthAuth
32
36
  additional_properties: dict[str, Any] = {}
33
37
 
34
38
  def to_snowflake_kwargs(self) -> dict[str, Any]:
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from collections.abc import Sequence
3
3
  from dataclasses import dataclass
4
+ from enum import Enum
4
5
  from typing import Any
5
6
 
6
7
  import duckdb
@@ -13,9 +14,17 @@ from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
14
  logger = logging.getLogger(__name__)
14
15
 
15
16
 
17
+ class ChunkType(str, Enum):
18
+ """Enum of the supported chunk types to search."""
19
+
20
+ TABLE = "table"
21
+ COLUMN = "column"
22
+
23
+
16
24
  @dataclass(kw_only=True, frozen=True)
17
25
  class VectorSearchCandidate:
18
26
  chunk_id: int
27
+ chunk_type: ChunkType | None
19
28
  display_text: str
20
29
  embeddable_text: str
21
30
  cosine_distance: float
@@ -26,6 +35,7 @@ class VectorSearchCandidate:
26
35
  @dataclass(kw_only=True, frozen=True)
27
36
  class Bm25SearchCandidate:
28
37
  chunk_id: int
38
+ chunk_type: ChunkType | None
29
39
  display_text: str
30
40
  embeddable_text: str
31
41
  bm25_score: float
@@ -65,6 +75,7 @@ class KeywordSearchScore:
65
75
  @dataclass(kw_only=True, frozen=True)
66
76
  class SearchResult:
67
77
  chunk_id: int
78
+ chunk_type: ChunkType | None
68
79
  display_text: str
69
80
  embeddable_text: str
70
81
  datasource_type: DatasourceType
@@ -90,6 +101,7 @@ class ChunkSearchRepository:
90
101
  dimension: int,
91
102
  limit: int,
92
103
  datasource_context_hashes: list[DatasourceContextHash],
104
+ chunk_types: list[ChunkType] | None = None,
93
105
  ) -> list[SearchResult]:
94
106
  """Read only similarity search on a specific embedding shard table."""
95
107
  vector_candidates = self._get_vector_candidates(
@@ -98,10 +110,12 @@ class ChunkSearchRepository:
98
110
  dimension=dimension,
99
111
  limit=limit,
100
112
  datasource_context_hashes=datasource_context_hashes,
113
+ chunk_types=chunk_types,
101
114
  )
102
115
  return [
103
116
  SearchResult(
104
117
  chunk_id=candidate.chunk_id,
118
+ chunk_type=candidate.chunk_type,
105
119
  display_text=candidate.display_text,
106
120
  embeddable_text=candidate.embeddable_text,
107
121
  datasource_type=candidate.datasource_type,
@@ -120,15 +134,26 @@ class ChunkSearchRepository:
120
134
  dimension: int,
121
135
  limit: int,
122
136
  datasource_context_hashes: list[DatasourceContextHash],
137
+ chunk_types: list[ChunkType] | None = None,
123
138
  ) -> list[VectorSearchCandidate]:
124
139
  """Read only vector candidates on a specific embedding shard table."""
125
140
  if not datasource_context_hashes:
126
141
  return []
127
142
 
128
143
  allowed_hashes_sql, hash_params = self._build_allowed_hashes_values(datasource_context_hashes)
144
+
145
+ chunk_types_param: list[list[ChunkType]]
146
+ if chunk_types:
147
+ search_candidates_chunk_type_filter = "WHERE c.chunk_type IN ?"
148
+ chunk_types_param = [chunk_types]
149
+ else:
150
+ chunk_types_param = []
151
+ search_candidates_chunk_type_filter = ""
152
+
129
153
  params: list[Any] = [
130
154
  *hash_params,
131
155
  list(search_vec),
156
+ *chunk_types_param,
132
157
  self._DEFAULT_DISTANCE_THRESHOLD,
133
158
  limit,
134
159
  ]
@@ -141,6 +166,7 @@ class ChunkSearchRepository:
141
166
  vector_candidates AS (
142
167
  SELECT
143
168
  c.chunk_id,
169
+ c.chunk_type,
144
170
  COALESCE(c.display_text, c.embeddable_text) AS display_text,
145
171
  c.embeddable_text,
146
172
  array_cosine_distance(e.vec, CAST(? AS FLOAT[{dimension}])) AS cosine_distance,
@@ -154,9 +180,11 @@ class ChunkSearchRepository:
154
180
  ON h.datasource_id = ah.datasource_id
155
181
  AND h.hash = ah.hash
156
182
  AND h.hash_algorithm = ah.hash_algorithm
183
+ {search_candidates_chunk_type_filter}
157
184
  )
158
185
  SELECT
159
186
  vc.chunk_id,
187
+ vc.chunk_type,
160
188
  vc.display_text,
161
189
  vc.embeddable_text,
162
190
  vc.cosine_distance,
@@ -176,11 +204,12 @@ class ChunkSearchRepository:
176
204
  return [
177
205
  VectorSearchCandidate(
178
206
  chunk_id=row[0],
179
- display_text=row[1],
180
- embeddable_text=row[2],
181
- cosine_distance=row[3],
182
- datasource_type=DatasourceType(full_type=row[4]),
183
- datasource_id=DatasourceId.from_string_repr(row[5]),
207
+ chunk_type=ChunkType(row[1]) if row[1] else None,
208
+ display_text=row[2],
209
+ embeddable_text=row[3],
210
+ cosine_distance=row[4],
211
+ datasource_type=DatasourceType(full_type=row[5]),
212
+ datasource_id=DatasourceId.from_string_repr(row[6]),
184
213
  )
185
214
  for row in rows
186
215
  ]
@@ -195,6 +224,7 @@ class ChunkSearchRepository:
195
224
  dimension: int,
196
225
  limit: int,
197
226
  datasource_context_hashes: list[DatasourceContextHash],
227
+ chunk_types: list[ChunkType] | None = None,
198
228
  ) -> list[SearchResult]:
199
229
  """Hybrid retrieval combining vector similarity and BM25 with Reciprocal Rank Fusion (RRF).
200
230
 
@@ -208,12 +238,14 @@ class ChunkSearchRepository:
208
238
  dimension=dimension,
209
239
  limit=candidate_limit,
210
240
  datasource_context_hashes=datasource_context_hashes,
241
+ chunk_types=chunk_types,
211
242
  )
212
243
 
213
244
  bm25_candidates = self._get_bm25_candidates(
214
245
  query_text=search_text,
215
246
  limit=candidate_limit,
216
247
  datasource_context_hashes=datasource_context_hashes,
248
+ chunk_types=chunk_types,
217
249
  )
218
250
  return self._fuse_by_rrf(
219
251
  vector_candidates=vector_candidates,
@@ -228,17 +260,20 @@ class ChunkSearchRepository:
228
260
  query_text: str,
229
261
  limit: int,
230
262
  datasource_context_hashes: list[DatasourceContextHash],
263
+ chunk_types: list[ChunkType] | None = None,
231
264
  ) -> list[SearchResult]:
232
265
  """Read only BM25 search over chunk text."""
233
266
  bm25_candidates = self._get_bm25_candidates(
234
267
  query_text=query_text,
235
268
  limit=limit,
236
269
  datasource_context_hashes=datasource_context_hashes,
270
+ chunk_types=chunk_types,
237
271
  )
238
272
 
239
273
  return [
240
274
  SearchResult(
241
275
  chunk_id=candidate.chunk_id,
276
+ chunk_type=candidate.chunk_type,
242
277
  display_text=candidate.display_text,
243
278
  embeddable_text=candidate.embeddable_text,
244
279
  datasource_type=candidate.datasource_type,
@@ -255,12 +290,21 @@ class ChunkSearchRepository:
255
290
  query_text: str,
256
291
  limit: int,
257
292
  datasource_context_hashes: list[DatasourceContextHash],
293
+ chunk_types: list[ChunkType] | None = None,
258
294
  ) -> list[Bm25SearchCandidate]:
259
295
  if not datasource_context_hashes:
260
296
  return []
261
297
 
262
298
  allowed_hashes_sql, hash_params = self._build_allowed_hashes_values(datasource_context_hashes)
263
- params: list[Any] = [*hash_params, query_text, limit]
299
+ chunk_types_param: list[list[ChunkType]]
300
+ if chunk_types:
301
+ search_candidates_chunk_type_filter = "WHERE c.chunk_type IN ?"
302
+ chunk_types_param = [chunk_types]
303
+ else:
304
+ chunk_types_param = []
305
+ search_candidates_chunk_type_filter = ""
306
+
307
+ params: list[Any] = [*hash_params, query_text, *chunk_types_param, limit]
264
308
 
265
309
  rows = self._conn.execute(
266
310
  f"""
@@ -270,6 +314,7 @@ class ChunkSearchRepository:
270
314
  bm25_candidates AS (
271
315
  SELECT
272
316
  c.chunk_id,
317
+ c.chunk_type,
273
318
  COALESCE(c.display_text, c.embeddable_text) AS display_text,
274
319
  c.embeddable_text,
275
320
  c.full_type,
@@ -285,9 +330,11 @@ class ChunkSearchRepository:
285
330
  ON h.datasource_id = ah.datasource_id
286
331
  AND h.hash = ah.hash
287
332
  AND h.hash_algorithm = ah.hash_algorithm
333
+ {search_candidates_chunk_type_filter}
288
334
  )
289
335
  SELECT
290
336
  b.chunk_id,
337
+ b.chunk_type,
291
338
  b.display_text,
292
339
  b.embeddable_text,
293
340
  b.bm25_score,
@@ -307,11 +354,12 @@ class ChunkSearchRepository:
307
354
  return [
308
355
  Bm25SearchCandidate(
309
356
  chunk_id=row[0],
310
- display_text=row[1],
311
- embeddable_text=row[2],
312
- bm25_score=row[3],
313
- datasource_type=DatasourceType(full_type=row[4]),
314
- datasource_id=DatasourceId.from_string_repr(row[5]),
357
+ chunk_type=ChunkType(row[1]) if row[1] else None,
358
+ display_text=row[2],
359
+ embeddable_text=row[3],
360
+ bm25_score=row[4],
361
+ datasource_type=DatasourceType(full_type=row[5]),
362
+ datasource_id=DatasourceId.from_string_repr(row[6]),
315
363
  )
316
364
  for row in rows
317
365
  ]
@@ -365,6 +413,7 @@ class ChunkSearchRepository:
365
413
  results.append(
366
414
  SearchResult(
367
415
  chunk_id=chunk_id,
416
+ chunk_type=data_candidate.chunk_type,
368
417
  display_text=data_candidate.display_text,
369
418
  embeddable_text=data_candidate.embeddable_text,
370
419
  datasource_type=data_candidate.datasource_type,
@@ -5,6 +5,7 @@ from databao_context_engine.datasources.datasource_context import (
5
5
  )
6
6
  from databao_context_engine.datasources.types import DatasourceId
7
7
  from databao_context_engine.project.layout import ProjectLayout
8
+ from databao_context_engine.search_context.chunk_search_repository import ChunkType
8
9
  from databao_context_engine.search_context.search_service import RAG_MODE, ContextSearchMode, SearchContextService
9
10
 
10
11
 
@@ -18,6 +19,7 @@ def run_context_search(
18
19
  datasource_ids: list[DatasourceId] | None,
19
20
  rag_mode: RAG_MODE,
20
21
  context_search_mode: ContextSearchMode,
22
+ chunk_types: list[ChunkType] | None = None,
21
23
  ):
22
24
  context_hashes = (
23
25
  get_datasource_context_hashes(project_layout, datasource_ids)
@@ -33,4 +35,5 @@ def run_context_search(
33
35
  datasource_context_hashes=context_hashes,
34
36
  rag_mode=rag_mode,
35
37
  context_search_mode=context_search_mode,
38
+ chunk_types=chunk_types,
36
39
  )
@@ -8,6 +8,7 @@ from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
8
8
  from databao_context_engine.llm.prompts.provider import PromptProvider
9
9
  from databao_context_engine.search_context.chunk_search_repository import (
10
10
  ChunkSearchRepository,
11
+ ChunkType,
11
12
  SearchResult,
12
13
  )
13
14
  from databao_context_engine.services.embedding_shard_resolver import EmbeddingShardResolver
@@ -55,6 +56,7 @@ class SearchContextService:
55
56
  limit: int | None = None,
56
57
  rag_mode: RAG_MODE,
57
58
  context_search_mode: ContextSearchMode,
59
+ chunk_types: list[ChunkType] | None = None,
58
60
  ) -> list[SearchResult]:
59
61
  if limit is None:
60
62
  limit = 10
@@ -65,6 +67,7 @@ class SearchContextService:
65
67
  limit=limit,
66
68
  rag_mode=rag_mode,
67
69
  context_search_mode=context_search_mode,
70
+ chunk_types=chunk_types,
68
71
  )
69
72
 
70
73
  logger.debug(f"Found {len(search_results)} search results")
@@ -90,6 +93,7 @@ class SearchContextService:
90
93
  limit: int,
91
94
  rag_mode: RAG_MODE,
92
95
  context_search_mode: ContextSearchMode,
96
+ chunk_types: list[ChunkType] | None = None,
93
97
  ) -> list[SearchResult]:
94
98
  if context_search_mode == ContextSearchMode.KEYWORD_SEARCH:
95
99
  query_text = self._rewrite_search_query(text) if rag_mode == RAG_MODE.REWRITE_QUERY else text
@@ -98,6 +102,7 @@ class SearchContextService:
98
102
  query_text=query_text,
99
103
  limit=limit,
100
104
  datasource_context_hashes=datasource_context_hashes,
105
+ chunk_types=chunk_types,
101
106
  )
102
107
 
103
108
  table_name, dimension = self._shard_resolver.resolve(
@@ -130,6 +135,7 @@ class SearchContextService:
130
135
  dimension=dimension,
131
136
  limit=limit,
132
137
  datasource_context_hashes=datasource_context_hashes,
138
+ chunk_types=chunk_types,
133
139
  )
134
140
  case ContextSearchMode.HYBRID_SEARCH:
135
141
  return self._chunk_search_repo.search_chunks_with_hybrid_search(
@@ -139,6 +145,7 @@ class SearchContextService:
139
145
  dimension=dimension,
140
146
  limit=limit,
141
147
  datasource_context_hashes=datasource_context_hashes,
148
+ chunk_types=chunk_types,
142
149
  )
143
150
 
144
151
  @perf.perf_span("search_context.rewrite_query")
@@ -14,7 +14,7 @@ from databao_context_engine.llm.factory import (
14
14
  from databao_context_engine.llm.prompts.provider import PromptProvider
15
15
  from databao_context_engine.plugins.plugin_loader import DatabaoContextPluginLoader
16
16
  from databao_context_engine.project.layout import ProjectLayout
17
- from databao_context_engine.search_context.chunk_search_repository import ChunkSearchRepository, SearchResult
17
+ from databao_context_engine.search_context.chunk_search_repository import ChunkSearchRepository, ChunkType, SearchResult
18
18
  from databao_context_engine.search_context.search_runner import run_context_search
19
19
  from databao_context_engine.search_context.search_service import RAG_MODE, ContextSearchMode, SearchContextService
20
20
  from databao_context_engine.services.factories import create_shard_resolver
@@ -38,6 +38,7 @@ def search_context(
38
38
  limit: int | None,
39
39
  datasource_ids: list[DatasourceId] | None,
40
40
  context_search_mode: ContextSearchMode,
41
+ chunk_types: list[ChunkType] | None = None,
41
42
  ) -> list[SearchResult]:
42
43
  with open_duckdb_connection(project_layout.db_path) as conn:
43
44
  ollama_service = create_ollama_service()
@@ -62,6 +63,7 @@ def search_context(
62
63
  datasource_ids=datasource_ids,
63
64
  rag_mode=rag_mode,
64
65
  context_search_mode=context_search_mode,
66
+ chunk_types=chunk_types,
65
67
  )
66
68
 
67
69
 
@@ -135,7 +135,10 @@ class PersistenceService:
135
135
  full_type=full_type,
136
136
  datasource_id=datasource_id,
137
137
  datasource_context_hash_id=datasource_context_hash_id,
138
- chunk_contents=[(ce.embedded_text, ce.display_text, ce.keyword_indexable_text) for ce in chunk_embeddings],
138
+ chunk_contents=[
139
+ (ce.embedded_text, ce.display_text, ce.keyword_indexable_text, ce.original_chunk.type)
140
+ for ce in chunk_embeddings
141
+ ],
139
142
  )
140
143
 
141
144
  @perf.perf_span("persistence.bulk_insert_embeddings")
@@ -0,0 +1 @@
1
+ ALTER TABLE chunk ADD COLUMN IF NOT EXISTS chunk_type TEXT;
@@ -23,6 +23,7 @@ class ChunkDTO:
23
23
  display_text: Optional[str]
24
24
  created_at: datetime
25
25
  datasource_context_hash_id: int
26
+ chunk_type: str | None = None
26
27
 
27
28
 
28
29
  @dataclass(frozen=True)
@@ -19,6 +19,7 @@ class ChunkRepository:
19
19
  self,
20
20
  *,
21
21
  full_type: str,
22
+ chunk_type: str | None = None,
22
23
  datasource_id: str,
23
24
  embeddable_text: str,
24
25
  display_text: Optional[str],
@@ -30,14 +31,15 @@ class ChunkRepository:
30
31
  cur=self._conn,
31
32
  sql="""
32
33
  INSERT INTO
33
- chunk(full_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
34
+ chunk(full_type, chunk_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
34
35
  VALUES
35
- (?, ?, ?, ?, ?, ?)
36
+ (?, ?, ?, ?, ?, ?, ?)
36
37
  RETURNING
37
38
  *
38
39
  """,
39
40
  params=[
40
41
  full_type,
42
+ chunk_type,
41
43
  datasource_id,
42
44
  embeddable_text,
43
45
  display_text,
@@ -181,13 +183,13 @@ class ChunkRepository:
181
183
  *,
182
184
  full_type: str,
183
185
  datasource_id: str,
186
+ chunk_contents: Sequence[Tuple[str, Optional[str], str, Optional[str]]],
184
187
  datasource_context_hash_id: int,
185
- chunk_contents: Sequence[Tuple[str, Optional[str], str]],
186
188
  ) -> Sequence[int]:
187
- values_sql = ", ".join(["(?, ?, ?, ?, ?, ?)"] * len(chunk_contents))
189
+ values_sql = ", ".join(["(?, ?, ?, ?, ?, ?, ?)"] * len(chunk_contents))
188
190
  sql = f"""
189
191
  INSERT INTO
190
- chunk(full_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
192
+ chunk(full_type, chunk_type, datasource_id, embeddable_text, display_text, keyword_index_text, datasource_context_hash_id)
191
193
  VALUES
192
194
  {values_sql}
193
195
  RETURNING
@@ -195,10 +197,11 @@ class ChunkRepository:
195
197
  """
196
198
 
197
199
  params: list[Any] = []
198
- for embeddable_text, display_text, keyword_index_text in chunk_contents:
200
+ for embeddable_text, display_text, keyword_index_text, chunk_type in chunk_contents:
199
201
  params.extend(
200
202
  [
201
203
  full_type,
204
+ chunk_type,
202
205
  datasource_id,
203
206
  embeddable_text,
204
207
  display_text,
@@ -227,6 +230,7 @@ class ChunkRepository:
227
230
  return ChunkDTO(
228
231
  chunk_id=int(row["chunk_id"]),
229
232
  full_type=row["full_type"],
233
+ chunk_type=row["chunk_type"],
230
234
  datasource_id=row["datasource_id"],
231
235
  embeddable_text=row["embeddable_text"],
232
236
  display_text=row["display_text"],