databao-context-engine 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. databao_context_engine/__init__.py +18 -6
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +84 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
  8. databao_context_engine/cli/add_datasource_config.py +41 -15
  9. databao_context_engine/cli/commands.py +12 -43
  10. databao_context_engine/cli/info.py +2 -2
  11. databao_context_engine/databao_context_engine.py +137 -0
  12. databao_context_engine/databao_context_project_manager.py +96 -6
  13. databao_context_engine/datasources/add_config.py +34 -0
  14. databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
  15. databao_context_engine/datasources/datasource_context.py +93 -0
  16. databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
  17. databao_context_engine/{project → datasources}/types.py +64 -15
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +59 -30
  20. databao_context_engine/llm/errors.py +2 -8
  21. databao_context_engine/llm/install.py +13 -20
  22. databao_context_engine/llm/service.py +1 -3
  23. databao_context_engine/mcp/all_results_tool.py +2 -2
  24. databao_context_engine/mcp/mcp_runner.py +4 -2
  25. databao_context_engine/mcp/mcp_server.py +1 -4
  26. databao_context_engine/mcp/retrieve_tool.py +3 -11
  27. databao_context_engine/plugin_loader.py +111 -0
  28. databao_context_engine/pluginlib/build_plugin.py +25 -9
  29. databao_context_engine/pluginlib/config.py +16 -2
  30. databao_context_engine/plugins/databases/athena_introspector.py +85 -22
  31. databao_context_engine/plugins/databases/base_introspector.py +5 -3
  32. databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
  33. databao_context_engine/plugins/databases/duckdb_introspector.py +1 -1
  34. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  35. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  36. databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
  37. databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
  38. databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
  39. databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
  40. databao_context_engine/plugins/plugin_loader.py +43 -42
  41. databao_context_engine/plugins/resources/parquet_introspector.py +2 -3
  42. databao_context_engine/project/info.py +31 -2
  43. databao_context_engine/project/init_project.py +16 -7
  44. databao_context_engine/project/layout.py +3 -3
  45. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  46. databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
  47. databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
  48. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
  49. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
  50. databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
  51. databao_context_engine/services/chunk_embedding_service.py +23 -11
  52. databao_context_engine/services/factories.py +1 -46
  53. databao_context_engine/services/persistence_service.py +11 -11
  54. databao_context_engine/storage/connection.py +11 -7
  55. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  56. databao_context_engine/storage/migrate.py +2 -4
  57. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  58. databao_context_engine/storage/models.py +2 -23
  59. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  60. databao_context_engine/storage/repositories/factories.py +1 -12
  61. databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
  62. databao_context_engine/system/properties.py +4 -2
  63. databao_context_engine-0.1.3.dist-info/METADATA +75 -0
  64. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/RECORD +68 -77
  65. databao_context_engine/build_sources/internal/build_service.py +0 -77
  66. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  67. databao_context_engine/build_sources/internal/export_results.py +0 -43
  68. databao_context_engine/build_sources/public/api.py +0 -4
  69. databao_context_engine/databao_engine.py +0 -85
  70. databao_context_engine/datasource_config/__init__.py +0 -0
  71. databao_context_engine/datasource_config/add_config.py +0 -50
  72. databao_context_engine/datasource_config/datasource_context.py +0 -60
  73. databao_context_engine/project/runs.py +0 -39
  74. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  75. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  76. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  77. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  78. databao_context_engine/serialisation/__init__.py +0 -0
  79. databao_context_engine/services/run_name_policy.py +0 -8
  80. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  81. databao_context_engine/storage/repositories/run_repository.py +0 -157
  82. databao_context_engine-0.1.2.dist-info/METADATA +0 -187
  83. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  84. /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
  85. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/WHEEL +0 -0
  86. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/entry_points.txt +0 -0
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- from datetime import datetime
5
-
6
- from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext, execute
7
- from databao_context_engine.pluginlib.build_plugin import (
8
- BuildPlugin,
9
- )
10
- from databao_context_engine.project.types import PreparedDatasource
11
- from databao_context_engine.serialisation.yaml import to_yaml_string
12
- from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingService
13
- from databao_context_engine.storage.models import RunDTO
14
- from databao_context_engine.storage.repositories.datasource_run_repository import DatasourceRunRepository
15
- from databao_context_engine.storage.repositories.run_repository import RunRepository
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class BuildService:
21
- def __init__(
22
- self,
23
- *,
24
- run_repo: RunRepository,
25
- datasource_run_repo: DatasourceRunRepository,
26
- chunk_embedding_service: ChunkEmbeddingService,
27
- ) -> None:
28
- self._run_repo = run_repo
29
- self._datasource_run_repo = datasource_run_repo
30
- self._chunk_embedding_service = chunk_embedding_service
31
-
32
- def start_run(self, *, project_id: str, dce_version: str) -> RunDTO:
33
- """
34
- Create a new run row and return (run_id, started_at).
35
- """
36
- return self._run_repo.create(project_id=project_id, dce_version=dce_version)
37
-
38
- def finalize_run(self, *, run_id: int):
39
- """
40
- Mark the run as complete (sets ended_at).
41
- """
42
- self._run_repo.update(run_id=run_id, ended_at=datetime.now())
43
-
44
- def process_prepared_source(
45
- self,
46
- *,
47
- run_id: int,
48
- prepared_source: PreparedDatasource,
49
- plugin: BuildPlugin,
50
- ) -> BuiltDatasourceContext:
51
- """
52
- Process a single source.
53
-
54
- 1) Execute the plugin
55
- 2) Divide the results into chunks
56
- 3) Embed and persist the chunks
57
- """
58
- result = execute(prepared_source, plugin)
59
-
60
- chunks = plugin.divide_context_into_chunks(result.context)
61
- if not chunks:
62
- logger.info("No chunks for %s — skipping.", prepared_source.path.name)
63
- return result
64
-
65
- datasource_run = self._datasource_run_repo.create(
66
- run_id=run_id,
67
- plugin=plugin.name,
68
- full_type=prepared_source.datasource_type.full_type,
69
- source_id=result.datasource_id,
70
- storage_directory=str(prepared_source.path.parent),
71
- )
72
-
73
- self._chunk_embedding_service.embed_chunks(
74
- datasource_run_id=datasource_run.datasource_run_id, chunks=chunks, result=to_yaml_string(result.context)
75
- )
76
-
77
- return result
@@ -1,52 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from databao_context_engine.build_sources.internal.build_runner import BuildContextResult, build
5
- from databao_context_engine.llm.factory import (
6
- create_ollama_description_provider,
7
- create_ollama_embedding_provider,
8
- create_ollama_service,
9
- )
10
- from databao_context_engine.project.info import get_dce_version
11
- from databao_context_engine.project.layout import ensure_project_dir
12
- from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
13
- from databao_context_engine.services.factories import (
14
- create_build_service,
15
- )
16
- from databao_context_engine.storage.connection import open_duckdb_connection
17
- from databao_context_engine.system.properties import get_db_path
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- def build_all_datasources(project_dir: Path, chunk_embedding_mode: ChunkEmbeddingMode) -> list[BuildContextResult]:
23
- """
24
- Public build entrypoint
25
- - Instantiates the build service
26
- - Delegates the actual build logic to the build runner
27
- """
28
- project_layout = ensure_project_dir(project_dir)
29
-
30
- logger.debug(f"Starting to build datasources in project {project_dir.resolve()}")
31
-
32
- with open_duckdb_connection(get_db_path()) as conn:
33
- ollama_service = create_ollama_service()
34
- embedding_provider = create_ollama_embedding_provider(ollama_service)
35
- description_provider = (
36
- create_ollama_description_provider(ollama_service)
37
- if chunk_embedding_mode.should_generate_description()
38
- else None
39
- )
40
- build_service = create_build_service(
41
- conn,
42
- embedding_provider=embedding_provider,
43
- description_provider=description_provider,
44
- chunk_embedding_mode=chunk_embedding_mode,
45
- )
46
- dce_config = project_layout.read_config_file()
47
- return build(
48
- project_dir=project_dir,
49
- build_service=build_service,
50
- project_id=str(dce_config.project_id),
51
- dce_version=get_dce_version(),
52
- )
@@ -1,43 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from databao_context_engine.build_sources.internal.plugin_execution import BuiltDatasourceContext
5
- from databao_context_engine.datasource_config.datasource_context import get_context_header_for_datasource
6
- from databao_context_engine.project.layout import ALL_RESULTS_FILE_NAME, get_output_dir
7
- from databao_context_engine.project.types import DatasourceId
8
- from databao_context_engine.serialisation.yaml import write_yaml_to_stream
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- def create_run_dir(project_dir: Path, run_name: str) -> Path:
14
- output_dir = get_output_dir(project_dir)
15
-
16
- run_dir = output_dir.joinpath(run_name)
17
- run_dir.mkdir(parents=True, exist_ok=False)
18
-
19
- return run_dir
20
-
21
-
22
- def export_build_result(run_dir: Path, result: BuiltDatasourceContext) -> Path:
23
- datasource_id = DatasourceId.from_string_repr(result.datasource_id)
24
- export_file_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
25
-
26
- # Make sure the parent folder exists
27
- export_file_path.parent.mkdir(exist_ok=True)
28
-
29
- with export_file_path.open("w") as export_file:
30
- write_yaml_to_stream(data=result, file_stream=export_file)
31
-
32
- logger.info(f"Exported result to {export_file_path.resolve()}")
33
-
34
- return export_file_path
35
-
36
-
37
- def append_result_to_all_results(run_dir: Path, result: BuiltDatasourceContext):
38
- path = run_dir.joinpath(ALL_RESULTS_FILE_NAME)
39
- path.parent.mkdir(parents=True, exist_ok=True)
40
- with path.open("a", encoding="utf-8") as export_file:
41
- export_file.write(get_context_header_for_datasource(DatasourceId.from_string_repr(result.datasource_id)))
42
- write_yaml_to_stream(data=result, file_stream=export_file)
43
- export_file.write("\n")
@@ -1,4 +0,0 @@
1
- from databao_context_engine.build_sources.internal.build_runner import BuildContextResult
2
- from databao_context_engine.build_sources.internal.build_wiring import build_all_datasources
3
-
4
- __all__ = ["build_all_datasources", "BuildContextResult"]
@@ -1,85 +0,0 @@
1
- import os
2
- from dataclasses import dataclass
3
- from pathlib import Path
4
- from typing import Any
5
-
6
- from databao_context_engine.datasource_config.datasource_context import (
7
- DatasourceContext,
8
- get_all_contexts,
9
- get_context_header_for_datasource,
10
- get_datasource_context,
11
- )
12
- from databao_context_engine.pluginlib.build_plugin import DatasourceType
13
- from databao_context_engine.project.datasource_discovery import get_datasource_list
14
- from databao_context_engine.project.layout import ensure_project_dir
15
- from databao_context_engine.project.types import Datasource, DatasourceId
16
- from databao_context_engine.retrieve_embeddings.public.api import retrieve_embeddings
17
-
18
-
19
- @dataclass
20
- class ContextSearchResult:
21
- datasource_id: DatasourceId
22
- datasource_type: DatasourceType
23
- distance: float
24
- context_result: str
25
-
26
-
27
- class DatabaoContextEngine:
28
- project_dir: Path
29
-
30
- def __init__(self, project_dir: Path) -> None:
31
- self.project_layout = ensure_project_dir(project_dir=project_dir)
32
- self.project_dir = project_dir
33
-
34
- def get_datasource_list(self) -> list[Datasource]:
35
- # TODO: Should this return the list of built datasources rather than the list of datasources within the src folder?
36
- return get_datasource_list(self.project_dir)
37
-
38
- def get_datasource_context(self, datasource_id: DatasourceId, run_name: str | None = None) -> DatasourceContext:
39
- return get_datasource_context(
40
- project_layout=self.project_layout, datasource_id=datasource_id, run_name=run_name
41
- )
42
-
43
- def get_all_contexts(self, run_name: str | None = None) -> list[DatasourceContext]:
44
- return get_all_contexts(project_layout=self.project_layout, run_name=run_name)
45
-
46
- def get_all_contexts_formatted(self, run_name: str | None = None) -> str:
47
- all_contexts = self.get_all_contexts(run_name=run_name)
48
-
49
- all_results = os.linesep.join(
50
- [f"{get_context_header_for_datasource(context.datasource_id)}{context.context}" for context in all_contexts]
51
- )
52
-
53
- return all_results
54
-
55
- def search_context(
56
- self,
57
- retrieve_text: str,
58
- run_name: str | None,
59
- limit: int | None,
60
- export_to_file: bool,
61
- datasource_ids: list[DatasourceId] | None = None,
62
- ) -> list[ContextSearchResult]:
63
- # TODO: Filter with datasource_ids
64
- # TODO: Remove the need for a run_name
65
-
66
- results = retrieve_embeddings(
67
- project_layout=self.project_layout,
68
- retrieve_text=retrieve_text,
69
- run_name=run_name,
70
- limit=limit,
71
- export_to_file=export_to_file,
72
- )
73
-
74
- return [
75
- ContextSearchResult(
76
- datasource_id=result.datasource_id,
77
- datasource_type=result.datasource_type,
78
- distance=result.cosine_distance,
79
- context_result=result.display_text,
80
- )
81
- for result in results
82
- ]
83
-
84
- def run_sql(self, datasource_id: DatasourceId, sql: str, params: list[str]) -> dict[str, Any]:
85
- raise NotImplementedError("Running SQL is not supported yet")
File without changes
@@ -1,50 +0,0 @@
1
- from pathlib import Path
2
- from typing import Any
3
-
4
- from databao_context_engine.introspection.property_extract import get_property_list_from_type
5
- from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, DatasourceType
6
- from databao_context_engine.pluginlib.config import ConfigPropertyDefinition, CustomiseConfigProperties
7
- from databao_context_engine.plugins.plugin_loader import get_plugin_for_type
8
- from databao_context_engine.project.layout import (
9
- create_datasource_config_file as create_datasource_config_file_internal,
10
- )
11
- from databao_context_engine.project.types import DatasourceId
12
- from databao_context_engine.serialisation.yaml import to_yaml_string
13
-
14
-
15
- def get_config_file_structure_for_datasource_type(datasource_type: DatasourceType) -> list[ConfigPropertyDefinition]:
16
- plugin = get_plugin_for_type(datasource_type)
17
-
18
- if isinstance(plugin, CustomiseConfigProperties):
19
- return plugin.get_config_file_properties()
20
- elif isinstance(plugin, BuildDatasourcePlugin):
21
- return get_property_list_from_type(plugin.config_file_type)
22
- else:
23
- raise ValueError(
24
- f"Impossible to create a config for type {datasource_type.full_type}. The plugin for this type is not a BuildDatasourcePlugin or CustomiseConfigProperties"
25
- )
26
-
27
-
28
- def create_datasource_config_file(
29
- project_dir: Path,
30
- datasource_type: DatasourceType,
31
- datasource_name: str,
32
- config_content: dict[str, Any],
33
- overwrite_existing: bool,
34
- ) -> Path:
35
- basic_config = {"type": datasource_type.subtype, "name": datasource_name}
36
-
37
- return create_datasource_config_file_internal(
38
- project_dir,
39
- get_datasource_id_for_config_file(datasource_type, datasource_name),
40
- to_yaml_string(basic_config | config_content),
41
- overwrite_existing=overwrite_existing,
42
- )
43
-
44
-
45
- def get_datasource_id_for_config_file(datasource_type: DatasourceType, datasource_name: str) -> DatasourceId:
46
- return DatasourceId(
47
- datasource_config_folder=datasource_type.config_folder,
48
- datasource_name=datasource_name,
49
- config_file_suffix=".yaml",
50
- )
@@ -1,60 +0,0 @@
1
- import os
2
- from dataclasses import dataclass
3
- from pathlib import Path
4
-
5
- from databao_context_engine.project.layout import ProjectLayout
6
- from databao_context_engine.project.runs import get_run_dir, resolve_run_name
7
- from databao_context_engine.project.types import DatasourceId
8
-
9
-
10
- @dataclass(eq=True, frozen=True)
11
- class DatasourceContext:
12
- datasource_id: DatasourceId
13
- # TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
14
- context: str
15
-
16
-
17
- def get_datasource_context(
18
- project_layout: ProjectLayout, datasource_id: DatasourceId, run_name: str | None = None
19
- ) -> DatasourceContext:
20
- run_dir = _resolve_run_dir(project_layout, run_name)
21
-
22
- context_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
23
- if not context_path.is_file():
24
- raise ValueError(f"Context file not found for datasource {str(datasource_id)} in run {run_dir.name}")
25
-
26
- context = context_path.read_text()
27
- return DatasourceContext(datasource_id=datasource_id, context=context)
28
-
29
-
30
- def get_all_contexts(project_layout: ProjectLayout, run_name: str | None = None) -> list[DatasourceContext]:
31
- run_dir = _resolve_run_dir(project_layout, run_name)
32
-
33
- result = []
34
- for main_type_dir in sorted((p for p in run_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
35
- for context_path in sorted(
36
- (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
37
- ):
38
- result.append(
39
- DatasourceContext(
40
- # FIXME: The extension will always be yaml here even if the datasource is a file with a different extension
41
- datasource_id=DatasourceId.from_datasource_config_file_path(context_path),
42
- context=context_path.read_text(),
43
- )
44
- )
45
-
46
- return result
47
-
48
-
49
- def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
50
- return f"# ===== {str(datasource_id)} ====={os.linesep}"
51
-
52
-
53
- def _resolve_run_dir(project_layout: ProjectLayout, run_name: str | None) -> Path:
54
- resolved_run_name = resolve_run_name(project_layout=project_layout, run_name=run_name)
55
-
56
- run_dir = get_run_dir(project_dir=project_layout.project_dir, run_name=resolved_run_name)
57
- if not run_dir.is_dir():
58
- raise ValueError(f"Run {resolved_run_name} does not exist at {run_dir.resolve()}")
59
-
60
- return run_dir
@@ -1,39 +0,0 @@
1
- from pathlib import Path
2
-
3
- from databao_context_engine.project.layout import ProjectLayout, get_output_dir
4
- from databao_context_engine.storage.connection import open_duckdb_connection
5
- from databao_context_engine.storage.repositories.factories import create_run_repository
6
- from databao_context_engine.storage.repositories.run_repository import RunRepository
7
- from databao_context_engine.system.properties import get_db_path
8
-
9
-
10
- def resolve_run_name(*, project_layout: ProjectLayout, run_name: str | None) -> str:
11
- project_id = str(project_layout.read_config_file().project_id)
12
-
13
- with open_duckdb_connection(get_db_path()) as conn:
14
- run_repository = create_run_repository(conn)
15
-
16
- return resolve_run_name_from_repo(run_repository=run_repository, project_id=project_id, run_name=run_name)
17
-
18
-
19
- def resolve_run_name_from_repo(*, run_repository: RunRepository, project_id: str, run_name: str | None) -> str:
20
- if run_name is None:
21
- latest = run_repository.get_latest_run_for_project(project_id=project_id)
22
- if latest is None:
23
- raise LookupError(f"No runs found for project '{project_id}'. Run a build first.")
24
- return latest.run_name
25
- else:
26
- run = run_repository.get_by_run_name(project_id=project_id, run_name=run_name)
27
- if run is None:
28
- raise LookupError(f"Run '{run_name}' not found for project '{project_id}'.")
29
- return run.run_name
30
-
31
-
32
- def get_run_dir(project_dir: Path, run_name: str) -> Path:
33
- run_dir = get_output_dir(project_dir).joinpath(run_name)
34
- if not run_dir.is_dir():
35
- raise ValueError(
36
- f"The run with name {run_name} doesn't exist in the project. [project_dir: {project_dir.resolve()}]"
37
- )
38
-
39
- return run_dir
@@ -1,29 +0,0 @@
1
- from databao_context_engine.llm.factory import create_ollama_embedding_provider, create_ollama_service
2
- from databao_context_engine.project.layout import ProjectLayout
3
- from databao_context_engine.retrieve_embeddings.internal.retrieve_runner import retrieve
4
- from databao_context_engine.services.factories import create_retrieve_service
5
- from databao_context_engine.storage.connection import open_duckdb_connection
6
- from databao_context_engine.storage.repositories.vector_search_repository import VectorSearchResult
7
- from databao_context_engine.system.properties import get_db_path
8
-
9
-
10
- def retrieve_embeddings(
11
- project_layout: ProjectLayout,
12
- retrieve_text: str,
13
- run_name: str | None,
14
- limit: int | None,
15
- export_to_file: bool,
16
- ) -> list[VectorSearchResult]:
17
- with open_duckdb_connection(get_db_path()) as conn:
18
- ollama_service = create_ollama_service()
19
- embedding_provider = create_ollama_embedding_provider(ollama_service)
20
- retrieve_service = create_retrieve_service(conn, embedding_provider=embedding_provider)
21
- return retrieve(
22
- project_dir=project_layout.project_dir,
23
- retrieve_service=retrieve_service,
24
- project_id=str(project_layout.read_config_file().project_id),
25
- text=retrieve_text,
26
- run_name=run_name,
27
- limit=limit,
28
- export_to_file=export_to_file,
29
- )
@@ -1,3 +0,0 @@
1
- from databao_context_engine.retrieve_embeddings.internal.retrieve_wiring import retrieve_embeddings
2
-
3
- __all__ = ["retrieve_embeddings"]
File without changes
@@ -1,8 +0,0 @@
1
- from datetime import datetime
2
-
3
-
4
- class RunNamePolicy:
5
- _RUN_DIR_PREFIX = "run-"
6
-
7
- def build(self, *, run_started_at: datetime):
8
- return f"{RunNamePolicy._RUN_DIR_PREFIX}{run_started_at.isoformat(timespec='seconds')}"
@@ -1,136 +0,0 @@
1
- from typing import Any, Optional, Tuple
2
-
3
- import duckdb
4
- from _duckdb import ConstraintException
5
-
6
- from databao_context_engine.storage.exceptions.exceptions import IntegrityError
7
- from databao_context_engine.storage.models import DatasourceRunDTO
8
-
9
-
10
- class DatasourceRunRepository:
11
- def __init__(self, conn: duckdb.DuckDBPyConnection):
12
- self._conn = conn
13
-
14
- def create(
15
- self,
16
- *,
17
- run_id: int,
18
- plugin: str,
19
- full_type: str,
20
- source_id: str,
21
- storage_directory: str,
22
- ) -> DatasourceRunDTO:
23
- try:
24
- row = self._conn.execute(
25
- """
26
- INSERT INTO
27
- datasource_run(run_id, plugin, full_type, source_id, storage_directory)
28
- VALUES
29
- (?, ?, ?, ?, ?)
30
- RETURNING
31
- *
32
- """,
33
- [run_id, plugin, full_type, source_id, storage_directory],
34
- ).fetchone()
35
- if row is None:
36
- raise RuntimeError("datasource_run creation returned no object")
37
- return self._row_to_dto(row)
38
- except ConstraintException as e:
39
- raise IntegrityError from e
40
-
41
- def get(self, datasource_run_id: int) -> Optional[DatasourceRunDTO]:
42
- row = self._conn.execute(
43
- """
44
- SELECT
45
- *
46
- FROM
47
- datasource_run
48
- WHERE
49
- datasource_run_id = ?
50
- """,
51
- [datasource_run_id],
52
- ).fetchone()
53
- return self._row_to_dto(row) if row else None
54
-
55
- def update(
56
- self,
57
- datasource_run_id: int,
58
- *,
59
- plugin: Optional[str] = None,
60
- full_type: Optional[str] = None,
61
- source_id: Optional[str] = None,
62
- storage_directory: Optional[str] = None,
63
- ) -> Optional[DatasourceRunDTO]:
64
- sets: list[Any] = []
65
- params: list[Any] = []
66
-
67
- if plugin is not None:
68
- sets.append("plugin = ?")
69
- params.append(plugin)
70
- if full_type is not None:
71
- sets.append("full_type = ?")
72
- params.append(full_type)
73
- if source_id is not None:
74
- sets.append("source_id = ?")
75
- params.append(source_id)
76
- if storage_directory is not None:
77
- sets.append("storage_directory = ?")
78
- params.append(storage_directory)
79
-
80
- if not sets:
81
- return self.get(datasource_run_id)
82
-
83
- params.append(datasource_run_id)
84
- self._conn.execute(
85
- f"""
86
- UPDATE
87
- datasource_run
88
- SET
89
- {", ".join(sets)}
90
- WHERE
91
- datasource_run_id = ?
92
- """,
93
- params,
94
- )
95
-
96
- return self.get(datasource_run_id)
97
-
98
- def delete(self, datasource_run_id: int) -> int:
99
- row = self._conn.execute(
100
- """
101
- DELETE FROM
102
- datasource_run
103
- WHERE
104
- datasource_run_id = ?
105
- RETURNING
106
- datasource_run_id
107
- """,
108
- [datasource_run_id],
109
- ).fetchone()
110
- return 1 if row else 0
111
-
112
- def list(self) -> list[DatasourceRunDTO]:
113
- rows = self._conn.execute(
114
- """
115
- SELECT
116
- *
117
- FROM
118
- datasource_run
119
- ORDER BY
120
- datasource_run_id DESC
121
- """
122
- ).fetchall()
123
- return [self._row_to_dto(r) for r in rows]
124
-
125
- @staticmethod
126
- def _row_to_dto(row: Tuple) -> DatasourceRunDTO:
127
- datasource_run_id, run_id, plugin, source_id, storage_directory, created_at, full_type = row
128
- return DatasourceRunDTO(
129
- datasource_run_id=int(datasource_run_id),
130
- run_id=int(run_id),
131
- plugin=str(plugin),
132
- full_type=str(full_type),
133
- source_id=str(source_id),
134
- storage_directory=str(storage_directory),
135
- created_at=created_at,
136
- )