databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +32 -7
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +82 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
- databao_context_engine/cli/add_datasource_config.py +49 -44
- databao_context_engine/cli/commands.py +40 -55
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +127 -0
- databao_context_engine/databao_context_project_manager.py +147 -30
- databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
- databao_context_engine/datasources/datasource_context.py +90 -0
- databao_context_engine/datasources/datasource_discovery.py +143 -0
- databao_context_engine/datasources/types.py +194 -0
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +76 -57
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +26 -30
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +9 -11
- databao_context_engine/plugin_loader.py +110 -0
- databao_context_engine/pluginlib/build_plugin.py +12 -29
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
- databao_context_engine/plugins/databases/base_introspector.py +11 -12
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +58 -52
- databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +14 -15
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
- databao_context_engine/serialization/__init__.py +0 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +3 -5
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.5.dist-info/METADATA +228 -0
- databao_context_engine-0.1.5.dist-info/RECORD +135 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/project/datasource_discovery.py +0 -141
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/project/types.py +0 -134
- databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- databao_context_engine-0.1.1.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
- /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
- /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from enum import StrEnum
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
6
|
-
|
|
7
|
-
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class DatasourceKind(StrEnum):
|
|
11
|
-
CONFIG = "config"
|
|
12
|
-
FILE = "file"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass(frozen=True)
|
|
16
|
-
class DatasourceDescriptor:
|
|
17
|
-
path: Path
|
|
18
|
-
kind: DatasourceKind
|
|
19
|
-
main_type: str
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass(frozen=True)
|
|
23
|
-
class PreparedConfig:
|
|
24
|
-
datasource_type: DatasourceType
|
|
25
|
-
path: Path
|
|
26
|
-
config: dict[Any, Any]
|
|
27
|
-
datasource_name: str
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@dataclass(frozen=True)
|
|
31
|
-
class PreparedFile:
|
|
32
|
-
datasource_type: DatasourceType
|
|
33
|
-
path: Path
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
PreparedDatasource = PreparedConfig | PreparedFile
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclass(kw_only=True, frozen=True, eq=True)
|
|
40
|
-
class DatasourceId:
|
|
41
|
-
"""
|
|
42
|
-
The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
|
|
43
|
-
|
|
44
|
-
e.g: "databases/my_postgres_datasource.yaml"
|
|
45
|
-
|
|
46
|
-
Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
datasource_config_folder: str
|
|
50
|
-
datasource_name: str
|
|
51
|
-
config_file_suffix: str
|
|
52
|
-
|
|
53
|
-
def __post_init__(self):
|
|
54
|
-
if not self.datasource_config_folder.strip():
|
|
55
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_config_folder must not be empty")
|
|
56
|
-
if not self.datasource_name.strip():
|
|
57
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not be empty")
|
|
58
|
-
if not self.config_file_suffix.strip():
|
|
59
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): config_file_suffix must not be empty")
|
|
60
|
-
|
|
61
|
-
if os.sep in self.datasource_config_folder:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
f"Invalid DatasourceId ({str(self)}): datasource_config_folder must not contain a path separator"
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
if os.sep in self.datasource_name:
|
|
67
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not contain a path separator")
|
|
68
|
-
|
|
69
|
-
if not self.config_file_suffix.startswith("."):
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f'Invalid DatasourceId ({str(self)}): config_file_suffix must start with a dot "." (e.g.: .yaml)'
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
if self.datasource_name.endswith(self.config_file_suffix):
|
|
75
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not contain the file suffix")
|
|
76
|
-
|
|
77
|
-
def __str__(self):
|
|
78
|
-
return str(self.relative_path_to_config_file())
|
|
79
|
-
|
|
80
|
-
def relative_path_to_config_file(self) -> Path:
|
|
81
|
-
"""
|
|
82
|
-
Returns a path to the config file for this datasource.
|
|
83
|
-
|
|
84
|
-
The returned path is relative to the src folder in the project.
|
|
85
|
-
"""
|
|
86
|
-
return Path(self.datasource_config_folder).joinpath(self.datasource_name + self.config_file_suffix)
|
|
87
|
-
|
|
88
|
-
def relative_path_to_context_file(self) -> Path:
|
|
89
|
-
"""
|
|
90
|
-
Returns a path to the config file for this datasource.
|
|
91
|
-
|
|
92
|
-
The returned path is relative to an output run folder in the project.
|
|
93
|
-
"""
|
|
94
|
-
# Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
|
|
95
|
-
suffix = ".yaml" if self.config_file_suffix == ".yaml" else (self.config_file_suffix + ".yaml")
|
|
96
|
-
|
|
97
|
-
return Path(self.datasource_config_folder).joinpath(self.datasource_name + suffix)
|
|
98
|
-
|
|
99
|
-
@classmethod
|
|
100
|
-
def from_string_repr(cls, datasource_id_as_string: str):
|
|
101
|
-
"""
|
|
102
|
-
Creates a DatasourceId from a string representation.
|
|
103
|
-
|
|
104
|
-
The string representation of a DatasourceId is the path to the datasource's config file relative to the src folder in the project.
|
|
105
|
-
|
|
106
|
-
e.g: "databases/my_postgres_datasource.yaml"
|
|
107
|
-
"""
|
|
108
|
-
config_file_path = Path(datasource_id_as_string)
|
|
109
|
-
|
|
110
|
-
if len(config_file_path.parents) > 2:
|
|
111
|
-
raise ValueError(
|
|
112
|
-
f"Invalid string representation of a DatasourceId: too many parent folders defined in {datasource_id_as_string}"
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
return DatasourceId.from_datasource_config_file_path(config_file_path)
|
|
116
|
-
|
|
117
|
-
@classmethod
|
|
118
|
-
def from_datasource_config_file_path(cls, datasource_config_file: Path):
|
|
119
|
-
"""
|
|
120
|
-
Creates a DatasourceId from a config file path.
|
|
121
|
-
|
|
122
|
-
The `datasource_config_file` path provided can either be the config file path relative to the src folder or the full path to the config file.
|
|
123
|
-
"""
|
|
124
|
-
return DatasourceId(
|
|
125
|
-
datasource_config_folder=datasource_config_file.parent.name,
|
|
126
|
-
datasource_name=datasource_config_file.stem,
|
|
127
|
-
config_file_suffix=datasource_config_file.suffix,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
@dataclass
|
|
132
|
-
class Datasource:
|
|
133
|
-
id: DatasourceId
|
|
134
|
-
type: DatasourceType
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def export_retrieve_results(run_dir: Path, retrieve_results: list[str]) -> Path:
|
|
5
|
-
path = run_dir.joinpath("context_duckdb.yaml")
|
|
6
|
-
|
|
7
|
-
with path.open("w") as export_file:
|
|
8
|
-
for result in retrieve_results:
|
|
9
|
-
export_file.write(result)
|
|
10
|
-
export_file.write("\n")
|
|
11
|
-
|
|
12
|
-
return path
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from databao_context_engine.project.runs import get_run_dir
|
|
5
|
-
from databao_context_engine.retrieve_embeddings.internal.export_results import export_retrieve_results
|
|
6
|
-
from databao_context_engine.retrieve_embeddings.internal.retrieve_service import RetrieveService
|
|
7
|
-
from databao_context_engine.storage.repositories.vector_search_repository import VectorSearchResult
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def retrieve(
|
|
13
|
-
project_dir: Path,
|
|
14
|
-
*,
|
|
15
|
-
retrieve_service: RetrieveService,
|
|
16
|
-
project_id: str,
|
|
17
|
-
text: str,
|
|
18
|
-
run_name: str | None,
|
|
19
|
-
limit: int | None,
|
|
20
|
-
export_to_file: bool,
|
|
21
|
-
) -> list[VectorSearchResult]:
|
|
22
|
-
resolved_run_name = retrieve_service.resolve_run_name(project_id=project_id, run_name=run_name)
|
|
23
|
-
retrieve_results = retrieve_service.retrieve(
|
|
24
|
-
project_id=project_id, text=text, run_name=resolved_run_name, limit=limit
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
if export_to_file:
|
|
28
|
-
export_directory = get_run_dir(project_dir=project_dir, run_name=resolved_run_name)
|
|
29
|
-
|
|
30
|
-
display_texts = [result.display_text for result in retrieve_results]
|
|
31
|
-
export_file = export_retrieve_results(export_directory, display_texts)
|
|
32
|
-
logger.info(f"Exported results to {export_file}")
|
|
33
|
-
|
|
34
|
-
return retrieve_results
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
from databao_context_engine.llm.factory import create_ollama_embedding_provider, create_ollama_service
|
|
2
|
-
from databao_context_engine.project.layout import ProjectLayout
|
|
3
|
-
from databao_context_engine.retrieve_embeddings.internal.retrieve_runner import retrieve
|
|
4
|
-
from databao_context_engine.services.factories import create_retrieve_service
|
|
5
|
-
from databao_context_engine.storage.connection import open_duckdb_connection
|
|
6
|
-
from databao_context_engine.storage.repositories.vector_search_repository import VectorSearchResult
|
|
7
|
-
from databao_context_engine.system.properties import get_db_path
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def retrieve_embeddings(
|
|
11
|
-
project_layout: ProjectLayout,
|
|
12
|
-
retrieve_text: str,
|
|
13
|
-
run_name: str | None,
|
|
14
|
-
limit: int | None,
|
|
15
|
-
export_to_file: bool,
|
|
16
|
-
) -> list[VectorSearchResult]:
|
|
17
|
-
with open_duckdb_connection(get_db_path()) as conn:
|
|
18
|
-
ollama_service = create_ollama_service()
|
|
19
|
-
embedding_provider = create_ollama_embedding_provider(ollama_service)
|
|
20
|
-
retrieve_service = create_retrieve_service(conn, embedding_provider=embedding_provider)
|
|
21
|
-
return retrieve(
|
|
22
|
-
project_dir=project_layout.project_dir,
|
|
23
|
-
retrieve_service=retrieve_service,
|
|
24
|
-
project_id=str(project_layout.read_config_file().project_id),
|
|
25
|
-
text=retrieve_text,
|
|
26
|
-
run_name=run_name,
|
|
27
|
-
limit=limit,
|
|
28
|
-
export_to_file=export_to_file,
|
|
29
|
-
)
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
from typing import Any, Optional, Tuple
|
|
2
|
-
|
|
3
|
-
import duckdb
|
|
4
|
-
from _duckdb import ConstraintException
|
|
5
|
-
|
|
6
|
-
from databao_context_engine.storage.exceptions.exceptions import IntegrityError
|
|
7
|
-
from databao_context_engine.storage.models import DatasourceRunDTO
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class DatasourceRunRepository:
|
|
11
|
-
def __init__(self, conn: duckdb.DuckDBPyConnection):
|
|
12
|
-
self._conn = conn
|
|
13
|
-
|
|
14
|
-
def create(
|
|
15
|
-
self,
|
|
16
|
-
*,
|
|
17
|
-
run_id: int,
|
|
18
|
-
plugin: str,
|
|
19
|
-
full_type: str,
|
|
20
|
-
source_id: str,
|
|
21
|
-
storage_directory: str,
|
|
22
|
-
) -> DatasourceRunDTO:
|
|
23
|
-
try:
|
|
24
|
-
row = self._conn.execute(
|
|
25
|
-
"""
|
|
26
|
-
INSERT INTO
|
|
27
|
-
datasource_run(run_id, plugin, full_type, source_id, storage_directory)
|
|
28
|
-
VALUES
|
|
29
|
-
(?, ?, ?, ?, ?)
|
|
30
|
-
RETURNING
|
|
31
|
-
*
|
|
32
|
-
""",
|
|
33
|
-
[run_id, plugin, full_type, source_id, storage_directory],
|
|
34
|
-
).fetchone()
|
|
35
|
-
if row is None:
|
|
36
|
-
raise RuntimeError("datasource_run creation returned no object")
|
|
37
|
-
return self._row_to_dto(row)
|
|
38
|
-
except ConstraintException as e:
|
|
39
|
-
raise IntegrityError from e
|
|
40
|
-
|
|
41
|
-
def get(self, datasource_run_id: int) -> Optional[DatasourceRunDTO]:
|
|
42
|
-
row = self._conn.execute(
|
|
43
|
-
"""
|
|
44
|
-
SELECT
|
|
45
|
-
*
|
|
46
|
-
FROM
|
|
47
|
-
datasource_run
|
|
48
|
-
WHERE
|
|
49
|
-
datasource_run_id = ?
|
|
50
|
-
""",
|
|
51
|
-
[datasource_run_id],
|
|
52
|
-
).fetchone()
|
|
53
|
-
return self._row_to_dto(row) if row else None
|
|
54
|
-
|
|
55
|
-
def update(
|
|
56
|
-
self,
|
|
57
|
-
datasource_run_id: int,
|
|
58
|
-
*,
|
|
59
|
-
plugin: Optional[str] = None,
|
|
60
|
-
full_type: Optional[str] = None,
|
|
61
|
-
source_id: Optional[str] = None,
|
|
62
|
-
storage_directory: Optional[str] = None,
|
|
63
|
-
) -> Optional[DatasourceRunDTO]:
|
|
64
|
-
sets: list[Any] = []
|
|
65
|
-
params: list[Any] = []
|
|
66
|
-
|
|
67
|
-
if plugin is not None:
|
|
68
|
-
sets.append("plugin = ?")
|
|
69
|
-
params.append(plugin)
|
|
70
|
-
if full_type is not None:
|
|
71
|
-
sets.append("full_type = ?")
|
|
72
|
-
params.append(full_type)
|
|
73
|
-
if source_id is not None:
|
|
74
|
-
sets.append("source_id = ?")
|
|
75
|
-
params.append(source_id)
|
|
76
|
-
if storage_directory is not None:
|
|
77
|
-
sets.append("storage_directory = ?")
|
|
78
|
-
params.append(storage_directory)
|
|
79
|
-
|
|
80
|
-
if not sets:
|
|
81
|
-
return self.get(datasource_run_id)
|
|
82
|
-
|
|
83
|
-
params.append(datasource_run_id)
|
|
84
|
-
self._conn.execute(
|
|
85
|
-
f"""
|
|
86
|
-
UPDATE
|
|
87
|
-
datasource_run
|
|
88
|
-
SET
|
|
89
|
-
{", ".join(sets)}
|
|
90
|
-
WHERE
|
|
91
|
-
datasource_run_id = ?
|
|
92
|
-
""",
|
|
93
|
-
params,
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
return self.get(datasource_run_id)
|
|
97
|
-
|
|
98
|
-
def delete(self, datasource_run_id: int) -> int:
|
|
99
|
-
row = self._conn.execute(
|
|
100
|
-
"""
|
|
101
|
-
DELETE FROM
|
|
102
|
-
datasource_run
|
|
103
|
-
WHERE
|
|
104
|
-
datasource_run_id = ?
|
|
105
|
-
RETURNING
|
|
106
|
-
datasource_run_id
|
|
107
|
-
""",
|
|
108
|
-
[datasource_run_id],
|
|
109
|
-
).fetchone()
|
|
110
|
-
return 1 if row else 0
|
|
111
|
-
|
|
112
|
-
def list(self) -> list[DatasourceRunDTO]:
|
|
113
|
-
rows = self._conn.execute(
|
|
114
|
-
"""
|
|
115
|
-
SELECT
|
|
116
|
-
*
|
|
117
|
-
FROM
|
|
118
|
-
datasource_run
|
|
119
|
-
ORDER BY
|
|
120
|
-
datasource_run_id DESC
|
|
121
|
-
"""
|
|
122
|
-
).fetchall()
|
|
123
|
-
return [self._row_to_dto(r) for r in rows]
|
|
124
|
-
|
|
125
|
-
@staticmethod
|
|
126
|
-
def _row_to_dto(row: Tuple) -> DatasourceRunDTO:
|
|
127
|
-
datasource_run_id, run_id, plugin, source_id, storage_directory, created_at, full_type = row
|
|
128
|
-
return DatasourceRunDTO(
|
|
129
|
-
datasource_run_id=int(datasource_run_id),
|
|
130
|
-
run_id=int(run_id),
|
|
131
|
-
plugin=str(plugin),
|
|
132
|
-
full_type=str(full_type),
|
|
133
|
-
source_id=str(source_id),
|
|
134
|
-
storage_directory=str(storage_directory),
|
|
135
|
-
created_at=created_at,
|
|
136
|
-
)
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import Any, Optional
|
|
3
|
-
|
|
4
|
-
import duckdb
|
|
5
|
-
|
|
6
|
-
from databao_context_engine.services.run_name_policy import RunNamePolicy
|
|
7
|
-
from databao_context_engine.storage.models import RunDTO
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class RunRepository:
|
|
11
|
-
def __init__(self, conn: duckdb.DuckDBPyConnection, run_name_policy: RunNamePolicy):
|
|
12
|
-
self._conn = conn
|
|
13
|
-
self._run_name_policy = run_name_policy
|
|
14
|
-
|
|
15
|
-
def create(
|
|
16
|
-
self, *, project_id: str, dce_version: Optional[str] = None, started_at: datetime | None = None
|
|
17
|
-
) -> RunDTO:
|
|
18
|
-
if started_at is None:
|
|
19
|
-
started_at = datetime.now()
|
|
20
|
-
run_name = self._run_name_policy.build(run_started_at=started_at)
|
|
21
|
-
|
|
22
|
-
row = self._conn.execute(
|
|
23
|
-
"""
|
|
24
|
-
INSERT INTO
|
|
25
|
-
run (project_id, nemory_version, started_at, run_name)
|
|
26
|
-
VALUES
|
|
27
|
-
(?, ?, ?, ?)
|
|
28
|
-
RETURNING
|
|
29
|
-
*
|
|
30
|
-
""",
|
|
31
|
-
[project_id, dce_version, started_at, run_name],
|
|
32
|
-
).fetchone()
|
|
33
|
-
if row is None:
|
|
34
|
-
raise RuntimeError("Run creation returned no object")
|
|
35
|
-
return self._row_to_dto(row)
|
|
36
|
-
|
|
37
|
-
def get(self, run_id: int) -> Optional[RunDTO]:
|
|
38
|
-
row = self._conn.execute(
|
|
39
|
-
"""
|
|
40
|
-
SELECT
|
|
41
|
-
*
|
|
42
|
-
FROM
|
|
43
|
-
run
|
|
44
|
-
WHERE
|
|
45
|
-
run_id = ?
|
|
46
|
-
""",
|
|
47
|
-
[run_id],
|
|
48
|
-
).fetchone()
|
|
49
|
-
return self._row_to_dto(row) if row else None
|
|
50
|
-
|
|
51
|
-
def get_by_run_name(self, *, project_id: str, run_name: str) -> RunDTO | None:
|
|
52
|
-
row = self._conn.execute(
|
|
53
|
-
"""
|
|
54
|
-
SELECT
|
|
55
|
-
*
|
|
56
|
-
FROM
|
|
57
|
-
run
|
|
58
|
-
WHERE
|
|
59
|
-
run.project_id = ? AND run_name = ?
|
|
60
|
-
""",
|
|
61
|
-
[project_id, run_name],
|
|
62
|
-
).fetchone()
|
|
63
|
-
return self._row_to_dto(row) if row else None
|
|
64
|
-
|
|
65
|
-
def get_latest_run_for_project(self, project_id: str) -> RunDTO | None:
|
|
66
|
-
row = self._conn.execute(
|
|
67
|
-
"""
|
|
68
|
-
SELECT
|
|
69
|
-
*
|
|
70
|
-
FROM
|
|
71
|
-
run
|
|
72
|
-
WHERE
|
|
73
|
-
run.project_id = ?
|
|
74
|
-
ORDER BY run.started_at DESC
|
|
75
|
-
LIMIT 1
|
|
76
|
-
""",
|
|
77
|
-
[project_id],
|
|
78
|
-
).fetchone()
|
|
79
|
-
return self._row_to_dto(row) if row else None
|
|
80
|
-
|
|
81
|
-
def update(
|
|
82
|
-
self,
|
|
83
|
-
run_id: int,
|
|
84
|
-
*,
|
|
85
|
-
project_id: Optional[str] = None,
|
|
86
|
-
ended_at: Optional[datetime] = None,
|
|
87
|
-
dce_version: Optional[str] = None,
|
|
88
|
-
) -> Optional[RunDTO]:
|
|
89
|
-
sets: list[Any] = []
|
|
90
|
-
params: list[Any] = []
|
|
91
|
-
|
|
92
|
-
if project_id is not None:
|
|
93
|
-
sets.append("project_id = ?")
|
|
94
|
-
params.append(project_id)
|
|
95
|
-
if ended_at is not None:
|
|
96
|
-
sets.append("ended_at = ?")
|
|
97
|
-
params.append(ended_at)
|
|
98
|
-
if dce_version is not None:
|
|
99
|
-
sets.append("nemory_version = ?")
|
|
100
|
-
params.append(dce_version)
|
|
101
|
-
|
|
102
|
-
if not sets:
|
|
103
|
-
return self.get(run_id)
|
|
104
|
-
|
|
105
|
-
params.append(run_id)
|
|
106
|
-
self._conn.execute(
|
|
107
|
-
f"""
|
|
108
|
-
UPDATE
|
|
109
|
-
run
|
|
110
|
-
SET
|
|
111
|
-
{", ".join(sets)}
|
|
112
|
-
WHERE
|
|
113
|
-
run_id = ?
|
|
114
|
-
""",
|
|
115
|
-
params,
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
return self.get(run_id)
|
|
119
|
-
|
|
120
|
-
def delete(self, run_id: int) -> int:
|
|
121
|
-
row = self._conn.execute(
|
|
122
|
-
"""
|
|
123
|
-
DELETE FROM
|
|
124
|
-
run
|
|
125
|
-
WHERE
|
|
126
|
-
run_id = ?
|
|
127
|
-
RETURNING
|
|
128
|
-
run_id
|
|
129
|
-
""",
|
|
130
|
-
[run_id],
|
|
131
|
-
).fetchone()
|
|
132
|
-
return 1 if row else 0
|
|
133
|
-
|
|
134
|
-
def list(self) -> list[RunDTO]:
|
|
135
|
-
rows = self._conn.execute(
|
|
136
|
-
"""
|
|
137
|
-
SELECT
|
|
138
|
-
*
|
|
139
|
-
FROM
|
|
140
|
-
run
|
|
141
|
-
ORDER BY
|
|
142
|
-
run_id DESC
|
|
143
|
-
"""
|
|
144
|
-
).fetchall()
|
|
145
|
-
return [self._row_to_dto(r) for r in rows]
|
|
146
|
-
|
|
147
|
-
@staticmethod
|
|
148
|
-
def _row_to_dto(row: tuple) -> RunDTO:
|
|
149
|
-
run_id, project_id, started_at, ended_at, dce_version, run_name = row
|
|
150
|
-
return RunDTO(
|
|
151
|
-
run_id=int(run_id),
|
|
152
|
-
run_name=run_name,
|
|
153
|
-
project_id=str(project_id),
|
|
154
|
-
started_at=started_at,
|
|
155
|
-
ended_at=ended_at,
|
|
156
|
-
nemory_version=dce_version,
|
|
157
|
-
)
|