databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +32 -7
- databao_context_engine/build_sources/__init__.py +4 -0
- databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
- databao_context_engine/build_sources/build_service.py +53 -0
- databao_context_engine/build_sources/build_wiring.py +82 -0
- databao_context_engine/build_sources/export_results.py +41 -0
- databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
- databao_context_engine/cli/add_datasource_config.py +49 -44
- databao_context_engine/cli/commands.py +40 -55
- databao_context_engine/cli/info.py +3 -2
- databao_context_engine/databao_context_engine.py +127 -0
- databao_context_engine/databao_context_project_manager.py +147 -30
- databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
- databao_context_engine/datasources/datasource_context.py +90 -0
- databao_context_engine/datasources/datasource_discovery.py +143 -0
- databao_context_engine/datasources/types.py +194 -0
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/init_project.py +25 -3
- databao_context_engine/introspection/property_extract.py +76 -57
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/errors.py +2 -8
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +26 -30
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/llm/service.py +1 -3
- databao_context_engine/mcp/mcp_runner.py +4 -2
- databao_context_engine/mcp/mcp_server.py +9 -11
- databao_context_engine/plugin_loader.py +110 -0
- databao_context_engine/pluginlib/build_plugin.py +12 -29
- databao_context_engine/pluginlib/config.py +16 -2
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
- databao_context_engine/plugins/databases/base_introspector.py +11 -12
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
- databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
- databao_context_engine/plugins/databases/introspection_scope.py +11 -9
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/duckdb_tools.py +18 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +58 -52
- databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/info.py +34 -2
- databao_context_engine/project/init_project.py +16 -7
- databao_context_engine/project/layout.py +14 -15
- databao_context_engine/retrieve_embeddings/__init__.py +3 -0
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
- databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
- databao_context_engine/serialization/__init__.py +0 -0
- databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
- databao_context_engine/services/chunk_embedding_service.py +23 -11
- databao_context_engine/services/factories.py +1 -46
- databao_context_engine/services/persistence_service.py +11 -11
- databao_context_engine/storage/connection.py +11 -7
- databao_context_engine/storage/exceptions/exceptions.py +2 -2
- databao_context_engine/storage/migrate.py +3 -5
- databao_context_engine/storage/migrations/V01__init.sql +6 -31
- databao_context_engine/storage/models.py +2 -23
- databao_context_engine/storage/repositories/chunk_repository.py +16 -12
- databao_context_engine/storage/repositories/factories.py +1 -12
- databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
- databao_context_engine/system/properties.py +4 -2
- databao_context_engine-0.1.5.dist-info/METADATA +228 -0
- databao_context_engine-0.1.5.dist-info/RECORD +135 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
- databao_context_engine/build_sources/internal/build_service.py +0 -77
- databao_context_engine/build_sources/internal/build_wiring.py +0 -52
- databao_context_engine/build_sources/internal/export_results.py +0 -43
- databao_context_engine/build_sources/public/api.py +0 -4
- databao_context_engine/databao_engine.py +0 -85
- databao_context_engine/datasource_config/add_config.py +0 -50
- databao_context_engine/datasource_config/datasource_context.py +0 -60
- databao_context_engine/mcp/all_results_tool.py +0 -5
- databao_context_engine/mcp/retrieve_tool.py +0 -22
- databao_context_engine/plugins/databases/athena_introspector.py +0 -101
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/project/datasource_discovery.py +0 -141
- databao_context_engine/project/runs.py +0 -39
- databao_context_engine/project/types.py +0 -134
- databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
- databao_context_engine/retrieve_embeddings/public/api.py +0 -3
- databao_context_engine/services/run_name_policy.py +0 -8
- databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
- databao_context_engine/storage/repositories/run_repository.py +0 -157
- databao_context_engine-0.1.1.dist-info/METADATA +0 -186
- databao_context_engine-0.1.1.dist-info/RECORD +0 -135
- /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
- /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
- /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
- /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
- /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
- {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
|
@@ -2,25 +2,26 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from pathlib import Path
|
|
6
5
|
|
|
7
6
|
from pydantic import ValidationError
|
|
8
7
|
|
|
9
|
-
from databao_context_engine.
|
|
10
|
-
from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
|
|
11
|
-
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
12
|
-
from databao_context_engine.project.datasource_discovery import (
|
|
8
|
+
from databao_context_engine.datasources.datasource_discovery import (
|
|
13
9
|
discover_datasources,
|
|
14
10
|
get_datasource_descriptors,
|
|
15
11
|
prepare_source,
|
|
16
12
|
)
|
|
17
|
-
from databao_context_engine.
|
|
18
|
-
from databao_context_engine.
|
|
13
|
+
from databao_context_engine.datasources.types import DatasourceId, PreparedConfig
|
|
14
|
+
from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
|
|
15
|
+
from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
|
|
16
|
+
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
17
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
19
18
|
|
|
20
19
|
logger = logging.getLogger(__name__)
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class DatasourceConnectionStatus(Enum):
|
|
23
|
+
"""Status of the connection to a datasource."""
|
|
24
|
+
|
|
24
25
|
VALID = "Valid"
|
|
25
26
|
INVALID = "Invalid"
|
|
26
27
|
UNKNOWN = "Unknown"
|
|
@@ -28,6 +29,15 @@ class DatasourceConnectionStatus(Enum):
|
|
|
28
29
|
|
|
29
30
|
@dataclass(kw_only=True)
|
|
30
31
|
class CheckDatasourceConnectionResult:
|
|
32
|
+
"""Result of checking the connection status of a datasource.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
datasource_id: The id of the datasource.
|
|
36
|
+
connection_status: The connection status of the datasource.
|
|
37
|
+
summary: A summary of the connection status' error, or None if the connection is valid.
|
|
38
|
+
full_message: A detailed message about the connection status' error, or None if the connection is valid.
|
|
39
|
+
"""
|
|
40
|
+
|
|
31
41
|
datasource_id: DatasourceId
|
|
32
42
|
connection_status: DatasourceConnectionStatus
|
|
33
43
|
summary: str | None
|
|
@@ -44,21 +54,19 @@ class CheckDatasourceConnectionResult:
|
|
|
44
54
|
|
|
45
55
|
|
|
46
56
|
def check_datasource_connection(
|
|
47
|
-
|
|
57
|
+
project_layout: ProjectLayout, *, datasource_ids: list[DatasourceId] | None = None
|
|
48
58
|
) -> dict[DatasourceId, CheckDatasourceConnectionResult]:
|
|
49
|
-
ensure_project_dir(project_dir)
|
|
50
|
-
|
|
51
59
|
if datasource_ids:
|
|
52
60
|
logger.info(f"Validating datasource(s): {datasource_ids}")
|
|
53
|
-
datasources_to_traverse = get_datasource_descriptors(
|
|
61
|
+
datasources_to_traverse = get_datasource_descriptors(project_layout, datasource_ids)
|
|
54
62
|
else:
|
|
55
|
-
datasources_to_traverse = discover_datasources(
|
|
63
|
+
datasources_to_traverse = discover_datasources(project_layout)
|
|
56
64
|
|
|
57
65
|
plugins = load_plugins(exclude_file_plugins=True)
|
|
58
66
|
|
|
59
67
|
result = {}
|
|
60
68
|
for discovered_datasource in datasources_to_traverse:
|
|
61
|
-
result_key = DatasourceId.from_datasource_config_file_path(discovered_datasource.path)
|
|
69
|
+
result_key = DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path)
|
|
62
70
|
|
|
63
71
|
try:
|
|
64
72
|
prepared_source = prepare_source(discovered_datasource)
|
|
@@ -103,12 +111,12 @@ def check_datasource_connection(
|
|
|
103
111
|
exc_info=True,
|
|
104
112
|
stack_info=True,
|
|
105
113
|
)
|
|
106
|
-
result[result_key] =
|
|
114
|
+
result[result_key] = _get_validation_result_from_error(result_key, e)
|
|
107
115
|
|
|
108
116
|
return result
|
|
109
117
|
|
|
110
118
|
|
|
111
|
-
def
|
|
119
|
+
def _get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
|
|
112
120
|
if isinstance(e, ValidationError):
|
|
113
121
|
return CheckDatasourceConnectionResult(
|
|
114
122
|
datasource_id=datasource_id,
|
|
@@ -116,16 +124,16 @@ def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
|
|
|
116
124
|
summary="Config file is invalid",
|
|
117
125
|
full_message=str(e),
|
|
118
126
|
)
|
|
119
|
-
|
|
127
|
+
if isinstance(e, NotImplementedError | NotSupportedError):
|
|
120
128
|
return CheckDatasourceConnectionResult(
|
|
121
129
|
datasource_id=datasource_id,
|
|
122
130
|
connection_status=DatasourceConnectionStatus.UNKNOWN,
|
|
123
131
|
summary="Plugin doesn't support validating its config",
|
|
124
132
|
)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
133
|
+
|
|
134
|
+
return CheckDatasourceConnectionResult(
|
|
135
|
+
datasource_id=datasource_id,
|
|
136
|
+
connection_status=DatasourceConnectionStatus.INVALID,
|
|
137
|
+
summary="Connection with the datasource can not be established",
|
|
138
|
+
full_message=str(e),
|
|
139
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
|
|
9
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(eq=True, frozen=True)
|
|
15
|
+
class DatasourceContext:
|
|
16
|
+
"""A generated Context for a Datasource.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
datasource_id: The id of the datasource.
|
|
20
|
+
context: The context generated for the datasource.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
datasource_id: DatasourceId
|
|
24
|
+
# TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
|
|
25
|
+
context: str
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceType:
|
|
29
|
+
with context_path.open("r") as context_file:
|
|
30
|
+
type_key = "datasource_type"
|
|
31
|
+
for line in context_file:
|
|
32
|
+
if line.startswith(f"{type_key}: "):
|
|
33
|
+
datasource_type = yaml.safe_load(line)[type_key]
|
|
34
|
+
return DatasourceType(full_type=datasource_type)
|
|
35
|
+
|
|
36
|
+
raise ValueError(f"Could not find type in context file {context_path}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
|
|
40
|
+
result = []
|
|
41
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
|
|
42
|
+
for context_file_name in filenames:
|
|
43
|
+
if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
|
|
44
|
+
continue
|
|
45
|
+
context_file = Path(dirpath).joinpath(context_file_name)
|
|
46
|
+
relative_context_file = context_file.relative_to(project_layout.output_dir)
|
|
47
|
+
try:
|
|
48
|
+
result.append(
|
|
49
|
+
Datasource(
|
|
50
|
+
id=DatasourceId.from_datasource_context_file_path(relative_context_file),
|
|
51
|
+
type=_read_datasource_type_from_context_file(context_file),
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
except ValueError as e:
|
|
55
|
+
logger.debug(str(e), exc_info=True, stack_info=True)
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_file.resolve()}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return sorted(result, key=lambda ds: str(ds.id).lower())
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
|
|
64
|
+
context_path = project_layout.output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
65
|
+
if not context_path.is_file():
|
|
66
|
+
raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
|
|
67
|
+
|
|
68
|
+
context = context_path.read_text()
|
|
69
|
+
return DatasourceContext(datasource_id=datasource_id, context=context)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
|
|
73
|
+
result = []
|
|
74
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
|
|
75
|
+
for context_file_name in filenames:
|
|
76
|
+
if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
|
|
77
|
+
continue
|
|
78
|
+
context_file = Path(dirpath).joinpath(context_file_name)
|
|
79
|
+
relative_context_file = context_file.relative_to(project_layout.output_dir)
|
|
80
|
+
result.append(
|
|
81
|
+
DatasourceContext(
|
|
82
|
+
datasource_id=DatasourceId.from_datasource_context_file_path(relative_context_file),
|
|
83
|
+
context=context_file.read_text(),
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
return sorted(result, key=lambda context: str(context.datasource_id).lower())
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
|
|
90
|
+
return f"# ===== {str(datasource_id)} ====={os.linesep}"
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from databao_context_engine.datasources.types import (
|
|
9
|
+
Datasource,
|
|
10
|
+
DatasourceDescriptor,
|
|
11
|
+
DatasourceId,
|
|
12
|
+
DatasourceKind,
|
|
13
|
+
PreparedConfig,
|
|
14
|
+
PreparedDatasource,
|
|
15
|
+
PreparedFile,
|
|
16
|
+
)
|
|
17
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
18
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
19
|
+
from databao_context_engine.templating.renderer import render_template
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
|
|
25
|
+
result = []
|
|
26
|
+
for discovered_datasource in discover_datasources(project_layout=project_layout):
|
|
27
|
+
try:
|
|
28
|
+
prepared_source = prepare_source(discovered_datasource)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logger.debug(str(e), exc_info=True, stack_info=True)
|
|
31
|
+
logger.info(f"Invalid source at ({discovered_datasource.path}): {str(e)}")
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
result.append(
|
|
35
|
+
Datasource(
|
|
36
|
+
id=DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path),
|
|
37
|
+
type=prepared_source.datasource_type,
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def discover_datasources(project_layout: ProjectLayout) -> list[DatasourceDescriptor]:
|
|
45
|
+
"""Scan the project's src/ directory and return all discovered sources.
|
|
46
|
+
|
|
47
|
+
Rules:
|
|
48
|
+
- Each first-level directory under src/ is treated as a main_type
|
|
49
|
+
- Unsupported or unreadable entries are skipped.
|
|
50
|
+
- The returned list is sorted by directory and then filename
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
project_layout: ProjectLayout instance representing the project directory and configuration.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
A list of DatasourceDescriptor instances representing the discovered datasources.
|
|
57
|
+
"""
|
|
58
|
+
datasources: list[DatasourceDescriptor] = []
|
|
59
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.src_dir):
|
|
60
|
+
for config_file_name in filenames:
|
|
61
|
+
context_file = Path(dirpath).joinpath(config_file_name)
|
|
62
|
+
datasource = _load_datasource_descriptor(project_layout, context_file)
|
|
63
|
+
if datasource is not None:
|
|
64
|
+
datasources.append(datasource)
|
|
65
|
+
|
|
66
|
+
return sorted(datasources, key=lambda ds: str(ds.datasource_id.relative_path_to_config_file()).lower())
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_datasource_file(p: Path) -> bool:
|
|
70
|
+
# ignore backup files
|
|
71
|
+
return p.is_file() and not p.suffix.endswith("~")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_datasource_descriptors(
|
|
75
|
+
project_layout: ProjectLayout, datasource_ids: list[DatasourceId]
|
|
76
|
+
) -> list[DatasourceDescriptor]:
|
|
77
|
+
datasources: list[DatasourceDescriptor] = []
|
|
78
|
+
for datasource_id in sorted(datasource_ids, key=lambda id: str(id)):
|
|
79
|
+
config_file_path = project_layout.src_dir.joinpath(datasource_id.relative_path_to_config_file())
|
|
80
|
+
if not config_file_path.is_file():
|
|
81
|
+
raise ValueError(f"Datasource config file not found: {config_file_path}")
|
|
82
|
+
|
|
83
|
+
datasource = _load_datasource_descriptor(project_layout, config_file_path)
|
|
84
|
+
if datasource is not None:
|
|
85
|
+
datasources.append(datasource)
|
|
86
|
+
|
|
87
|
+
return datasources
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _load_datasource_descriptor(project_layout: ProjectLayout, config_file: Path) -> DatasourceDescriptor | None:
|
|
91
|
+
"""Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
|
|
92
|
+
if not config_file.is_file():
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
parent_name = config_file.parent.name
|
|
96
|
+
extension = config_file.suffix.lower().lstrip(".")
|
|
97
|
+
relative_config_file = config_file.relative_to(project_layout.src_dir)
|
|
98
|
+
|
|
99
|
+
if parent_name == "files" and len(relative_config_file.parts) == 2:
|
|
100
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
101
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
|
|
102
|
+
|
|
103
|
+
if extension in {"yaml", "yml"}:
|
|
104
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
105
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.CONFIG)
|
|
106
|
+
|
|
107
|
+
if extension:
|
|
108
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
109
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
|
|
110
|
+
|
|
111
|
+
logger.debug("Skipping file without extension: %s", config_file)
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
|
|
116
|
+
"""Convert a discovered datasource into a prepared datasource ready for plugin execution."""
|
|
117
|
+
if datasource.kind is DatasourceKind.FILE:
|
|
118
|
+
file_subtype = datasource.path.suffix.lower().lstrip(".")
|
|
119
|
+
return PreparedFile(
|
|
120
|
+
datasource_id=datasource.datasource_id,
|
|
121
|
+
datasource_type=DatasourceType(full_type=file_subtype),
|
|
122
|
+
path=datasource.path,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
config = _parse_config_file(datasource.path)
|
|
126
|
+
|
|
127
|
+
ds_type = config.get("type")
|
|
128
|
+
if not ds_type or not isinstance(ds_type, str):
|
|
129
|
+
raise ValueError("Config missing 'type' at %s - skipping", datasource.path)
|
|
130
|
+
|
|
131
|
+
return PreparedConfig(
|
|
132
|
+
datasource_id=datasource.datasource_id,
|
|
133
|
+
datasource_type=DatasourceType(full_type=ds_type),
|
|
134
|
+
path=datasource.path,
|
|
135
|
+
config=config,
|
|
136
|
+
datasource_name=datasource.path.stem,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _parse_config_file(file_path: Path) -> dict[Any, Any]:
|
|
141
|
+
rendered_file = render_template(file_path.read_text())
|
|
142
|
+
|
|
143
|
+
return yaml.safe_load(rendered_file) or {}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
7
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DatasourceKind(StrEnum):
|
|
11
|
+
CONFIG = "config"
|
|
12
|
+
FILE = "file"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class DatasourceDescriptor:
|
|
17
|
+
datasource_id: "DatasourceId"
|
|
18
|
+
path: Path
|
|
19
|
+
kind: DatasourceKind
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class PreparedConfig:
|
|
24
|
+
datasource_id: "DatasourceId"
|
|
25
|
+
datasource_type: DatasourceType
|
|
26
|
+
path: Path
|
|
27
|
+
config: dict[Any, Any]
|
|
28
|
+
datasource_name: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class PreparedFile:
|
|
33
|
+
datasource_id: "DatasourceId"
|
|
34
|
+
datasource_type: DatasourceType
|
|
35
|
+
path: Path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
PreparedDatasource = PreparedConfig | PreparedFile
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(kw_only=True, frozen=True, eq=True)
|
|
42
|
+
class DatasourceId:
|
|
43
|
+
"""The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
|
|
44
|
+
|
|
45
|
+
e.g: "databases/my_postgres_datasource.yaml"
|
|
46
|
+
|
|
47
|
+
Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
datasource_path: The path to the datasource relative to project's src folder.
|
|
51
|
+
config_file_suffix: The suffix of the config (or raw) file.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
ALLOWED_YAML_SUFFIXES = [".yaml", ".yml"]
|
|
55
|
+
CONTEXT_FILE_SUFFIX = ".yaml"
|
|
56
|
+
|
|
57
|
+
datasource_path: str
|
|
58
|
+
config_file_suffix: str
|
|
59
|
+
|
|
60
|
+
def __post_init__(self):
|
|
61
|
+
if not self.datasource_path.strip():
|
|
62
|
+
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not be empty")
|
|
63
|
+
if not self.config_file_suffix.strip():
|
|
64
|
+
raise ValueError(f"Invalid DatasourceId ({str(self)}): config_file_suffix must not be empty")
|
|
65
|
+
|
|
66
|
+
if not self.config_file_suffix.startswith("."):
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f'Invalid DatasourceId ({str(self)}): config_file_suffix must start with a dot "." (e.g.: .yaml)'
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if self.datasource_path.endswith(self.config_file_suffix):
|
|
72
|
+
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not contain the file suffix")
|
|
73
|
+
|
|
74
|
+
def __str__(self):
|
|
75
|
+
return str(self.relative_path_to_config_file())
|
|
76
|
+
|
|
77
|
+
def relative_path_to_config_file(self) -> Path:
|
|
78
|
+
"""Return a path to the config file for this datasource.
|
|
79
|
+
|
|
80
|
+
The returned path is relative to the src folder in the project.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The path to the config file relative to the src folder in the project.
|
|
84
|
+
"""
|
|
85
|
+
return Path(self.datasource_path + self.config_file_suffix)
|
|
86
|
+
|
|
87
|
+
def relative_path_to_context_file(self) -> Path:
|
|
88
|
+
"""Return a path to the config file for this datasource.
|
|
89
|
+
|
|
90
|
+
The returned path is relative to an output run folder in the project.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The path to the context file relative to the output folder in the project.
|
|
94
|
+
"""
|
|
95
|
+
# Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
|
|
96
|
+
suffix = (
|
|
97
|
+
DatasourceId.CONTEXT_FILE_SUFFIX
|
|
98
|
+
if self.config_file_suffix in DatasourceId.ALLOWED_YAML_SUFFIXES
|
|
99
|
+
else (self.config_file_suffix + DatasourceId.CONTEXT_FILE_SUFFIX)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return Path(self.datasource_path + suffix)
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
|
|
106
|
+
"""Create a DatasourceId from a string representation.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
datasource_id_as_string: The string representation of a DatasourceId.
|
|
110
|
+
This is the path to the datasource's config file relative to the src folder in the project.
|
|
111
|
+
(e.g. "databases/my_postgres_datasource.yaml")
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
The DatasourceId instance created from the string representation.
|
|
115
|
+
"""
|
|
116
|
+
config_file_path = Path(datasource_id_as_string)
|
|
117
|
+
return DatasourceId._from_relative_datasource_config_file_path(config_file_path)
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def from_datasource_config_file_path(
|
|
121
|
+
cls, project_layout: ProjectLayout, datasource_config_file: Path
|
|
122
|
+
) -> "DatasourceId":
|
|
123
|
+
"""Create a DatasourceId from a config file path.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
project_layout: The databao context engine project layout.
|
|
127
|
+
datasource_config_file: The path to the datasource config file.
|
|
128
|
+
This path can either be the config file path relative to the src folder or the full path to the config file.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
The DatasourceId instance created from the config file path.
|
|
132
|
+
"""
|
|
133
|
+
relative_datasource_config_file = datasource_config_file.relative_to(project_layout.src_dir)
|
|
134
|
+
return DatasourceId._from_relative_datasource_config_file_path(relative_datasource_config_file)
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def _from_relative_datasource_config_file_path(cls, relative_datasource_config_file: Path) -> "DatasourceId":
|
|
138
|
+
if relative_datasource_config_file.is_absolute():
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"Path to datasource config file should be relative to project's src folder: {relative_datasource_config_file}"
|
|
141
|
+
)
|
|
142
|
+
return DatasourceId(
|
|
143
|
+
datasource_path=_extract_datasource_path(relative_datasource_config_file),
|
|
144
|
+
config_file_suffix=relative_datasource_config_file.suffix,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def from_datasource_context_file_path(cls, datasource_context_file: Path) -> "DatasourceId":
|
|
149
|
+
"""Create a DatasourceId from a context file path.
|
|
150
|
+
|
|
151
|
+
This factory handles the case where the context was generated from a raw file rather than from a config.
|
|
152
|
+
In that case, the context file name will look like "<my_datasource_name>.<raw_file_extension>.yaml"
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
datasource_context_file: The path to the datasource context file.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
The DatasourceId instance created from the context file path.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: If the wrong datasource_context_file is provided.
|
|
162
|
+
"""
|
|
163
|
+
if (
|
|
164
|
+
len(datasource_context_file.suffixes) > 1
|
|
165
|
+
and datasource_context_file.suffix == DatasourceId.CONTEXT_FILE_SUFFIX
|
|
166
|
+
):
|
|
167
|
+
# If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
|
|
168
|
+
context_file_name_without_yaml_extension = datasource_context_file.name[
|
|
169
|
+
: -len(DatasourceId.CONTEXT_FILE_SUFFIX)
|
|
170
|
+
]
|
|
171
|
+
datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
|
|
172
|
+
if datasource_context_file.is_absolute():
|
|
173
|
+
raise ValueError(f"Path to datasource context file should be a relative path: {datasource_context_file}")
|
|
174
|
+
return DatasourceId(
|
|
175
|
+
datasource_path=_extract_datasource_path(datasource_context_file),
|
|
176
|
+
config_file_suffix=datasource_context_file.suffix,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _extract_datasource_path(datasource_file: Path) -> str:
|
|
181
|
+
return str(datasource_file.with_suffix(""))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class Datasource:
|
|
186
|
+
"""A datasource contained in the project.
|
|
187
|
+
|
|
188
|
+
Attributes:
|
|
189
|
+
id: The unique identifier of the datasource.
|
|
190
|
+
type: The type of the datasource.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
id: DatasourceId
|
|
194
|
+
type: DatasourceType
|
|
@@ -38,9 +38,9 @@ def generate_configs_schemas(
|
|
|
38
38
|
try:
|
|
39
39
|
schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
click.echo(f"{os.linesep}{os.linesep}".join(schema_list))
|
|
42
42
|
except Exception as e:
|
|
43
|
-
|
|
43
|
+
click.echo(str(e))
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def _generate_json_schema_output_for_plugins(
|
|
@@ -54,10 +54,9 @@ def _generate_json_schema_output_for_plugins(
|
|
|
54
54
|
if len(filtered_plugins) == 0:
|
|
55
55
|
if include_plugins:
|
|
56
56
|
raise ValueError(f"No plugin found with id in {include_plugins}")
|
|
57
|
-
|
|
57
|
+
if exclude_plugins:
|
|
58
58
|
raise ValueError(f"No plugin found when excluding {exclude_plugins}")
|
|
59
|
-
|
|
60
|
-
raise ValueError("No plugin found")
|
|
59
|
+
raise ValueError("No plugin found")
|
|
61
60
|
|
|
62
61
|
results = []
|
|
63
62
|
for plugin in filtered_plugins:
|
|
@@ -1,17 +1,39 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
3
|
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
|
|
4
|
-
from databao_context_engine.project.init_project import init_project_dir
|
|
4
|
+
from databao_context_engine.project.init_project import InitProjectError, init_project_dir
|
|
5
5
|
from databao_context_engine.project.layout import is_project_dir_valid
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
9
|
-
|
|
9
|
+
"""Initialize a Databao Context project in the given directory.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
Args:
|
|
12
|
+
project_dir: The directory where the project should be initialized.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
A DatabaoContextProjectManager for the initialized project.
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
InitProjectError: If the project could not be initialized.
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
initialized_project_dir = init_project_dir(project_dir=project_dir)
|
|
22
|
+
|
|
23
|
+
return DatabaoContextProjectManager(project_dir=initialized_project_dir)
|
|
24
|
+
except InitProjectError as e:
|
|
25
|
+
raise e
|
|
12
26
|
|
|
13
27
|
|
|
14
28
|
def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
29
|
+
"""Initialize a Databao Context project in the given directory or get a DatabaoContextProjectManager for the project if it already exists.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
project_dir: The directory where the project should be initialized if it doesn't exist yet..
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A DatabaoContextProjectManager for the project in project_dir.
|
|
36
|
+
"""
|
|
15
37
|
if is_project_dir_valid(project_dir):
|
|
16
38
|
return DatabaoContextProjectManager(project_dir=project_dir)
|
|
17
39
|
|