databao-context-engine 0.1.4.dev1__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +14 -1
- databao_context_engine/build_sources/build_runner.py +7 -7
- databao_context_engine/build_sources/build_wiring.py +8 -10
- databao_context_engine/build_sources/plugin_execution.py +9 -12
- databao_context_engine/cli/add_datasource_config.py +9 -30
- databao_context_engine/cli/commands.py +29 -13
- databao_context_engine/databao_context_engine.py +3 -13
- databao_context_engine/databao_context_project_manager.py +56 -29
- databao_context_engine/datasources/check_config.py +13 -16
- databao_context_engine/datasources/datasource_context.py +21 -24
- databao_context_engine/datasources/datasource_discovery.py +45 -44
- databao_context_engine/datasources/types.py +53 -42
- databao_context_engine/generate_configs_schemas.py +4 -5
- databao_context_engine/introspection/property_extract.py +52 -47
- databao_context_engine/llm/__init__.py +10 -0
- databao_context_engine/llm/api.py +57 -0
- databao_context_engine/llm/descriptions/ollama.py +1 -3
- databao_context_engine/llm/factory.py +5 -2
- databao_context_engine/llm/install.py +13 -10
- databao_context_engine/llm/runtime.py +3 -5
- databao_context_engine/mcp/mcp_server.py +1 -3
- databao_context_engine/plugin_loader.py +6 -7
- databao_context_engine/pluginlib/build_plugin.py +0 -33
- databao_context_engine/plugins/databases/athena/__init__.py +0 -0
- databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{athena_introspector.py → athena/athena_introspector.py} +2 -5
- databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +1 -3
- databao_context_engine/plugins/databases/base_introspector.py +11 -14
- databao_context_engine/plugins/databases/clickhouse/__init__.py +0 -0
- databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +2 -5
- databao_context_engine/plugins/databases/duckdb/__init__.py +0 -0
- databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +4 -7
- databao_context_engine/plugins/databases/mssql/__init__.py +0 -0
- databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +9 -10
- databao_context_engine/plugins/databases/mysql/__init__.py +0 -0
- databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
- databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +8 -8
- databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
- databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +9 -16
- databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
- databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +8 -9
- databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
- databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
- databao_context_engine/plugins/dbt/__init__.py +0 -0
- databao_context_engine/plugins/dbt/dbt_chunker.py +47 -0
- databao_context_engine/plugins/dbt/dbt_context_extractor.py +106 -0
- databao_context_engine/plugins/dbt/dbt_plugin.py +25 -0
- databao_context_engine/plugins/dbt/types.py +44 -0
- databao_context_engine/plugins/dbt/types_artifacts.py +58 -0
- databao_context_engine/plugins/files/__init__.py +0 -0
- databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
- databao_context_engine/plugins/plugin_loader.py +13 -15
- databao_context_engine/plugins/resources/parquet_introspector.py +1 -1
- databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
- databao_context_engine/project/layout.py +12 -13
- databao_context_engine/retrieve_embeddings/retrieve_runner.py +3 -16
- databao_context_engine/retrieve_embeddings/retrieve_service.py +13 -6
- databao_context_engine/retrieve_embeddings/retrieve_wiring.py +4 -7
- databao_context_engine/serialization/yaml.py +5 -5
- databao_context_engine/storage/migrate.py +1 -1
- databao_context_engine/storage/repositories/vector_search_repository.py +18 -6
- databao_context_engine-0.1.6.dist-info/METADATA +228 -0
- {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/RECORD +71 -55
- databao_context_engine/datasources/add_config.py +0 -34
- databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
- databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
- databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
- databao_context_engine/retrieve_embeddings/export_results.py +0 -12
- databao_context_engine-0.1.4.dev1.dist-info/METADATA +0 -75
- {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/WHEEL +0 -0
- {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/entry_points.txt +0 -0
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
import yaml
|
|
7
7
|
|
|
8
8
|
from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
|
|
9
|
-
from databao_context_engine.project.layout import ProjectLayout
|
|
9
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -37,33 +37,31 @@ def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceTyp
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
|
|
40
|
-
output_dir = get_output_dir(project_dir=project_layout.project_dir)
|
|
41
|
-
|
|
42
40
|
result = []
|
|
43
|
-
for
|
|
44
|
-
for
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
|
|
42
|
+
for context_file_name in filenames:
|
|
43
|
+
if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
|
|
44
|
+
continue
|
|
45
|
+
context_file = Path(dirpath).joinpath(context_file_name)
|
|
46
|
+
relative_context_file = context_file.relative_to(project_layout.output_dir)
|
|
47
47
|
try:
|
|
48
48
|
result.append(
|
|
49
49
|
Datasource(
|
|
50
|
-
id=DatasourceId.
|
|
51
|
-
type=_read_datasource_type_from_context_file(
|
|
50
|
+
id=DatasourceId.from_datasource_context_file_path(relative_context_file),
|
|
51
|
+
type=_read_datasource_type_from_context_file(context_file),
|
|
52
52
|
)
|
|
53
53
|
)
|
|
54
54
|
except ValueError as e:
|
|
55
55
|
logger.debug(str(e), exc_info=True, stack_info=True)
|
|
56
56
|
logger.warning(
|
|
57
|
-
f"Ignoring introspected datasource: Failed to read datasource_type from context file at {
|
|
57
|
+
f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_file.resolve()}"
|
|
58
58
|
)
|
|
59
59
|
|
|
60
|
-
return result
|
|
60
|
+
return sorted(result, key=lambda ds: str(ds.id).lower())
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
context_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
64
|
+
context_path = project_layout.output_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
67
65
|
if not context_path.is_file():
|
|
68
66
|
raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
|
|
69
67
|
|
|
@@ -72,21 +70,20 @@ def get_datasource_context(project_layout: ProjectLayout, datasource_id: Datasou
|
|
|
72
70
|
|
|
73
71
|
|
|
74
72
|
def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
|
|
75
|
-
output_dir = get_output_dir(project_dir=project_layout.project_dir)
|
|
76
|
-
|
|
77
73
|
result = []
|
|
78
|
-
for
|
|
79
|
-
for
|
|
80
|
-
|
|
81
|
-
|
|
74
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
|
|
75
|
+
for context_file_name in filenames:
|
|
76
|
+
if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
|
|
77
|
+
continue
|
|
78
|
+
context_file = Path(dirpath).joinpath(context_file_name)
|
|
79
|
+
relative_context_file = context_file.relative_to(project_layout.output_dir)
|
|
82
80
|
result.append(
|
|
83
81
|
DatasourceContext(
|
|
84
|
-
datasource_id=DatasourceId.from_datasource_context_file_path(
|
|
85
|
-
context=
|
|
82
|
+
datasource_id=DatasourceId.from_datasource_context_file_path(relative_context_file),
|
|
83
|
+
context=context_file.read_text(),
|
|
86
84
|
)
|
|
87
85
|
)
|
|
88
|
-
|
|
89
|
-
return result
|
|
86
|
+
return sorted(result, key=lambda context: str(context.datasource_id).lower())
|
|
90
87
|
|
|
91
88
|
|
|
92
89
|
def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
@@ -14,15 +15,15 @@ from databao_context_engine.datasources.types import (
|
|
|
14
15
|
PreparedFile,
|
|
15
16
|
)
|
|
16
17
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
17
|
-
from databao_context_engine.project.layout import
|
|
18
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
18
19
|
from databao_context_engine.templating.renderer import render_template
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
def get_datasource_list(
|
|
24
|
+
def get_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
|
|
24
25
|
result = []
|
|
25
|
-
for discovered_datasource in discover_datasources(
|
|
26
|
+
for discovered_datasource in discover_datasources(project_layout=project_layout):
|
|
26
27
|
try:
|
|
27
28
|
prepared_source = prepare_source(discovered_datasource)
|
|
28
29
|
except Exception as e:
|
|
@@ -32,7 +33,7 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
|
|
|
32
33
|
|
|
33
34
|
result.append(
|
|
34
35
|
Datasource(
|
|
35
|
-
id=DatasourceId.from_datasource_config_file_path(discovered_datasource.path),
|
|
36
|
+
id=DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path),
|
|
36
37
|
type=prepared_source.datasource_type,
|
|
37
38
|
)
|
|
38
39
|
)
|
|
@@ -40,7 +41,7 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
|
|
|
40
41
|
return result
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
def discover_datasources(
|
|
44
|
+
def discover_datasources(project_layout: ProjectLayout) -> list[DatasourceDescriptor]:
|
|
44
45
|
"""Scan the project's src/ directory and return all discovered sources.
|
|
45
46
|
|
|
46
47
|
Rules:
|
|
@@ -48,24 +49,21 @@ def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
|
|
|
48
49
|
- Unsupported or unreadable entries are skipped.
|
|
49
50
|
- The returned list is sorted by directory and then filename
|
|
50
51
|
|
|
52
|
+
Args:
|
|
53
|
+
project_layout: ProjectLayout instance representing the project directory and configuration.
|
|
54
|
+
|
|
51
55
|
Returns:
|
|
52
56
|
A list of DatasourceDescriptor instances representing the discovered datasources.
|
|
53
|
-
|
|
54
|
-
Raises:
|
|
55
|
-
ValueError: If the src directory does not exist in the project directory.
|
|
56
57
|
"""
|
|
57
|
-
src = get_source_dir(project_dir)
|
|
58
|
-
if not src.exists() or not src.is_dir():
|
|
59
|
-
raise ValueError(f"src directory does not exist in {project_dir}")
|
|
60
|
-
|
|
61
58
|
datasources: list[DatasourceDescriptor] = []
|
|
62
|
-
for
|
|
63
|
-
for
|
|
64
|
-
|
|
59
|
+
for dirpath, dirnames, filenames in os.walk(project_layout.src_dir):
|
|
60
|
+
for config_file_name in filenames:
|
|
61
|
+
context_file = Path(dirpath).joinpath(config_file_name)
|
|
62
|
+
datasource = _load_datasource_descriptor(project_layout, context_file)
|
|
65
63
|
if datasource is not None:
|
|
66
64
|
datasources.append(datasource)
|
|
67
65
|
|
|
68
|
-
return datasources
|
|
66
|
+
return sorted(datasources, key=lambda ds: str(ds.datasource_id.relative_path_to_config_file()).lower())
|
|
69
67
|
|
|
70
68
|
|
|
71
69
|
def _is_datasource_file(p: Path) -> bool:
|
|
@@ -73,42 +71,44 @@ def _is_datasource_file(p: Path) -> bool:
|
|
|
73
71
|
return p.is_file() and not p.suffix.endswith("~")
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
def get_datasource_descriptors(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
raise ValueError(f"src directory does not exist in {project_dir}")
|
|
80
|
-
|
|
74
|
+
def get_datasource_descriptors(
|
|
75
|
+
project_layout: ProjectLayout, datasource_ids: list[DatasourceId]
|
|
76
|
+
) -> list[DatasourceDescriptor]:
|
|
81
77
|
datasources: list[DatasourceDescriptor] = []
|
|
82
78
|
for datasource_id in sorted(datasource_ids, key=lambda id: str(id)):
|
|
83
|
-
config_file_path =
|
|
79
|
+
config_file_path = project_layout.src_dir.joinpath(datasource_id.relative_path_to_config_file())
|
|
84
80
|
if not config_file_path.is_file():
|
|
85
81
|
raise ValueError(f"Datasource config file not found: {config_file_path}")
|
|
86
82
|
|
|
87
|
-
datasource = _load_datasource_descriptor(config_file_path)
|
|
83
|
+
datasource = _load_datasource_descriptor(project_layout, config_file_path)
|
|
88
84
|
if datasource is not None:
|
|
89
85
|
datasources.append(datasource)
|
|
90
86
|
|
|
91
87
|
return datasources
|
|
92
88
|
|
|
93
89
|
|
|
94
|
-
def _load_datasource_descriptor(
|
|
90
|
+
def _load_datasource_descriptor(project_layout: ProjectLayout, config_file: Path) -> DatasourceDescriptor | None:
|
|
95
91
|
"""Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
|
|
96
|
-
if not
|
|
92
|
+
if not config_file.is_file():
|
|
97
93
|
return None
|
|
98
94
|
|
|
99
|
-
parent_name =
|
|
100
|
-
extension =
|
|
95
|
+
parent_name = config_file.parent.name
|
|
96
|
+
extension = config_file.suffix.lower().lstrip(".")
|
|
97
|
+
relative_config_file = config_file.relative_to(project_layout.src_dir)
|
|
101
98
|
|
|
102
|
-
if parent_name == "files":
|
|
103
|
-
|
|
99
|
+
if parent_name == "files" and len(relative_config_file.parts) == 2:
|
|
100
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
101
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
|
|
104
102
|
|
|
105
103
|
if extension in {"yaml", "yml"}:
|
|
106
|
-
|
|
104
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
105
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.CONFIG)
|
|
107
106
|
|
|
108
107
|
if extension:
|
|
109
|
-
|
|
108
|
+
datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
|
|
109
|
+
return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
|
|
110
110
|
|
|
111
|
-
logger.debug("Skipping file without extension: %s",
|
|
111
|
+
logger.debug("Skipping file without extension: %s", config_file)
|
|
112
112
|
return None
|
|
113
113
|
|
|
114
114
|
|
|
@@ -117,23 +117,24 @@ def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
|
|
|
117
117
|
if datasource.kind is DatasourceKind.FILE:
|
|
118
118
|
file_subtype = datasource.path.suffix.lower().lstrip(".")
|
|
119
119
|
return PreparedFile(
|
|
120
|
-
|
|
120
|
+
datasource_id=datasource.datasource_id,
|
|
121
|
+
datasource_type=DatasourceType(full_type=file_subtype),
|
|
121
122
|
path=datasource.path,
|
|
122
123
|
)
|
|
123
124
|
|
|
124
|
-
|
|
125
|
-
config = _parse_config_file(datasource.path)
|
|
125
|
+
config = _parse_config_file(datasource.path)
|
|
126
126
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
127
|
+
ds_type = config.get("type")
|
|
128
|
+
if not ds_type or not isinstance(ds_type, str):
|
|
129
|
+
raise ValueError("Config missing 'type' at %s - skipping", datasource.path)
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
131
|
+
return PreparedConfig(
|
|
132
|
+
datasource_id=datasource.datasource_id,
|
|
133
|
+
datasource_type=DatasourceType(full_type=ds_type),
|
|
134
|
+
path=datasource.path,
|
|
135
|
+
config=config,
|
|
136
|
+
datasource_name=datasource.path.stem,
|
|
137
|
+
)
|
|
137
138
|
|
|
138
139
|
|
|
139
140
|
def _parse_config_file(file_path: Path) -> dict[Any, Any]:
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from dataclasses import dataclass
|
|
3
2
|
from enum import StrEnum
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Any
|
|
6
5
|
|
|
7
6
|
from databao_context_engine.pluginlib.build_plugin import DatasourceType
|
|
7
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class DatasourceKind(StrEnum):
|
|
@@ -14,13 +14,14 @@ class DatasourceKind(StrEnum):
|
|
|
14
14
|
|
|
15
15
|
@dataclass(frozen=True)
|
|
16
16
|
class DatasourceDescriptor:
|
|
17
|
+
datasource_id: "DatasourceId"
|
|
17
18
|
path: Path
|
|
18
19
|
kind: DatasourceKind
|
|
19
|
-
main_type: str
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@dataclass(frozen=True)
|
|
23
23
|
class PreparedConfig:
|
|
24
|
+
datasource_id: "DatasourceId"
|
|
24
25
|
datasource_type: DatasourceType
|
|
25
26
|
path: Path
|
|
26
27
|
config: dict[Any, Any]
|
|
@@ -29,6 +30,7 @@ class PreparedConfig:
|
|
|
29
30
|
|
|
30
31
|
@dataclass(frozen=True)
|
|
31
32
|
class PreparedFile:
|
|
33
|
+
datasource_id: "DatasourceId"
|
|
32
34
|
datasource_type: DatasourceType
|
|
33
35
|
path: Path
|
|
34
36
|
|
|
@@ -45,38 +47,29 @@ class DatasourceId:
|
|
|
45
47
|
Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
|
|
46
48
|
|
|
47
49
|
Attributes:
|
|
48
|
-
|
|
49
|
-
datasource_name: The name of the datasource.
|
|
50
|
+
datasource_path: The path to the datasource relative to project's src folder.
|
|
50
51
|
config_file_suffix: The suffix of the config (or raw) file.
|
|
51
52
|
"""
|
|
52
53
|
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
ALLOWED_YAML_SUFFIXES = [".yaml", ".yml"]
|
|
55
|
+
CONTEXT_FILE_SUFFIX = ".yaml"
|
|
56
|
+
|
|
57
|
+
datasource_path: str
|
|
55
58
|
config_file_suffix: str
|
|
56
59
|
|
|
57
60
|
def __post_init__(self):
|
|
58
|
-
if not self.
|
|
59
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}):
|
|
60
|
-
if not self.datasource_name.strip():
|
|
61
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not be empty")
|
|
61
|
+
if not self.datasource_path.strip():
|
|
62
|
+
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not be empty")
|
|
62
63
|
if not self.config_file_suffix.strip():
|
|
63
64
|
raise ValueError(f"Invalid DatasourceId ({str(self)}): config_file_suffix must not be empty")
|
|
64
65
|
|
|
65
|
-
if os.sep in self.datasource_config_folder:
|
|
66
|
-
raise ValueError(
|
|
67
|
-
f"Invalid DatasourceId ({str(self)}): datasource_config_folder must not contain a path separator"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
if os.sep in self.datasource_name:
|
|
71
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not contain a path separator")
|
|
72
|
-
|
|
73
66
|
if not self.config_file_suffix.startswith("."):
|
|
74
67
|
raise ValueError(
|
|
75
68
|
f'Invalid DatasourceId ({str(self)}): config_file_suffix must start with a dot "." (e.g.: .yaml)'
|
|
76
69
|
)
|
|
77
70
|
|
|
78
|
-
if self.
|
|
79
|
-
raise ValueError(f"Invalid DatasourceId ({str(self)}):
|
|
71
|
+
if self.datasource_path.endswith(self.config_file_suffix):
|
|
72
|
+
raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not contain the file suffix")
|
|
80
73
|
|
|
81
74
|
def __str__(self):
|
|
82
75
|
return str(self.relative_path_to_config_file())
|
|
@@ -89,7 +82,7 @@ class DatasourceId:
|
|
|
89
82
|
Returns:
|
|
90
83
|
The path to the config file relative to the src folder in the project.
|
|
91
84
|
"""
|
|
92
|
-
return Path(self.
|
|
85
|
+
return Path(self.datasource_path + self.config_file_suffix)
|
|
93
86
|
|
|
94
87
|
def relative_path_to_context_file(self) -> Path:
|
|
95
88
|
"""Return a path to the config file for this datasource.
|
|
@@ -100,9 +93,13 @@ class DatasourceId:
|
|
|
100
93
|
The path to the context file relative to the output folder in the project.
|
|
101
94
|
"""
|
|
102
95
|
# Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
|
|
103
|
-
suffix =
|
|
96
|
+
suffix = (
|
|
97
|
+
DatasourceId.CONTEXT_FILE_SUFFIX
|
|
98
|
+
if self.config_file_suffix in DatasourceId.ALLOWED_YAML_SUFFIXES
|
|
99
|
+
else (self.config_file_suffix + DatasourceId.CONTEXT_FILE_SUFFIX)
|
|
100
|
+
)
|
|
104
101
|
|
|
105
|
-
return Path(self.
|
|
102
|
+
return Path(self.datasource_path + suffix)
|
|
106
103
|
|
|
107
104
|
@classmethod
|
|
108
105
|
def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
|
|
@@ -115,34 +112,36 @@ class DatasourceId:
|
|
|
115
112
|
|
|
116
113
|
Returns:
|
|
117
114
|
The DatasourceId instance created from the string representation.
|
|
118
|
-
|
|
119
|
-
Raises:
|
|
120
|
-
ValueError: If the string representation is invalid (e.g. too many parent folders).
|
|
121
115
|
"""
|
|
122
116
|
config_file_path = Path(datasource_id_as_string)
|
|
123
|
-
|
|
124
|
-
if len(config_file_path.parents) > 2:
|
|
125
|
-
raise ValueError(
|
|
126
|
-
f"Invalid string representation of a DatasourceId: too many parent folders defined in {datasource_id_as_string}"
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
return DatasourceId.from_datasource_config_file_path(config_file_path)
|
|
117
|
+
return DatasourceId._from_relative_datasource_config_file_path(config_file_path)
|
|
130
118
|
|
|
131
119
|
@classmethod
|
|
132
|
-
def from_datasource_config_file_path(
|
|
120
|
+
def from_datasource_config_file_path(
|
|
121
|
+
cls, project_layout: ProjectLayout, datasource_config_file: Path
|
|
122
|
+
) -> "DatasourceId":
|
|
133
123
|
"""Create a DatasourceId from a config file path.
|
|
134
124
|
|
|
135
125
|
Args:
|
|
126
|
+
project_layout: The databao context engine project layout.
|
|
136
127
|
datasource_config_file: The path to the datasource config file.
|
|
137
128
|
This path can either be the config file path relative to the src folder or the full path to the config file.
|
|
138
129
|
|
|
139
130
|
Returns:
|
|
140
131
|
The DatasourceId instance created from the config file path.
|
|
141
132
|
"""
|
|
133
|
+
relative_datasource_config_file = datasource_config_file.relative_to(project_layout.src_dir)
|
|
134
|
+
return DatasourceId._from_relative_datasource_config_file_path(relative_datasource_config_file)
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def _from_relative_datasource_config_file_path(cls, relative_datasource_config_file: Path) -> "DatasourceId":
|
|
138
|
+
if relative_datasource_config_file.is_absolute():
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"Path to datasource config file should be relative to project's src folder: {relative_datasource_config_file}"
|
|
141
|
+
)
|
|
142
142
|
return DatasourceId(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
config_file_suffix=datasource_config_file.suffix,
|
|
143
|
+
datasource_path=_extract_datasource_path(relative_datasource_config_file),
|
|
144
|
+
config_file_suffix=relative_datasource_config_file.suffix,
|
|
146
145
|
)
|
|
147
146
|
|
|
148
147
|
@classmethod
|
|
@@ -157,19 +156,31 @@ class DatasourceId:
|
|
|
157
156
|
|
|
158
157
|
Returns:
|
|
159
158
|
The DatasourceId instance created from the context file path.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: If the wrong datasource_context_file is provided.
|
|
160
162
|
"""
|
|
161
|
-
if
|
|
163
|
+
if (
|
|
164
|
+
len(datasource_context_file.suffixes) > 1
|
|
165
|
+
and datasource_context_file.suffix == DatasourceId.CONTEXT_FILE_SUFFIX
|
|
166
|
+
):
|
|
162
167
|
# If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
|
|
163
|
-
context_file_name_without_yaml_extension = datasource_context_file.name[
|
|
168
|
+
context_file_name_without_yaml_extension = datasource_context_file.name[
|
|
169
|
+
: -len(DatasourceId.CONTEXT_FILE_SUFFIX)
|
|
170
|
+
]
|
|
164
171
|
datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
|
|
165
|
-
|
|
172
|
+
if datasource_context_file.is_absolute():
|
|
173
|
+
raise ValueError(f"Path to datasource context file should be a relative path: {datasource_context_file}")
|
|
166
174
|
return DatasourceId(
|
|
167
|
-
|
|
168
|
-
datasource_name=datasource_context_file.stem,
|
|
175
|
+
datasource_path=_extract_datasource_path(datasource_context_file),
|
|
169
176
|
config_file_suffix=datasource_context_file.suffix,
|
|
170
177
|
)
|
|
171
178
|
|
|
172
179
|
|
|
180
|
+
def _extract_datasource_path(datasource_file: Path) -> str:
|
|
181
|
+
return str(datasource_file.with_suffix(""))
|
|
182
|
+
|
|
183
|
+
|
|
173
184
|
@dataclass
|
|
174
185
|
class Datasource:
|
|
175
186
|
"""A datasource contained in the project.
|
|
@@ -38,9 +38,9 @@ def generate_configs_schemas(
|
|
|
38
38
|
try:
|
|
39
39
|
schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
click.echo(f"{os.linesep}{os.linesep}".join(schema_list))
|
|
42
42
|
except Exception as e:
|
|
43
|
-
|
|
43
|
+
click.echo(str(e))
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def _generate_json_schema_output_for_plugins(
|
|
@@ -54,10 +54,9 @@ def _generate_json_schema_output_for_plugins(
|
|
|
54
54
|
if len(filtered_plugins) == 0:
|
|
55
55
|
if include_plugins:
|
|
56
56
|
raise ValueError(f"No plugin found with id in {include_plugins}")
|
|
57
|
-
|
|
57
|
+
if exclude_plugins:
|
|
58
58
|
raise ValueError(f"No plugin found when excluding {exclude_plugins}")
|
|
59
|
-
|
|
60
|
-
raise ValueError("No plugin found")
|
|
59
|
+
raise ValueError("No plugin found")
|
|
61
60
|
|
|
62
61
|
results = []
|
|
63
62
|
for plugin in filtered_plugins:
|
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
import types
|
|
2
2
|
from dataclasses import MISSING, fields, is_dataclass
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import (
|
|
4
|
+
Annotated,
|
|
5
|
+
Any,
|
|
6
|
+
ForwardRef,
|
|
7
|
+
Iterable,
|
|
8
|
+
Mapping,
|
|
9
|
+
Union,
|
|
10
|
+
get_args,
|
|
11
|
+
get_origin,
|
|
12
|
+
get_type_hints,
|
|
13
|
+
)
|
|
4
14
|
|
|
5
15
|
from pydantic import BaseModel
|
|
6
16
|
from pydantic_core import PydanticUndefinedType
|
|
@@ -14,10 +24,10 @@ from databao_context_engine.pluginlib.config import (
|
|
|
14
24
|
|
|
15
25
|
|
|
16
26
|
def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
|
|
17
|
-
return _get_property_list_from_type(parent_type=root_type
|
|
27
|
+
return _get_property_list_from_type(parent_type=root_type)
|
|
18
28
|
|
|
19
29
|
|
|
20
|
-
def _get_property_list_from_type(*, parent_type: type
|
|
30
|
+
def _get_property_list_from_type(*, parent_type: type) -> list[ConfigPropertyDefinition]:
|
|
21
31
|
if is_dataclass(parent_type):
|
|
22
32
|
return _get_property_list_from_dataclass(parent_type=parent_type)
|
|
23
33
|
|
|
@@ -29,20 +39,15 @@ def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> li
|
|
|
29
39
|
# issubclass is raising a TypeError: issubclass() arg 1 must be a class
|
|
30
40
|
pass
|
|
31
41
|
|
|
32
|
-
return _get_property_list_from_type_hints(parent_type=parent_type
|
|
42
|
+
return _get_property_list_from_type_hints(parent_type=parent_type)
|
|
33
43
|
|
|
34
44
|
|
|
35
|
-
def _get_property_list_from_type_hints(*, parent_type: type
|
|
45
|
+
def _get_property_list_from_type_hints(*, parent_type: type) -> list[ConfigPropertyDefinition]:
|
|
36
46
|
try:
|
|
37
47
|
type_hints = get_type_hints(parent_type, include_extras=True)
|
|
38
|
-
except TypeError
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
return []
|
|
42
|
-
else:
|
|
43
|
-
# If we're evaluating a nested property, we want to propagate the exception
|
|
44
|
-
# to let the parent property know that this type should be ignored
|
|
45
|
-
raise e
|
|
48
|
+
except TypeError:
|
|
49
|
+
# Return an empty list of properties for any type that is not an object (e.g: primitives like str or containers like dict, list, tuple, etc.
|
|
50
|
+
return []
|
|
46
51
|
|
|
47
52
|
result = []
|
|
48
53
|
for property_key, property_type in type_hints.items():
|
|
@@ -137,13 +142,7 @@ def _create_property(
|
|
|
137
142
|
type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
|
|
138
143
|
|
|
139
144
|
for union_type in actual_property_types:
|
|
140
|
-
|
|
141
|
-
nested_props = _get_property_list_from_type(
|
|
142
|
-
parent_type=union_type,
|
|
143
|
-
is_root_type=False,
|
|
144
|
-
)
|
|
145
|
-
except TypeError:
|
|
146
|
-
nested_props = []
|
|
145
|
+
nested_props = _get_property_list_from_type(parent_type=union_type)
|
|
147
146
|
|
|
148
147
|
type_properties[union_type] = nested_props
|
|
149
148
|
|
|
@@ -152,31 +151,38 @@ def _create_property(
|
|
|
152
151
|
types=actual_property_types,
|
|
153
152
|
type_properties=type_properties,
|
|
154
153
|
)
|
|
155
|
-
else:
|
|
156
|
-
actual_property_type = actual_property_types[0]
|
|
157
|
-
try:
|
|
158
|
-
nested_properties = _get_property_list_from_type(
|
|
159
|
-
parent_type=actual_property_type,
|
|
160
|
-
is_root_type=False,
|
|
161
|
-
)
|
|
162
|
-
except TypeError:
|
|
163
|
-
return None
|
|
164
|
-
|
|
165
|
-
resolved_type = actual_property_type if not nested_properties else None
|
|
166
|
-
default_value = compute_default_value(
|
|
167
|
-
annotation=annotation,
|
|
168
|
-
property_default=property_default,
|
|
169
|
-
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
170
|
-
)
|
|
171
154
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
155
|
+
actual_property_type = actual_property_types[0]
|
|
156
|
+
nested_properties = _get_property_list_from_type(parent_type=actual_property_type)
|
|
157
|
+
|
|
158
|
+
if len(nested_properties) == 0 and _is_mapping_or_iterable(actual_property_type):
|
|
159
|
+
# Ignore Iterables and Mappings for which we didn't resolve nested properties
|
|
160
|
+
# (TypedDict is a Mapping but since we manage to resolve nested properties, it won't be ignored)
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
resolved_type = actual_property_type if not nested_properties else None
|
|
164
|
+
default_value = compute_default_value(
|
|
165
|
+
annotation=annotation,
|
|
166
|
+
property_default=property_default,
|
|
167
|
+
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return ConfigSinglePropertyDefinition(
|
|
171
|
+
property_key=property_name,
|
|
172
|
+
property_type=resolved_type,
|
|
173
|
+
required=required,
|
|
174
|
+
default_value=default_value,
|
|
175
|
+
nested_properties=nested_properties or None,
|
|
176
|
+
secret=secret,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _is_mapping_or_iterable(property_type: type):
|
|
181
|
+
# For types like list[str], we need to get the origin (ie. list) to use in issubclass
|
|
182
|
+
origin = get_origin(property_type)
|
|
183
|
+
|
|
184
|
+
# We make sure to not return True for str, which is an Iterable
|
|
185
|
+
return property_type is not str and issubclass(origin if origin else property_type, (Mapping, Iterable))
|
|
180
186
|
|
|
181
187
|
|
|
182
188
|
def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
|
|
@@ -194,9 +200,8 @@ def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
|
|
|
194
200
|
|
|
195
201
|
if property_type_origin is Annotated:
|
|
196
202
|
return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return args
|
|
203
|
+
if property_type_origin in (Union, types.UnionType):
|
|
204
|
+
return tuple(arg for arg in get_args(property_type) if arg is not type(None))
|
|
200
205
|
|
|
201
206
|
return (property_type,)
|
|
202
207
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from databao_context_engine.llm.api import download_ollama_models_if_needed, install_ollama_if_needed
|
|
2
|
+
from databao_context_engine.llm.errors import OllamaError, OllamaPermanentError, OllamaTransientError
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"install_ollama_if_needed",
|
|
6
|
+
"download_ollama_models_if_needed",
|
|
7
|
+
"OllamaError",
|
|
8
|
+
"OllamaTransientError",
|
|
9
|
+
"OllamaPermanentError",
|
|
10
|
+
]
|