databao-context-engine 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databao_context_engine/__init__.py +35 -0
- databao_context_engine/build_sources/__init__.py +0 -0
- databao_context_engine/build_sources/internal/__init__.py +0 -0
- databao_context_engine/build_sources/internal/build_runner.py +111 -0
- databao_context_engine/build_sources/internal/build_service.py +77 -0
- databao_context_engine/build_sources/internal/build_wiring.py +52 -0
- databao_context_engine/build_sources/internal/export_results.py +43 -0
- databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
- databao_context_engine/build_sources/public/__init__.py +0 -0
- databao_context_engine/build_sources/public/api.py +4 -0
- databao_context_engine/cli/__init__.py +0 -0
- databao_context_engine/cli/add_datasource_config.py +130 -0
- databao_context_engine/cli/commands.py +256 -0
- databao_context_engine/cli/datasources.py +64 -0
- databao_context_engine/cli/info.py +32 -0
- databao_context_engine/config/__init__.py +0 -0
- databao_context_engine/config/log_config.yaml +16 -0
- databao_context_engine/config/logging.py +43 -0
- databao_context_engine/databao_context_project_manager.py +92 -0
- databao_context_engine/databao_engine.py +85 -0
- databao_context_engine/datasource_config/__init__.py +0 -0
- databao_context_engine/datasource_config/add_config.py +50 -0
- databao_context_engine/datasource_config/check_config.py +131 -0
- databao_context_engine/datasource_config/datasource_context.py +60 -0
- databao_context_engine/event_journal/__init__.py +0 -0
- databao_context_engine/event_journal/writer.py +29 -0
- databao_context_engine/generate_configs_schemas.py +92 -0
- databao_context_engine/init_project.py +18 -0
- databao_context_engine/introspection/__init__.py +0 -0
- databao_context_engine/introspection/property_extract.py +202 -0
- databao_context_engine/llm/__init__.py +0 -0
- databao_context_engine/llm/config.py +20 -0
- databao_context_engine/llm/descriptions/__init__.py +0 -0
- databao_context_engine/llm/descriptions/ollama.py +21 -0
- databao_context_engine/llm/descriptions/provider.py +10 -0
- databao_context_engine/llm/embeddings/__init__.py +0 -0
- databao_context_engine/llm/embeddings/ollama.py +37 -0
- databao_context_engine/llm/embeddings/provider.py +13 -0
- databao_context_engine/llm/errors.py +16 -0
- databao_context_engine/llm/factory.py +61 -0
- databao_context_engine/llm/install.py +227 -0
- databao_context_engine/llm/runtime.py +73 -0
- databao_context_engine/llm/service.py +159 -0
- databao_context_engine/main.py +19 -0
- databao_context_engine/mcp/__init__.py +0 -0
- databao_context_engine/mcp/all_results_tool.py +5 -0
- databao_context_engine/mcp/mcp_runner.py +16 -0
- databao_context_engine/mcp/mcp_server.py +63 -0
- databao_context_engine/mcp/retrieve_tool.py +22 -0
- databao_context_engine/pluginlib/__init__.py +0 -0
- databao_context_engine/pluginlib/build_plugin.py +107 -0
- databao_context_engine/pluginlib/config.py +37 -0
- databao_context_engine/pluginlib/plugin_utils.py +68 -0
- databao_context_engine/plugins/__init__.py +0 -0
- databao_context_engine/plugins/athena_db_plugin.py +12 -0
- databao_context_engine/plugins/base_db_plugin.py +45 -0
- databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
- databao_context_engine/plugins/databases/__init__.py +0 -0
- databao_context_engine/plugins/databases/athena_introspector.py +101 -0
- databao_context_engine/plugins/databases/base_introspector.py +144 -0
- databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
- databao_context_engine/plugins/databases/database_chunker.py +69 -0
- databao_context_engine/plugins/databases/databases_types.py +114 -0
- databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
- databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
- databao_context_engine/plugins/databases/introspection_scope.py +74 -0
- databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
- databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
- databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
- databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
- databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
- databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
- databao_context_engine/plugins/mssql_db_plugin.py +12 -0
- databao_context_engine/plugins/mysql_db_plugin.py +12 -0
- databao_context_engine/plugins/parquet_plugin.py +32 -0
- databao_context_engine/plugins/plugin_loader.py +110 -0
- databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
- databao_context_engine/plugins/resources/__init__.py +0 -0
- databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
- databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
- databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
- databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
- databao_context_engine/project/__init__.py +0 -0
- databao_context_engine/project/datasource_discovery.py +141 -0
- databao_context_engine/project/info.py +44 -0
- databao_context_engine/project/init_project.py +102 -0
- databao_context_engine/project/layout.py +127 -0
- databao_context_engine/project/project_config.py +32 -0
- databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
- databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
- databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
- databao_context_engine/project/runs.py +39 -0
- databao_context_engine/project/types.py +134 -0
- databao_context_engine/retrieve_embeddings/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
- databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
- databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
- databao_context_engine/retrieve_embeddings/public/api.py +3 -0
- databao_context_engine/serialisation/__init__.py +0 -0
- databao_context_engine/serialisation/yaml.py +35 -0
- databao_context_engine/services/__init__.py +0 -0
- databao_context_engine/services/chunk_embedding_service.py +104 -0
- databao_context_engine/services/embedding_shard_resolver.py +64 -0
- databao_context_engine/services/factories.py +88 -0
- databao_context_engine/services/models.py +12 -0
- databao_context_engine/services/persistence_service.py +61 -0
- databao_context_engine/services/run_name_policy.py +8 -0
- databao_context_engine/services/table_name_policy.py +15 -0
- databao_context_engine/storage/__init__.py +0 -0
- databao_context_engine/storage/connection.py +32 -0
- databao_context_engine/storage/exceptions/__init__.py +0 -0
- databao_context_engine/storage/exceptions/exceptions.py +6 -0
- databao_context_engine/storage/migrate.py +127 -0
- databao_context_engine/storage/migrations/V01__init.sql +63 -0
- databao_context_engine/storage/models.py +51 -0
- databao_context_engine/storage/repositories/__init__.py +0 -0
- databao_context_engine/storage/repositories/chunk_repository.py +130 -0
- databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
- databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
- databao_context_engine/storage/repositories/embedding_repository.py +113 -0
- databao_context_engine/storage/repositories/factories.py +35 -0
- databao_context_engine/storage/repositories/run_repository.py +157 -0
- databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
- databao_context_engine/storage/transaction.py +14 -0
- databao_context_engine/system/__init__.py +0 -0
- databao_context_engine/system/properties.py +13 -0
- databao_context_engine/templating/__init__.py +0 -0
- databao_context_engine/templating/renderer.py +29 -0
- databao_context_engine-0.1.1.dist-info/METADATA +186 -0
- databao_context_engine-0.1.1.dist-info/RECORD +135 -0
- databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
- databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pydantic import ValidationError
|
|
8
|
+
|
|
9
|
+
from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
|
|
10
|
+
from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
|
|
11
|
+
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
12
|
+
from databao_context_engine.project.datasource_discovery import (
|
|
13
|
+
discover_datasources,
|
|
14
|
+
get_datasource_descriptors,
|
|
15
|
+
prepare_source,
|
|
16
|
+
)
|
|
17
|
+
from databao_context_engine.project.layout import ensure_project_dir
|
|
18
|
+
from databao_context_engine.project.types import DatasourceId, PreparedConfig
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DatasourceConnectionStatus(Enum):
|
|
24
|
+
VALID = "Valid"
|
|
25
|
+
INVALID = "Invalid"
|
|
26
|
+
UNKNOWN = "Unknown"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(kw_only=True)
|
|
30
|
+
class CheckDatasourceConnectionResult:
|
|
31
|
+
datasource_id: DatasourceId
|
|
32
|
+
connection_status: DatasourceConnectionStatus
|
|
33
|
+
summary: str | None
|
|
34
|
+
full_message: str | None = None
|
|
35
|
+
|
|
36
|
+
def format(self, show_summary_only: bool = True) -> str:
|
|
37
|
+
formatted_string = str(self.connection_status.value)
|
|
38
|
+
if self.summary:
|
|
39
|
+
formatted_string += f" - {self.summary}"
|
|
40
|
+
if not show_summary_only and self.full_message:
|
|
41
|
+
formatted_string += f"{os.linesep}{self.full_message}"
|
|
42
|
+
|
|
43
|
+
return formatted_string
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def check_datasource_connection(
|
|
47
|
+
project_dir: Path, *, datasource_ids: list[DatasourceId] | None = None
|
|
48
|
+
) -> dict[DatasourceId, CheckDatasourceConnectionResult]:
|
|
49
|
+
ensure_project_dir(project_dir)
|
|
50
|
+
|
|
51
|
+
if datasource_ids:
|
|
52
|
+
logger.info(f"Validating datasource(s): {datasource_ids}")
|
|
53
|
+
datasources_to_traverse = get_datasource_descriptors(project_dir, datasource_ids)
|
|
54
|
+
else:
|
|
55
|
+
datasources_to_traverse = discover_datasources(project_dir)
|
|
56
|
+
|
|
57
|
+
plugins = load_plugins(exclude_file_plugins=True)
|
|
58
|
+
|
|
59
|
+
result = {}
|
|
60
|
+
for discovered_datasource in datasources_to_traverse:
|
|
61
|
+
result_key = DatasourceId.from_datasource_config_file_path(discovered_datasource.path)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
prepared_source = prepare_source(discovered_datasource)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
result[result_key] = CheckDatasourceConnectionResult(
|
|
67
|
+
datasource_id=result_key,
|
|
68
|
+
connection_status=DatasourceConnectionStatus.INVALID,
|
|
69
|
+
summary="Failed to prepare source",
|
|
70
|
+
full_message=str(e),
|
|
71
|
+
)
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
plugin = plugins.get(prepared_source.datasource_type)
|
|
75
|
+
if plugin is None:
|
|
76
|
+
logger.debug(
|
|
77
|
+
"No plugin for '%s' (datasource=%s) — skipping.",
|
|
78
|
+
prepared_source.datasource_type.full_type,
|
|
79
|
+
prepared_source.path,
|
|
80
|
+
)
|
|
81
|
+
result[result_key] = CheckDatasourceConnectionResult(
|
|
82
|
+
datasource_id=result_key,
|
|
83
|
+
connection_status=DatasourceConnectionStatus.INVALID,
|
|
84
|
+
summary="No compatible plugin found",
|
|
85
|
+
)
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if isinstance(prepared_source, PreparedConfig) and isinstance(plugin, BuildDatasourcePlugin):
|
|
89
|
+
try:
|
|
90
|
+
check_connection_for_datasource(
|
|
91
|
+
plugin=plugin,
|
|
92
|
+
datasource_type=prepared_source.datasource_type,
|
|
93
|
+
config=prepared_source.config,
|
|
94
|
+
datasource_name=prepared_source.datasource_name,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
result[result_key] = CheckDatasourceConnectionResult(
|
|
98
|
+
datasource_id=result_key, connection_status=DatasourceConnectionStatus.VALID, summary=None
|
|
99
|
+
)
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.debug(
|
|
102
|
+
f"Connection failed for {prepared_source.datasource_name} with error: {str(e)}",
|
|
103
|
+
exc_info=True,
|
|
104
|
+
stack_info=True,
|
|
105
|
+
)
|
|
106
|
+
result[result_key] = get_validation_result_from_error(result_key, e)
|
|
107
|
+
|
|
108
|
+
return result
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
|
|
112
|
+
if isinstance(e, ValidationError):
|
|
113
|
+
return CheckDatasourceConnectionResult(
|
|
114
|
+
datasource_id=datasource_id,
|
|
115
|
+
connection_status=DatasourceConnectionStatus.INVALID,
|
|
116
|
+
summary="Config file is invalid",
|
|
117
|
+
full_message=str(e),
|
|
118
|
+
)
|
|
119
|
+
elif isinstance(e, NotImplementedError | NotSupportedError):
|
|
120
|
+
return CheckDatasourceConnectionResult(
|
|
121
|
+
datasource_id=datasource_id,
|
|
122
|
+
connection_status=DatasourceConnectionStatus.UNKNOWN,
|
|
123
|
+
summary="Plugin doesn't support validating its config",
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
return CheckDatasourceConnectionResult(
|
|
127
|
+
datasource_id=datasource_id,
|
|
128
|
+
connection_status=DatasourceConnectionStatus.INVALID,
|
|
129
|
+
summary="Connection with the datasource can not be established",
|
|
130
|
+
full_message=str(e),
|
|
131
|
+
)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from databao_context_engine.project.layout import ProjectLayout
|
|
6
|
+
from databao_context_engine.project.runs import get_run_dir, resolve_run_name
|
|
7
|
+
from databao_context_engine.project.types import DatasourceId
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(eq=True, frozen=True)
|
|
11
|
+
class DatasourceContext:
|
|
12
|
+
datasource_id: DatasourceId
|
|
13
|
+
# TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
|
|
14
|
+
context: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_datasource_context(
|
|
18
|
+
project_layout: ProjectLayout, datasource_id: DatasourceId, run_name: str | None = None
|
|
19
|
+
) -> DatasourceContext:
|
|
20
|
+
run_dir = _resolve_run_dir(project_layout, run_name)
|
|
21
|
+
|
|
22
|
+
context_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
|
|
23
|
+
if not context_path.is_file():
|
|
24
|
+
raise ValueError(f"Context file not found for datasource {str(datasource_id)} in run {run_dir.name}")
|
|
25
|
+
|
|
26
|
+
context = context_path.read_text()
|
|
27
|
+
return DatasourceContext(datasource_id=datasource_id, context=context)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_all_contexts(project_layout: ProjectLayout, run_name: str | None = None) -> list[DatasourceContext]:
|
|
31
|
+
run_dir = _resolve_run_dir(project_layout, run_name)
|
|
32
|
+
|
|
33
|
+
result = []
|
|
34
|
+
for main_type_dir in sorted((p for p in run_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
|
|
35
|
+
for context_path in sorted(
|
|
36
|
+
(p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
|
|
37
|
+
):
|
|
38
|
+
result.append(
|
|
39
|
+
DatasourceContext(
|
|
40
|
+
# FIXME: The extension will always be yaml here even if the datasource is a file with a different extension
|
|
41
|
+
datasource_id=DatasourceId.from_datasource_config_file_path(context_path),
|
|
42
|
+
context=context_path.read_text(),
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
|
|
50
|
+
return f"# ===== {str(datasource_id)} ====={os.linesep}"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _resolve_run_dir(project_layout: ProjectLayout, run_name: str | None) -> Path:
|
|
54
|
+
resolved_run_name = resolve_run_name(project_layout=project_layout, run_name=run_name)
|
|
55
|
+
|
|
56
|
+
run_dir = get_run_dir(project_dir=project_layout.project_dir, run_name=resolved_run_name)
|
|
57
|
+
if not run_dir.is_dir():
|
|
58
|
+
raise ValueError(f"Run {resolved_run_name} does not exist at {run_dir.resolve()}")
|
|
59
|
+
|
|
60
|
+
return run_dir
|
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from uuid import UUID, uuid1
|
|
5
|
+
|
|
6
|
+
from databao_context_engine.system.properties import get_dce_path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_journal_file(dce_path: Path) -> Path:
|
|
10
|
+
return dce_path / "event-journal" / "journal.txt"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def log_event(*, project_id: UUID, dce_version: str, event_type: str, event_id: UUID = uuid1(), **kwargs):
|
|
14
|
+
current_timestamp = datetime.datetime.now().isoformat()
|
|
15
|
+
journal_file = get_journal_file(get_dce_path())
|
|
16
|
+
journal_file.parent.mkdir(parents=True, exist_ok=True)
|
|
17
|
+
with journal_file.open("a+", encoding="utf-8") as journal_handle:
|
|
18
|
+
event_json = json.dumps(
|
|
19
|
+
{
|
|
20
|
+
"id": str(event_id),
|
|
21
|
+
"project_id": str(project_id),
|
|
22
|
+
"dce_version": dce_version,
|
|
23
|
+
"timestamp": current_timestamp,
|
|
24
|
+
"type": event_type,
|
|
25
|
+
**kwargs,
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
journal_handle.write(event_json)
|
|
29
|
+
journal_handle.write("\n")
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin
|
|
8
|
+
from databao_context_engine.pluginlib.plugin_utils import format_json_schema_for_output, generate_json_schema
|
|
9
|
+
from databao_context_engine.plugins.plugin_loader import load_plugins
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.command()
|
|
15
|
+
@click.option(
|
|
16
|
+
"-i",
|
|
17
|
+
"--include-plugins",
|
|
18
|
+
multiple=True,
|
|
19
|
+
type=str,
|
|
20
|
+
help="""
|
|
21
|
+
Plugin ID for the plugins to include in the json schema generation (-i can be specified multiple times to include multiple plugins).
|
|
22
|
+
When set, only the plugins in this list are included in the json schema generation.
|
|
23
|
+
This can't be used in conjunction with --exclude-plugins
|
|
24
|
+
""",
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"-e",
|
|
28
|
+
"--exclude-plugins",
|
|
29
|
+
multiple=True,
|
|
30
|
+
help="""
|
|
31
|
+
Plugin ID for the plugins to exclude from the json schema generation (-e can be specified multiple times to exclude multiple plugins).
|
|
32
|
+
This can't be used in conjunction with --include-plugins
|
|
33
|
+
""",
|
|
34
|
+
)
|
|
35
|
+
def generate_configs_schemas(
|
|
36
|
+
include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
|
|
37
|
+
):
|
|
38
|
+
try:
|
|
39
|
+
schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
|
|
40
|
+
|
|
41
|
+
print(f"{os.linesep}{os.linesep}".join(schema_list))
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(str(e))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _generate_json_schema_output_for_plugins(
|
|
47
|
+
include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
|
|
48
|
+
) -> list[str]:
|
|
49
|
+
if include_plugins and exclude_plugins:
|
|
50
|
+
raise ValueError("Can't use --include-plugins and --exclude-plugins together")
|
|
51
|
+
|
|
52
|
+
filtered_plugins = _get_plugins_for_schema_generation(include_plugins, exclude_plugins)
|
|
53
|
+
|
|
54
|
+
if len(filtered_plugins) == 0:
|
|
55
|
+
if include_plugins:
|
|
56
|
+
raise ValueError(f"No plugin found with id in {include_plugins}")
|
|
57
|
+
elif exclude_plugins:
|
|
58
|
+
raise ValueError(f"No plugin found when excluding {exclude_plugins}")
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError("No plugin found")
|
|
61
|
+
|
|
62
|
+
results = []
|
|
63
|
+
for plugin in filtered_plugins:
|
|
64
|
+
if not isinstance(plugin, BuildDatasourcePlugin):
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
json_schema = generate_json_schema(plugin)
|
|
68
|
+
if json_schema is not None:
|
|
69
|
+
results.append(format_json_schema_for_output(plugin, json_schema))
|
|
70
|
+
|
|
71
|
+
return results
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get_plugins_for_schema_generation(
|
|
75
|
+
include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
|
|
76
|
+
) -> list[BuildDatasourcePlugin]:
|
|
77
|
+
all_plugins = load_plugins(exclude_file_plugins=True).values()
|
|
78
|
+
|
|
79
|
+
return [
|
|
80
|
+
cast(BuildDatasourcePlugin, plugin)
|
|
81
|
+
for plugin in all_plugins
|
|
82
|
+
if (plugin.id in include_plugins if include_plugins else True)
|
|
83
|
+
and (plugin.id not in exclude_plugins if exclude_plugins else True)
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def main():
|
|
88
|
+
generate_configs_schemas()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
|
|
4
|
+
from databao_context_engine.project.init_project import init_project_dir
|
|
5
|
+
from databao_context_engine.project.layout import is_project_dir_valid
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
9
|
+
initialised_project_dir = init_project_dir(project_dir=project_dir)
|
|
10
|
+
|
|
11
|
+
return DatabaoContextProjectManager(project_dir=initialised_project_dir)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
|
|
15
|
+
if is_project_dir_valid(project_dir):
|
|
16
|
+
return DatabaoContextProjectManager(project_dir=project_dir)
|
|
17
|
+
|
|
18
|
+
return init_dce_project(project_dir=project_dir)
|
|
File without changes
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import types
|
|
2
|
+
from dataclasses import MISSING, fields, is_dataclass
|
|
3
|
+
from typing import Annotated, Any, ForwardRef, Union, get_origin, get_type_hints
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, _internal
|
|
6
|
+
from pydantic_core import PydanticUndefinedType
|
|
7
|
+
|
|
8
|
+
from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation, ConfigPropertyDefinition
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
|
|
12
|
+
return _get_property_list_from_type(parent_type=root_type, is_root_type=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
|
|
16
|
+
if is_dataclass(parent_type):
|
|
17
|
+
return _get_property_list_from_dataclass(parent_type=parent_type)
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
if issubclass(parent_type, BaseModel):
|
|
21
|
+
return _get_property_list_from_pydantic_base_model(parent_type=parent_type)
|
|
22
|
+
except TypeError:
|
|
23
|
+
# when trying to compare ABC Metadata classes to BaseModel, e.g: issubclass(Mapping[str, str], BaseModel)
|
|
24
|
+
# issubclass is raising a TypeError: issubclass() arg 1 must be a class
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
return _get_property_list_from_type_hints(parent_type=parent_type, is_root_type=is_root_type)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _get_property_list_from_type_hints(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
|
|
31
|
+
try:
|
|
32
|
+
type_hints = get_type_hints(parent_type, include_extras=True)
|
|
33
|
+
except TypeError as e:
|
|
34
|
+
if is_root_type:
|
|
35
|
+
# Ignore root types that don't have type hints like dict or list
|
|
36
|
+
return []
|
|
37
|
+
else:
|
|
38
|
+
# If we're evaluating a nested property, we want to propagate the exception
|
|
39
|
+
# to let the parent property know that this type should be ignored
|
|
40
|
+
raise e
|
|
41
|
+
|
|
42
|
+
result = []
|
|
43
|
+
for property_key, property_type in type_hints.items():
|
|
44
|
+
config_property = _create_property(property_type=property_type, property_name=property_key)
|
|
45
|
+
|
|
46
|
+
if config_property is not None:
|
|
47
|
+
result.append(config_property)
|
|
48
|
+
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyDefinition]:
|
|
53
|
+
if not is_dataclass(parent_type):
|
|
54
|
+
raise ValueError(f"{parent_type} is not a dataclass")
|
|
55
|
+
|
|
56
|
+
dataclass_fields = fields(parent_type)
|
|
57
|
+
|
|
58
|
+
result = []
|
|
59
|
+
for field in dataclass_fields:
|
|
60
|
+
has_field_default = field.default is not None and field.default != MISSING
|
|
61
|
+
|
|
62
|
+
if isinstance(field.type, str):
|
|
63
|
+
try:
|
|
64
|
+
property_type = _evaluate_type_string(field.type)
|
|
65
|
+
except Exception:
|
|
66
|
+
continue
|
|
67
|
+
else:
|
|
68
|
+
property_type = field.type
|
|
69
|
+
|
|
70
|
+
property_for_field = _create_property(
|
|
71
|
+
property_type=property_type,
|
|
72
|
+
property_name=field.name,
|
|
73
|
+
property_default=field.default if has_field_default else None,
|
|
74
|
+
is_property_required=not has_field_default,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if property_for_field is not None:
|
|
78
|
+
result.append(property_for_field)
|
|
79
|
+
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _get_property_list_from_pydantic_base_model(parent_type: type):
|
|
84
|
+
if not issubclass(parent_type, BaseModel):
|
|
85
|
+
raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
|
|
86
|
+
|
|
87
|
+
pydantic_fields = parent_type.model_fields
|
|
88
|
+
result = []
|
|
89
|
+
|
|
90
|
+
for field_name, field_info in pydantic_fields.items():
|
|
91
|
+
has_field_default = type(field_info.default) is not PydanticUndefinedType
|
|
92
|
+
|
|
93
|
+
if field_info.annotation is None:
|
|
94
|
+
# No type: ignore the field
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
property_for_field = _create_property(
|
|
98
|
+
property_type=field_info.annotation,
|
|
99
|
+
property_name=field_name,
|
|
100
|
+
property_default=field_info.default if has_field_default else None,
|
|
101
|
+
is_property_required=not has_field_default,
|
|
102
|
+
annotation=next(
|
|
103
|
+
(metadata for metadata in field_info.metadata if isinstance(metadata, ConfigPropertyAnnotation)), None
|
|
104
|
+
),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if property_for_field is not None:
|
|
108
|
+
result.append(property_for_field)
|
|
109
|
+
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _create_property(
|
|
114
|
+
*,
|
|
115
|
+
property_type: type,
|
|
116
|
+
property_name: str,
|
|
117
|
+
property_default: Any | None = None,
|
|
118
|
+
is_property_required: bool = False,
|
|
119
|
+
annotation: ConfigPropertyAnnotation | None = None,
|
|
120
|
+
) -> ConfigPropertyDefinition | None:
|
|
121
|
+
annotation = annotation or _get_config_property_annotation(property_type)
|
|
122
|
+
|
|
123
|
+
if annotation is not None and annotation.ignored_for_config_wizard:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
actual_property_type = _read_actual_property_type(property_type)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
nested_properties = _get_property_list_from_type(parent_type=actual_property_type, is_root_type=False)
|
|
130
|
+
except TypeError:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
default_value = compute_default_value(
|
|
134
|
+
annotation=annotation,
|
|
135
|
+
property_default=property_default,
|
|
136
|
+
has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return ConfigPropertyDefinition(
|
|
140
|
+
property_key=property_name,
|
|
141
|
+
property_type=actual_property_type if not nested_properties else None,
|
|
142
|
+
required=annotation.required if annotation else is_property_required,
|
|
143
|
+
default_value=default_value,
|
|
144
|
+
nested_properties=nested_properties if nested_properties else None,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
|
|
149
|
+
if get_origin(property_type) is Annotated:
|
|
150
|
+
return next(
|
|
151
|
+
(metadata for metadata in property_type.__metadata__ if isinstance(metadata, ConfigPropertyAnnotation)),
|
|
152
|
+
None,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _read_actual_property_type(property_type: type) -> type:
|
|
159
|
+
property_type_origin = get_origin(property_type)
|
|
160
|
+
|
|
161
|
+
if property_type_origin is Annotated:
|
|
162
|
+
return property_type.__origin__ # type: ignore[attr-defined]
|
|
163
|
+
elif property_type_origin is Union or property_type_origin is types.UnionType:
|
|
164
|
+
type_args = property_type.__args__ # type: ignore[attr-defined]
|
|
165
|
+
if len(type_args) == 2 and type(None) in type_args:
|
|
166
|
+
# Uses the actual type T when the Union is "T | None" (or "None | T")
|
|
167
|
+
return next(arg for arg in type_args if arg is not None)
|
|
168
|
+
else:
|
|
169
|
+
# Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
|
|
170
|
+
return type(None)
|
|
171
|
+
|
|
172
|
+
return property_type
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def compute_default_value(
|
|
176
|
+
*, annotation: ConfigPropertyAnnotation | None, property_default: Any | None = None, has_nested_properties: bool
|
|
177
|
+
) -> str | None:
|
|
178
|
+
if has_nested_properties:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
if annotation is not None and annotation.default_value is not None:
|
|
182
|
+
return str(annotation.default_value)
|
|
183
|
+
|
|
184
|
+
if property_default is not None:
|
|
185
|
+
return str(property_default)
|
|
186
|
+
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _evaluate_type_string(property_type: str) -> type:
|
|
191
|
+
try:
|
|
192
|
+
# Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
|
|
193
|
+
return _internal._typing_extra.eval_type(property_type)
|
|
194
|
+
except Exception as initial_error:
|
|
195
|
+
try:
|
|
196
|
+
# Try to convert it ourselves if Pydantic didn't work
|
|
197
|
+
return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
|
|
198
|
+
globalns=globals(), localns=locals(), recursive_guard=frozenset()
|
|
199
|
+
)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
# Ignore if we didn't manage to convert the str to a type
|
|
202
|
+
raise e from initial_error
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class OllamaConfig:
|
|
8
|
+
host: str = "127.0.0.1"
|
|
9
|
+
port: int = 11434
|
|
10
|
+
|
|
11
|
+
timeout: float = 30.0
|
|
12
|
+
headers: Optional[dict[str, str]] = None
|
|
13
|
+
|
|
14
|
+
bin_path: str = "ollama"
|
|
15
|
+
work_dir: Optional[Path] = None
|
|
16
|
+
extra_env: Optional[dict[str, str]] = None
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def base_url(self) -> str:
|
|
20
|
+
return f"http://{self.host}:{self.port}"
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from databao_context_engine.llm.descriptions.provider import DescriptionProvider
|
|
2
|
+
from databao_context_engine.llm.service import OllamaService
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class OllamaDescriptionProvider(DescriptionProvider):
|
|
6
|
+
def __init__(self, *, service: OllamaService, model_id: str):
|
|
7
|
+
self._service = service
|
|
8
|
+
self._model_id = model_id
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
def describer(self) -> str:
|
|
12
|
+
return "ollama"
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def model_id(self) -> str:
|
|
16
|
+
return self._model_id
|
|
17
|
+
|
|
18
|
+
def describe(self, text: str, context: str) -> str:
|
|
19
|
+
description = self._service.describe(model=self._model_id, text=text, context=context)
|
|
20
|
+
|
|
21
|
+
return description
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
|
|
4
|
+
from databao_context_engine.llm.service import OllamaService
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OllamaEmbeddingProvider(EmbeddingProvider):
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
*,
|
|
11
|
+
service: OllamaService,
|
|
12
|
+
model_id: str,
|
|
13
|
+
dim: int = 768,
|
|
14
|
+
):
|
|
15
|
+
self._service = service
|
|
16
|
+
self._model_id = model_id
|
|
17
|
+
self._dim: int = dim
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def embedder(self) -> str:
|
|
21
|
+
return "ollama"
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def model_id(self) -> str:
|
|
25
|
+
return self._model_id
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def dim(self) -> int:
|
|
29
|
+
return self._dim
|
|
30
|
+
|
|
31
|
+
def embed(self, text: str) -> Sequence[float]:
|
|
32
|
+
vec = self._service.embed(model=self._model_id, text=text)
|
|
33
|
+
|
|
34
|
+
if len(vec) != self._dim:
|
|
35
|
+
raise ValueError(f"provider returned dim={len(vec)} but expected {self._dim}")
|
|
36
|
+
|
|
37
|
+
return [float(x) for x in vec]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from typing import Protocol
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class EmbeddingProvider(Protocol):
|
|
6
|
+
@property
|
|
7
|
+
def embedder(self) -> str: ...
|
|
8
|
+
@property
|
|
9
|
+
def model_id(self) -> str: ...
|
|
10
|
+
@property
|
|
11
|
+
def dim(self) -> int: ...
|
|
12
|
+
|
|
13
|
+
def embed(self, text: str) -> Sequence[float]: ...
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class OllamaError(Exception):
|
|
2
|
+
"""Base class for Ollama-related errors."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class OllamaTransientError(OllamaError):
|
|
6
|
+
"""
|
|
7
|
+
Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
|
|
8
|
+
Typically worth retrying.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OllamaPermanentError(OllamaError):
|
|
13
|
+
"""
|
|
14
|
+
Errors that are unlikely to succeed on retry without changing inputs
|
|
15
|
+
or configuration (4xx, bad response schema, etc.).
|
|
16
|
+
"""
|