databao-context-engine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. databao_context_engine/__init__.py +35 -0
  2. databao_context_engine/build_sources/__init__.py +0 -0
  3. databao_context_engine/build_sources/internal/__init__.py +0 -0
  4. databao_context_engine/build_sources/internal/build_runner.py +111 -0
  5. databao_context_engine/build_sources/internal/build_service.py +77 -0
  6. databao_context_engine/build_sources/internal/build_wiring.py +52 -0
  7. databao_context_engine/build_sources/internal/export_results.py +43 -0
  8. databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
  9. databao_context_engine/build_sources/public/__init__.py +0 -0
  10. databao_context_engine/build_sources/public/api.py +4 -0
  11. databao_context_engine/cli/__init__.py +0 -0
  12. databao_context_engine/cli/add_datasource_config.py +130 -0
  13. databao_context_engine/cli/commands.py +256 -0
  14. databao_context_engine/cli/datasources.py +64 -0
  15. databao_context_engine/cli/info.py +32 -0
  16. databao_context_engine/config/__init__.py +0 -0
  17. databao_context_engine/config/log_config.yaml +16 -0
  18. databao_context_engine/config/logging.py +43 -0
  19. databao_context_engine/databao_context_project_manager.py +92 -0
  20. databao_context_engine/databao_engine.py +85 -0
  21. databao_context_engine/datasource_config/__init__.py +0 -0
  22. databao_context_engine/datasource_config/add_config.py +50 -0
  23. databao_context_engine/datasource_config/check_config.py +131 -0
  24. databao_context_engine/datasource_config/datasource_context.py +60 -0
  25. databao_context_engine/event_journal/__init__.py +0 -0
  26. databao_context_engine/event_journal/writer.py +29 -0
  27. databao_context_engine/generate_configs_schemas.py +92 -0
  28. databao_context_engine/init_project.py +18 -0
  29. databao_context_engine/introspection/__init__.py +0 -0
  30. databao_context_engine/introspection/property_extract.py +202 -0
  31. databao_context_engine/llm/__init__.py +0 -0
  32. databao_context_engine/llm/config.py +20 -0
  33. databao_context_engine/llm/descriptions/__init__.py +0 -0
  34. databao_context_engine/llm/descriptions/ollama.py +21 -0
  35. databao_context_engine/llm/descriptions/provider.py +10 -0
  36. databao_context_engine/llm/embeddings/__init__.py +0 -0
  37. databao_context_engine/llm/embeddings/ollama.py +37 -0
  38. databao_context_engine/llm/embeddings/provider.py +13 -0
  39. databao_context_engine/llm/errors.py +16 -0
  40. databao_context_engine/llm/factory.py +61 -0
  41. databao_context_engine/llm/install.py +227 -0
  42. databao_context_engine/llm/runtime.py +73 -0
  43. databao_context_engine/llm/service.py +159 -0
  44. databao_context_engine/main.py +19 -0
  45. databao_context_engine/mcp/__init__.py +0 -0
  46. databao_context_engine/mcp/all_results_tool.py +5 -0
  47. databao_context_engine/mcp/mcp_runner.py +16 -0
  48. databao_context_engine/mcp/mcp_server.py +63 -0
  49. databao_context_engine/mcp/retrieve_tool.py +22 -0
  50. databao_context_engine/pluginlib/__init__.py +0 -0
  51. databao_context_engine/pluginlib/build_plugin.py +107 -0
  52. databao_context_engine/pluginlib/config.py +37 -0
  53. databao_context_engine/pluginlib/plugin_utils.py +68 -0
  54. databao_context_engine/plugins/__init__.py +0 -0
  55. databao_context_engine/plugins/athena_db_plugin.py +12 -0
  56. databao_context_engine/plugins/base_db_plugin.py +45 -0
  57. databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
  58. databao_context_engine/plugins/databases/__init__.py +0 -0
  59. databao_context_engine/plugins/databases/athena_introspector.py +101 -0
  60. databao_context_engine/plugins/databases/base_introspector.py +144 -0
  61. databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
  62. databao_context_engine/plugins/databases/database_chunker.py +69 -0
  63. databao_context_engine/plugins/databases/databases_types.py +114 -0
  64. databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
  65. databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
  66. databao_context_engine/plugins/databases/introspection_scope.py +74 -0
  67. databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
  68. databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
  69. databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
  70. databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
  71. databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
  72. databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
  73. databao_context_engine/plugins/mssql_db_plugin.py +12 -0
  74. databao_context_engine/plugins/mysql_db_plugin.py +12 -0
  75. databao_context_engine/plugins/parquet_plugin.py +32 -0
  76. databao_context_engine/plugins/plugin_loader.py +110 -0
  77. databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
  78. databao_context_engine/plugins/resources/__init__.py +0 -0
  79. databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
  80. databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
  81. databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
  82. databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
  83. databao_context_engine/project/__init__.py +0 -0
  84. databao_context_engine/project/datasource_discovery.py +141 -0
  85. databao_context_engine/project/info.py +44 -0
  86. databao_context_engine/project/init_project.py +102 -0
  87. databao_context_engine/project/layout.py +127 -0
  88. databao_context_engine/project/project_config.py +32 -0
  89. databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
  90. databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
  91. databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
  92. databao_context_engine/project/runs.py +39 -0
  93. databao_context_engine/project/types.py +134 -0
  94. databao_context_engine/retrieve_embeddings/__init__.py +0 -0
  95. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  96. databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
  97. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
  98. databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
  99. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
  100. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  101. databao_context_engine/retrieve_embeddings/public/api.py +3 -0
  102. databao_context_engine/serialisation/__init__.py +0 -0
  103. databao_context_engine/serialisation/yaml.py +35 -0
  104. databao_context_engine/services/__init__.py +0 -0
  105. databao_context_engine/services/chunk_embedding_service.py +104 -0
  106. databao_context_engine/services/embedding_shard_resolver.py +64 -0
  107. databao_context_engine/services/factories.py +88 -0
  108. databao_context_engine/services/models.py +12 -0
  109. databao_context_engine/services/persistence_service.py +61 -0
  110. databao_context_engine/services/run_name_policy.py +8 -0
  111. databao_context_engine/services/table_name_policy.py +15 -0
  112. databao_context_engine/storage/__init__.py +0 -0
  113. databao_context_engine/storage/connection.py +32 -0
  114. databao_context_engine/storage/exceptions/__init__.py +0 -0
  115. databao_context_engine/storage/exceptions/exceptions.py +6 -0
  116. databao_context_engine/storage/migrate.py +127 -0
  117. databao_context_engine/storage/migrations/V01__init.sql +63 -0
  118. databao_context_engine/storage/models.py +51 -0
  119. databao_context_engine/storage/repositories/__init__.py +0 -0
  120. databao_context_engine/storage/repositories/chunk_repository.py +130 -0
  121. databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
  122. databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
  123. databao_context_engine/storage/repositories/embedding_repository.py +113 -0
  124. databao_context_engine/storage/repositories/factories.py +35 -0
  125. databao_context_engine/storage/repositories/run_repository.py +157 -0
  126. databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
  127. databao_context_engine/storage/transaction.py +14 -0
  128. databao_context_engine/system/__init__.py +0 -0
  129. databao_context_engine/system/properties.py +13 -0
  130. databao_context_engine/templating/__init__.py +0 -0
  131. databao_context_engine/templating/renderer.py +29 -0
  132. databao_context_engine-0.1.1.dist-info/METADATA +186 -0
  133. databao_context_engine-0.1.1.dist-info/RECORD +135 -0
  134. databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
  135. databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,131 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from pathlib import Path
6
+
7
+ from pydantic import ValidationError
8
+
9
+ from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
10
+ from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
11
+ from databao_context_engine.plugins.plugin_loader import load_plugins
12
+ from databao_context_engine.project.datasource_discovery import (
13
+ discover_datasources,
14
+ get_datasource_descriptors,
15
+ prepare_source,
16
+ )
17
+ from databao_context_engine.project.layout import ensure_project_dir
18
+ from databao_context_engine.project.types import DatasourceId, PreparedConfig
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class DatasourceConnectionStatus(Enum):
24
+ VALID = "Valid"
25
+ INVALID = "Invalid"
26
+ UNKNOWN = "Unknown"
27
+
28
+
29
+ @dataclass(kw_only=True)
30
+ class CheckDatasourceConnectionResult:
31
+ datasource_id: DatasourceId
32
+ connection_status: DatasourceConnectionStatus
33
+ summary: str | None
34
+ full_message: str | None = None
35
+
36
+ def format(self, show_summary_only: bool = True) -> str:
37
+ formatted_string = str(self.connection_status.value)
38
+ if self.summary:
39
+ formatted_string += f" - {self.summary}"
40
+ if not show_summary_only and self.full_message:
41
+ formatted_string += f"{os.linesep}{self.full_message}"
42
+
43
+ return formatted_string
44
+
45
+
46
+ def check_datasource_connection(
47
+ project_dir: Path, *, datasource_ids: list[DatasourceId] | None = None
48
+ ) -> dict[DatasourceId, CheckDatasourceConnectionResult]:
49
+ ensure_project_dir(project_dir)
50
+
51
+ if datasource_ids:
52
+ logger.info(f"Validating datasource(s): {datasource_ids}")
53
+ datasources_to_traverse = get_datasource_descriptors(project_dir, datasource_ids)
54
+ else:
55
+ datasources_to_traverse = discover_datasources(project_dir)
56
+
57
+ plugins = load_plugins(exclude_file_plugins=True)
58
+
59
+ result = {}
60
+ for discovered_datasource in datasources_to_traverse:
61
+ result_key = DatasourceId.from_datasource_config_file_path(discovered_datasource.path)
62
+
63
+ try:
64
+ prepared_source = prepare_source(discovered_datasource)
65
+ except Exception as e:
66
+ result[result_key] = CheckDatasourceConnectionResult(
67
+ datasource_id=result_key,
68
+ connection_status=DatasourceConnectionStatus.INVALID,
69
+ summary="Failed to prepare source",
70
+ full_message=str(e),
71
+ )
72
+ continue
73
+
74
+ plugin = plugins.get(prepared_source.datasource_type)
75
+ if plugin is None:
76
+ logger.debug(
77
+ "No plugin for '%s' (datasource=%s) — skipping.",
78
+ prepared_source.datasource_type.full_type,
79
+ prepared_source.path,
80
+ )
81
+ result[result_key] = CheckDatasourceConnectionResult(
82
+ datasource_id=result_key,
83
+ connection_status=DatasourceConnectionStatus.INVALID,
84
+ summary="No compatible plugin found",
85
+ )
86
+ continue
87
+
88
+ if isinstance(prepared_source, PreparedConfig) and isinstance(plugin, BuildDatasourcePlugin):
89
+ try:
90
+ check_connection_for_datasource(
91
+ plugin=plugin,
92
+ datasource_type=prepared_source.datasource_type,
93
+ config=prepared_source.config,
94
+ datasource_name=prepared_source.datasource_name,
95
+ )
96
+
97
+ result[result_key] = CheckDatasourceConnectionResult(
98
+ datasource_id=result_key, connection_status=DatasourceConnectionStatus.VALID, summary=None
99
+ )
100
+ except Exception as e:
101
+ logger.debug(
102
+ f"Connection failed for {prepared_source.datasource_name} with error: {str(e)}",
103
+ exc_info=True,
104
+ stack_info=True,
105
+ )
106
+ result[result_key] = get_validation_result_from_error(result_key, e)
107
+
108
+ return result
109
+
110
+
111
+ def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
112
+ if isinstance(e, ValidationError):
113
+ return CheckDatasourceConnectionResult(
114
+ datasource_id=datasource_id,
115
+ connection_status=DatasourceConnectionStatus.INVALID,
116
+ summary="Config file is invalid",
117
+ full_message=str(e),
118
+ )
119
+ elif isinstance(e, NotImplementedError | NotSupportedError):
120
+ return CheckDatasourceConnectionResult(
121
+ datasource_id=datasource_id,
122
+ connection_status=DatasourceConnectionStatus.UNKNOWN,
123
+ summary="Plugin doesn't support validating its config",
124
+ )
125
+ else:
126
+ return CheckDatasourceConnectionResult(
127
+ datasource_id=datasource_id,
128
+ connection_status=DatasourceConnectionStatus.INVALID,
129
+ summary="Connection with the datasource can not be established",
130
+ full_message=str(e),
131
+ )
@@ -0,0 +1,60 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ from databao_context_engine.project.layout import ProjectLayout
6
+ from databao_context_engine.project.runs import get_run_dir, resolve_run_name
7
+ from databao_context_engine.project.types import DatasourceId
8
+
9
+
10
+ @dataclass(eq=True, frozen=True)
11
+ class DatasourceContext:
12
+ datasource_id: DatasourceId
13
+ # TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
14
+ context: str
15
+
16
+
17
+ def get_datasource_context(
18
+ project_layout: ProjectLayout, datasource_id: DatasourceId, run_name: str | None = None
19
+ ) -> DatasourceContext:
20
+ run_dir = _resolve_run_dir(project_layout, run_name)
21
+
22
+ context_path = run_dir.joinpath(datasource_id.relative_path_to_context_file())
23
+ if not context_path.is_file():
24
+ raise ValueError(f"Context file not found for datasource {str(datasource_id)} in run {run_dir.name}")
25
+
26
+ context = context_path.read_text()
27
+ return DatasourceContext(datasource_id=datasource_id, context=context)
28
+
29
+
30
+ def get_all_contexts(project_layout: ProjectLayout, run_name: str | None = None) -> list[DatasourceContext]:
31
+ run_dir = _resolve_run_dir(project_layout, run_name)
32
+
33
+ result = []
34
+ for main_type_dir in sorted((p for p in run_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
35
+ for context_path in sorted(
36
+ (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
37
+ ):
38
+ result.append(
39
+ DatasourceContext(
40
+ # FIXME: The extension will always be yaml here even if the datasource is a file with a different extension
41
+ datasource_id=DatasourceId.from_datasource_config_file_path(context_path),
42
+ context=context_path.read_text(),
43
+ )
44
+ )
45
+
46
+ return result
47
+
48
+
49
+ def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
50
+ return f"# ===== {str(datasource_id)} ====={os.linesep}"
51
+
52
+
53
+ def _resolve_run_dir(project_layout: ProjectLayout, run_name: str | None) -> Path:
54
+ resolved_run_name = resolve_run_name(project_layout=project_layout, run_name=run_name)
55
+
56
+ run_dir = get_run_dir(project_dir=project_layout.project_dir, run_name=resolved_run_name)
57
+ if not run_dir.is_dir():
58
+ raise ValueError(f"Run {resolved_run_name} does not exist at {run_dir.resolve()}")
59
+
60
+ return run_dir
File without changes
@@ -0,0 +1,29 @@
1
+ import datetime
2
+ import json
3
+ from pathlib import Path
4
+ from uuid import UUID, uuid1
5
+
6
+ from databao_context_engine.system.properties import get_dce_path
7
+
8
+
9
+ def get_journal_file(dce_path: Path) -> Path:
10
+ return dce_path / "event-journal" / "journal.txt"
11
+
12
+
13
+ def log_event(*, project_id: UUID, dce_version: str, event_type: str, event_id: UUID = uuid1(), **kwargs):
14
+ current_timestamp = datetime.datetime.now().isoformat()
15
+ journal_file = get_journal_file(get_dce_path())
16
+ journal_file.parent.mkdir(parents=True, exist_ok=True)
17
+ with journal_file.open("a+", encoding="utf-8") as journal_handle:
18
+ event_json = json.dumps(
19
+ {
20
+ "id": str(event_id),
21
+ "project_id": str(project_id),
22
+ "dce_version": dce_version,
23
+ "timestamp": current_timestamp,
24
+ "type": event_type,
25
+ **kwargs,
26
+ }
27
+ )
28
+ journal_handle.write(event_json)
29
+ journal_handle.write("\n")
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import os
3
+ from typing import cast
4
+
5
+ import click
6
+
7
+ from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin
8
+ from databao_context_engine.pluginlib.plugin_utils import format_json_schema_for_output, generate_json_schema
9
+ from databao_context_engine.plugins.plugin_loader import load_plugins
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @click.command()
15
+ @click.option(
16
+ "-i",
17
+ "--include-plugins",
18
+ multiple=True,
19
+ type=str,
20
+ help="""
21
+ Plugin ID for the plugins to include in the json schema generation (-i can be specified multiple times to include multiple plugins).
22
+ When set, only the plugins in this list are included in the json schema generation.
23
+ This can't be used in conjunction with --exclude-plugins
24
+ """,
25
+ )
26
+ @click.option(
27
+ "-e",
28
+ "--exclude-plugins",
29
+ multiple=True,
30
+ help="""
31
+ Plugin ID for the plugins to exclude from the json schema generation (-e can be specified multiple times to exclude multiple plugins).
32
+ This can't be used in conjunction with --include-plugins
33
+ """,
34
+ )
35
+ def generate_configs_schemas(
36
+ include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
37
+ ):
38
+ try:
39
+ schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
40
+
41
+ print(f"{os.linesep}{os.linesep}".join(schema_list))
42
+ except Exception as e:
43
+ print(str(e))
44
+
45
+
46
+ def _generate_json_schema_output_for_plugins(
47
+ include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
48
+ ) -> list[str]:
49
+ if include_plugins and exclude_plugins:
50
+ raise ValueError("Can't use --include-plugins and --exclude-plugins together")
51
+
52
+ filtered_plugins = _get_plugins_for_schema_generation(include_plugins, exclude_plugins)
53
+
54
+ if len(filtered_plugins) == 0:
55
+ if include_plugins:
56
+ raise ValueError(f"No plugin found with id in {include_plugins}")
57
+ elif exclude_plugins:
58
+ raise ValueError(f"No plugin found when excluding {exclude_plugins}")
59
+ else:
60
+ raise ValueError("No plugin found")
61
+
62
+ results = []
63
+ for plugin in filtered_plugins:
64
+ if not isinstance(plugin, BuildDatasourcePlugin):
65
+ continue
66
+
67
+ json_schema = generate_json_schema(plugin)
68
+ if json_schema is not None:
69
+ results.append(format_json_schema_for_output(plugin, json_schema))
70
+
71
+ return results
72
+
73
+
74
+ def _get_plugins_for_schema_generation(
75
+ include_plugins: tuple[str, ...] | None = None, exclude_plugins: tuple[str, ...] | None = None
76
+ ) -> list[BuildDatasourcePlugin]:
77
+ all_plugins = load_plugins(exclude_file_plugins=True).values()
78
+
79
+ return [
80
+ cast(BuildDatasourcePlugin, plugin)
81
+ for plugin in all_plugins
82
+ if (plugin.id in include_plugins if include_plugins else True)
83
+ and (plugin.id not in exclude_plugins if exclude_plugins else True)
84
+ ]
85
+
86
+
87
+ def main():
88
+ generate_configs_schemas()
89
+
90
+
91
+ if __name__ == "__main__":
92
+ main()
@@ -0,0 +1,18 @@
1
+ from pathlib import Path
2
+
3
+ from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
4
+ from databao_context_engine.project.init_project import init_project_dir
5
+ from databao_context_engine.project.layout import is_project_dir_valid
6
+
7
+
8
+ def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
9
+ initialised_project_dir = init_project_dir(project_dir=project_dir)
10
+
11
+ return DatabaoContextProjectManager(project_dir=initialised_project_dir)
12
+
13
+
14
+ def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
15
+ if is_project_dir_valid(project_dir):
16
+ return DatabaoContextProjectManager(project_dir=project_dir)
17
+
18
+ return init_dce_project(project_dir=project_dir)
File without changes
@@ -0,0 +1,202 @@
1
+ import types
2
+ from dataclasses import MISSING, fields, is_dataclass
3
+ from typing import Annotated, Any, ForwardRef, Union, get_origin, get_type_hints
4
+
5
+ from pydantic import BaseModel, _internal
6
+ from pydantic_core import PydanticUndefinedType
7
+
8
+ from databao_context_engine.pluginlib.config import ConfigPropertyAnnotation, ConfigPropertyDefinition
9
+
10
+
11
+ def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
12
+ return _get_property_list_from_type(parent_type=root_type, is_root_type=True)
13
+
14
+
15
+ def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
16
+ if is_dataclass(parent_type):
17
+ return _get_property_list_from_dataclass(parent_type=parent_type)
18
+
19
+ try:
20
+ if issubclass(parent_type, BaseModel):
21
+ return _get_property_list_from_pydantic_base_model(parent_type=parent_type)
22
+ except TypeError:
23
+ # when trying to compare ABC Metadata classes to BaseModel, e.g: issubclass(Mapping[str, str], BaseModel)
24
+ # issubclass is raising a TypeError: issubclass() arg 1 must be a class
25
+ pass
26
+
27
+ return _get_property_list_from_type_hints(parent_type=parent_type, is_root_type=is_root_type)
28
+
29
+
30
+ def _get_property_list_from_type_hints(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
31
+ try:
32
+ type_hints = get_type_hints(parent_type, include_extras=True)
33
+ except TypeError as e:
34
+ if is_root_type:
35
+ # Ignore root types that don't have type hints like dict or list
36
+ return []
37
+ else:
38
+ # If we're evaluating a nested property, we want to propagate the exception
39
+ # to let the parent property know that this type should be ignored
40
+ raise e
41
+
42
+ result = []
43
+ for property_key, property_type in type_hints.items():
44
+ config_property = _create_property(property_type=property_type, property_name=property_key)
45
+
46
+ if config_property is not None:
47
+ result.append(config_property)
48
+
49
+ return result
50
+
51
+
52
+ def _get_property_list_from_dataclass(parent_type: type) -> list[ConfigPropertyDefinition]:
53
+ if not is_dataclass(parent_type):
54
+ raise ValueError(f"{parent_type} is not a dataclass")
55
+
56
+ dataclass_fields = fields(parent_type)
57
+
58
+ result = []
59
+ for field in dataclass_fields:
60
+ has_field_default = field.default is not None and field.default != MISSING
61
+
62
+ if isinstance(field.type, str):
63
+ try:
64
+ property_type = _evaluate_type_string(field.type)
65
+ except Exception:
66
+ continue
67
+ else:
68
+ property_type = field.type
69
+
70
+ property_for_field = _create_property(
71
+ property_type=property_type,
72
+ property_name=field.name,
73
+ property_default=field.default if has_field_default else None,
74
+ is_property_required=not has_field_default,
75
+ )
76
+
77
+ if property_for_field is not None:
78
+ result.append(property_for_field)
79
+
80
+ return result
81
+
82
+
83
+ def _get_property_list_from_pydantic_base_model(parent_type: type):
84
+ if not issubclass(parent_type, BaseModel):
85
+ raise ValueError(f"{parent_type} is not a Pydantic BaseModel")
86
+
87
+ pydantic_fields = parent_type.model_fields
88
+ result = []
89
+
90
+ for field_name, field_info in pydantic_fields.items():
91
+ has_field_default = type(field_info.default) is not PydanticUndefinedType
92
+
93
+ if field_info.annotation is None:
94
+ # No type: ignore the field
95
+ continue
96
+
97
+ property_for_field = _create_property(
98
+ property_type=field_info.annotation,
99
+ property_name=field_name,
100
+ property_default=field_info.default if has_field_default else None,
101
+ is_property_required=not has_field_default,
102
+ annotation=next(
103
+ (metadata for metadata in field_info.metadata if isinstance(metadata, ConfigPropertyAnnotation)), None
104
+ ),
105
+ )
106
+
107
+ if property_for_field is not None:
108
+ result.append(property_for_field)
109
+
110
+ return result
111
+
112
+
113
+ def _create_property(
114
+ *,
115
+ property_type: type,
116
+ property_name: str,
117
+ property_default: Any | None = None,
118
+ is_property_required: bool = False,
119
+ annotation: ConfigPropertyAnnotation | None = None,
120
+ ) -> ConfigPropertyDefinition | None:
121
+ annotation = annotation or _get_config_property_annotation(property_type)
122
+
123
+ if annotation is not None and annotation.ignored_for_config_wizard:
124
+ return None
125
+
126
+ actual_property_type = _read_actual_property_type(property_type)
127
+
128
+ try:
129
+ nested_properties = _get_property_list_from_type(parent_type=actual_property_type, is_root_type=False)
130
+ except TypeError:
131
+ return None
132
+
133
+ default_value = compute_default_value(
134
+ annotation=annotation,
135
+ property_default=property_default,
136
+ has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
137
+ )
138
+
139
+ return ConfigPropertyDefinition(
140
+ property_key=property_name,
141
+ property_type=actual_property_type if not nested_properties else None,
142
+ required=annotation.required if annotation else is_property_required,
143
+ default_value=default_value,
144
+ nested_properties=nested_properties if nested_properties else None,
145
+ )
146
+
147
+
148
+ def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
149
+ if get_origin(property_type) is Annotated:
150
+ return next(
151
+ (metadata for metadata in property_type.__metadata__ if isinstance(metadata, ConfigPropertyAnnotation)),
152
+ None,
153
+ )
154
+
155
+ return None
156
+
157
+
158
+ def _read_actual_property_type(property_type: type) -> type:
159
+ property_type_origin = get_origin(property_type)
160
+
161
+ if property_type_origin is Annotated:
162
+ return property_type.__origin__ # type: ignore[attr-defined]
163
+ elif property_type_origin is Union or property_type_origin is types.UnionType:
164
+ type_args = property_type.__args__ # type: ignore[attr-defined]
165
+ if len(type_args) == 2 and type(None) in type_args:
166
+ # Uses the actual type T when the Union is "T | None" (or "None | T")
167
+ return next(arg for arg in type_args if arg is not None)
168
+ else:
169
+ # Ignoring Union types when it is not used as type | None as we wouldn't which type to pick
170
+ return type(None)
171
+
172
+ return property_type
173
+
174
+
175
+ def compute_default_value(
176
+ *, annotation: ConfigPropertyAnnotation | None, property_default: Any | None = None, has_nested_properties: bool
177
+ ) -> str | None:
178
+ if has_nested_properties:
179
+ return None
180
+
181
+ if annotation is not None and annotation.default_value is not None:
182
+ return str(annotation.default_value)
183
+
184
+ if property_default is not None:
185
+ return str(property_default)
186
+
187
+ return None
188
+
189
+
190
+ def _evaluate_type_string(property_type: str) -> type:
191
+ try:
192
+ # Using a pydantic internal function for this, to avoid having to implement type evaluation manually...
193
+ return _internal._typing_extra.eval_type(property_type)
194
+ except Exception as initial_error:
195
+ try:
196
+ # Try to convert it ourselves if Pydantic didn't work
197
+ return ForwardRef(property_type)._evaluate( # type: ignore[return-value]
198
+ globalns=globals(), localns=locals(), recursive_guard=frozenset()
199
+ )
200
+ except Exception as e:
201
+ # Ignore if we didn't manage to convert the str to a type
202
+ raise e from initial_error
File without changes
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class OllamaConfig:
8
+ host: str = "127.0.0.1"
9
+ port: int = 11434
10
+
11
+ timeout: float = 30.0
12
+ headers: Optional[dict[str, str]] = None
13
+
14
+ bin_path: str = "ollama"
15
+ work_dir: Optional[Path] = None
16
+ extra_env: Optional[dict[str, str]] = None
17
+
18
+ @property
19
+ def base_url(self) -> str:
20
+ return f"http://{self.host}:{self.port}"
File without changes
@@ -0,0 +1,21 @@
1
+ from databao_context_engine.llm.descriptions.provider import DescriptionProvider
2
+ from databao_context_engine.llm.service import OllamaService
3
+
4
+
5
+ class OllamaDescriptionProvider(DescriptionProvider):
6
+ def __init__(self, *, service: OllamaService, model_id: str):
7
+ self._service = service
8
+ self._model_id = model_id
9
+
10
+ @property
11
+ def describer(self) -> str:
12
+ return "ollama"
13
+
14
+ @property
15
+ def model_id(self) -> str:
16
+ return self._model_id
17
+
18
+ def describe(self, text: str, context: str) -> str:
19
+ description = self._service.describe(model=self._model_id, text=text, context=context)
20
+
21
+ return description
@@ -0,0 +1,10 @@
1
+ from typing import Protocol
2
+
3
+
4
+ class DescriptionProvider(Protocol):
5
+ @property
6
+ def describer(self) -> str: ...
7
+ @property
8
+ def model_id(self) -> str: ...
9
+
10
+ def describe(self, text: str, context: str) -> str: ...
File without changes
@@ -0,0 +1,37 @@
1
+ from collections.abc import Sequence
2
+
3
+ from databao_context_engine.llm.embeddings.provider import EmbeddingProvider
4
+ from databao_context_engine.llm.service import OllamaService
5
+
6
+
7
+ class OllamaEmbeddingProvider(EmbeddingProvider):
8
+ def __init__(
9
+ self,
10
+ *,
11
+ service: OllamaService,
12
+ model_id: str,
13
+ dim: int = 768,
14
+ ):
15
+ self._service = service
16
+ self._model_id = model_id
17
+ self._dim: int = dim
18
+
19
+ @property
20
+ def embedder(self) -> str:
21
+ return "ollama"
22
+
23
+ @property
24
+ def model_id(self) -> str:
25
+ return self._model_id
26
+
27
+ @property
28
+ def dim(self) -> int:
29
+ return self._dim
30
+
31
+ def embed(self, text: str) -> Sequence[float]:
32
+ vec = self._service.embed(model=self._model_id, text=text)
33
+
34
+ if len(vec) != self._dim:
35
+ raise ValueError(f"provider returned dim={len(vec)} but expected {self._dim}")
36
+
37
+ return [float(x) for x in vec]
@@ -0,0 +1,13 @@
1
+ from collections.abc import Sequence
2
+ from typing import Protocol
3
+
4
+
5
+ class EmbeddingProvider(Protocol):
6
+ @property
7
+ def embedder(self) -> str: ...
8
+ @property
9
+ def model_id(self) -> str: ...
10
+ @property
11
+ def dim(self) -> int: ...
12
+
13
+ def embed(self, text: str) -> Sequence[float]: ...
@@ -0,0 +1,16 @@
1
+ class OllamaError(Exception):
2
+ """Base class for Ollama-related errors."""
3
+
4
+
5
+ class OllamaTransientError(OllamaError):
6
+ """
7
+ Errors that are likely temporary (network issues, timeouts, 5xx, etc.).
8
+ Typically worth retrying.
9
+ """
10
+
11
+
12
+ class OllamaPermanentError(OllamaError):
13
+ """
14
+ Errors that are unlikely to succeed on retry without changing inputs
15
+ or configuration (4xx, bad response schema, etc.).
16
+ """