databao-context-engine 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. databao_context_engine/__init__.py +32 -7
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +31 -27
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +82 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +11 -18
  8. databao_context_engine/cli/add_datasource_config.py +49 -44
  9. databao_context_engine/cli/commands.py +40 -55
  10. databao_context_engine/cli/info.py +3 -2
  11. databao_context_engine/databao_context_engine.py +127 -0
  12. databao_context_engine/databao_context_project_manager.py +147 -30
  13. databao_context_engine/{datasource_config → datasources}/check_config.py +31 -23
  14. databao_context_engine/datasources/datasource_context.py +90 -0
  15. databao_context_engine/datasources/datasource_discovery.py +143 -0
  16. databao_context_engine/datasources/types.py +194 -0
  17. databao_context_engine/generate_configs_schemas.py +4 -5
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +76 -57
  20. databao_context_engine/llm/__init__.py +10 -0
  21. databao_context_engine/llm/api.py +57 -0
  22. databao_context_engine/llm/descriptions/ollama.py +1 -3
  23. databao_context_engine/llm/errors.py +2 -8
  24. databao_context_engine/llm/factory.py +5 -2
  25. databao_context_engine/llm/install.py +26 -30
  26. databao_context_engine/llm/runtime.py +3 -5
  27. databao_context_engine/llm/service.py +1 -3
  28. databao_context_engine/mcp/mcp_runner.py +4 -2
  29. databao_context_engine/mcp/mcp_server.py +9 -11
  30. databao_context_engine/plugin_loader.py +110 -0
  31. databao_context_engine/pluginlib/build_plugin.py +12 -29
  32. databao_context_engine/pluginlib/config.py +16 -2
  33. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  34. databao_context_engine/plugins/databases/athena/athena_introspector.py +161 -0
  35. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +6 -5
  36. databao_context_engine/plugins/databases/base_introspector.py +11 -12
  37. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  38. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +24 -16
  39. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  40. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +7 -12
  41. databao_context_engine/plugins/databases/introspection_model_builder.py +1 -1
  42. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  43. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  44. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  45. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +29 -21
  46. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  47. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +26 -15
  48. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  49. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  50. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +11 -18
  51. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  52. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  53. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +49 -17
  54. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  55. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  56. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  57. databao_context_engine/plugins/duckdb_tools.py +18 -0
  58. databao_context_engine/plugins/files/__init__.py +0 -0
  59. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  60. databao_context_engine/plugins/plugin_loader.py +58 -52
  61. databao_context_engine/plugins/resources/parquet_introspector.py +8 -20
  62. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  63. databao_context_engine/project/info.py +34 -2
  64. databao_context_engine/project/init_project.py +16 -7
  65. databao_context_engine/project/layout.py +14 -15
  66. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  67. databao_context_engine/retrieve_embeddings/retrieve_runner.py +17 -0
  68. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +12 -19
  69. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +46 -0
  70. databao_context_engine/serialization/__init__.py +0 -0
  71. databao_context_engine/{serialisation → serialization}/yaml.py +6 -6
  72. databao_context_engine/services/chunk_embedding_service.py +23 -11
  73. databao_context_engine/services/factories.py +1 -46
  74. databao_context_engine/services/persistence_service.py +11 -11
  75. databao_context_engine/storage/connection.py +11 -7
  76. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  77. databao_context_engine/storage/migrate.py +3 -5
  78. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  79. databao_context_engine/storage/models.py +2 -23
  80. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  81. databao_context_engine/storage/repositories/factories.py +1 -12
  82. databao_context_engine/storage/repositories/vector_search_repository.py +23 -16
  83. databao_context_engine/system/properties.py +4 -2
  84. databao_context_engine-0.1.5.dist-info/METADATA +228 -0
  85. databao_context_engine-0.1.5.dist-info/RECORD +135 -0
  86. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/WHEEL +1 -1
  87. databao_context_engine/build_sources/internal/build_service.py +0 -77
  88. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  89. databao_context_engine/build_sources/internal/export_results.py +0 -43
  90. databao_context_engine/build_sources/public/api.py +0 -4
  91. databao_context_engine/databao_engine.py +0 -85
  92. databao_context_engine/datasource_config/add_config.py +0 -50
  93. databao_context_engine/datasource_config/datasource_context.py +0 -60
  94. databao_context_engine/mcp/all_results_tool.py +0 -5
  95. databao_context_engine/mcp/retrieve_tool.py +0 -22
  96. databao_context_engine/plugins/databases/athena_introspector.py +0 -101
  97. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  98. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  99. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  100. databao_context_engine/project/datasource_discovery.py +0 -141
  101. databao_context_engine/project/runs.py +0 -39
  102. databao_context_engine/project/types.py +0 -134
  103. databao_context_engine/retrieve_embeddings/internal/export_results.py +0 -12
  104. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +0 -34
  105. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  106. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  107. databao_context_engine/services/run_name_policy.py +0 -8
  108. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  109. databao_context_engine/storage/repositories/run_repository.py +0 -157
  110. databao_context_engine-0.1.1.dist-info/METADATA +0 -186
  111. databao_context_engine-0.1.1.dist-info/RECORD +0 -135
  112. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  113. /databao_context_engine/{build_sources/public → plugins/databases/athena}/__init__.py +0 -0
  114. /databao_context_engine/{datasource_config → plugins/databases/clickhouse}/__init__.py +0 -0
  115. /databao_context_engine/{retrieve_embeddings/internal → plugins/databases/duckdb}/__init__.py +0 -0
  116. /databao_context_engine/{retrieve_embeddings/public → plugins/databases/mssql}/__init__.py +0 -0
  117. /databao_context_engine/{serialisation → plugins/databases/mysql}/__init__.py +0 -0
  118. {databao_context_engine-0.1.1.dist-info → databao_context_engine-0.1.5.dist-info}/entry_points.txt +0 -0
@@ -2,25 +2,26 @@ import logging
2
2
  import os
3
3
  from dataclasses import dataclass
4
4
  from enum import Enum
5
- from pathlib import Path
6
5
 
7
6
  from pydantic import ValidationError
8
7
 
9
- from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
10
- from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
11
- from databao_context_engine.plugins.plugin_loader import load_plugins
12
- from databao_context_engine.project.datasource_discovery import (
8
+ from databao_context_engine.datasources.datasource_discovery import (
13
9
  discover_datasources,
14
10
  get_datasource_descriptors,
15
11
  prepare_source,
16
12
  )
17
- from databao_context_engine.project.layout import ensure_project_dir
18
- from databao_context_engine.project.types import DatasourceId, PreparedConfig
13
+ from databao_context_engine.datasources.types import DatasourceId, PreparedConfig
14
+ from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
15
+ from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
16
+ from databao_context_engine.plugins.plugin_loader import load_plugins
17
+ from databao_context_engine.project.layout import ProjectLayout
19
18
 
20
19
  logger = logging.getLogger(__name__)
21
20
 
22
21
 
23
22
  class DatasourceConnectionStatus(Enum):
23
+ """Status of the connection to a datasource."""
24
+
24
25
  VALID = "Valid"
25
26
  INVALID = "Invalid"
26
27
  UNKNOWN = "Unknown"
@@ -28,6 +29,15 @@ class DatasourceConnectionStatus(Enum):
28
29
 
29
30
  @dataclass(kw_only=True)
30
31
  class CheckDatasourceConnectionResult:
32
+ """Result of checking the connection status of a datasource.
33
+
34
+ Attributes:
35
+ datasource_id: The id of the datasource.
36
+ connection_status: The connection status of the datasource.
37
+ summary: A summary of the connection status' error, or None if the connection is valid.
38
+ full_message: A detailed message about the connection status' error, or None if the connection is valid.
39
+ """
40
+
31
41
  datasource_id: DatasourceId
32
42
  connection_status: DatasourceConnectionStatus
33
43
  summary: str | None
@@ -44,21 +54,19 @@ class CheckDatasourceConnectionResult:
44
54
 
45
55
 
46
56
  def check_datasource_connection(
47
- project_dir: Path, *, datasource_ids: list[DatasourceId] | None = None
57
+ project_layout: ProjectLayout, *, datasource_ids: list[DatasourceId] | None = None
48
58
  ) -> dict[DatasourceId, CheckDatasourceConnectionResult]:
49
- ensure_project_dir(project_dir)
50
-
51
59
  if datasource_ids:
52
60
  logger.info(f"Validating datasource(s): {datasource_ids}")
53
- datasources_to_traverse = get_datasource_descriptors(project_dir, datasource_ids)
61
+ datasources_to_traverse = get_datasource_descriptors(project_layout, datasource_ids)
54
62
  else:
55
- datasources_to_traverse = discover_datasources(project_dir)
63
+ datasources_to_traverse = discover_datasources(project_layout)
56
64
 
57
65
  plugins = load_plugins(exclude_file_plugins=True)
58
66
 
59
67
  result = {}
60
68
  for discovered_datasource in datasources_to_traverse:
61
- result_key = DatasourceId.from_datasource_config_file_path(discovered_datasource.path)
69
+ result_key = DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path)
62
70
 
63
71
  try:
64
72
  prepared_source = prepare_source(discovered_datasource)
@@ -103,12 +111,12 @@ def check_datasource_connection(
103
111
  exc_info=True,
104
112
  stack_info=True,
105
113
  )
106
- result[result_key] = get_validation_result_from_error(result_key, e)
114
+ result[result_key] = _get_validation_result_from_error(result_key, e)
107
115
 
108
116
  return result
109
117
 
110
118
 
111
- def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
119
+ def _get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
112
120
  if isinstance(e, ValidationError):
113
121
  return CheckDatasourceConnectionResult(
114
122
  datasource_id=datasource_id,
@@ -116,16 +124,16 @@ def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
116
124
  summary="Config file is invalid",
117
125
  full_message=str(e),
118
126
  )
119
- elif isinstance(e, NotImplementedError | NotSupportedError):
127
+ if isinstance(e, NotImplementedError | NotSupportedError):
120
128
  return CheckDatasourceConnectionResult(
121
129
  datasource_id=datasource_id,
122
130
  connection_status=DatasourceConnectionStatus.UNKNOWN,
123
131
  summary="Plugin doesn't support validating its config",
124
132
  )
125
- else:
126
- return CheckDatasourceConnectionResult(
127
- datasource_id=datasource_id,
128
- connection_status=DatasourceConnectionStatus.INVALID,
129
- summary="Connection with the datasource can not be established",
130
- full_message=str(e),
131
- )
133
+
134
+ return CheckDatasourceConnectionResult(
135
+ datasource_id=datasource_id,
136
+ connection_status=DatasourceConnectionStatus.INVALID,
137
+ summary="Connection with the datasource can not be established",
138
+ full_message=str(e),
139
+ )
@@ -0,0 +1,90 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import yaml
7
+
8
+ from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
9
+ from databao_context_engine.project.layout import ProjectLayout
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass(eq=True, frozen=True)
15
+ class DatasourceContext:
16
+ """A generated Context for a Datasource.
17
+
18
+ Attributes:
19
+ datasource_id: The id of the datasource.
20
+ context: The context generated for the datasource.
21
+ """
22
+
23
+ datasource_id: DatasourceId
24
+ # TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
25
+ context: str
26
+
27
+
28
+ def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceType:
29
+ with context_path.open("r") as context_file:
30
+ type_key = "datasource_type"
31
+ for line in context_file:
32
+ if line.startswith(f"{type_key}: "):
33
+ datasource_type = yaml.safe_load(line)[type_key]
34
+ return DatasourceType(full_type=datasource_type)
35
+
36
+ raise ValueError(f"Could not find type in context file {context_path}")
37
+
38
+
39
+ def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
40
+ result = []
41
+ for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
42
+ for context_file_name in filenames:
43
+ if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
44
+ continue
45
+ context_file = Path(dirpath).joinpath(context_file_name)
46
+ relative_context_file = context_file.relative_to(project_layout.output_dir)
47
+ try:
48
+ result.append(
49
+ Datasource(
50
+ id=DatasourceId.from_datasource_context_file_path(relative_context_file),
51
+ type=_read_datasource_type_from_context_file(context_file),
52
+ )
53
+ )
54
+ except ValueError as e:
55
+ logger.debug(str(e), exc_info=True, stack_info=True)
56
+ logger.warning(
57
+ f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_file.resolve()}"
58
+ )
59
+
60
+ return sorted(result, key=lambda ds: str(ds.id).lower())
61
+
62
+
63
+ def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
64
+ context_path = project_layout.output_dir.joinpath(datasource_id.relative_path_to_context_file())
65
+ if not context_path.is_file():
66
+ raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
67
+
68
+ context = context_path.read_text()
69
+ return DatasourceContext(datasource_id=datasource_id, context=context)
70
+
71
+
72
+ def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
73
+ result = []
74
+ for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
75
+ for context_file_name in filenames:
76
+ if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
77
+ continue
78
+ context_file = Path(dirpath).joinpath(context_file_name)
79
+ relative_context_file = context_file.relative_to(project_layout.output_dir)
80
+ result.append(
81
+ DatasourceContext(
82
+ datasource_id=DatasourceId.from_datasource_context_file_path(relative_context_file),
83
+ context=context_file.read_text(),
84
+ )
85
+ )
86
+ return sorted(result, key=lambda context: str(context.datasource_id).lower())
87
+
88
+
89
+ def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
90
+ return f"# ===== {str(datasource_id)} ====={os.linesep}"
@@ -0,0 +1,143 @@
1
+ import logging
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import yaml
7
+
8
+ from databao_context_engine.datasources.types import (
9
+ Datasource,
10
+ DatasourceDescriptor,
11
+ DatasourceId,
12
+ DatasourceKind,
13
+ PreparedConfig,
14
+ PreparedDatasource,
15
+ PreparedFile,
16
+ )
17
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
18
+ from databao_context_engine.project.layout import ProjectLayout
19
+ from databao_context_engine.templating.renderer import render_template
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def get_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
25
+ result = []
26
+ for discovered_datasource in discover_datasources(project_layout=project_layout):
27
+ try:
28
+ prepared_source = prepare_source(discovered_datasource)
29
+ except Exception as e:
30
+ logger.debug(str(e), exc_info=True, stack_info=True)
31
+ logger.info(f"Invalid source at ({discovered_datasource.path}): {str(e)}")
32
+ continue
33
+
34
+ result.append(
35
+ Datasource(
36
+ id=DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path),
37
+ type=prepared_source.datasource_type,
38
+ )
39
+ )
40
+
41
+ return result
42
+
43
+
44
+ def discover_datasources(project_layout: ProjectLayout) -> list[DatasourceDescriptor]:
45
+ """Scan the project's src/ directory and return all discovered sources.
46
+
47
+ Rules:
48
+ - Each first-level directory under src/ is treated as a main_type
49
+ - Unsupported or unreadable entries are skipped.
50
+ - The returned list is sorted by directory and then filename
51
+
52
+ Args:
53
+ project_layout: ProjectLayout instance representing the project directory and configuration.
54
+
55
+ Returns:
56
+ A list of DatasourceDescriptor instances representing the discovered datasources.
57
+ """
58
+ datasources: list[DatasourceDescriptor] = []
59
+ for dirpath, dirnames, filenames in os.walk(project_layout.src_dir):
60
+ for config_file_name in filenames:
61
+ context_file = Path(dirpath).joinpath(config_file_name)
62
+ datasource = _load_datasource_descriptor(project_layout, context_file)
63
+ if datasource is not None:
64
+ datasources.append(datasource)
65
+
66
+ return sorted(datasources, key=lambda ds: str(ds.datasource_id.relative_path_to_config_file()).lower())
67
+
68
+
69
+ def _is_datasource_file(p: Path) -> bool:
70
+ # ignore backup files
71
+ return p.is_file() and not p.suffix.endswith("~")
72
+
73
+
74
+ def get_datasource_descriptors(
75
+ project_layout: ProjectLayout, datasource_ids: list[DatasourceId]
76
+ ) -> list[DatasourceDescriptor]:
77
+ datasources: list[DatasourceDescriptor] = []
78
+ for datasource_id in sorted(datasource_ids, key=lambda id: str(id)):
79
+ config_file_path = project_layout.src_dir.joinpath(datasource_id.relative_path_to_config_file())
80
+ if not config_file_path.is_file():
81
+ raise ValueError(f"Datasource config file not found: {config_file_path}")
82
+
83
+ datasource = _load_datasource_descriptor(project_layout, config_file_path)
84
+ if datasource is not None:
85
+ datasources.append(datasource)
86
+
87
+ return datasources
88
+
89
+
90
+ def _load_datasource_descriptor(project_layout: ProjectLayout, config_file: Path) -> DatasourceDescriptor | None:
91
+ """Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
92
+ if not config_file.is_file():
93
+ return None
94
+
95
+ parent_name = config_file.parent.name
96
+ extension = config_file.suffix.lower().lstrip(".")
97
+ relative_config_file = config_file.relative_to(project_layout.src_dir)
98
+
99
+ if parent_name == "files" and len(relative_config_file.parts) == 2:
100
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
101
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
102
+
103
+ if extension in {"yaml", "yml"}:
104
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
105
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.CONFIG)
106
+
107
+ if extension:
108
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
109
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
110
+
111
+ logger.debug("Skipping file without extension: %s", config_file)
112
+ return None
113
+
114
+
115
+ def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
116
+ """Convert a discovered datasource into a prepared datasource ready for plugin execution."""
117
+ if datasource.kind is DatasourceKind.FILE:
118
+ file_subtype = datasource.path.suffix.lower().lstrip(".")
119
+ return PreparedFile(
120
+ datasource_id=datasource.datasource_id,
121
+ datasource_type=DatasourceType(full_type=file_subtype),
122
+ path=datasource.path,
123
+ )
124
+
125
+ config = _parse_config_file(datasource.path)
126
+
127
+ ds_type = config.get("type")
128
+ if not ds_type or not isinstance(ds_type, str):
129
+ raise ValueError("Config missing 'type' at %s - skipping", datasource.path)
130
+
131
+ return PreparedConfig(
132
+ datasource_id=datasource.datasource_id,
133
+ datasource_type=DatasourceType(full_type=ds_type),
134
+ path=datasource.path,
135
+ config=config,
136
+ datasource_name=datasource.path.stem,
137
+ )
138
+
139
+
140
+ def _parse_config_file(file_path: Path) -> dict[Any, Any]:
141
+ rendered_file = render_template(file_path.read_text())
142
+
143
+ return yaml.safe_load(rendered_file) or {}
@@ -0,0 +1,194 @@
1
+ from dataclasses import dataclass
2
+ from enum import StrEnum
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
7
+ from databao_context_engine.project.layout import ProjectLayout
8
+
9
+
10
+ class DatasourceKind(StrEnum):
11
+ CONFIG = "config"
12
+ FILE = "file"
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class DatasourceDescriptor:
17
+ datasource_id: "DatasourceId"
18
+ path: Path
19
+ kind: DatasourceKind
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class PreparedConfig:
24
+ datasource_id: "DatasourceId"
25
+ datasource_type: DatasourceType
26
+ path: Path
27
+ config: dict[Any, Any]
28
+ datasource_name: str
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class PreparedFile:
33
+ datasource_id: "DatasourceId"
34
+ datasource_type: DatasourceType
35
+ path: Path
36
+
37
+
38
+ PreparedDatasource = PreparedConfig | PreparedFile
39
+
40
+
41
+ @dataclass(kw_only=True, frozen=True, eq=True)
42
+ class DatasourceId:
43
+ """The ID of a datasource. The ID is the path to the datasource's config file relative to the src folder in the project.
44
+
45
+ e.g: "databases/my_postgres_datasource.yaml"
46
+
47
+ Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
48
+
49
+ Attributes:
50
+ datasource_path: The path to the datasource relative to project's src folder.
51
+ config_file_suffix: The suffix of the config (or raw) file.
52
+ """
53
+
54
+ ALLOWED_YAML_SUFFIXES = [".yaml", ".yml"]
55
+ CONTEXT_FILE_SUFFIX = ".yaml"
56
+
57
+ datasource_path: str
58
+ config_file_suffix: str
59
+
60
+ def __post_init__(self):
61
+ if not self.datasource_path.strip():
62
+ raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not be empty")
63
+ if not self.config_file_suffix.strip():
64
+ raise ValueError(f"Invalid DatasourceId ({str(self)}): config_file_suffix must not be empty")
65
+
66
+ if not self.config_file_suffix.startswith("."):
67
+ raise ValueError(
68
+ f'Invalid DatasourceId ({str(self)}): config_file_suffix must start with a dot "." (e.g.: .yaml)'
69
+ )
70
+
71
+ if self.datasource_path.endswith(self.config_file_suffix):
72
+ raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not contain the file suffix")
73
+
74
+ def __str__(self):
75
+ return str(self.relative_path_to_config_file())
76
+
77
+ def relative_path_to_config_file(self) -> Path:
78
+ """Return a path to the config file for this datasource.
79
+
80
+ The returned path is relative to the src folder in the project.
81
+
82
+ Returns:
83
+ The path to the config file relative to the src folder in the project.
84
+ """
85
+ return Path(self.datasource_path + self.config_file_suffix)
86
+
87
+ def relative_path_to_context_file(self) -> Path:
88
+ """Return a path to the config file for this datasource.
89
+
90
+ The returned path is relative to an output run folder in the project.
91
+
92
+ Returns:
93
+ The path to the context file relative to the output folder in the project.
94
+ """
95
+ # Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
96
+ suffix = (
97
+ DatasourceId.CONTEXT_FILE_SUFFIX
98
+ if self.config_file_suffix in DatasourceId.ALLOWED_YAML_SUFFIXES
99
+ else (self.config_file_suffix + DatasourceId.CONTEXT_FILE_SUFFIX)
100
+ )
101
+
102
+ return Path(self.datasource_path + suffix)
103
+
104
+ @classmethod
105
+ def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
106
+ """Create a DatasourceId from a string representation.
107
+
108
+ Args:
109
+ datasource_id_as_string: The string representation of a DatasourceId.
110
+ This is the path to the datasource's config file relative to the src folder in the project.
111
+ (e.g. "databases/my_postgres_datasource.yaml")
112
+
113
+ Returns:
114
+ The DatasourceId instance created from the string representation.
115
+ """
116
+ config_file_path = Path(datasource_id_as_string)
117
+ return DatasourceId._from_relative_datasource_config_file_path(config_file_path)
118
+
119
+ @classmethod
120
+ def from_datasource_config_file_path(
121
+ cls, project_layout: ProjectLayout, datasource_config_file: Path
122
+ ) -> "DatasourceId":
123
+ """Create a DatasourceId from a config file path.
124
+
125
+ Args:
126
+ project_layout: The databao context engine project layout.
127
+ datasource_config_file: The path to the datasource config file.
128
+ This path can either be the config file path relative to the src folder or the full path to the config file.
129
+
130
+ Returns:
131
+ The DatasourceId instance created from the config file path.
132
+ """
133
+ relative_datasource_config_file = datasource_config_file.relative_to(project_layout.src_dir)
134
+ return DatasourceId._from_relative_datasource_config_file_path(relative_datasource_config_file)
135
+
136
+ @classmethod
137
+ def _from_relative_datasource_config_file_path(cls, relative_datasource_config_file: Path) -> "DatasourceId":
138
+ if relative_datasource_config_file.is_absolute():
139
+ raise ValueError(
140
+ f"Path to datasource config file should be relative to project's src folder: {relative_datasource_config_file}"
141
+ )
142
+ return DatasourceId(
143
+ datasource_path=_extract_datasource_path(relative_datasource_config_file),
144
+ config_file_suffix=relative_datasource_config_file.suffix,
145
+ )
146
+
147
+ @classmethod
148
+ def from_datasource_context_file_path(cls, datasource_context_file: Path) -> "DatasourceId":
149
+ """Create a DatasourceId from a context file path.
150
+
151
+ This factory handles the case where the context was generated from a raw file rather than from a config.
152
+ In that case, the context file name will look like "<my_datasource_name>.<raw_file_extension>.yaml"
153
+
154
+ Args:
155
+ datasource_context_file: The path to the datasource context file.
156
+
157
+ Returns:
158
+ The DatasourceId instance created from the context file path.
159
+
160
+ Raises:
161
+ ValueError: If the wrong datasource_context_file is provided.
162
+ """
163
+ if (
164
+ len(datasource_context_file.suffixes) > 1
165
+ and datasource_context_file.suffix == DatasourceId.CONTEXT_FILE_SUFFIX
166
+ ):
167
+ # If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
168
+ context_file_name_without_yaml_extension = datasource_context_file.name[
169
+ : -len(DatasourceId.CONTEXT_FILE_SUFFIX)
170
+ ]
171
+ datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
172
+ if datasource_context_file.is_absolute():
173
+ raise ValueError(f"Path to datasource context file should be a relative path: {datasource_context_file}")
174
+ return DatasourceId(
175
+ datasource_path=_extract_datasource_path(datasource_context_file),
176
+ config_file_suffix=datasource_context_file.suffix,
177
+ )
178
+
179
+
180
+ def _extract_datasource_path(datasource_file: Path) -> str:
181
+ return str(datasource_file.with_suffix(""))
182
+
183
+
184
+ @dataclass
185
+ class Datasource:
186
+ """A datasource contained in the project.
187
+
188
+ Attributes:
189
+ id: The unique identifier of the datasource.
190
+ type: The type of the datasource.
191
+ """
192
+
193
+ id: DatasourceId
194
+ type: DatasourceType
@@ -38,9 +38,9 @@ def generate_configs_schemas(
38
38
  try:
39
39
  schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
40
40
 
41
- print(f"{os.linesep}{os.linesep}".join(schema_list))
41
+ click.echo(f"{os.linesep}{os.linesep}".join(schema_list))
42
42
  except Exception as e:
43
- print(str(e))
43
+ click.echo(str(e))
44
44
 
45
45
 
46
46
  def _generate_json_schema_output_for_plugins(
@@ -54,10 +54,9 @@ def _generate_json_schema_output_for_plugins(
54
54
  if len(filtered_plugins) == 0:
55
55
  if include_plugins:
56
56
  raise ValueError(f"No plugin found with id in {include_plugins}")
57
- elif exclude_plugins:
57
+ if exclude_plugins:
58
58
  raise ValueError(f"No plugin found when excluding {exclude_plugins}")
59
- else:
60
- raise ValueError("No plugin found")
59
+ raise ValueError("No plugin found")
61
60
 
62
61
  results = []
63
62
  for plugin in filtered_plugins:
@@ -1,17 +1,39 @@
1
1
  from pathlib import Path
2
2
 
3
3
  from databao_context_engine.databao_context_project_manager import DatabaoContextProjectManager
4
- from databao_context_engine.project.init_project import init_project_dir
4
+ from databao_context_engine.project.init_project import InitProjectError, init_project_dir
5
5
  from databao_context_engine.project.layout import is_project_dir_valid
6
6
 
7
7
 
8
8
  def init_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
9
- initialised_project_dir = init_project_dir(project_dir=project_dir)
9
+ """Initialize a Databao Context project in the given directory.
10
10
 
11
- return DatabaoContextProjectManager(project_dir=initialised_project_dir)
11
+ Args:
12
+ project_dir: The directory where the project should be initialized.
13
+
14
+ Returns:
15
+ A DatabaoContextProjectManager for the initialized project.
16
+
17
+ Raises:
18
+ InitProjectError: If the project could not be initialized.
19
+ """
20
+ try:
21
+ initialized_project_dir = init_project_dir(project_dir=project_dir)
22
+
23
+ return DatabaoContextProjectManager(project_dir=initialized_project_dir)
24
+ except InitProjectError as e:
25
+ raise e
12
26
 
13
27
 
14
28
  def init_or_get_dce_project(project_dir: Path) -> DatabaoContextProjectManager:
29
+ """Initialize a Databao Context project in the given directory or get a DatabaoContextProjectManager for the project if it already exists.
30
+
31
+ Args:
32
+ project_dir: The directory where the project should be initialized if it doesn't exist yet..
33
+
34
+ Returns:
35
+ A DatabaoContextProjectManager for the project in project_dir.
36
+ """
15
37
  if is_project_dir_valid(project_dir):
16
38
  return DatabaoContextProjectManager(project_dir=project_dir)
17
39