databao-context-engine 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. databao_context_engine/__init__.py +18 -6
  2. databao_context_engine/build_sources/__init__.py +4 -0
  3. databao_context_engine/build_sources/{internal/build_runner.py → build_runner.py} +27 -23
  4. databao_context_engine/build_sources/build_service.py +53 -0
  5. databao_context_engine/build_sources/build_wiring.py +84 -0
  6. databao_context_engine/build_sources/export_results.py +41 -0
  7. databao_context_engine/build_sources/{internal/plugin_execution.py → plugin_execution.py} +3 -7
  8. databao_context_engine/cli/add_datasource_config.py +41 -15
  9. databao_context_engine/cli/commands.py +12 -43
  10. databao_context_engine/cli/info.py +2 -2
  11. databao_context_engine/databao_context_engine.py +137 -0
  12. databao_context_engine/databao_context_project_manager.py +96 -6
  13. databao_context_engine/datasources/add_config.py +34 -0
  14. databao_context_engine/{datasource_config → datasources}/check_config.py +18 -7
  15. databao_context_engine/datasources/datasource_context.py +93 -0
  16. databao_context_engine/{project → datasources}/datasource_discovery.py +17 -16
  17. databao_context_engine/{project → datasources}/types.py +64 -15
  18. databao_context_engine/init_project.py +25 -3
  19. databao_context_engine/introspection/property_extract.py +59 -30
  20. databao_context_engine/llm/errors.py +2 -8
  21. databao_context_engine/llm/install.py +13 -20
  22. databao_context_engine/llm/service.py +1 -3
  23. databao_context_engine/mcp/all_results_tool.py +2 -2
  24. databao_context_engine/mcp/mcp_runner.py +4 -2
  25. databao_context_engine/mcp/mcp_server.py +1 -4
  26. databao_context_engine/mcp/retrieve_tool.py +3 -11
  27. databao_context_engine/plugin_loader.py +111 -0
  28. databao_context_engine/pluginlib/build_plugin.py +25 -9
  29. databao_context_engine/pluginlib/config.py +16 -2
  30. databao_context_engine/plugins/databases/athena_introspector.py +85 -22
  31. databao_context_engine/plugins/databases/base_introspector.py +5 -3
  32. databao_context_engine/plugins/databases/clickhouse_introspector.py +22 -11
  33. databao_context_engine/plugins/databases/duckdb_introspector.py +1 -1
  34. databao_context_engine/plugins/databases/introspection_scope.py +11 -9
  35. databao_context_engine/plugins/databases/introspection_scope_matcher.py +2 -5
  36. databao_context_engine/plugins/databases/mssql_introspector.py +26 -17
  37. databao_context_engine/plugins/databases/mysql_introspector.py +23 -12
  38. databao_context_engine/plugins/databases/postgresql_introspector.py +2 -2
  39. databao_context_engine/plugins/databases/snowflake_introspector.py +43 -10
  40. databao_context_engine/plugins/plugin_loader.py +43 -42
  41. databao_context_engine/plugins/resources/parquet_introspector.py +2 -3
  42. databao_context_engine/project/info.py +31 -2
  43. databao_context_engine/project/init_project.py +16 -7
  44. databao_context_engine/project/layout.py +3 -3
  45. databao_context_engine/retrieve_embeddings/__init__.py +3 -0
  46. databao_context_engine/retrieve_embeddings/{internal/export_results.py → export_results.py} +2 -2
  47. databao_context_engine/retrieve_embeddings/{internal/retrieve_runner.py → retrieve_runner.py} +5 -9
  48. databao_context_engine/retrieve_embeddings/{internal/retrieve_service.py → retrieve_service.py} +3 -17
  49. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +49 -0
  50. databao_context_engine/{serialisation → serialization}/yaml.py +1 -1
  51. databao_context_engine/services/chunk_embedding_service.py +23 -11
  52. databao_context_engine/services/factories.py +1 -46
  53. databao_context_engine/services/persistence_service.py +11 -11
  54. databao_context_engine/storage/connection.py +11 -7
  55. databao_context_engine/storage/exceptions/exceptions.py +2 -2
  56. databao_context_engine/storage/migrate.py +2 -4
  57. databao_context_engine/storage/migrations/V01__init.sql +6 -31
  58. databao_context_engine/storage/models.py +2 -23
  59. databao_context_engine/storage/repositories/chunk_repository.py +16 -12
  60. databao_context_engine/storage/repositories/factories.py +1 -12
  61. databao_context_engine/storage/repositories/vector_search_repository.py +8 -13
  62. databao_context_engine/system/properties.py +4 -2
  63. databao_context_engine-0.1.3.dist-info/METADATA +75 -0
  64. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/RECORD +68 -77
  65. databao_context_engine/build_sources/internal/build_service.py +0 -77
  66. databao_context_engine/build_sources/internal/build_wiring.py +0 -52
  67. databao_context_engine/build_sources/internal/export_results.py +0 -43
  68. databao_context_engine/build_sources/public/api.py +0 -4
  69. databao_context_engine/databao_engine.py +0 -85
  70. databao_context_engine/datasource_config/__init__.py +0 -0
  71. databao_context_engine/datasource_config/add_config.py +0 -50
  72. databao_context_engine/datasource_config/datasource_context.py +0 -60
  73. databao_context_engine/project/runs.py +0 -39
  74. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  75. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +0 -29
  76. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  77. databao_context_engine/retrieve_embeddings/public/api.py +0 -3
  78. databao_context_engine/serialisation/__init__.py +0 -0
  79. databao_context_engine/services/run_name_policy.py +0 -8
  80. databao_context_engine/storage/repositories/datasource_run_repository.py +0 -136
  81. databao_context_engine/storage/repositories/run_repository.py +0 -157
  82. databao_context_engine-0.1.2.dist-info/METADATA +0 -187
  83. /databao_context_engine/{build_sources/internal → datasources}/__init__.py +0 -0
  84. /databao_context_engine/{build_sources/public → serialization}/__init__.py +0 -0
  85. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/WHEEL +0 -0
  86. {databao_context_engine-0.1.2.dist-info → databao_context_engine-0.1.3.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,137 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from databao_context_engine.datasources.datasource_context import (
7
+ DatasourceContext,
8
+ get_all_contexts,
9
+ get_context_header_for_datasource,
10
+ get_datasource_context,
11
+ get_introspected_datasource_list,
12
+ )
13
+ from databao_context_engine.datasources.types import Datasource, DatasourceId
14
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
15
+ from databao_context_engine.project.layout import ProjectLayout, ensure_project_dir
16
+ from databao_context_engine.retrieve_embeddings import retrieve_embeddings
17
+
18
+
19
+ @dataclass
20
+ class ContextSearchResult:
21
+ """The result of a search in the project's contexts.
22
+
23
+ Attributes:
24
+ datasource_id: The ID of the datasource that generated the result.
25
+ datasource_type: The type of the datasource that generated the result.
26
+ distance: The distance between the search text and the result.
27
+ context_result: The actual content of the result that was found as a YAML string.
28
+ This content will be a subpart of the full context of the datasource.
29
+ In some cases, its content won't contain the exact same attributes as what can be
30
+ found directly in the full context.
31
+ """
32
+
33
+ datasource_id: DatasourceId
34
+ datasource_type: DatasourceType
35
+ distance: float
36
+ context_result: str
37
+
38
+
39
+ class DatabaoContextEngine:
40
+ """Engine for reading and using the contexts generated in a Databao Context Project.
41
+
42
+ The Databao Context Project should already have datasources configured and built (see DatabaoContextProjectManager), so that they can be used in the Engine.
43
+
44
+ Attributes:
45
+ project_dir: The root directory of the Databao Context Project.
46
+ """
47
+
48
+ project_dir: Path
49
+ _project_layout: ProjectLayout
50
+
51
+ def __init__(self, project_dir: Path) -> None:
52
+ """Initialize the DatabaoContextEngine.
53
+
54
+ Args:
55
+ project_dir: The root directory of the Databao Context Project.
56
+ There must be a valid DatabaoContextProject in this directory.
57
+ """
58
+ self._project_layout = ensure_project_dir(project_dir=project_dir)
59
+ self.project_dir = project_dir
60
+
61
+ def get_introspected_datasource_list(self) -> list[Datasource]:
62
+ """Return the list of datasources for which a context is available.
63
+
64
+ Returns:
65
+ A list of the datasources for which a context is available.
66
+ """
67
+ return get_introspected_datasource_list(self._project_layout)
68
+
69
+ def get_datasource_context(self, datasource_id: DatasourceId) -> DatasourceContext:
70
+ """Return the context available for a given datasource.
71
+
72
+ Args:
73
+ datasource_id: The ID of the datasource.
74
+
75
+ Returns:
76
+ The context for this datasource.
77
+ """
78
+ return get_datasource_context(project_layout=self._project_layout, datasource_id=datasource_id)
79
+
80
+ def get_all_contexts(self) -> list[DatasourceContext]:
81
+ """Return all contexts generated in the project.
82
+
83
+ Returns:
84
+ A list of all contexts generated in the project.
85
+ """
86
+ return get_all_contexts(project_layout=self._project_layout)
87
+
88
+ def get_all_contexts_formatted(self) -> str:
89
+ all_contexts = self.get_all_contexts()
90
+
91
+ all_results = os.linesep.join(
92
+ [f"{get_context_header_for_datasource(context.datasource_id)}{context.context}" for context in all_contexts]
93
+ )
94
+
95
+ return all_results
96
+
97
+ def search_context(
98
+ self,
99
+ retrieve_text: str,
100
+ limit: int | None = None,
101
+ export_to_file: bool = False,
102
+ datasource_ids: list[DatasourceId] | None = None,
103
+ ) -> list[ContextSearchResult]:
104
+ """Search in the avaialable context for the closest matches to the given text.
105
+
106
+ Args:
107
+ retrieve_text: The text to search for in the contexts.
108
+ limit: The maximum number of results to return. If None is provided, a default limit of 10 will be used.
109
+ export_to_file: Whether the results should be exported to a file as a side-effect. If True, the results will be exported in a file in the run directory.
110
+ datasource_ids: Not Implemented yet: providing this argument changes nothing to the search
111
+
112
+ Returns:
113
+ A list of the results found for the search, sorted by distance.
114
+ """
115
+ # TODO: Filter with datasource_ids
116
+ # TODO: When no run_name is required, we can extract the "export_to_file" side-effect and let the caller (the CLI) do it themselves
117
+
118
+ results = retrieve_embeddings(
119
+ project_layout=self._project_layout,
120
+ retrieve_text=retrieve_text,
121
+ limit=limit,
122
+ export_to_file=export_to_file,
123
+ )
124
+
125
+ return [
126
+ ContextSearchResult(
127
+ datasource_id=result.datasource_id,
128
+ datasource_type=result.datasource_type,
129
+ distance=result.cosine_distance,
130
+ context_result=result.display_text,
131
+ )
132
+ for result in results
133
+ ]
134
+
135
+ def run_sql(self, datasource_id: DatasourceId, sql: str, params: list[str]) -> dict[str, Any]:
136
+ """Not Implemented yet. This will allow to run a SQL query against a datasource (if the datasource supports it)."""
137
+ raise NotImplementedError("Running SQL is not supported yet")
@@ -2,43 +2,100 @@ from dataclasses import dataclass
2
2
  from pathlib import Path
3
3
  from typing import Any, overload
4
4
 
5
- from databao_context_engine.build_sources.public.api import BuildContextResult, build_all_datasources
6
- from databao_context_engine.datasource_config.add_config import (
5
+ from databao_context_engine.build_sources import BuildContextResult, build_all_datasources
6
+ from databao_context_engine.databao_context_engine import DatabaoContextEngine
7
+ from databao_context_engine.datasources.add_config import (
7
8
  create_datasource_config_file,
8
9
  get_datasource_id_for_config_file,
9
10
  )
10
- from databao_context_engine.datasource_config.check_config import (
11
+ from databao_context_engine.datasources.check_config import (
11
12
  CheckDatasourceConnectionResult,
13
+ )
14
+ from databao_context_engine.datasources.check_config import (
12
15
  check_datasource_connection as check_datasource_connection_internal,
13
16
  )
17
+ from databao_context_engine.datasources.datasource_discovery import get_datasource_list
18
+ from databao_context_engine.datasources.types import Datasource, DatasourceId
14
19
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
15
- from databao_context_engine.project.datasource_discovery import DatasourceId
16
20
  from databao_context_engine.project.layout import ensure_datasource_config_file_doesnt_exist, ensure_project_dir
17
21
  from databao_context_engine.services.chunk_embedding_service import ChunkEmbeddingMode
18
22
 
19
23
 
20
24
  @dataclass
21
25
  class DatasourceConfigFile:
26
+ """A datasource config file that was created by the DatabaoContextProjectManager.
27
+
28
+ Attributes:
29
+ datasource_id: The unique identifier for the datasource.
30
+ config_file_path: The path to the datasource configuration file.
31
+ """
32
+
22
33
  datasource_id: DatasourceId
23
34
  config_file_path: Path
24
35
 
25
36
 
26
37
  class DatabaoContextProjectManager:
38
+ """Project Manager for Databao Context Projects.
39
+
40
+ This project manager is responsible for configuring and building a Databao Context Project.
41
+ The project_dir should already have been initialized before a Project manager can be used.
42
+
43
+ Attributes:
44
+ project_dir: The root directory of the Databao Context Project.
45
+ """
46
+
27
47
  project_dir: Path
28
48
 
29
49
  def __init__(self, project_dir: Path) -> None:
50
+ """Initialize the DatabaoContextProjectManager.
51
+
52
+ Args:
53
+ project_dir: The root directory of the Databao Context Project.
54
+ """
30
55
  ensure_project_dir(project_dir=project_dir)
31
56
  self.project_dir = project_dir
32
57
 
58
+ def get_configured_datasource_list(self) -> list[Datasource]:
59
+ """Return the list of datasources configured in the project.
60
+
61
+ This method returns all datasources configured in the src folder of the project,
62
+ no matter whether the datasource configuration is valid or not.
63
+
64
+ Returns:
65
+ The list of datasources configured in the project.
66
+ """
67
+ return get_datasource_list(self.project_dir)
68
+
33
69
  def build_context(
34
- self, datasource_ids: list[DatasourceId] | None, chunk_embedding_mode: ChunkEmbeddingMode
70
+ self,
71
+ datasource_ids: list[DatasourceId] | None = None,
72
+ chunk_embedding_mode: ChunkEmbeddingMode = ChunkEmbeddingMode.EMBEDDABLE_TEXT_ONLY,
35
73
  ) -> list[BuildContextResult]:
74
+ """Build the context for datasources in the project.
75
+
76
+ Any datasource with an invalid configuration will be skipped.
77
+
78
+ Args:
79
+ datasource_ids: The list of datasource ids to build. If None, all datasources will be built.
80
+ chunk_embedding_mode: The mode to use for chunk embedding.
81
+
82
+ Returns:
83
+ The list of all built results.
84
+ """
36
85
  # TODO: Filter which datasources to build by datasource_ids
37
86
  return build_all_datasources(project_dir=self.project_dir, chunk_embedding_mode=chunk_embedding_mode)
38
87
 
39
88
  def check_datasource_connection(
40
- self, datasource_ids: list[DatasourceId] | None
89
+ self, datasource_ids: list[DatasourceId] | None = None
41
90
  ) -> list[CheckDatasourceConnectionResult]:
91
+ """Check the connection for datasources in the project.
92
+
93
+ Args:
94
+ datasource_ids: The list of datasource ids to check. If None, all datasources will be checked.
95
+
96
+ Returns:
97
+ The list of all connection check results, sorted by datasource id.
98
+ """
42
99
  return sorted(
43
100
  check_datasource_connection_internal(project_dir=self.project_dir, datasource_ids=datasource_ids).values(),
44
101
  key=lambda result: str(result.datasource_id),
@@ -51,6 +108,18 @@ class DatabaoContextProjectManager:
51
108
  config_content: dict[str, Any],
52
109
  overwrite_existing: bool = False,
53
110
  ) -> DatasourceConfigFile:
111
+ """Create a new datasource configuration file in the project.
112
+
113
+ Args:
114
+ datasource_type: The type of the datasource to create.
115
+ datasource_name: The name of the datasource to create.
116
+ config_content: The content of the datasource configuration. This is a dictionary that will be written as-is in a yaml file.
117
+ The actual content of the configuration is not checked and might not be valid for the requested type..
118
+ overwrite_existing: Whether to overwrite an existing datasource configuration file if it already exists.
119
+
120
+ Returns:
121
+ The path to the created datasource configuration file.
122
+ """
54
123
  # TODO: Before creating the datasource, validate the config content based on which plugin will be used
55
124
  config_file_path = create_datasource_config_file(
56
125
  project_dir=self.project_dir,
@@ -77,6 +146,19 @@ class DatabaoContextProjectManager:
77
146
  datasource_name: str | None = None,
78
147
  datasource_id: DatasourceId | None = None,
79
148
  ) -> bool:
149
+ """Check if a datasource configuration file already exists in the project.
150
+
151
+ Args:
152
+ datasource_type: The type of the datasource. This must be used in conjunction with datasource_name.
153
+ datasource_name: The name of the datasource. This must be used in conjunction with datasource_type.
154
+ datasource_id: The id of the datasource. If provided, datasource_type and datasource_name will be ignored.
155
+
156
+ Returns:
157
+ True if there is already a datasource configuration file for this datasource, False otherwise.
158
+
159
+ Raises:
160
+ ValueError: If the wrong set of arguments is provided.
161
+ """
80
162
  if datasource_type is not None and datasource_name is not None:
81
163
  datasource_id = get_datasource_id_for_config_file(datasource_type, datasource_name)
82
164
  elif datasource_id is None:
@@ -90,3 +172,11 @@ class DatabaoContextProjectManager:
90
172
  return False
91
173
  except ValueError:
92
174
  return True
175
+
176
+ def get_engine_for_project(self) -> DatabaoContextEngine:
177
+ """Instantiate a DatabaoContextEngine for the project.
178
+
179
+ Returns:
180
+ A DatabaoContextEngine instance for the project.
181
+ """
182
+ return DatabaoContextEngine(project_dir=self.project_dir)
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ from databao_context_engine.datasources.types import DatasourceId
5
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
6
+ from databao_context_engine.project.layout import (
7
+ create_datasource_config_file as create_datasource_config_file_internal,
8
+ )
9
+ from databao_context_engine.serialization.yaml import to_yaml_string
10
+
11
+
12
+ def create_datasource_config_file(
13
+ project_dir: Path,
14
+ datasource_type: DatasourceType,
15
+ datasource_name: str,
16
+ config_content: dict[str, Any],
17
+ overwrite_existing: bool,
18
+ ) -> Path:
19
+ basic_config = {"type": datasource_type.subtype, "name": datasource_name}
20
+
21
+ return create_datasource_config_file_internal(
22
+ project_dir,
23
+ get_datasource_id_for_config_file(datasource_type, datasource_name),
24
+ to_yaml_string(basic_config | config_content),
25
+ overwrite_existing=overwrite_existing,
26
+ )
27
+
28
+
29
+ def get_datasource_id_for_config_file(datasource_type: DatasourceType, datasource_name: str) -> DatasourceId:
30
+ return DatasourceId(
31
+ datasource_config_folder=datasource_type.config_folder,
32
+ datasource_name=datasource_name,
33
+ config_file_suffix=".yaml",
34
+ )
@@ -6,21 +6,23 @@ from pathlib import Path
6
6
 
7
7
  from pydantic import ValidationError
8
8
 
9
- from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
10
- from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
11
- from databao_context_engine.plugins.plugin_loader import load_plugins
12
- from databao_context_engine.project.datasource_discovery import (
9
+ from databao_context_engine.datasources.datasource_discovery import (
13
10
  discover_datasources,
14
11
  get_datasource_descriptors,
15
12
  prepare_source,
16
13
  )
14
+ from databao_context_engine.datasources.types import DatasourceId, PreparedConfig
15
+ from databao_context_engine.pluginlib.build_plugin import BuildDatasourcePlugin, NotSupportedError
16
+ from databao_context_engine.pluginlib.plugin_utils import check_connection_for_datasource
17
+ from databao_context_engine.plugins.plugin_loader import load_plugins
17
18
  from databao_context_engine.project.layout import ensure_project_dir
18
- from databao_context_engine.project.types import DatasourceId, PreparedConfig
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
23
  class DatasourceConnectionStatus(Enum):
24
+ """Status of the connection to a datasource."""
25
+
24
26
  VALID = "Valid"
25
27
  INVALID = "Invalid"
26
28
  UNKNOWN = "Unknown"
@@ -28,6 +30,15 @@ class DatasourceConnectionStatus(Enum):
28
30
 
29
31
  @dataclass(kw_only=True)
30
32
  class CheckDatasourceConnectionResult:
33
+ """Result of checking the connection status of a datasource.
34
+
35
+ Attributes:
36
+ datasource_id: The id of the datasource.
37
+ connection_status: The connection status of the datasource.
38
+ summary: A summary of the connection status' error, or None if the connection is valid.
39
+ full_message: A detailed message about the connection status' error, or None if the connection is valid.
40
+ """
41
+
31
42
  datasource_id: DatasourceId
32
43
  connection_status: DatasourceConnectionStatus
33
44
  summary: str | None
@@ -103,12 +114,12 @@ def check_datasource_connection(
103
114
  exc_info=True,
104
115
  stack_info=True,
105
116
  )
106
- result[result_key] = get_validation_result_from_error(result_key, e)
117
+ result[result_key] = _get_validation_result_from_error(result_key, e)
107
118
 
108
119
  return result
109
120
 
110
121
 
111
- def get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
122
+ def _get_validation_result_from_error(datasource_id: DatasourceId, e: Exception):
112
123
  if isinstance(e, ValidationError):
113
124
  return CheckDatasourceConnectionResult(
114
125
  datasource_id=datasource_id,
@@ -0,0 +1,93 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import yaml
7
+
8
+ from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
9
+ from databao_context_engine.project.layout import ProjectLayout, get_output_dir
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass(eq=True, frozen=True)
15
+ class DatasourceContext:
16
+ """A generated Context for a Datasource.
17
+
18
+ Attributes:
19
+ datasource_id: The id of the datasource.
20
+ context: The context generated for the datasource.
21
+ """
22
+
23
+ datasource_id: DatasourceId
24
+ # TODO: Read the context as a BuildExecutionResult instead of a Yaml string?
25
+ context: str
26
+
27
+
28
+ def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceType:
29
+ with context_path.open("r") as context_file:
30
+ type_key = "datasource_type"
31
+ for line in context_file:
32
+ if line.startswith(f"{type_key}: "):
33
+ datasource_type = yaml.safe_load(line)[type_key]
34
+ return DatasourceType(full_type=datasource_type)
35
+
36
+ raise ValueError(f"Could not find type in context file {context_path}")
37
+
38
+
39
+ def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
40
+ output_dir = get_output_dir(project_dir=project_layout.project_dir)
41
+
42
+ result = []
43
+ for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
44
+ for context_path in sorted(
45
+ (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
46
+ ):
47
+ try:
48
+ result.append(
49
+ Datasource(
50
+ id=DatasourceId.from_datasource_config_file_path(context_path),
51
+ type=_read_datasource_type_from_context_file(context_path),
52
+ )
53
+ )
54
+ except ValueError as e:
55
+ logger.debug(str(e), exc_info=True, stack_info=True)
56
+ logger.warning(
57
+ f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_path.resolve()}"
58
+ )
59
+
60
+ return result
61
+
62
+
63
+ def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
64
+ output_dir = get_output_dir(project_dir=project_layout.project_dir)
65
+
66
+ context_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
67
+ if not context_path.is_file():
68
+ raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
69
+
70
+ context = context_path.read_text()
71
+ return DatasourceContext(datasource_id=datasource_id, context=context)
72
+
73
+
74
+ def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
75
+ output_dir = get_output_dir(project_dir=project_layout.project_dir)
76
+
77
+ result = []
78
+ for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
79
+ for context_path in sorted(
80
+ (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
81
+ ):
82
+ result.append(
83
+ DatasourceContext(
84
+ datasource_id=DatasourceId.from_datasource_context_file_path(context_path),
85
+ context=context_path.read_text(),
86
+ )
87
+ )
88
+
89
+ return result
90
+
91
+
92
+ def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
93
+ return f"# ===== {str(datasource_id)} ====={os.linesep}"
@@ -4,17 +4,17 @@ from typing import Any
4
4
 
5
5
  import yaml
6
6
 
7
- from databao_context_engine.pluginlib.build_plugin import DatasourceType
8
- from databao_context_engine.project.layout import get_source_dir
9
- from databao_context_engine.project.types import (
7
+ from databao_context_engine.datasources.types import (
8
+ Datasource,
10
9
  DatasourceDescriptor,
10
+ DatasourceId,
11
11
  DatasourceKind,
12
12
  PreparedConfig,
13
13
  PreparedDatasource,
14
14
  PreparedFile,
15
- DatasourceId,
16
- Datasource,
17
15
  )
16
+ from databao_context_engine.pluginlib.build_plugin import DatasourceType
17
+ from databao_context_engine.project.layout import get_source_dir
18
18
  from databao_context_engine.templating.renderer import render_template
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -41,13 +41,18 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
41
41
 
42
42
 
43
43
  def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
44
- """
45
- Scan the project's src/ directory and return all discovered sources.
44
+ """Scan the project's src/ directory and return all discovered sources.
46
45
 
47
46
  Rules:
48
47
  - Each first-level directory under src/ is treated as a main_type
49
48
  - Unsupported or unreadable entries are skipped.
50
49
  - The returned list is sorted by directory and then filename
50
+
51
+ Returns:
52
+ A list of DatasourceDescriptor instances representing the discovered datasources.
53
+
54
+ Raises:
55
+ ValueError: If the src directory does not exist in the project directory.
51
56
  """
52
57
  src = get_source_dir(project_dir)
53
58
  if not src.exists() or not src.is_dir():
@@ -56,7 +61,7 @@ def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
56
61
  datasources: list[DatasourceDescriptor] = []
57
62
  for main_dir in sorted((p for p in src.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
58
63
  for path in sorted((p for p in main_dir.iterdir() if _is_datasource_file(p)), key=lambda p: p.name.lower()):
59
- datasource = load_datasource_descriptor(path)
64
+ datasource = _load_datasource_descriptor(path)
60
65
  if datasource is not None:
61
66
  datasources.append(datasource)
62
67
 
@@ -79,17 +84,15 @@ def get_datasource_descriptors(project_dir: Path, datasource_ids: list[Datasourc
79
84
  if not config_file_path.is_file():
80
85
  raise ValueError(f"Datasource config file not found: {config_file_path}")
81
86
 
82
- datasource = load_datasource_descriptor(config_file_path)
87
+ datasource = _load_datasource_descriptor(config_file_path)
83
88
  if datasource is not None:
84
89
  datasources.append(datasource)
85
90
 
86
91
  return datasources
87
92
 
88
93
 
89
- def load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
90
- """
91
- Load a single file with src/<parent_name>/ into a DatasourceDescriptor
92
- """
94
+ def _load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
95
+ """Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
93
96
  if not path.is_file():
94
97
  return None
95
98
 
@@ -110,9 +113,7 @@ def load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
110
113
 
111
114
 
112
115
  def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
113
- """
114
- Convert a discovered datasource into a prepared datasource ready for plugin execution
115
- """
116
+ """Convert a discovered datasource into a prepared datasource ready for plugin execution."""
116
117
  if datasource.kind is DatasourceKind.FILE:
117
118
  file_subtype = datasource.path.suffix.lower().lstrip(".")
118
119
  return PreparedFile(