databao-context-engine 0.1.4.dev1__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. databao_context_engine/__init__.py +14 -1
  2. databao_context_engine/build_sources/build_runner.py +7 -7
  3. databao_context_engine/build_sources/build_wiring.py +8 -10
  4. databao_context_engine/build_sources/plugin_execution.py +9 -12
  5. databao_context_engine/cli/add_datasource_config.py +9 -30
  6. databao_context_engine/cli/commands.py +29 -13
  7. databao_context_engine/databao_context_engine.py +3 -13
  8. databao_context_engine/databao_context_project_manager.py +56 -29
  9. databao_context_engine/datasources/check_config.py +13 -16
  10. databao_context_engine/datasources/datasource_context.py +21 -24
  11. databao_context_engine/datasources/datasource_discovery.py +45 -44
  12. databao_context_engine/datasources/types.py +53 -42
  13. databao_context_engine/generate_configs_schemas.py +4 -5
  14. databao_context_engine/introspection/property_extract.py +52 -47
  15. databao_context_engine/llm/__init__.py +10 -0
  16. databao_context_engine/llm/api.py +57 -0
  17. databao_context_engine/llm/descriptions/ollama.py +1 -3
  18. databao_context_engine/llm/factory.py +5 -2
  19. databao_context_engine/llm/install.py +13 -10
  20. databao_context_engine/llm/runtime.py +3 -5
  21. databao_context_engine/mcp/mcp_server.py +1 -3
  22. databao_context_engine/plugin_loader.py +6 -7
  23. databao_context_engine/pluginlib/build_plugin.py +0 -33
  24. databao_context_engine/plugins/databases/athena/__init__.py +0 -0
  25. databao_context_engine/plugins/{athena_db_plugin.py → databases/athena/athena_db_plugin.py} +3 -3
  26. databao_context_engine/plugins/databases/{athena_introspector.py → athena/athena_introspector.py} +2 -5
  27. databao_context_engine/plugins/{base_db_plugin.py → databases/base_db_plugin.py} +1 -3
  28. databao_context_engine/plugins/databases/base_introspector.py +11 -14
  29. databao_context_engine/plugins/databases/clickhouse/__init__.py +0 -0
  30. databao_context_engine/plugins/{clickhouse_db_plugin.py → databases/clickhouse/clickhouse_db_plugin.py} +3 -3
  31. databao_context_engine/plugins/databases/{clickhouse_introspector.py → clickhouse/clickhouse_introspector.py} +2 -5
  32. databao_context_engine/plugins/databases/duckdb/__init__.py +0 -0
  33. databao_context_engine/plugins/databases/duckdb/duckdb_db_plugin.py +12 -0
  34. databao_context_engine/plugins/databases/{duckdb_introspector.py → duckdb/duckdb_introspector.py} +4 -7
  35. databao_context_engine/plugins/databases/mssql/__init__.py +0 -0
  36. databao_context_engine/plugins/{mssql_db_plugin.py → databases/mssql/mssql_db_plugin.py} +3 -3
  37. databao_context_engine/plugins/databases/{mssql_introspector.py → mssql/mssql_introspector.py} +9 -10
  38. databao_context_engine/plugins/databases/mysql/__init__.py +0 -0
  39. databao_context_engine/plugins/{mysql_db_plugin.py → databases/mysql/mysql_db_plugin.py} +3 -3
  40. databao_context_engine/plugins/databases/{mysql_introspector.py → mysql/mysql_introspector.py} +8 -8
  41. databao_context_engine/plugins/databases/postgresql/__init__.py +0 -0
  42. databao_context_engine/plugins/databases/postgresql/postgresql_db_plugin.py +15 -0
  43. databao_context_engine/plugins/databases/{postgresql_introspector.py → postgresql/postgresql_introspector.py} +9 -16
  44. databao_context_engine/plugins/databases/snowflake/__init__.py +0 -0
  45. databao_context_engine/plugins/databases/snowflake/snowflake_db_plugin.py +15 -0
  46. databao_context_engine/plugins/databases/{snowflake_introspector.py → snowflake/snowflake_introspector.py} +8 -9
  47. databao_context_engine/plugins/databases/sqlite/__init__.py +0 -0
  48. databao_context_engine/plugins/databases/sqlite/sqlite_db_plugin.py +12 -0
  49. databao_context_engine/plugins/databases/sqlite/sqlite_introspector.py +241 -0
  50. databao_context_engine/plugins/dbt/__init__.py +0 -0
  51. databao_context_engine/plugins/dbt/dbt_chunker.py +47 -0
  52. databao_context_engine/plugins/dbt/dbt_context_extractor.py +106 -0
  53. databao_context_engine/plugins/dbt/dbt_plugin.py +25 -0
  54. databao_context_engine/plugins/dbt/types.py +44 -0
  55. databao_context_engine/plugins/dbt/types_artifacts.py +58 -0
  56. databao_context_engine/plugins/files/__init__.py +0 -0
  57. databao_context_engine/plugins/{unstructured_files_plugin.py → files/unstructured_files_plugin.py} +1 -1
  58. databao_context_engine/plugins/plugin_loader.py +13 -15
  59. databao_context_engine/plugins/resources/parquet_introspector.py +1 -1
  60. databao_context_engine/plugins/{parquet_plugin.py → resources/parquet_plugin.py} +1 -3
  61. databao_context_engine/project/layout.py +12 -13
  62. databao_context_engine/retrieve_embeddings/retrieve_runner.py +3 -16
  63. databao_context_engine/retrieve_embeddings/retrieve_service.py +13 -6
  64. databao_context_engine/retrieve_embeddings/retrieve_wiring.py +4 -7
  65. databao_context_engine/serialization/yaml.py +5 -5
  66. databao_context_engine/storage/migrate.py +1 -1
  67. databao_context_engine/storage/repositories/vector_search_repository.py +18 -6
  68. databao_context_engine-0.1.6.dist-info/METADATA +228 -0
  69. {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/RECORD +71 -55
  70. databao_context_engine/datasources/add_config.py +0 -34
  71. databao_context_engine/plugins/duckdb_db_plugin.py +0 -12
  72. databao_context_engine/plugins/postgresql_db_plugin.py +0 -12
  73. databao_context_engine/plugins/snowflake_db_plugin.py +0 -12
  74. databao_context_engine/retrieve_embeddings/export_results.py +0 -12
  75. databao_context_engine-0.1.4.dev1.dist-info/METADATA +0 -75
  76. {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/WHEEL +0 -0
  77. {databao_context_engine-0.1.4.dev1.dist-info → databao_context_engine-0.1.6.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  import yaml
7
7
 
8
8
  from databao_context_engine.datasources.types import Datasource, DatasourceId, DatasourceType
9
- from databao_context_engine.project.layout import ProjectLayout, get_output_dir
9
+ from databao_context_engine.project.layout import ProjectLayout
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -37,33 +37,31 @@ def _read_datasource_type_from_context_file(context_path: Path) -> DatasourceTyp
37
37
 
38
38
 
39
39
  def get_introspected_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
40
- output_dir = get_output_dir(project_dir=project_layout.project_dir)
41
-
42
40
  result = []
43
- for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
44
- for context_path in sorted(
45
- (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
46
- ):
41
+ for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
42
+ for context_file_name in filenames:
43
+ if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
44
+ continue
45
+ context_file = Path(dirpath).joinpath(context_file_name)
46
+ relative_context_file = context_file.relative_to(project_layout.output_dir)
47
47
  try:
48
48
  result.append(
49
49
  Datasource(
50
- id=DatasourceId.from_datasource_config_file_path(context_path),
51
- type=_read_datasource_type_from_context_file(context_path),
50
+ id=DatasourceId.from_datasource_context_file_path(relative_context_file),
51
+ type=_read_datasource_type_from_context_file(context_file),
52
52
  )
53
53
  )
54
54
  except ValueError as e:
55
55
  logger.debug(str(e), exc_info=True, stack_info=True)
56
56
  logger.warning(
57
- f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_path.resolve()}"
57
+ f"Ignoring introspected datasource: Failed to read datasource_type from context file at {context_file.resolve()}"
58
58
  )
59
59
 
60
- return result
60
+ return sorted(result, key=lambda ds: str(ds.id).lower())
61
61
 
62
62
 
63
63
  def get_datasource_context(project_layout: ProjectLayout, datasource_id: DatasourceId) -> DatasourceContext:
64
- output_dir = get_output_dir(project_dir=project_layout.project_dir)
65
-
66
- context_path = output_dir.joinpath(datasource_id.relative_path_to_context_file())
64
+ context_path = project_layout.output_dir.joinpath(datasource_id.relative_path_to_context_file())
67
65
  if not context_path.is_file():
68
66
  raise ValueError(f"Context file not found for datasource {str(datasource_id)}")
69
67
 
@@ -72,21 +70,20 @@ def get_datasource_context(project_layout: ProjectLayout, datasource_id: Datasou
72
70
 
73
71
 
74
72
  def get_all_contexts(project_layout: ProjectLayout) -> list[DatasourceContext]:
75
- output_dir = get_output_dir(project_dir=project_layout.project_dir)
76
-
77
73
  result = []
78
- for main_type_dir in sorted((p for p in output_dir.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
79
- for context_path in sorted(
80
- (p for p in main_type_dir.iterdir() if p.suffix in [".yaml", ".yml"]), key=lambda p: p.name.lower()
81
- ):
74
+ for dirpath, dirnames, filenames in os.walk(project_layout.output_dir):
75
+ for context_file_name in filenames:
76
+ if Path(context_file_name).suffix not in DatasourceId.ALLOWED_YAML_SUFFIXES:
77
+ continue
78
+ context_file = Path(dirpath).joinpath(context_file_name)
79
+ relative_context_file = context_file.relative_to(project_layout.output_dir)
82
80
  result.append(
83
81
  DatasourceContext(
84
- datasource_id=DatasourceId.from_datasource_context_file_path(context_path),
85
- context=context_path.read_text(),
82
+ datasource_id=DatasourceId.from_datasource_context_file_path(relative_context_file),
83
+ context=context_file.read_text(),
86
84
  )
87
85
  )
88
-
89
- return result
86
+ return sorted(result, key=lambda context: str(context.datasource_id).lower())
90
87
 
91
88
 
92
89
  def get_context_header_for_datasource(datasource_id: DatasourceId) -> str:
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  from pathlib import Path
3
4
  from typing import Any
4
5
 
@@ -14,15 +15,15 @@ from databao_context_engine.datasources.types import (
14
15
  PreparedFile,
15
16
  )
16
17
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
17
- from databao_context_engine.project.layout import get_source_dir
18
+ from databao_context_engine.project.layout import ProjectLayout
18
19
  from databao_context_engine.templating.renderer import render_template
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
- def get_datasource_list(project_dir: Path) -> list[Datasource]:
24
+ def get_datasource_list(project_layout: ProjectLayout) -> list[Datasource]:
24
25
  result = []
25
- for discovered_datasource in discover_datasources(project_dir=project_dir):
26
+ for discovered_datasource in discover_datasources(project_layout=project_layout):
26
27
  try:
27
28
  prepared_source = prepare_source(discovered_datasource)
28
29
  except Exception as e:
@@ -32,7 +33,7 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
32
33
 
33
34
  result.append(
34
35
  Datasource(
35
- id=DatasourceId.from_datasource_config_file_path(discovered_datasource.path),
36
+ id=DatasourceId.from_datasource_config_file_path(project_layout, discovered_datasource.path),
36
37
  type=prepared_source.datasource_type,
37
38
  )
38
39
  )
@@ -40,7 +41,7 @@ def get_datasource_list(project_dir: Path) -> list[Datasource]:
40
41
  return result
41
42
 
42
43
 
43
- def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
44
+ def discover_datasources(project_layout: ProjectLayout) -> list[DatasourceDescriptor]:
44
45
  """Scan the project's src/ directory and return all discovered sources.
45
46
 
46
47
  Rules:
@@ -48,24 +49,21 @@ def discover_datasources(project_dir: Path) -> list[DatasourceDescriptor]:
48
49
  - Unsupported or unreadable entries are skipped.
49
50
  - The returned list is sorted by directory and then filename
50
51
 
52
+ Args:
53
+ project_layout: ProjectLayout instance representing the project directory and configuration.
54
+
51
55
  Returns:
52
56
  A list of DatasourceDescriptor instances representing the discovered datasources.
53
-
54
- Raises:
55
- ValueError: If the src directory does not exist in the project directory.
56
57
  """
57
- src = get_source_dir(project_dir)
58
- if not src.exists() or not src.is_dir():
59
- raise ValueError(f"src directory does not exist in {project_dir}")
60
-
61
58
  datasources: list[DatasourceDescriptor] = []
62
- for main_dir in sorted((p for p in src.iterdir() if p.is_dir()), key=lambda p: p.name.lower()):
63
- for path in sorted((p for p in main_dir.iterdir() if _is_datasource_file(p)), key=lambda p: p.name.lower()):
64
- datasource = _load_datasource_descriptor(path)
59
+ for dirpath, dirnames, filenames in os.walk(project_layout.src_dir):
60
+ for config_file_name in filenames:
61
+ context_file = Path(dirpath).joinpath(config_file_name)
62
+ datasource = _load_datasource_descriptor(project_layout, context_file)
65
63
  if datasource is not None:
66
64
  datasources.append(datasource)
67
65
 
68
- return datasources
66
+ return sorted(datasources, key=lambda ds: str(ds.datasource_id.relative_path_to_config_file()).lower())
69
67
 
70
68
 
71
69
  def _is_datasource_file(p: Path) -> bool:
@@ -73,42 +71,44 @@ def _is_datasource_file(p: Path) -> bool:
73
71
  return p.is_file() and not p.suffix.endswith("~")
74
72
 
75
73
 
76
- def get_datasource_descriptors(project_dir: Path, datasource_ids: list[DatasourceId]):
77
- src = get_source_dir(project_dir)
78
- if not src.exists() or not src.is_dir():
79
- raise ValueError(f"src directory does not exist in {project_dir}")
80
-
74
+ def get_datasource_descriptors(
75
+ project_layout: ProjectLayout, datasource_ids: list[DatasourceId]
76
+ ) -> list[DatasourceDescriptor]:
81
77
  datasources: list[DatasourceDescriptor] = []
82
78
  for datasource_id in sorted(datasource_ids, key=lambda id: str(id)):
83
- config_file_path = src.joinpath(datasource_id.relative_path_to_config_file())
79
+ config_file_path = project_layout.src_dir.joinpath(datasource_id.relative_path_to_config_file())
84
80
  if not config_file_path.is_file():
85
81
  raise ValueError(f"Datasource config file not found: {config_file_path}")
86
82
 
87
- datasource = _load_datasource_descriptor(config_file_path)
83
+ datasource = _load_datasource_descriptor(project_layout, config_file_path)
88
84
  if datasource is not None:
89
85
  datasources.append(datasource)
90
86
 
91
87
  return datasources
92
88
 
93
89
 
94
- def _load_datasource_descriptor(path: Path) -> DatasourceDescriptor | None:
90
+ def _load_datasource_descriptor(project_layout: ProjectLayout, config_file: Path) -> DatasourceDescriptor | None:
95
91
  """Load a single file with src/<parent_name>/ into a DatasourceDescriptor."""
96
- if not path.is_file():
92
+ if not config_file.is_file():
97
93
  return None
98
94
 
99
- parent_name = path.parent.name
100
- extension = path.suffix.lower().lstrip(".")
95
+ parent_name = config_file.parent.name
96
+ extension = config_file.suffix.lower().lstrip(".")
97
+ relative_config_file = config_file.relative_to(project_layout.src_dir)
101
98
 
102
- if parent_name == "files":
103
- return DatasourceDescriptor(path=path.resolve(), main_type=parent_name, kind=DatasourceKind.FILE)
99
+ if parent_name == "files" and len(relative_config_file.parts) == 2:
100
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
101
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
104
102
 
105
103
  if extension in {"yaml", "yml"}:
106
- return DatasourceDescriptor(path=path.resolve(), main_type=parent_name, kind=DatasourceKind.CONFIG)
104
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
105
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.CONFIG)
107
106
 
108
107
  if extension:
109
- return DatasourceDescriptor(path=path.resolve(), main_type=parent_name, kind=DatasourceKind.FILE)
108
+ datasource_id = DatasourceId.from_datasource_config_file_path(project_layout, config_file)
109
+ return DatasourceDescriptor(datasource_id=datasource_id, path=config_file.resolve(), kind=DatasourceKind.FILE)
110
110
 
111
- logger.debug("Skipping file without extension: %s", path)
111
+ logger.debug("Skipping file without extension: %s", config_file)
112
112
  return None
113
113
 
114
114
 
@@ -117,23 +117,24 @@ def prepare_source(datasource: DatasourceDescriptor) -> PreparedDatasource:
117
117
  if datasource.kind is DatasourceKind.FILE:
118
118
  file_subtype = datasource.path.suffix.lower().lstrip(".")
119
119
  return PreparedFile(
120
- datasource_type=DatasourceType.from_main_and_subtypes(main_type=datasource.main_type, subtype=file_subtype),
120
+ datasource_id=datasource.datasource_id,
121
+ datasource_type=DatasourceType(full_type=file_subtype),
121
122
  path=datasource.path,
122
123
  )
123
124
 
124
- else:
125
- config = _parse_config_file(datasource.path)
125
+ config = _parse_config_file(datasource.path)
126
126
 
127
- subtype = config.get("type")
128
- if not subtype or not isinstance(subtype, str):
129
- raise ValueError("Config missing 'type' at %s - skipping", datasource.path)
127
+ ds_type = config.get("type")
128
+ if not ds_type or not isinstance(ds_type, str):
129
+ raise ValueError("Config missing 'type' at %s - skipping", datasource.path)
130
130
 
131
- return PreparedConfig(
132
- datasource_type=DatasourceType.from_main_and_subtypes(main_type=datasource.main_type, subtype=subtype),
133
- path=datasource.path,
134
- config=config,
135
- datasource_name=datasource.path.stem,
136
- )
131
+ return PreparedConfig(
132
+ datasource_id=datasource.datasource_id,
133
+ datasource_type=DatasourceType(full_type=ds_type),
134
+ path=datasource.path,
135
+ config=config,
136
+ datasource_name=datasource.path.stem,
137
+ )
137
138
 
138
139
 
139
140
  def _parse_config_file(file_path: Path) -> dict[Any, Any]:
@@ -1,10 +1,10 @@
1
- import os
2
1
  from dataclasses import dataclass
3
2
  from enum import StrEnum
4
3
  from pathlib import Path
5
4
  from typing import Any
6
5
 
7
6
  from databao_context_engine.pluginlib.build_plugin import DatasourceType
7
+ from databao_context_engine.project.layout import ProjectLayout
8
8
 
9
9
 
10
10
  class DatasourceKind(StrEnum):
@@ -14,13 +14,14 @@ class DatasourceKind(StrEnum):
14
14
 
15
15
  @dataclass(frozen=True)
16
16
  class DatasourceDescriptor:
17
+ datasource_id: "DatasourceId"
17
18
  path: Path
18
19
  kind: DatasourceKind
19
- main_type: str
20
20
 
21
21
 
22
22
  @dataclass(frozen=True)
23
23
  class PreparedConfig:
24
+ datasource_id: "DatasourceId"
24
25
  datasource_type: DatasourceType
25
26
  path: Path
26
27
  config: dict[Any, Any]
@@ -29,6 +30,7 @@ class PreparedConfig:
29
30
 
30
31
  @dataclass(frozen=True)
31
32
  class PreparedFile:
33
+ datasource_id: "DatasourceId"
32
34
  datasource_type: DatasourceType
33
35
  path: Path
34
36
 
@@ -45,38 +47,29 @@ class DatasourceId:
45
47
  Use the provided factory methods `from_string_repr` and `from_datasource_config_file_path` to create a DatasourceId, rather than its constructor.
46
48
 
47
49
  Attributes:
48
- datasource_config_folder: The folder where the datasource's config file is located.
49
- datasource_name: The name of the datasource.
50
+ datasource_path: The path to the datasource relative to project's src folder.
50
51
  config_file_suffix: The suffix of the config (or raw) file.
51
52
  """
52
53
 
53
- datasource_config_folder: str
54
- datasource_name: str
54
+ ALLOWED_YAML_SUFFIXES = [".yaml", ".yml"]
55
+ CONTEXT_FILE_SUFFIX = ".yaml"
56
+
57
+ datasource_path: str
55
58
  config_file_suffix: str
56
59
 
57
60
  def __post_init__(self):
58
- if not self.datasource_config_folder.strip():
59
- raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_config_folder must not be empty")
60
- if not self.datasource_name.strip():
61
- raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not be empty")
61
+ if not self.datasource_path.strip():
62
+ raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not be empty")
62
63
  if not self.config_file_suffix.strip():
63
64
  raise ValueError(f"Invalid DatasourceId ({str(self)}): config_file_suffix must not be empty")
64
65
 
65
- if os.sep in self.datasource_config_folder:
66
- raise ValueError(
67
- f"Invalid DatasourceId ({str(self)}): datasource_config_folder must not contain a path separator"
68
- )
69
-
70
- if os.sep in self.datasource_name:
71
- raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not contain a path separator")
72
-
73
66
  if not self.config_file_suffix.startswith("."):
74
67
  raise ValueError(
75
68
  f'Invalid DatasourceId ({str(self)}): config_file_suffix must start with a dot "." (e.g.: .yaml)'
76
69
  )
77
70
 
78
- if self.datasource_name.endswith(self.config_file_suffix):
79
- raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_name must not contain the file suffix")
71
+ if self.datasource_path.endswith(self.config_file_suffix):
72
+ raise ValueError(f"Invalid DatasourceId ({str(self)}): datasource_path must not contain the file suffix")
80
73
 
81
74
  def __str__(self):
82
75
  return str(self.relative_path_to_config_file())
@@ -89,7 +82,7 @@ class DatasourceId:
89
82
  Returns:
90
83
  The path to the config file relative to the src folder in the project.
91
84
  """
92
- return Path(self.datasource_config_folder).joinpath(self.datasource_name + self.config_file_suffix)
85
+ return Path(self.datasource_path + self.config_file_suffix)
93
86
 
94
87
  def relative_path_to_context_file(self) -> Path:
95
88
  """Return a path to the config file for this datasource.
@@ -100,9 +93,13 @@ class DatasourceId:
100
93
  The path to the context file relative to the output folder in the project.
101
94
  """
102
95
  # Keep the suffix in the filename if this datasource is a raw file, to handle multiple files with the same name and different extensions
103
- suffix = ".yaml" if self.config_file_suffix == ".yaml" else (self.config_file_suffix + ".yaml")
96
+ suffix = (
97
+ DatasourceId.CONTEXT_FILE_SUFFIX
98
+ if self.config_file_suffix in DatasourceId.ALLOWED_YAML_SUFFIXES
99
+ else (self.config_file_suffix + DatasourceId.CONTEXT_FILE_SUFFIX)
100
+ )
104
101
 
105
- return Path(self.datasource_config_folder).joinpath(self.datasource_name + suffix)
102
+ return Path(self.datasource_path + suffix)
106
103
 
107
104
  @classmethod
108
105
  def from_string_repr(cls, datasource_id_as_string: str) -> "DatasourceId":
@@ -115,34 +112,36 @@ class DatasourceId:
115
112
 
116
113
  Returns:
117
114
  The DatasourceId instance created from the string representation.
118
-
119
- Raises:
120
- ValueError: If the string representation is invalid (e.g. too many parent folders).
121
115
  """
122
116
  config_file_path = Path(datasource_id_as_string)
123
-
124
- if len(config_file_path.parents) > 2:
125
- raise ValueError(
126
- f"Invalid string representation of a DatasourceId: too many parent folders defined in {datasource_id_as_string}"
127
- )
128
-
129
- return DatasourceId.from_datasource_config_file_path(config_file_path)
117
+ return DatasourceId._from_relative_datasource_config_file_path(config_file_path)
130
118
 
131
119
  @classmethod
132
- def from_datasource_config_file_path(cls, datasource_config_file: Path) -> "DatasourceId":
120
+ def from_datasource_config_file_path(
121
+ cls, project_layout: ProjectLayout, datasource_config_file: Path
122
+ ) -> "DatasourceId":
133
123
  """Create a DatasourceId from a config file path.
134
124
 
135
125
  Args:
126
+ project_layout: The databao context engine project layout.
136
127
  datasource_config_file: The path to the datasource config file.
137
128
  This path can either be the config file path relative to the src folder or the full path to the config file.
138
129
 
139
130
  Returns:
140
131
  The DatasourceId instance created from the config file path.
141
132
  """
133
+ relative_datasource_config_file = datasource_config_file.relative_to(project_layout.src_dir)
134
+ return DatasourceId._from_relative_datasource_config_file_path(relative_datasource_config_file)
135
+
136
+ @classmethod
137
+ def _from_relative_datasource_config_file_path(cls, relative_datasource_config_file: Path) -> "DatasourceId":
138
+ if relative_datasource_config_file.is_absolute():
139
+ raise ValueError(
140
+ f"Path to datasource config file should be relative to project's src folder: {relative_datasource_config_file}"
141
+ )
142
142
  return DatasourceId(
143
- datasource_config_folder=datasource_config_file.parent.name,
144
- datasource_name=datasource_config_file.stem,
145
- config_file_suffix=datasource_config_file.suffix,
143
+ datasource_path=_extract_datasource_path(relative_datasource_config_file),
144
+ config_file_suffix=relative_datasource_config_file.suffix,
146
145
  )
147
146
 
148
147
  @classmethod
@@ -157,19 +156,31 @@ class DatasourceId:
157
156
 
158
157
  Returns:
159
158
  The DatasourceId instance created from the context file path.
159
+
160
+ Raises:
161
+ ValueError: If the wrong datasource_context_file is provided.
160
162
  """
161
- if len(datasource_context_file.suffixes) > 1 and datasource_context_file.suffix == ".yaml":
163
+ if (
164
+ len(datasource_context_file.suffixes) > 1
165
+ and datasource_context_file.suffix == DatasourceId.CONTEXT_FILE_SUFFIX
166
+ ):
162
167
  # If there is more than 1 suffix, we remove the latest suffix (.yaml) to keep only the actual datasource file suffix
163
- context_file_name_without_yaml_extension = datasource_context_file.name[: -len(".yaml")]
168
+ context_file_name_without_yaml_extension = datasource_context_file.name[
169
+ : -len(DatasourceId.CONTEXT_FILE_SUFFIX)
170
+ ]
164
171
  datasource_context_file = datasource_context_file.with_name(context_file_name_without_yaml_extension)
165
-
172
+ if datasource_context_file.is_absolute():
173
+ raise ValueError(f"Path to datasource context file should be a relative path: {datasource_context_file}")
166
174
  return DatasourceId(
167
- datasource_config_folder=datasource_context_file.parent.name,
168
- datasource_name=datasource_context_file.stem,
175
+ datasource_path=_extract_datasource_path(datasource_context_file),
169
176
  config_file_suffix=datasource_context_file.suffix,
170
177
  )
171
178
 
172
179
 
180
+ def _extract_datasource_path(datasource_file: Path) -> str:
181
+ return str(datasource_file.with_suffix(""))
182
+
183
+
173
184
  @dataclass
174
185
  class Datasource:
175
186
  """A datasource contained in the project.
@@ -38,9 +38,9 @@ def generate_configs_schemas(
38
38
  try:
39
39
  schema_list = _generate_json_schema_output_for_plugins(include_plugins, exclude_plugins)
40
40
 
41
- print(f"{os.linesep}{os.linesep}".join(schema_list))
41
+ click.echo(f"{os.linesep}{os.linesep}".join(schema_list))
42
42
  except Exception as e:
43
- print(str(e))
43
+ click.echo(str(e))
44
44
 
45
45
 
46
46
  def _generate_json_schema_output_for_plugins(
@@ -54,10 +54,9 @@ def _generate_json_schema_output_for_plugins(
54
54
  if len(filtered_plugins) == 0:
55
55
  if include_plugins:
56
56
  raise ValueError(f"No plugin found with id in {include_plugins}")
57
- elif exclude_plugins:
57
+ if exclude_plugins:
58
58
  raise ValueError(f"No plugin found when excluding {exclude_plugins}")
59
- else:
60
- raise ValueError("No plugin found")
59
+ raise ValueError("No plugin found")
61
60
 
62
61
  results = []
63
62
  for plugin in filtered_plugins:
@@ -1,6 +1,16 @@
1
1
  import types
2
2
  from dataclasses import MISSING, fields, is_dataclass
3
- from typing import Annotated, Any, ForwardRef, Union, get_args, get_origin, get_type_hints
3
+ from typing import (
4
+ Annotated,
5
+ Any,
6
+ ForwardRef,
7
+ Iterable,
8
+ Mapping,
9
+ Union,
10
+ get_args,
11
+ get_origin,
12
+ get_type_hints,
13
+ )
4
14
 
5
15
  from pydantic import BaseModel
6
16
  from pydantic_core import PydanticUndefinedType
@@ -14,10 +24,10 @@ from databao_context_engine.pluginlib.config import (
14
24
 
15
25
 
16
26
  def get_property_list_from_type(root_type: type) -> list[ConfigPropertyDefinition]:
17
- return _get_property_list_from_type(parent_type=root_type, is_root_type=True)
27
+ return _get_property_list_from_type(parent_type=root_type)
18
28
 
19
29
 
20
- def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
30
+ def _get_property_list_from_type(*, parent_type: type) -> list[ConfigPropertyDefinition]:
21
31
  if is_dataclass(parent_type):
22
32
  return _get_property_list_from_dataclass(parent_type=parent_type)
23
33
 
@@ -29,20 +39,15 @@ def _get_property_list_from_type(*, parent_type: type, is_root_type: bool) -> li
29
39
  # issubclass is raising a TypeError: issubclass() arg 1 must be a class
30
40
  pass
31
41
 
32
- return _get_property_list_from_type_hints(parent_type=parent_type, is_root_type=is_root_type)
42
+ return _get_property_list_from_type_hints(parent_type=parent_type)
33
43
 
34
44
 
35
- def _get_property_list_from_type_hints(*, parent_type: type, is_root_type: bool) -> list[ConfigPropertyDefinition]:
45
+ def _get_property_list_from_type_hints(*, parent_type: type) -> list[ConfigPropertyDefinition]:
36
46
  try:
37
47
  type_hints = get_type_hints(parent_type, include_extras=True)
38
- except TypeError as e:
39
- if is_root_type:
40
- # Ignore root types that don't have type hints like dict or list
41
- return []
42
- else:
43
- # If we're evaluating a nested property, we want to propagate the exception
44
- # to let the parent property know that this type should be ignored
45
- raise e
48
+ except TypeError:
49
+ # Return an empty list of properties for any type that is not an object (e.g: primitives like str or containers like dict, list, tuple, etc.
50
+ return []
46
51
 
47
52
  result = []
48
53
  for property_key, property_type in type_hints.items():
@@ -137,13 +142,7 @@ def _create_property(
137
142
  type_properties: dict[type, list[ConfigPropertyDefinition]] = {}
138
143
 
139
144
  for union_type in actual_property_types:
140
- try:
141
- nested_props = _get_property_list_from_type(
142
- parent_type=union_type,
143
- is_root_type=False,
144
- )
145
- except TypeError:
146
- nested_props = []
145
+ nested_props = _get_property_list_from_type(parent_type=union_type)
147
146
 
148
147
  type_properties[union_type] = nested_props
149
148
 
@@ -152,31 +151,38 @@ def _create_property(
152
151
  types=actual_property_types,
153
152
  type_properties=type_properties,
154
153
  )
155
- else:
156
- actual_property_type = actual_property_types[0]
157
- try:
158
- nested_properties = _get_property_list_from_type(
159
- parent_type=actual_property_type,
160
- is_root_type=False,
161
- )
162
- except TypeError:
163
- return None
164
-
165
- resolved_type = actual_property_type if not nested_properties else None
166
- default_value = compute_default_value(
167
- annotation=annotation,
168
- property_default=property_default,
169
- has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
170
- )
171
154
 
172
- return ConfigSinglePropertyDefinition(
173
- property_key=property_name,
174
- property_type=resolved_type,
175
- required=required,
176
- default_value=default_value,
177
- nested_properties=nested_properties or None,
178
- secret=secret,
179
- )
155
+ actual_property_type = actual_property_types[0]
156
+ nested_properties = _get_property_list_from_type(parent_type=actual_property_type)
157
+
158
+ if len(nested_properties) == 0 and _is_mapping_or_iterable(actual_property_type):
159
+ # Ignore Iterables and Mappings for which we didn't resolve nested properties
160
+ # (TypedDict is a Mapping but since we manage to resolve nested properties, it won't be ignored)
161
+ return None
162
+
163
+ resolved_type = actual_property_type if not nested_properties else None
164
+ default_value = compute_default_value(
165
+ annotation=annotation,
166
+ property_default=property_default,
167
+ has_nested_properties=nested_properties is not None and len(nested_properties) > 0,
168
+ )
169
+
170
+ return ConfigSinglePropertyDefinition(
171
+ property_key=property_name,
172
+ property_type=resolved_type,
173
+ required=required,
174
+ default_value=default_value,
175
+ nested_properties=nested_properties or None,
176
+ secret=secret,
177
+ )
178
+
179
+
180
+ def _is_mapping_or_iterable(property_type: type):
181
+ # For types like list[str], we need to get the origin (ie. list) to use in issubclass
182
+ origin = get_origin(property_type)
183
+
184
+ # We make sure to not return True for str, which is an Iterable
185
+ return property_type is not str and issubclass(origin if origin else property_type, (Mapping, Iterable))
180
186
 
181
187
 
182
188
  def _get_config_property_annotation(property_type) -> ConfigPropertyAnnotation | None:
@@ -194,9 +200,8 @@ def _read_actual_property_type(property_type: type) -> tuple[type, ...]:
194
200
 
195
201
  if property_type_origin is Annotated:
196
202
  return _read_actual_property_type(property_type.__origin__) # type: ignore[attr-defined]
197
- elif property_type_origin in (Union, types.UnionType):
198
- args = tuple(arg for arg in get_args(property_type) if arg is not type(None))
199
- return args
203
+ if property_type_origin in (Union, types.UnionType):
204
+ return tuple(arg for arg in get_args(property_type) if arg is not type(None))
200
205
 
201
206
  return (property_type,)
202
207
 
@@ -0,0 +1,10 @@
1
+ from databao_context_engine.llm.api import download_ollama_models_if_needed, install_ollama_if_needed
2
+ from databao_context_engine.llm.errors import OllamaError, OllamaPermanentError, OllamaTransientError
3
+
4
+ __all__ = [
5
+ "install_ollama_if_needed",
6
+ "download_ollama_models_if_needed",
7
+ "OllamaError",
8
+ "OllamaTransientError",
9
+ "OllamaPermanentError",
10
+ ]