databao-context-engine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. databao_context_engine/__init__.py +35 -0
  2. databao_context_engine/build_sources/__init__.py +0 -0
  3. databao_context_engine/build_sources/internal/__init__.py +0 -0
  4. databao_context_engine/build_sources/internal/build_runner.py +111 -0
  5. databao_context_engine/build_sources/internal/build_service.py +77 -0
  6. databao_context_engine/build_sources/internal/build_wiring.py +52 -0
  7. databao_context_engine/build_sources/internal/export_results.py +43 -0
  8. databao_context_engine/build_sources/internal/plugin_execution.py +74 -0
  9. databao_context_engine/build_sources/public/__init__.py +0 -0
  10. databao_context_engine/build_sources/public/api.py +4 -0
  11. databao_context_engine/cli/__init__.py +0 -0
  12. databao_context_engine/cli/add_datasource_config.py +130 -0
  13. databao_context_engine/cli/commands.py +256 -0
  14. databao_context_engine/cli/datasources.py +64 -0
  15. databao_context_engine/cli/info.py +32 -0
  16. databao_context_engine/config/__init__.py +0 -0
  17. databao_context_engine/config/log_config.yaml +16 -0
  18. databao_context_engine/config/logging.py +43 -0
  19. databao_context_engine/databao_context_project_manager.py +92 -0
  20. databao_context_engine/databao_engine.py +85 -0
  21. databao_context_engine/datasource_config/__init__.py +0 -0
  22. databao_context_engine/datasource_config/add_config.py +50 -0
  23. databao_context_engine/datasource_config/check_config.py +131 -0
  24. databao_context_engine/datasource_config/datasource_context.py +60 -0
  25. databao_context_engine/event_journal/__init__.py +0 -0
  26. databao_context_engine/event_journal/writer.py +29 -0
  27. databao_context_engine/generate_configs_schemas.py +92 -0
  28. databao_context_engine/init_project.py +18 -0
  29. databao_context_engine/introspection/__init__.py +0 -0
  30. databao_context_engine/introspection/property_extract.py +202 -0
  31. databao_context_engine/llm/__init__.py +0 -0
  32. databao_context_engine/llm/config.py +20 -0
  33. databao_context_engine/llm/descriptions/__init__.py +0 -0
  34. databao_context_engine/llm/descriptions/ollama.py +21 -0
  35. databao_context_engine/llm/descriptions/provider.py +10 -0
  36. databao_context_engine/llm/embeddings/__init__.py +0 -0
  37. databao_context_engine/llm/embeddings/ollama.py +37 -0
  38. databao_context_engine/llm/embeddings/provider.py +13 -0
  39. databao_context_engine/llm/errors.py +16 -0
  40. databao_context_engine/llm/factory.py +61 -0
  41. databao_context_engine/llm/install.py +227 -0
  42. databao_context_engine/llm/runtime.py +73 -0
  43. databao_context_engine/llm/service.py +159 -0
  44. databao_context_engine/main.py +19 -0
  45. databao_context_engine/mcp/__init__.py +0 -0
  46. databao_context_engine/mcp/all_results_tool.py +5 -0
  47. databao_context_engine/mcp/mcp_runner.py +16 -0
  48. databao_context_engine/mcp/mcp_server.py +63 -0
  49. databao_context_engine/mcp/retrieve_tool.py +22 -0
  50. databao_context_engine/pluginlib/__init__.py +0 -0
  51. databao_context_engine/pluginlib/build_plugin.py +107 -0
  52. databao_context_engine/pluginlib/config.py +37 -0
  53. databao_context_engine/pluginlib/plugin_utils.py +68 -0
  54. databao_context_engine/plugins/__init__.py +0 -0
  55. databao_context_engine/plugins/athena_db_plugin.py +12 -0
  56. databao_context_engine/plugins/base_db_plugin.py +45 -0
  57. databao_context_engine/plugins/clickhouse_db_plugin.py +15 -0
  58. databao_context_engine/plugins/databases/__init__.py +0 -0
  59. databao_context_engine/plugins/databases/athena_introspector.py +101 -0
  60. databao_context_engine/plugins/databases/base_introspector.py +144 -0
  61. databao_context_engine/plugins/databases/clickhouse_introspector.py +162 -0
  62. databao_context_engine/plugins/databases/database_chunker.py +69 -0
  63. databao_context_engine/plugins/databases/databases_types.py +114 -0
  64. databao_context_engine/plugins/databases/duckdb_introspector.py +325 -0
  65. databao_context_engine/plugins/databases/introspection_model_builder.py +270 -0
  66. databao_context_engine/plugins/databases/introspection_scope.py +74 -0
  67. databao_context_engine/plugins/databases/introspection_scope_matcher.py +103 -0
  68. databao_context_engine/plugins/databases/mssql_introspector.py +433 -0
  69. databao_context_engine/plugins/databases/mysql_introspector.py +338 -0
  70. databao_context_engine/plugins/databases/postgresql_introspector.py +428 -0
  71. databao_context_engine/plugins/databases/snowflake_introspector.py +287 -0
  72. databao_context_engine/plugins/duckdb_db_plugin.py +12 -0
  73. databao_context_engine/plugins/mssql_db_plugin.py +12 -0
  74. databao_context_engine/plugins/mysql_db_plugin.py +12 -0
  75. databao_context_engine/plugins/parquet_plugin.py +32 -0
  76. databao_context_engine/plugins/plugin_loader.py +110 -0
  77. databao_context_engine/plugins/postgresql_db_plugin.py +12 -0
  78. databao_context_engine/plugins/resources/__init__.py +0 -0
  79. databao_context_engine/plugins/resources/parquet_chunker.py +23 -0
  80. databao_context_engine/plugins/resources/parquet_introspector.py +154 -0
  81. databao_context_engine/plugins/snowflake_db_plugin.py +12 -0
  82. databao_context_engine/plugins/unstructured_files_plugin.py +68 -0
  83. databao_context_engine/project/__init__.py +0 -0
  84. databao_context_engine/project/datasource_discovery.py +141 -0
  85. databao_context_engine/project/info.py +44 -0
  86. databao_context_engine/project/init_project.py +102 -0
  87. databao_context_engine/project/layout.py +127 -0
  88. databao_context_engine/project/project_config.py +32 -0
  89. databao_context_engine/project/resources/examples/src/databases/example_postgres.yaml +7 -0
  90. databao_context_engine/project/resources/examples/src/files/documentation.md +30 -0
  91. databao_context_engine/project/resources/examples/src/files/notes.txt +20 -0
  92. databao_context_engine/project/runs.py +39 -0
  93. databao_context_engine/project/types.py +134 -0
  94. databao_context_engine/retrieve_embeddings/__init__.py +0 -0
  95. databao_context_engine/retrieve_embeddings/internal/__init__.py +0 -0
  96. databao_context_engine/retrieve_embeddings/internal/export_results.py +12 -0
  97. databao_context_engine/retrieve_embeddings/internal/retrieve_runner.py +34 -0
  98. databao_context_engine/retrieve_embeddings/internal/retrieve_service.py +68 -0
  99. databao_context_engine/retrieve_embeddings/internal/retrieve_wiring.py +29 -0
  100. databao_context_engine/retrieve_embeddings/public/__init__.py +0 -0
  101. databao_context_engine/retrieve_embeddings/public/api.py +3 -0
  102. databao_context_engine/serialisation/__init__.py +0 -0
  103. databao_context_engine/serialisation/yaml.py +35 -0
  104. databao_context_engine/services/__init__.py +0 -0
  105. databao_context_engine/services/chunk_embedding_service.py +104 -0
  106. databao_context_engine/services/embedding_shard_resolver.py +64 -0
  107. databao_context_engine/services/factories.py +88 -0
  108. databao_context_engine/services/models.py +12 -0
  109. databao_context_engine/services/persistence_service.py +61 -0
  110. databao_context_engine/services/run_name_policy.py +8 -0
  111. databao_context_engine/services/table_name_policy.py +15 -0
  112. databao_context_engine/storage/__init__.py +0 -0
  113. databao_context_engine/storage/connection.py +32 -0
  114. databao_context_engine/storage/exceptions/__init__.py +0 -0
  115. databao_context_engine/storage/exceptions/exceptions.py +6 -0
  116. databao_context_engine/storage/migrate.py +127 -0
  117. databao_context_engine/storage/migrations/V01__init.sql +63 -0
  118. databao_context_engine/storage/models.py +51 -0
  119. databao_context_engine/storage/repositories/__init__.py +0 -0
  120. databao_context_engine/storage/repositories/chunk_repository.py +130 -0
  121. databao_context_engine/storage/repositories/datasource_run_repository.py +136 -0
  122. databao_context_engine/storage/repositories/embedding_model_registry_repository.py +87 -0
  123. databao_context_engine/storage/repositories/embedding_repository.py +113 -0
  124. databao_context_engine/storage/repositories/factories.py +35 -0
  125. databao_context_engine/storage/repositories/run_repository.py +157 -0
  126. databao_context_engine/storage/repositories/vector_search_repository.py +63 -0
  127. databao_context_engine/storage/transaction.py +14 -0
  128. databao_context_engine/system/__init__.py +0 -0
  129. databao_context_engine/system/properties.py +13 -0
  130. databao_context_engine/templating/__init__.py +0 -0
  131. databao_context_engine/templating/renderer.py +29 -0
  132. databao_context_engine-0.1.1.dist-info/METADATA +186 -0
  133. databao_context_engine-0.1.1.dist-info/RECORD +135 -0
  134. databao_context_engine-0.1.1.dist-info/WHEEL +4 -0
  135. databao_context_engine-0.1.1.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,270 @@
1
+ from collections import defaultdict
2
+ from typing import Any, Iterable, cast
3
+
4
+ from databao_context_engine.plugins.databases.databases_types import (
5
+ CheckConstraint,
6
+ DatabaseColumn,
7
+ DatabasePartitionInfo,
8
+ DatabaseTable,
9
+ DatasetKind,
10
+ ForeignKey,
11
+ ForeignKeyColumnMap,
12
+ Index,
13
+ KeyConstraint,
14
+ DatabaseSchema,
15
+ )
16
+
17
+
18
+ class IntrospectionModelBuilder:
19
+ def __init__(self) -> None:
20
+ self.by_table: dict[str, DatabaseTable] = {}
21
+
22
+ @classmethod
23
+ def build_schemas_from_components(
24
+ cls,
25
+ *,
26
+ schemas: list[str],
27
+ rels: list[dict] | None = None,
28
+ cols: list[dict] | None = None,
29
+ pk_cols: list[dict] | None = None,
30
+ uq_cols: list[dict] | None = None,
31
+ checks: list[dict] | None = None,
32
+ fk_cols: list[dict] | None = None,
33
+ idx_cols: list[dict] | None = None,
34
+ partitions: list[dict] | None = None,
35
+ schema_field: str = "schema_name",
36
+ ) -> list[DatabaseSchema]:
37
+ def group_by_schema(rows: list[dict] | None) -> dict[str, list[dict]]:
38
+ g: dict[str, list[dict]] = defaultdict(list)
39
+ for r in rows or []:
40
+ s = r.get(schema_field)
41
+ if isinstance(s, str) and s:
42
+ g[s].append(r)
43
+ return g
44
+
45
+ grouped = {
46
+ "rels": group_by_schema(rels),
47
+ "cols": group_by_schema(cols),
48
+ "pk": group_by_schema(pk_cols),
49
+ "uq": group_by_schema(uq_cols),
50
+ "checks": group_by_schema(checks),
51
+ "fks": group_by_schema(fk_cols),
52
+ "idx": group_by_schema(idx_cols),
53
+ "parts": group_by_schema(partitions),
54
+ }
55
+
56
+ out: list[DatabaseSchema] = []
57
+ for schema in schemas:
58
+ tables = (
59
+ cls.build_tables_from_components(
60
+ rels=grouped["rels"].get(schema, []),
61
+ cols=grouped["cols"].get(schema, []),
62
+ pk_cols=grouped["pk"].get(schema, []),
63
+ uq_cols=grouped["uq"].get(schema, []),
64
+ checks=grouped["checks"].get(schema, []),
65
+ fk_cols=grouped["fks"].get(schema, []),
66
+ idx_cols=grouped["idx"].get(schema, []),
67
+ partitions=grouped["parts"].get(schema, []),
68
+ )
69
+ or []
70
+ )
71
+
72
+ if tables:
73
+ out.append(DatabaseSchema(name=schema, tables=tables))
74
+
75
+ return out
76
+
77
+ @classmethod
78
+ def build_tables_from_components(
79
+ cls,
80
+ *,
81
+ rels: list[dict] | None = None,
82
+ cols: list[dict] | None = None,
83
+ pk_cols: list[dict] | None = None,
84
+ uq_cols: list[dict] | None = None,
85
+ checks: list[dict] | None = None,
86
+ fk_cols: list[dict] | None = None,
87
+ idx_cols: list[dict] | None = None,
88
+ partitions: list[dict] | None = None,
89
+ ) -> list[DatabaseTable]:
90
+ b = cls()
91
+ b.apply_relations(rels)
92
+ b.apply_columns(cols)
93
+ b.apply_primary_keys(pk_cols)
94
+ b.apply_unique_constraints(uq_cols)
95
+ b.apply_checks(checks)
96
+ b.apply_foreign_keys(fk_cols)
97
+ b.apply_indexes(idx_cols)
98
+ b.apply_partitions(partitions)
99
+ return b.finish()
100
+
101
+ def get_or_create_table(self, table_name: str) -> DatabaseTable:
102
+ t = self.by_table.get(table_name)
103
+ if t is None:
104
+ t = self.by_table[table_name] = DatabaseTable(
105
+ name=table_name,
106
+ columns=[],
107
+ samples=[],
108
+ partition_info=None,
109
+ description=None,
110
+ kind=DatasetKind.TABLE,
111
+ )
112
+ return t
113
+
114
+ def apply_relations(self, rels: list[dict] | None) -> None:
115
+ for r in rels or []:
116
+ t = self.get_or_create_table(r["table_name"])
117
+ t.kind = DatasetKind.from_raw((r.get("kind") or "table").lower())
118
+ if desc := r.get("description"):
119
+ t.description = desc
120
+
121
+ def apply_columns(self, cols: list[dict] | None) -> None:
122
+ cols_by_table = group_rows(cols, ("table_name",))
123
+ for (table_name,), grp in cols_by_table.items():
124
+ grp.sort(key=lambda r: (r.get("ordinal_position") is None, r.get("ordinal_position") or 0))
125
+ t = self.get_or_create_table(table_name)
126
+ for c in grp:
127
+ t.columns.append(
128
+ DatabaseColumn(
129
+ name=c["column_name"],
130
+ type=c["data_type"],
131
+ nullable=bool(coerce_bool(c.get("is_nullable"), default=True)),
132
+ description=c.get("description"),
133
+ default_expression=c.get("default_expression"),
134
+ generated=c.get("generated"),
135
+ checks=[],
136
+ )
137
+ )
138
+
139
+ def apply_primary_keys(self, pk_cols: list[dict] | None) -> None:
140
+ pk_groups = group_rows(pk_cols, ("table_name", "constraint_name"))
141
+ for (table_name, cname), grp in pk_groups.items():
142
+ grp.sort(key=lambda r: sort_position_by_key(r, "position"))
143
+ self.get_or_create_table(table_name).primary_key = KeyConstraint(
144
+ name=cname,
145
+ columns=[r["column_name"] for r in grp if r.get("column_name") is not None],
146
+ validated=True,
147
+ )
148
+
149
+ def apply_unique_constraints(self, uq_cols: list[dict] | None) -> None:
150
+ uq_groups = group_rows(uq_cols, ("table_name", "constraint_name"))
151
+ by_table: dict[str, list[KeyConstraint]] = defaultdict(list)
152
+
153
+ for (table_name, cname), grp in uq_groups.items():
154
+ grp.sort(key=lambda r: sort_position_by_key(r, "position"))
155
+ by_table[table_name].append(
156
+ KeyConstraint(
157
+ name=cname,
158
+ columns=[r["column_name"] for r in grp if r.get("column_name") is not None],
159
+ validated=True,
160
+ )
161
+ )
162
+
163
+ for table_name, uqs in by_table.items():
164
+ self.get_or_create_table(table_name).unique_constraints = uqs
165
+
166
+ def apply_checks(self, checks: list[dict] | None) -> None:
167
+ for r in checks or []:
168
+ self.get_or_create_table(r["table_name"]).checks.append(
169
+ CheckConstraint(
170
+ name=r["constraint_name"],
171
+ expression=cast(str, r.get("expression")),
172
+ validated=coerce_bool(r.get("validated"), default=True),
173
+ )
174
+ )
175
+
176
+ def apply_foreign_keys(self, fk_cols: list[dict] | None) -> None:
177
+ fk_groups = group_rows(fk_cols, ("table_name", "constraint_name"))
178
+ by_table: dict[str, list[ForeignKey]] = defaultdict(list)
179
+
180
+ for (table_name, cname), grp in fk_groups.items():
181
+ grp.sort(key=lambda r: sort_position_by_key(r, "position"))
182
+ first = grp[0]
183
+
184
+ ref_schema = first.get("ref_schema")
185
+ ref_table = first.get("ref_table")
186
+ referenced = f"{ref_schema}.{ref_table}" if ref_schema and ref_table else ""
187
+
188
+ by_table[table_name].append(
189
+ ForeignKey(
190
+ name=cname,
191
+ mapping=[ForeignKeyColumnMap(from_column=r["from_column"], to_column=r["to_column"]) for r in grp],
192
+ referenced_table=referenced,
193
+ on_update=first.get("on_update"),
194
+ on_delete=first.get("on_delete"),
195
+ enforced=coerce_bool(first.get("enforced"), default=True),
196
+ validated=coerce_bool(first.get("validated"), default=True),
197
+ )
198
+ )
199
+
200
+ for table_name, fks in by_table.items():
201
+ self.get_or_create_table(table_name).foreign_keys = fks
202
+
203
+ def apply_indexes(self, idx_cols: list[dict] | None) -> None:
204
+ idx_groups = group_rows(idx_cols, ("table_name", "index_name"))
205
+ by_table: dict[str, list[Index]] = defaultdict(list)
206
+
207
+ for (table_name, idx_name), grp in idx_groups.items():
208
+ grp.sort(key=lambda r: sort_position_by_key(r, "position"))
209
+ first = grp[0]
210
+
211
+ by_table[table_name].append(
212
+ Index(
213
+ name=idx_name,
214
+ columns=[cast(str, r.get("expr")) for r in grp if r.get("expr") is not None],
215
+ unique=bool(coerce_bool(first.get("is_unique"), default=False)),
216
+ method=first.get("method"),
217
+ predicate=first.get("predicate"),
218
+ )
219
+ )
220
+
221
+ for table_name, idxs in by_table.items():
222
+ self.get_or_create_table(table_name).indexes = idxs
223
+
224
+ def apply_partitions(self, partitions: list[dict] | None) -> None:
225
+ for r in partitions or []:
226
+ t = self.get_or_create_table(r["table_name"])
227
+
228
+ meta = {k: v for k, v in r.items() if k not in ("table_name", "partition_tables", "schema_name")}
229
+ part_tables = r.get("partition_tables") or []
230
+ part_tables_list = [p for p in list(part_tables) if p is not None]
231
+
232
+ t.partition_info = DatabasePartitionInfo(
233
+ meta=meta,
234
+ partition_tables=part_tables_list,
235
+ )
236
+
237
+ def finish(self) -> list[DatabaseTable]:
238
+ return [self.by_table[k] for k in sorted(self.by_table)]
239
+
240
+
241
+ def coerce_bool(value: Any, default: bool | None = None) -> bool | None:
242
+ if value is None:
243
+ return default
244
+ if isinstance(value, bool):
245
+ return value
246
+ if isinstance(value, int):
247
+ return bool(value)
248
+ if isinstance(value, str):
249
+ v = value.strip().lower()
250
+ if v in {"yes", "true", "1"}:
251
+ return True
252
+ if v in {"no", "false", "0"}:
253
+ return False
254
+ return bool(value)
255
+
256
+
257
+ def sort_position_by_key(r: dict, pos_field: str) -> tuple[bool, int]:
258
+ pos = r.get(pos_field)
259
+ try:
260
+ pos_val = int(pos) if pos is not None else 0
261
+ except (TypeError, ValueError):
262
+ pos_val = 0
263
+ return pos is None, pos_val
264
+
265
+
266
+ def group_rows(rows: Iterable[dict] | None, key_fields: tuple[str, ...]) -> dict[tuple[Any, ...], list[dict]]:
267
+ grouped: dict[tuple[Any, ...], list[dict]] = defaultdict(list)
268
+ for r in rows or []:
269
+ grouped[tuple(r.get(f) for f in key_fields)].append(r)
270
+ return grouped
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, ConfigDict, field_validator, model_validator
6
+
7
+
8
+ class ScopeIncludeRule(BaseModel):
9
+ """
10
+ Allowlist selector.
11
+ - catalog: optional glob pattern
12
+ - schemas: optional list of glob patterns (string also accepted and normalized to a list)
13
+
14
+ A rule must specify at least one of: catalog, schemas.
15
+ """
16
+
17
+ model_config = ConfigDict(extra="forbid")
18
+
19
+ catalog: str | None = None
20
+ schemas: list[str] | None = None
21
+
22
+ @field_validator("schemas", mode="before")
23
+ @classmethod
24
+ def _normalize_schemas(cls, v: Any) -> Any:
25
+ if v is None:
26
+ return None
27
+ if isinstance(v, str):
28
+ return [v]
29
+ return v
30
+
31
+ @model_validator(mode="after")
32
+ def _validate_rule(self) -> ScopeIncludeRule:
33
+ if self.catalog is None and self.schemas is None:
34
+ raise ValueError("Include rule must specify at least 'catalog' or 'schemas'")
35
+ return self
36
+
37
+
38
+ class ScopeExcludeRule(BaseModel):
39
+ """
40
+ Denylist selector.
41
+ - catalog: optional glob pattern
42
+ - schemas: optional list of glob patterns (string also accepted)
43
+ - except_schemas: optional list of glob patterns (string also accepted)
44
+
45
+ If a target matches the rule but also matches except_schemas, it is NOT excluded by this rule.
46
+ """
47
+
48
+ model_config = ConfigDict(extra="forbid")
49
+
50
+ catalog: str | None = None
51
+ schemas: list[str] | None = None
52
+ except_schemas: list[str] | None = None
53
+
54
+ @field_validator("schemas", "except_schemas", mode="before")
55
+ @classmethod
56
+ def _normalize_lists(cls, v: Any) -> Any:
57
+ if v is None:
58
+ return None
59
+ if isinstance(v, str):
60
+ return [v]
61
+ return v
62
+
63
+ @model_validator(mode="after")
64
+ def _validate_rule(self) -> ScopeExcludeRule:
65
+ if self.catalog is None and self.schemas is None:
66
+ raise ValueError("Exclude rule must specify at least 'catalog' or 'schemas'")
67
+ return self
68
+
69
+
70
+ class IntrospectionScope(BaseModel):
71
+ model_config = ConfigDict(extra="forbid")
72
+
73
+ include: list[ScopeIncludeRule] = []
74
+ exclude: list[ScopeExcludeRule] = []
@@ -0,0 +1,103 @@
1
+ import fnmatch
2
+ from dataclasses import dataclass
3
+
4
+ from databao_context_engine.plugins.databases.introspection_scope import (
5
+ IntrospectionScope,
6
+ ScopeExcludeRule,
7
+ ScopeIncludeRule,
8
+ )
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ScopeSelection:
13
+ """
14
+ The final catalog+schema scope to introspect.
15
+ """
16
+
17
+ catalogs: list[str]
18
+ schemas_per_catalog: dict[str, list[str]]
19
+
20
+
21
+ class IntrospectionScopeMatcher:
22
+ """
23
+ Applies include/exclude rules (glob matching, case-insensitive) to a discovered set of catalogs/schemas.
24
+
25
+ Semantics:
26
+ - If include is empty => start from "everything"
27
+ - If include is non-empty => start from "only what include matches"
28
+ - Then apply exclude (exclude wins)
29
+ - except_schemas on an exclude rule prevents exclusion for that rule only
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ scope: IntrospectionScope | None,
35
+ *,
36
+ ignored_schemas: set[str] | None = None,
37
+ ) -> None:
38
+ self._scope = scope or IntrospectionScope()
39
+ self._ignored_schemas = {s.lower() for s in (ignored_schemas or set())}
40
+
41
+ def filter_scopes(
42
+ self,
43
+ catalogs: list[str],
44
+ schemas_per_catalog: dict[str, list[str]],
45
+ ) -> ScopeSelection:
46
+ include_rules = self._scope.include
47
+ exclude_rules = self._scope.exclude
48
+ has_includes = len(include_rules) > 0
49
+
50
+ filtered: dict[str, list[str]] = {}
51
+
52
+ for catalog in catalogs:
53
+ kept_schemas: list[str] = []
54
+ for schema in schemas_per_catalog.get(catalog, []):
55
+ if schema.lower() in self._ignored_schemas:
56
+ continue
57
+
58
+ if has_includes and not self._is_included(include_rules, catalog, schema):
59
+ continue
60
+
61
+ if self._is_excluded(exclude_rules, catalog, schema):
62
+ continue
63
+
64
+ kept_schemas.append(schema)
65
+
66
+ if kept_schemas:
67
+ filtered[catalog] = kept_schemas
68
+
69
+ filtered_catalogs = [c for c in catalogs if c in filtered]
70
+ return ScopeSelection(catalogs=filtered_catalogs, schemas_per_catalog=filtered)
71
+
72
+ @staticmethod
73
+ def _glob_match(pattern: str, value: str) -> bool:
74
+ return fnmatch.fnmatchcase(value.lower(), pattern.lower())
75
+
76
+ def _matches_any(self, patterns: list[str] | None, value: str) -> bool:
77
+ if patterns is None:
78
+ return True
79
+ return any(self._glob_match(p, value) for p in patterns)
80
+
81
+ def _include_rule_matches(self, rule: ScopeIncludeRule, catalog: str, schema: str) -> bool:
82
+ if rule.catalog is not None and not self._glob_match(rule.catalog, catalog):
83
+ return False
84
+ if rule.schemas is not None and not self._matches_any(rule.schemas, schema):
85
+ return False
86
+ return True
87
+
88
+ def _exclude_rule_excludes(self, rule: ScopeExcludeRule, catalog: str, schema: str) -> bool:
89
+ if rule.catalog is not None and not self._glob_match(rule.catalog, catalog):
90
+ return False
91
+ if rule.schemas is not None and not self._matches_any(rule.schemas, schema):
92
+ return False
93
+
94
+ if rule.except_schemas is not None and self._matches_any(rule.except_schemas, schema):
95
+ return False
96
+
97
+ return True
98
+
99
+ def _is_included(self, rules: list[ScopeIncludeRule], catalog: str, schema: str) -> bool:
100
+ return any(self._include_rule_matches(r, catalog, schema) for r in rules)
101
+
102
+ def _is_excluded(self, rules: list[ScopeExcludeRule], catalog: str, schema: str) -> bool:
103
+ return any(self._exclude_rule_excludes(r, catalog, schema) for r in rules)