dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1036 @@
1
+ # =============================================================================
2
+ # DVT Catalog Store
3
+ # =============================================================================
4
+ # DuckDB-based catalog store for DVT projects.
5
+ #
6
+ # This store contains PROJECT CATALOG data:
7
+ # - targets: Available connections from profiles.yml
8
+ # - source_definitions: Sources with connections from manifest
9
+ # - model_definitions: Models with targets from manifest
10
+ # - catalog_nodes: Enriched catalog for docs visualization
11
+ # - lineage_edges: DAG lineage for visualization
12
+ #
13
+ # Location: <project>/.dvt/catalog.duckdb
14
+ #
15
+ # SEPARATION OF CONCERNS:
16
+ # - catalog.duckdb: Project structure (targets, sources, models, lineage)
17
+ # - metastore.duckdb: Runtime data (profile_results, column_metadata, row_counts)
18
+ #
19
+ # This separation ensures that catalog operations don't interfere with
20
+ # runtime operations like profiling, run, build, etc.
21
+ #
22
+ # DVT v0.59.0: Initial implementation
23
+ # =============================================================================
24
+
25
+ import json
26
+ from pathlib import Path
27
+ from typing import Any, Dict, List, Optional, Tuple
28
+ from dataclasses import dataclass, asdict
29
+ from datetime import datetime
30
+
31
+ try:
32
+ import duckdb
33
+ HAS_DUCKDB = True
34
+ except ImportError:
35
+ HAS_DUCKDB = False
36
+
37
+
38
+ # =============================================================================
39
+ # Data Classes
40
+ # =============================================================================
41
+
42
+ @dataclass
43
+ class TargetDefinition:
44
+ """Target/connection definition from profiles.yml."""
45
+ name: str
46
+ adapter_type: str
47
+ database: Optional[str] = None
48
+ schema_name: Optional[str] = None
49
+ is_default: bool = False
50
+ host: Optional[str] = None # Sanitized, no secrets
51
+ port: Optional[int] = None
52
+ meta: Optional[str] = None # JSON
53
+ last_verified: Optional[datetime] = None
54
+
55
+
56
+ @dataclass
57
+ class SourceTableDefinition:
58
+ """Source table definition from manifest (sources.yml)."""
59
+ unique_id: str # source.project.source_name.table_name
60
+ source_name: str
61
+ table_name: str
62
+ connection_name: str # Target/output to use for this source
63
+ database: Optional[str] = None
64
+ schema_name: Optional[str] = None
65
+ adapter_type: Optional[str] = None
66
+ identifier: Optional[str] = None # Physical table name if different
67
+ description: Optional[str] = None
68
+ loader: Optional[str] = None
69
+ meta: Optional[str] = None # JSON
70
+ freshness: Optional[str] = None # JSON
71
+ columns: Optional[str] = None # JSON array
72
+ created_at: Optional[datetime] = None
73
+ updated_at: Optional[datetime] = None
74
+
75
+
76
+ @dataclass
77
+ class ModelDefinition:
78
+ """Model definition from manifest."""
79
+ unique_id: str
80
+ name: str
81
+ connection_name: str # Target for this model
82
+ database: Optional[str] = None
83
+ schema_name: Optional[str] = None
84
+ adapter_type: Optional[str] = None
85
+ materialized: Optional[str] = None
86
+ description: Optional[str] = None
87
+ tags: Optional[str] = None # JSON array
88
+ meta: Optional[str] = None # JSON
89
+ config: Optional[str] = None # JSON
90
+ columns: Optional[str] = None # JSON array
91
+ depends_on_nodes: Optional[str] = None # JSON array
92
+ compiled_sql_hash: Optional[str] = None # For change detection
93
+ created_at: Optional[datetime] = None
94
+ updated_at: Optional[datetime] = None
95
+
96
+
97
+ @dataclass
98
+ class CatalogNode:
99
+ """Enriched catalog node for dvt docs generate."""
100
+ unique_id: str
101
+ resource_type: str # 'model', 'source', 'test', 'seed', 'snapshot'
102
+ name: str
103
+ schema_name: Optional[str] = None
104
+ database: Optional[str] = None
105
+ connection_name: Optional[str] = None
106
+ adapter_type: Optional[str] = None
107
+ description: Optional[str] = None
108
+ icon_type: Optional[str] = None
109
+ color_hex: Optional[str] = None
110
+ materialized: Optional[str] = None
111
+ tags: Optional[str] = None # JSON array
112
+ meta: Optional[str] = None # JSON object
113
+ columns: Optional[str] = None # JSON array
114
+ row_count: Optional[int] = None
115
+ bytes_stored: Optional[int] = None
116
+ created_at: Optional[datetime] = None
117
+ updated_at: Optional[datetime] = None
118
+
119
+
120
+ @dataclass
121
+ class LineageEdge:
122
+ """Lineage edge representing a dependency between nodes."""
123
+ id: Optional[int] = None
124
+ source_node_id: str = ""
125
+ target_node_id: str = ""
126
+ edge_type: str = "" # 'ref', 'source', 'depends_on'
127
+ is_cross_connection: bool = False
128
+ source_connection: Optional[str] = None
129
+ target_connection: Optional[str] = None
130
+
131
+
132
+ class CatalogStore:
133
+ """
134
+ DuckDB-based catalog store for a DVT project.
135
+
136
+ Location: <project_root>/.dvt/catalog.duckdb
137
+
138
+ Tables:
139
+ - targets: Available connections from profiles.yml
140
+ - source_definitions: Sources with connections from manifest
141
+ - model_definitions: Models with targets from manifest
142
+ - catalog_nodes: Enriched catalog for docs
143
+ - lineage_edges: DAG lineage for visualization
144
+
145
+ This store is SEPARATE from metastore.duckdb to avoid interference
146
+ between catalog operations and runtime operations.
147
+ """
148
+
149
+ DVT_DIR = ".dvt"
150
+ CATALOG_DB = "catalog.duckdb"
151
+
152
+ def __init__(self, project_root: Path):
153
+ """
154
+ Initialize the catalog store.
155
+
156
+ Args:
157
+ project_root: Path to the DVT project root directory
158
+ """
159
+ if not HAS_DUCKDB:
160
+ raise ImportError(
161
+ "DuckDB is required for catalog store. "
162
+ "Install with: pip install duckdb"
163
+ )
164
+
165
+ self.project_root = Path(project_root)
166
+ self.dvt_dir = self.project_root / self.DVT_DIR
167
+ self.db_path = self.dvt_dir / self.CATALOG_DB
168
+ self._conn: Optional[duckdb.DuckDBPyConnection] = None
169
+
170
+ @property
171
+ def conn(self) -> "duckdb.DuckDBPyConnection":
172
+ """Get or create database connection."""
173
+ if self._conn is None:
174
+ self._conn = duckdb.connect(str(self.db_path))
175
+ return self._conn
176
+
177
+ def close(self) -> None:
178
+ """Close the database connection."""
179
+ if self._conn is not None:
180
+ self._conn.close()
181
+ self._conn = None
182
+
183
+ def __enter__(self) -> "CatalogStore":
184
+ return self
185
+
186
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
187
+ self.close()
188
+
189
+ # =========================================================================
190
+ # Initialization
191
+ # =========================================================================
192
+
193
+ def initialize(self, drop_existing: bool = True) -> None:
194
+ """
195
+ Initialize the catalog store.
196
+
197
+ Creates:
198
+ 1. .dvt/ directory if it doesn't exist
199
+ 2. catalog.duckdb database
200
+ 3. Schema tables (empty)
201
+
202
+ Args:
203
+ drop_existing: If True, drops existing tables and recreates them
204
+ with empty schemas. Default is True to ensure clean
205
+ initialization on each `dvt init`.
206
+ """
207
+ self.dvt_dir.mkdir(parents=True, exist_ok=True)
208
+
209
+ if drop_existing:
210
+ self._drop_all_tables()
211
+
212
+ self._create_schema()
213
+
214
+ def _drop_all_tables(self) -> None:
215
+ """Drop all catalog tables to reset to empty state."""
216
+ tables = [
217
+ "lineage_edges",
218
+ "catalog_nodes",
219
+ "model_definitions",
220
+ "source_definitions",
221
+ "targets",
222
+ ]
223
+ for table in tables:
224
+ self.conn.execute(f"DROP TABLE IF EXISTS {table}")
225
+
226
+ # Drop sequences
227
+ self.conn.execute("DROP SEQUENCE IF EXISTS seq_lineage_edges_id")
228
+
229
+ def _create_schema(self) -> None:
230
+ """Create the database schema tables."""
231
+
232
+ # Targets table - stores available connections from profiles.yml
233
+ self.conn.execute("""
234
+ CREATE TABLE IF NOT EXISTS targets (
235
+ name VARCHAR PRIMARY KEY,
236
+ adapter_type VARCHAR NOT NULL,
237
+ database VARCHAR,
238
+ schema_name VARCHAR,
239
+ is_default BOOLEAN DEFAULT FALSE,
240
+ host VARCHAR,
241
+ port INTEGER,
242
+ meta JSON,
243
+ last_verified TIMESTAMP,
244
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
245
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
246
+ )
247
+ """)
248
+
249
+ # Source definitions - stores sources with their connections from manifest
250
+ self.conn.execute("""
251
+ CREATE TABLE IF NOT EXISTS source_definitions (
252
+ unique_id VARCHAR PRIMARY KEY,
253
+ source_name VARCHAR NOT NULL,
254
+ table_name VARCHAR NOT NULL,
255
+ connection_name VARCHAR NOT NULL,
256
+ database VARCHAR,
257
+ schema_name VARCHAR,
258
+ adapter_type VARCHAR,
259
+ identifier VARCHAR,
260
+ description TEXT,
261
+ loader VARCHAR,
262
+ meta JSON,
263
+ freshness JSON,
264
+ columns JSON,
265
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
266
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
267
+ )
268
+ """)
269
+
270
+ # Model definitions - stores models with their targets from manifest
271
+ self.conn.execute("""
272
+ CREATE TABLE IF NOT EXISTS model_definitions (
273
+ unique_id VARCHAR PRIMARY KEY,
274
+ name VARCHAR NOT NULL,
275
+ connection_name VARCHAR NOT NULL,
276
+ database VARCHAR,
277
+ schema_name VARCHAR,
278
+ adapter_type VARCHAR,
279
+ materialized VARCHAR,
280
+ description TEXT,
281
+ tags JSON,
282
+ meta JSON,
283
+ config JSON,
284
+ columns JSON,
285
+ depends_on_nodes JSON,
286
+ compiled_sql_hash VARCHAR,
287
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
288
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
289
+ )
290
+ """)
291
+
292
+ # Catalog nodes - enriched catalog for docs visualization
293
+ self.conn.execute("""
294
+ CREATE TABLE IF NOT EXISTS catalog_nodes (
295
+ unique_id VARCHAR PRIMARY KEY,
296
+ resource_type VARCHAR NOT NULL,
297
+ name VARCHAR NOT NULL,
298
+ schema_name VARCHAR,
299
+ database VARCHAR,
300
+ connection_name VARCHAR,
301
+ adapter_type VARCHAR,
302
+ description TEXT,
303
+ icon_type VARCHAR,
304
+ color_hex VARCHAR,
305
+ materialized VARCHAR,
306
+ tags JSON,
307
+ meta JSON,
308
+ columns JSON,
309
+ row_count BIGINT,
310
+ bytes_stored BIGINT,
311
+ created_at TIMESTAMP,
312
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
313
+ )
314
+ """)
315
+
316
+ # Lineage edges - DAG lineage for visualization
317
+ self.conn.execute("""
318
+ CREATE SEQUENCE IF NOT EXISTS seq_lineage_edges_id START 1;
319
+ CREATE TABLE IF NOT EXISTS lineage_edges (
320
+ id INTEGER PRIMARY KEY DEFAULT nextval('seq_lineage_edges_id'),
321
+ source_node_id VARCHAR NOT NULL,
322
+ target_node_id VARCHAR NOT NULL,
323
+ edge_type VARCHAR NOT NULL,
324
+ is_cross_connection BOOLEAN DEFAULT FALSE,
325
+ source_connection VARCHAR,
326
+ target_connection VARCHAR
327
+ )
328
+ """)
329
+
330
+ # Create indexes
331
+ self.conn.execute("""
332
+ CREATE INDEX IF NOT EXISTS idx_source_definitions_connection
333
+ ON source_definitions(connection_name)
334
+ """)
335
+ self.conn.execute("""
336
+ CREATE INDEX IF NOT EXISTS idx_model_definitions_connection
337
+ ON model_definitions(connection_name)
338
+ """)
339
+ self.conn.execute("""
340
+ CREATE INDEX IF NOT EXISTS idx_catalog_nodes_type
341
+ ON catalog_nodes(resource_type)
342
+ """)
343
+ self.conn.execute("""
344
+ CREATE INDEX IF NOT EXISTS idx_catalog_nodes_connection
345
+ ON catalog_nodes(connection_name)
346
+ """)
347
+ self.conn.execute("""
348
+ CREATE INDEX IF NOT EXISTS idx_lineage_edges_source
349
+ ON lineage_edges(source_node_id)
350
+ """)
351
+ self.conn.execute("""
352
+ CREATE INDEX IF NOT EXISTS idx_lineage_edges_target
353
+ ON lineage_edges(target_node_id)
354
+ """)
355
+
356
+ # =========================================================================
357
+ # Target Operations
358
+ # =========================================================================
359
+
360
+ def save_target(self, target: TargetDefinition) -> None:
361
+ """Save a target definition to the store."""
362
+ self.conn.execute("""
363
+ INSERT OR REPLACE INTO targets
364
+ (name, adapter_type, database, schema_name, is_default,
365
+ host, port, meta, last_verified, updated_at)
366
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
367
+ """, [
368
+ target.name, target.adapter_type, target.database,
369
+ target.schema_name, target.is_default, target.host,
370
+ target.port, target.meta, target.last_verified
371
+ ])
372
+
373
+ def save_targets_batch(self, targets: List[TargetDefinition]) -> None:
374
+ """Save multiple targets in a batch."""
375
+ for target in targets:
376
+ self.save_target(target)
377
+
378
+ def get_target(self, name: str) -> Optional[TargetDefinition]:
379
+ """Get a target by name."""
380
+ result = self.conn.execute("""
381
+ SELECT name, adapter_type, database, schema_name, is_default,
382
+ host, port, meta, last_verified
383
+ FROM targets WHERE name = ?
384
+ """, [name]).fetchone()
385
+
386
+ if result:
387
+ return TargetDefinition(
388
+ name=result[0], adapter_type=result[1], database=result[2],
389
+ schema_name=result[3], is_default=result[4], host=result[5],
390
+ port=result[6], meta=result[7], last_verified=result[8]
391
+ )
392
+ return None
393
+
394
+ def get_all_targets(self) -> List[TargetDefinition]:
395
+ """Get all targets."""
396
+ results = self.conn.execute("""
397
+ SELECT name, adapter_type, database, schema_name, is_default,
398
+ host, port, meta, last_verified
399
+ FROM targets ORDER BY is_default DESC, name
400
+ """).fetchall()
401
+
402
+ return [
403
+ TargetDefinition(
404
+ name=r[0], adapter_type=r[1], database=r[2],
405
+ schema_name=r[3], is_default=r[4], host=r[5],
406
+ port=r[6], meta=r[7], last_verified=r[8]
407
+ )
408
+ for r in results
409
+ ]
410
+
411
+ def get_default_target(self) -> Optional[TargetDefinition]:
412
+ """Get the default target."""
413
+ result = self.conn.execute("""
414
+ SELECT name, adapter_type, database, schema_name, is_default,
415
+ host, port, meta, last_verified
416
+ FROM targets WHERE is_default = TRUE LIMIT 1
417
+ """).fetchone()
418
+
419
+ if result:
420
+ return TargetDefinition(
421
+ name=result[0], adapter_type=result[1], database=result[2],
422
+ schema_name=result[3], is_default=result[4], host=result[5],
423
+ port=result[6], meta=result[7], last_verified=result[8]
424
+ )
425
+ return None
426
+
427
+ def clear_targets(self) -> None:
428
+ """Clear all targets."""
429
+ self.conn.execute("DELETE FROM targets")
430
+
431
+ # =========================================================================
432
+ # Source Definition Operations
433
+ # =========================================================================
434
+
435
+ def save_source_definition(self, source: SourceTableDefinition) -> None:
436
+ """Save a source definition to the store."""
437
+ self.conn.execute("""
438
+ INSERT OR REPLACE INTO source_definitions
439
+ (unique_id, source_name, table_name, connection_name,
440
+ database, schema_name, adapter_type, identifier,
441
+ description, loader, meta, freshness, columns, updated_at)
442
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
443
+ """, [
444
+ source.unique_id, source.source_name, source.table_name,
445
+ source.connection_name, source.database, source.schema_name,
446
+ source.adapter_type, source.identifier, source.description,
447
+ source.loader, source.meta, source.freshness, source.columns
448
+ ])
449
+
450
+ def save_source_definitions_batch(self, sources: List[SourceTableDefinition]) -> None:
451
+ """Save multiple source definitions in a batch."""
452
+ for source in sources:
453
+ self.save_source_definition(source)
454
+
455
+ def get_source_definition(self, unique_id: str) -> Optional[SourceTableDefinition]:
456
+ """Get a source definition by unique ID."""
457
+ result = self.conn.execute("""
458
+ SELECT unique_id, source_name, table_name, connection_name,
459
+ database, schema_name, adapter_type, identifier,
460
+ description, loader, meta, freshness, columns,
461
+ created_at, updated_at
462
+ FROM source_definitions WHERE unique_id = ?
463
+ """, [unique_id]).fetchone()
464
+
465
+ if result:
466
+ return SourceTableDefinition(
467
+ unique_id=result[0], source_name=result[1], table_name=result[2],
468
+ connection_name=result[3], database=result[4], schema_name=result[5],
469
+ adapter_type=result[6], identifier=result[7], description=result[8],
470
+ loader=result[9], meta=result[10], freshness=result[11],
471
+ columns=result[12], created_at=result[13], updated_at=result[14]
472
+ )
473
+ return None
474
+
475
+ def get_sources_by_connection(self, connection_name: str) -> List[SourceTableDefinition]:
476
+ """Get all sources for a specific connection."""
477
+ results = self.conn.execute("""
478
+ SELECT unique_id, source_name, table_name, connection_name,
479
+ database, schema_name, adapter_type, identifier,
480
+ description, loader, meta, freshness, columns,
481
+ created_at, updated_at
482
+ FROM source_definitions
483
+ WHERE connection_name = ?
484
+ ORDER BY source_name, table_name
485
+ """, [connection_name]).fetchall()
486
+
487
+ return [
488
+ SourceTableDefinition(
489
+ unique_id=r[0], source_name=r[1], table_name=r[2],
490
+ connection_name=r[3], database=r[4], schema_name=r[5],
491
+ adapter_type=r[6], identifier=r[7], description=r[8],
492
+ loader=r[9], meta=r[10], freshness=r[11],
493
+ columns=r[12], created_at=r[13], updated_at=r[14]
494
+ )
495
+ for r in results
496
+ ]
497
+
498
+ def get_all_source_definitions(self) -> List[SourceTableDefinition]:
499
+ """Get all source definitions."""
500
+ results = self.conn.execute("""
501
+ SELECT unique_id, source_name, table_name, connection_name,
502
+ database, schema_name, adapter_type, identifier,
503
+ description, loader, meta, freshness, columns,
504
+ created_at, updated_at
505
+ FROM source_definitions
506
+ ORDER BY source_name, table_name
507
+ """).fetchall()
508
+
509
+ return [
510
+ SourceTableDefinition(
511
+ unique_id=r[0], source_name=r[1], table_name=r[2],
512
+ connection_name=r[3], database=r[4], schema_name=r[5],
513
+ adapter_type=r[6], identifier=r[7], description=r[8],
514
+ loader=r[9], meta=r[10], freshness=r[11],
515
+ columns=r[12], created_at=r[13], updated_at=r[14]
516
+ )
517
+ for r in results
518
+ ]
519
+
520
+ def clear_source_definitions(self) -> None:
521
+ """Clear all source definitions."""
522
+ self.conn.execute("DELETE FROM source_definitions")
523
+
524
+ # =========================================================================
525
+ # Model Definition Operations
526
+ # =========================================================================
527
+
528
+ def save_model_definition(self, model: ModelDefinition) -> None:
529
+ """Save a model definition to the store."""
530
+ self.conn.execute("""
531
+ INSERT OR REPLACE INTO model_definitions
532
+ (unique_id, name, connection_name, database, schema_name,
533
+ adapter_type, materialized, description, tags, meta,
534
+ config, columns, depends_on_nodes, compiled_sql_hash, updated_at)
535
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
536
+ """, [
537
+ model.unique_id, model.name, model.connection_name,
538
+ model.database, model.schema_name, model.adapter_type,
539
+ model.materialized, model.description, model.tags, model.meta,
540
+ model.config, model.columns, model.depends_on_nodes,
541
+ model.compiled_sql_hash
542
+ ])
543
+
544
+ def save_model_definitions_batch(self, models: List[ModelDefinition]) -> None:
545
+ """Save multiple model definitions in a batch."""
546
+ for model in models:
547
+ self.save_model_definition(model)
548
+
549
+ def get_model_definition(self, unique_id: str) -> Optional[ModelDefinition]:
550
+ """Get a model definition by unique ID."""
551
+ result = self.conn.execute("""
552
+ SELECT unique_id, name, connection_name, database, schema_name,
553
+ adapter_type, materialized, description, tags, meta,
554
+ config, columns, depends_on_nodes, compiled_sql_hash,
555
+ created_at, updated_at
556
+ FROM model_definitions WHERE unique_id = ?
557
+ """, [unique_id]).fetchone()
558
+
559
+ if result:
560
+ return ModelDefinition(
561
+ unique_id=result[0], name=result[1], connection_name=result[2],
562
+ database=result[3], schema_name=result[4], adapter_type=result[5],
563
+ materialized=result[6], description=result[7], tags=result[8],
564
+ meta=result[9], config=result[10], columns=result[11],
565
+ depends_on_nodes=result[12], compiled_sql_hash=result[13],
566
+ created_at=result[14], updated_at=result[15]
567
+ )
568
+ return None
569
+
570
+ def get_models_by_connection(self, connection_name: str) -> List[ModelDefinition]:
571
+ """Get all models for a specific connection."""
572
+ results = self.conn.execute("""
573
+ SELECT unique_id, name, connection_name, database, schema_name,
574
+ adapter_type, materialized, description, tags, meta,
575
+ config, columns, depends_on_nodes, compiled_sql_hash,
576
+ created_at, updated_at
577
+ FROM model_definitions
578
+ WHERE connection_name = ?
579
+ ORDER BY name
580
+ """, [connection_name]).fetchall()
581
+
582
+ return [
583
+ ModelDefinition(
584
+ unique_id=r[0], name=r[1], connection_name=r[2],
585
+ database=r[3], schema_name=r[4], adapter_type=r[5],
586
+ materialized=r[6], description=r[7], tags=r[8],
587
+ meta=r[9], config=r[10], columns=r[11],
588
+ depends_on_nodes=r[12], compiled_sql_hash=r[13],
589
+ created_at=r[14], updated_at=r[15]
590
+ )
591
+ for r in results
592
+ ]
593
+
594
+ def get_all_model_definitions(self) -> List[ModelDefinition]:
595
+ """Get all model definitions."""
596
+ results = self.conn.execute("""
597
+ SELECT unique_id, name, connection_name, database, schema_name,
598
+ adapter_type, materialized, description, tags, meta,
599
+ config, columns, depends_on_nodes, compiled_sql_hash,
600
+ created_at, updated_at
601
+ FROM model_definitions
602
+ ORDER BY name
603
+ """).fetchall()
604
+
605
+ return [
606
+ ModelDefinition(
607
+ unique_id=r[0], name=r[1], connection_name=r[2],
608
+ database=r[3], schema_name=r[4], adapter_type=r[5],
609
+ materialized=r[6], description=r[7], tags=r[8],
610
+ meta=r[9], config=r[10], columns=r[11],
611
+ depends_on_nodes=r[12], compiled_sql_hash=r[13],
612
+ created_at=r[14], updated_at=r[15]
613
+ )
614
+ for r in results
615
+ ]
616
+
617
+ def clear_model_definitions(self) -> None:
618
+ """Clear all model definitions."""
619
+ self.conn.execute("DELETE FROM model_definitions")
620
+
621
+ # =========================================================================
622
+ # Catalog Node Operations
623
+ # =========================================================================
624
+
625
+ def save_catalog_node(self, node: CatalogNode) -> None:
626
+ """Save a catalog node to the store."""
627
+ self.conn.execute("""
628
+ INSERT OR REPLACE INTO catalog_nodes
629
+ (unique_id, resource_type, name, schema_name, database,
630
+ connection_name, adapter_type, description, icon_type, color_hex,
631
+ materialized, tags, meta, columns, row_count, bytes_stored,
632
+ created_at, updated_at)
633
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
634
+ """, [
635
+ node.unique_id, node.resource_type, node.name,
636
+ node.schema_name, node.database,
637
+ node.connection_name, node.adapter_type,
638
+ node.description, node.icon_type, node.color_hex,
639
+ node.materialized, node.tags, node.meta, node.columns,
640
+ node.row_count, node.bytes_stored, node.created_at
641
+ ])
642
+
643
+ def get_catalog_node(self, unique_id: str) -> Optional[CatalogNode]:
644
+ """Get a catalog node by unique ID."""
645
+ result = self.conn.execute("""
646
+ SELECT unique_id, resource_type, name, schema_name, database,
647
+ connection_name, adapter_type, description, icon_type, color_hex,
648
+ materialized, tags, meta, columns, row_count, bytes_stored,
649
+ created_at, updated_at
650
+ FROM catalog_nodes WHERE unique_id = ?
651
+ """, [unique_id]).fetchone()
652
+
653
+ if result:
654
+ return CatalogNode(
655
+ unique_id=result[0], resource_type=result[1], name=result[2],
656
+ schema_name=result[3], database=result[4],
657
+ connection_name=result[5], adapter_type=result[6],
658
+ description=result[7], icon_type=result[8], color_hex=result[9],
659
+ materialized=result[10], tags=result[11], meta=result[12],
660
+ columns=result[13], row_count=result[14], bytes_stored=result[15],
661
+ created_at=result[16], updated_at=result[17]
662
+ )
663
+ return None
664
+
665
+ def get_catalog_nodes_by_type(self, resource_type: str) -> List[CatalogNode]:
666
+ """Get all catalog nodes of a specific type."""
667
+ results = self.conn.execute("""
668
+ SELECT unique_id, resource_type, name, schema_name, database,
669
+ connection_name, adapter_type, description, icon_type, color_hex,
670
+ materialized, tags, meta, columns, row_count, bytes_stored,
671
+ created_at, updated_at
672
+ FROM catalog_nodes WHERE resource_type = ? ORDER BY name
673
+ """, [resource_type]).fetchall()
674
+
675
+ return [
676
+ CatalogNode(
677
+ unique_id=r[0], resource_type=r[1], name=r[2],
678
+ schema_name=r[3], database=r[4],
679
+ connection_name=r[5], adapter_type=r[6],
680
+ description=r[7], icon_type=r[8], color_hex=r[9],
681
+ materialized=r[10], tags=r[11], meta=r[12],
682
+ columns=r[13], row_count=r[14], bytes_stored=r[15],
683
+ created_at=r[16], updated_at=r[17]
684
+ )
685
+ for r in results
686
+ ]
687
+
688
+ def get_catalog_nodes_by_connection(self, connection_name: str) -> List[CatalogNode]:
689
+ """Get all catalog nodes for a specific connection."""
690
+ results = self.conn.execute("""
691
+ SELECT unique_id, resource_type, name, schema_name, database,
692
+ connection_name, adapter_type, description, icon_type, color_hex,
693
+ materialized, tags, meta, columns, row_count, bytes_stored,
694
+ created_at, updated_at
695
+ FROM catalog_nodes WHERE connection_name = ? ORDER BY resource_type, name
696
+ """, [connection_name]).fetchall()
697
+
698
+ return [
699
+ CatalogNode(
700
+ unique_id=r[0], resource_type=r[1], name=r[2],
701
+ schema_name=r[3], database=r[4],
702
+ connection_name=r[5], adapter_type=r[6],
703
+ description=r[7], icon_type=r[8], color_hex=r[9],
704
+ materialized=r[10], tags=r[11], meta=r[12],
705
+ columns=r[13], row_count=r[14], bytes_stored=r[15],
706
+ created_at=r[16], updated_at=r[17]
707
+ )
708
+ for r in results
709
+ ]
710
+
711
+ def get_all_catalog_nodes(self) -> List[CatalogNode]:
712
+ """Get all catalog nodes."""
713
+ results = self.conn.execute("""
714
+ SELECT unique_id, resource_type, name, schema_name, database,
715
+ connection_name, adapter_type, description, icon_type, color_hex,
716
+ materialized, tags, meta, columns, row_count, bytes_stored,
717
+ created_at, updated_at
718
+ FROM catalog_nodes ORDER BY resource_type, name
719
+ """).fetchall()
720
+
721
+ return [
722
+ CatalogNode(
723
+ unique_id=r[0], resource_type=r[1], name=r[2],
724
+ schema_name=r[3], database=r[4],
725
+ connection_name=r[5], adapter_type=r[6],
726
+ description=r[7], icon_type=r[8], color_hex=r[9],
727
+ materialized=r[10], tags=r[11], meta=r[12],
728
+ columns=r[13], row_count=r[14], bytes_stored=r[15],
729
+ created_at=r[16], updated_at=r[17]
730
+ )
731
+ for r in results
732
+ ]
733
+
734
+ def clear_catalog_nodes(self) -> None:
735
+ """Clear all catalog nodes."""
736
+ self.conn.execute("DELETE FROM catalog_nodes")
737
+
738
+ # =========================================================================
739
+ # Lineage Edge Operations
740
+ # =========================================================================
741
+
742
+ def save_lineage_edge(self, edge: LineageEdge) -> int:
743
+ """Save a lineage edge to the store. Returns the edge ID."""
744
+ if edge.id:
745
+ self.conn.execute("""
746
+ INSERT OR REPLACE INTO lineage_edges
747
+ (id, source_node_id, target_node_id, edge_type,
748
+ is_cross_connection, source_connection, target_connection)
749
+ VALUES (?, ?, ?, ?, ?, ?, ?)
750
+ """, [
751
+ edge.id, edge.source_node_id, edge.target_node_id, edge.edge_type,
752
+ edge.is_cross_connection, edge.source_connection, edge.target_connection
753
+ ])
754
+ return edge.id
755
+ else:
756
+ result = self.conn.execute("""
757
+ INSERT INTO lineage_edges
758
+ (source_node_id, target_node_id, edge_type,
759
+ is_cross_connection, source_connection, target_connection)
760
+ VALUES (?, ?, ?, ?, ?, ?)
761
+ RETURNING id
762
+ """, [
763
+ edge.source_node_id, edge.target_node_id, edge.edge_type,
764
+ edge.is_cross_connection, edge.source_connection, edge.target_connection
765
+ ]).fetchone()
766
+ return result[0] if result else 0
767
+
768
+ def get_upstream_edges(self, node_id: str) -> List[LineageEdge]:
769
+ """Get all edges where this node is the target (upstream dependencies)."""
770
+ results = self.conn.execute("""
771
+ SELECT id, source_node_id, target_node_id, edge_type,
772
+ is_cross_connection, source_connection, target_connection
773
+ FROM lineage_edges WHERE target_node_id = ?
774
+ """, [node_id]).fetchall()
775
+
776
+ return [
777
+ LineageEdge(
778
+ id=r[0], source_node_id=r[1], target_node_id=r[2],
779
+ edge_type=r[3], is_cross_connection=r[4],
780
+ source_connection=r[5], target_connection=r[6]
781
+ )
782
+ for r in results
783
+ ]
784
+
785
+ def get_downstream_edges(self, node_id: str) -> List[LineageEdge]:
786
+ """Get all edges where this node is the source (downstream dependents)."""
787
+ results = self.conn.execute("""
788
+ SELECT id, source_node_id, target_node_id, edge_type,
789
+ is_cross_connection, source_connection, target_connection
790
+ FROM lineage_edges WHERE source_node_id = ?
791
+ """, [node_id]).fetchall()
792
+
793
+ return [
794
+ LineageEdge(
795
+ id=r[0], source_node_id=r[1], target_node_id=r[2],
796
+ edge_type=r[3], is_cross_connection=r[4],
797
+ source_connection=r[5], target_connection=r[6]
798
+ )
799
+ for r in results
800
+ ]
801
+
802
+ def get_all_lineage_edges(self) -> List[LineageEdge]:
803
+ """Get all lineage edges."""
804
+ results = self.conn.execute("""
805
+ SELECT id, source_node_id, target_node_id, edge_type,
806
+ is_cross_connection, source_connection, target_connection
807
+ FROM lineage_edges ORDER BY source_node_id, target_node_id
808
+ """).fetchall()
809
+
810
+ return [
811
+ LineageEdge(
812
+ id=r[0], source_node_id=r[1], target_node_id=r[2],
813
+ edge_type=r[3], is_cross_connection=r[4],
814
+ source_connection=r[5], target_connection=r[6]
815
+ )
816
+ for r in results
817
+ ]
818
+
819
+ def get_cross_connection_edges(self) -> List[LineageEdge]:
820
+ """Get all edges that cross connection boundaries."""
821
+ results = self.conn.execute("""
822
+ SELECT id, source_node_id, target_node_id, edge_type,
823
+ is_cross_connection, source_connection, target_connection
824
+ FROM lineage_edges WHERE is_cross_connection = TRUE
825
+ ORDER BY source_node_id, target_node_id
826
+ """).fetchall()
827
+
828
+ return [
829
+ LineageEdge(
830
+ id=r[0], source_node_id=r[1], target_node_id=r[2],
831
+ edge_type=r[3], is_cross_connection=r[4],
832
+ source_connection=r[5], target_connection=r[6]
833
+ )
834
+ for r in results
835
+ ]
836
+
837
+ def clear_lineage_edges(self) -> None:
838
+ """Clear all lineage edges and reset sequence."""
839
+ self.conn.execute("DELETE FROM lineage_edges")
840
+ # Reset sequence to start fresh
841
+ try:
842
+ self.conn.execute("ALTER SEQUENCE seq_lineage_edges_id RESTART WITH 1")
843
+ except Exception:
844
+ pass # Sequence might not exist yet
845
+
846
+ # =========================================================================
847
+ # Batch Operations
848
+ # =========================================================================
849
+
850
+ def clear_all(self) -> None:
851
+ """Clear all catalog data."""
852
+ self.clear_targets()
853
+ self.clear_source_definitions()
854
+ self.clear_model_definitions()
855
+ self.clear_catalog_nodes()
856
+ self.clear_lineage_edges()
857
+
858
+ def populate_from_manifest(
859
+ self,
860
+ manifest_data: Dict[str, Any],
861
+ default_target: str,
862
+ targets_info: Dict[str, Dict[str, Any]]
863
+ ) -> None:
864
+ """
865
+ Populate the catalog store from manifest data.
866
+
867
+ Args:
868
+ manifest_data: Parsed manifest.json data
869
+ default_target: Default target name
870
+ targets_info: Dict of target name -> {adapter_type, database, schema, ...}
871
+ """
872
+ # Clear existing data
873
+ self.clear_source_definitions()
874
+ self.clear_model_definitions()
875
+ self.clear_lineage_edges()
876
+
877
+ # Save targets
878
+ self.clear_targets()
879
+ for target_name, target_info in targets_info.items():
880
+ target = TargetDefinition(
881
+ name=target_name,
882
+ adapter_type=target_info.get('type', 'unknown'),
883
+ database=target_info.get('database'),
884
+ schema_name=target_info.get('schema'),
885
+ is_default=(target_name == default_target),
886
+ host=target_info.get('host'),
887
+ port=target_info.get('port'),
888
+ )
889
+ self.save_target(target)
890
+
891
+ # Process sources
892
+ for unique_id, source_data in manifest_data.get('sources', {}).items():
893
+ connection = source_data.get('connection') or default_target
894
+ adapter_type = targets_info.get(connection, {}).get('type')
895
+
896
+ source = SourceTableDefinition(
897
+ unique_id=unique_id,
898
+ source_name=source_data.get('source_name', ''),
899
+ table_name=source_data.get('name', ''),
900
+ connection_name=connection,
901
+ database=source_data.get('database'),
902
+ schema_name=source_data.get('schema'),
903
+ adapter_type=adapter_type,
904
+ identifier=source_data.get('identifier'),
905
+ description=source_data.get('description'),
906
+ loader=source_data.get('loader'),
907
+ meta=json.dumps(source_data.get('meta')) if source_data.get('meta') else None,
908
+ columns=json.dumps(source_data.get('columns')) if source_data.get('columns') else None,
909
+ )
910
+ self.save_source_definition(source)
911
+
912
+ # Process models
913
+ for unique_id, node_data in manifest_data.get('nodes', {}).items():
914
+ if node_data.get('resource_type') != 'model':
915
+ continue
916
+
917
+ config = node_data.get('config', {})
918
+ connection = config.get('target') or default_target
919
+ adapter_type = targets_info.get(connection, {}).get('type')
920
+
921
+ model = ModelDefinition(
922
+ unique_id=unique_id,
923
+ name=node_data.get('name', ''),
924
+ connection_name=connection,
925
+ database=node_data.get('database'),
926
+ schema_name=node_data.get('schema'),
927
+ adapter_type=adapter_type,
928
+ materialized=config.get('materialized'),
929
+ description=node_data.get('description'),
930
+ tags=json.dumps(list(node_data.get('tags', []))) if node_data.get('tags') else None,
931
+ meta=json.dumps(node_data.get('meta')) if node_data.get('meta') else None,
932
+ config=json.dumps(config) if config else None,
933
+ columns=json.dumps(node_data.get('columns')) if node_data.get('columns') else None,
934
+ depends_on_nodes=json.dumps(node_data.get('depends_on', {}).get('nodes', [])),
935
+ )
936
+ self.save_model_definition(model)
937
+
938
+ # Build lineage edges
939
+ node_connections = {}
940
+
941
+ # Map sources to connections
942
+ for unique_id, source_data in manifest_data.get('sources', {}).items():
943
+ node_connections[unique_id] = source_data.get('connection') or default_target
944
+
945
+ # Map models to connections
946
+ for unique_id, node_data in manifest_data.get('nodes', {}).items():
947
+ if node_data.get('resource_type') == 'model':
948
+ config = node_data.get('config', {})
949
+ node_connections[unique_id] = config.get('target') or default_target
950
+
951
+ # Create edges
952
+ for unique_id, node_data in manifest_data.get('nodes', {}).items():
953
+ if node_data.get('resource_type') != 'model':
954
+ continue
955
+
956
+ target_connection = node_connections.get(unique_id, default_target)
957
+ depends_on = node_data.get('depends_on', {}).get('nodes', [])
958
+
959
+ for dep_id in depends_on:
960
+ source_connection = node_connections.get(dep_id, default_target)
961
+
962
+ if dep_id.startswith('source.'):
963
+ edge_type = 'source'
964
+ elif dep_id.startswith('model.'):
965
+ edge_type = 'ref'
966
+ else:
967
+ edge_type = 'depends_on'
968
+
969
+ is_cross = source_connection != target_connection
970
+
971
+ edge = LineageEdge(
972
+ source_node_id=dep_id,
973
+ target_node_id=unique_id,
974
+ edge_type=edge_type,
975
+ is_cross_connection=is_cross,
976
+ source_connection=source_connection,
977
+ target_connection=target_connection,
978
+ )
979
+ self.save_lineage_edge(edge)
980
+
981
+ # =========================================================================
982
+ # Utility Methods
983
+ # =========================================================================
984
+
985
+ def exists(self) -> bool:
986
+ """Check if the catalog store exists."""
987
+ return self.db_path.exists()
988
+
989
+ def get_stats(self) -> Dict[str, Any]:
990
+ """Get statistics about the catalog store."""
991
+ targets_count = self.conn.execute("SELECT COUNT(*) FROM targets").fetchone()[0]
992
+ sources_count = self.conn.execute("SELECT COUNT(*) FROM source_definitions").fetchone()[0]
993
+ models_count = self.conn.execute("SELECT COUNT(*) FROM model_definitions").fetchone()[0]
994
+ catalog_count = self.conn.execute("SELECT COUNT(*) FROM catalog_nodes").fetchone()[0]
995
+ edges_count = self.conn.execute("SELECT COUNT(*) FROM lineage_edges").fetchone()[0]
996
+ cross_edges = self.conn.execute(
997
+ "SELECT COUNT(*) FROM lineage_edges WHERE is_cross_connection = TRUE"
998
+ ).fetchone()[0]
999
+
1000
+ return {
1001
+ "targets": targets_count,
1002
+ "sources": sources_count,
1003
+ "models": models_count,
1004
+ "catalog_nodes": catalog_count,
1005
+ "lineage_edges": edges_count,
1006
+ "cross_connection_edges": cross_edges,
1007
+ "db_path": str(self.db_path),
1008
+ }
1009
+
1010
+ def get_federation_summary(self) -> Dict[str, Any]:
1011
+ """Get a summary of federation paths in the project."""
1012
+ # Group sources by connection
1013
+ sources_by_conn = self.conn.execute("""
1014
+ SELECT connection_name, COUNT(*) as count
1015
+ FROM source_definitions
1016
+ GROUP BY connection_name
1017
+ """).fetchall()
1018
+
1019
+ # Group models by connection
1020
+ models_by_conn = self.conn.execute("""
1021
+ SELECT connection_name, COUNT(*) as count
1022
+ FROM model_definitions
1023
+ GROUP BY connection_name
1024
+ """).fetchall()
1025
+
1026
+ # Get cross-connection edge count
1027
+ cross_edges = self.conn.execute(
1028
+ "SELECT COUNT(*) FROM lineage_edges WHERE is_cross_connection = TRUE"
1029
+ ).fetchone()[0]
1030
+
1031
+ return {
1032
+ "sources_by_connection": {r[0]: r[1] for r in sources_by_conn},
1033
+ "models_by_connection": {r[0]: r[1] for r in models_by_conn},
1034
+ "cross_connection_edges": cross_edges,
1035
+ "federation_paths_exist": cross_edges > 0,
1036
+ }