dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dbt/task/metadata.py ADDED
@@ -0,0 +1,804 @@
1
+ # =============================================================================
2
+ # DVT Metadata Task
3
+ # =============================================================================
4
+ # Manages metadata for DVT projects - sources and materialized models.
5
+ #
6
+ # Commands:
7
+ # dvt metadata reset # Clear all metadata from store
8
+ # dvt metadata snapshot # Capture metadata for sources + models
9
+ # dvt metadata export # Display metadata in CLI (Rich table)
10
+ # dvt metadata export-csv <file> # Export to CSV file
11
+ # dvt metadata export-json <file> # Export to JSON file
12
+ #
13
+ # DVT v0.57.0: Replaces dvt snap with enhanced metadata management
14
+ # =============================================================================
15
+
16
+ import json
17
+ from pathlib import Path
18
+ from datetime import datetime
19
+ from typing import Dict, List, Optional, Any, Tuple
20
+
21
+ from dbt.task.base import BaseTask
22
+ from dbt.flags import get_flags
23
+
24
+
25
+ class MetadataTask(BaseTask):
26
+ """
27
+ Task to manage DVT project metadata.
28
+
29
+ This task handles:
30
+ 1. Capturing metadata from source definitions (sources.yml)
31
+ 2. Capturing metadata from materialized models
32
+ 3. Exporting metadata to various formats
33
+ 4. Clearing/resetting the metadata store
34
+ """
35
+
36
+ def __init__(self, args):
37
+ super().__init__(args)
38
+ self._metadata_store = None
39
+
40
+ @property
41
+ def metadata_store(self):
42
+ """Lazy load the metadata store."""
43
+ if self._metadata_store is None:
44
+ from dbt.compute.metadata import ProjectMetadataStore
45
+ project_root = Path(get_flags().PROJECT_DIR or ".")
46
+ self._metadata_store = ProjectMetadataStore(project_root)
47
+ return self._metadata_store
48
+
49
+ def run(self):
50
+ """Execute the metadata task based on subcommand."""
51
+ subcommand = getattr(self.args, 'subcommand', 'snapshot')
52
+
53
+ if subcommand == 'reset':
54
+ return self.run_reset()
55
+ elif subcommand == 'snapshot':
56
+ return self.run_snapshot()
57
+ elif subcommand == 'export':
58
+ return self.run_export()
59
+ elif subcommand == 'export-csv':
60
+ return self.run_export_csv()
61
+ elif subcommand == 'export-json':
62
+ return self.run_export_json()
63
+ else:
64
+ # Default to snapshot
65
+ return self.run_snapshot()
66
+
67
+ # =========================================================================
68
+ # Reset Subcommand
69
+ # =========================================================================
70
+
71
+ def run_reset(self):
72
+ """Clear all metadata from the store."""
73
+ from dbt.compute.metadata import ProjectMetadataStore
74
+
75
+ project_dir = getattr(self.args, 'project_dir', None)
76
+ project_root = Path(project_dir) if project_dir else Path(".")
77
+
78
+ try:
79
+ from rich.console import Console
80
+ console = Console()
81
+ use_rich = True
82
+ except ImportError:
83
+ use_rich = False
84
+
85
+ dvt_dir = project_root / ".dvt"
86
+ if not dvt_dir.exists():
87
+ msg = "No .dvt directory found. Nothing to reset."
88
+ if use_rich:
89
+ console.print(f"[yellow]{msg}[/yellow]")
90
+ else:
91
+ print(msg)
92
+ return True, True
93
+
94
+ with ProjectMetadataStore(project_root) as store:
95
+ store.initialize()
96
+ store.clear_all_metadata()
97
+
98
+ msg = "Metadata store cleared successfully."
99
+ if use_rich:
100
+ console.print(f"[green]✓[/green] {msg}")
101
+ else:
102
+ print(f"✓ {msg}")
103
+
104
+ return True, True
105
+
106
+ # =========================================================================
107
+ # Snapshot Subcommand
108
+ # =========================================================================
109
+
110
+ def run_snapshot(self):
111
+ """Capture metadata for all sources and materialized models."""
112
+ from dbt.compute.metadata import ProjectMetadataStore
113
+ from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
114
+ from dbt.compute.metadata.registry import TypeRegistry
115
+
116
+ project_dir = getattr(self.args, 'project_dir', None)
117
+ project_root = Path(project_dir) if project_dir else Path(".")
118
+
119
+ # Try to use Rich for beautiful output
120
+ try:
121
+ from rich.console import Console
122
+ from rich.progress import Progress, SpinnerColumn, TextColumn
123
+ from rich.table import Table
124
+ from rich.panel import Panel
125
+ console = Console()
126
+ use_rich = True
127
+ except ImportError:
128
+ use_rich = False
129
+
130
+ # Ensure .dvt directory exists
131
+ dvt_dir = project_root / ".dvt"
132
+ if not dvt_dir.exists():
133
+ dvt_dir.mkdir(parents=True, exist_ok=True)
134
+ if use_rich:
135
+ console.print(f"[cyan]Created {dvt_dir}[/cyan]")
136
+ else:
137
+ print(f"Created {dvt_dir}")
138
+
139
+ # Header
140
+ if use_rich:
141
+ console.print(Panel.fit(
142
+ "[bold cyan]DVT Metadata Snapshot[/bold cyan]\n"
143
+ "Capturing metadata for sources and models",
144
+ border_style="cyan"
145
+ ))
146
+ console.print()
147
+ else:
148
+ print("DVT Metadata Snapshot")
149
+ print("=" * 40)
150
+ print()
151
+
152
+ # Load sources and models
153
+ sources = self._load_sources(project_root)
154
+ models = self._load_models(project_root)
155
+
156
+ if not sources and not models:
157
+ msg = "No sources or models found in project."
158
+ if use_rich:
159
+ console.print(f"[yellow]{msg}[/yellow]")
160
+ else:
161
+ print(msg)
162
+ return True, True
163
+
164
+ total_sources = len(sources)
165
+ total_models = len(models)
166
+ if use_rich:
167
+ console.print(f"Found [cyan]{total_sources}[/cyan] source(s) and [cyan]{total_models}[/cyan] model(s)")
168
+ console.print()
169
+ else:
170
+ print(f"Found {total_sources} source(s) and {total_models} model(s)")
171
+ print()
172
+
173
+ # Process sources and models
174
+ with ProjectMetadataStore(project_root) as store:
175
+ store.initialize()
176
+
177
+ source_tables = 0
178
+ source_columns = 0
179
+ model_tables = 0
180
+ model_columns = 0
181
+ errors = []
182
+
183
+ # Snapshot sources
184
+ if sources:
185
+ if use_rich:
186
+ console.print("[bold]Snapping sources...[/bold]")
187
+ else:
188
+ print("Snapping sources...")
189
+
190
+ for source_name, source_config in sources.items():
191
+ try:
192
+ t_count, c_count = self._snap_source(store, source_name, source_config)
193
+ source_tables += t_count
194
+ source_columns += c_count
195
+ if use_rich:
196
+ console.print(f" [green]✓[/green] {source_name}: {t_count} tables, {c_count} columns")
197
+ else:
198
+ print(f" ✓ {source_name}: {t_count} tables, {c_count} columns")
199
+ except Exception as e:
200
+ errors.append((source_name, str(e)))
201
+ if use_rich:
202
+ console.print(f" [red]✗[/red] {source_name}: {e}")
203
+ else:
204
+ print(f" ✗ {source_name}: {e}")
205
+
206
+ # Snapshot models
207
+ if models:
208
+ if use_rich:
209
+ console.print()
210
+ console.print("[bold]Snapping models...[/bold]")
211
+ else:
212
+ print()
213
+ print("Snapping models...")
214
+
215
+ for model_name, model_config in models.items():
216
+ try:
217
+ t_count, c_count = self._snap_model(store, model_name, model_config)
218
+ model_tables += t_count
219
+ model_columns += c_count
220
+ if t_count > 0:
221
+ if use_rich:
222
+ console.print(f" [green]✓[/green] {model_name}: {c_count} columns")
223
+ else:
224
+ print(f" ✓ {model_name}: {c_count} columns")
225
+ except Exception as e:
226
+ errors.append((f"model:{model_name}", str(e)))
227
+ if use_rich:
228
+ console.print(f" [red]✗[/red] {model_name}: {e}")
229
+ else:
230
+ print(f" ✗ {model_name}: {e}")
231
+
232
+ # Summary
233
+ total_tables = source_tables + model_tables
234
+ total_columns = source_columns + model_columns
235
+
236
+ if use_rich:
237
+ console.print()
238
+ if errors:
239
+ console.print(Panel(
240
+ f"[yellow]Completed with {len(errors)} error(s)[/yellow]\n"
241
+ f"Tables: {total_tables} | Columns: {total_columns}",
242
+ title="Summary",
243
+ border_style="yellow"
244
+ ))
245
+ else:
246
+ console.print(Panel(
247
+ f"[green]Success![/green]\n"
248
+ f"Sources: {source_tables} tables, {source_columns} columns\n"
249
+ f"Models: {model_tables} tables, {model_columns} columns\n"
250
+ f"[dim]Saved to .dvt/metadata_store.duckdb[/dim]",
251
+ title="Summary",
252
+ border_style="green"
253
+ ))
254
+ else:
255
+ print()
256
+ print("=" * 40)
257
+ if errors:
258
+ print(f"Completed with {len(errors)} error(s)")
259
+ else:
260
+ print(f"Success: {total_tables} tables, {total_columns} columns")
261
+ print(f"Saved to .dvt/metadata_store.duckdb")
262
+
263
+ return len(errors) == 0, True
264
+
265
+ # =========================================================================
266
+ # Export Subcommand (CLI display)
267
+ # =========================================================================
268
+
269
+ def run_export(self):
270
+ """Display metadata in Rich-formatted CLI output."""
271
+ from dbt.compute.metadata import ProjectMetadataStore
272
+
273
+ project_dir = getattr(self.args, 'project_dir', None)
274
+ project_root = Path(project_dir) if project_dir else Path(".")
275
+
276
+ # Try to use Rich
277
+ try:
278
+ from rich.console import Console
279
+ from rich.table import Table
280
+ from rich.panel import Panel
281
+ console = Console()
282
+ use_rich = True
283
+ except ImportError:
284
+ use_rich = False
285
+
286
+ dvt_dir = project_root / ".dvt"
287
+ if not dvt_dir.exists():
288
+ msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
289
+ if use_rich:
290
+ console.print(f"[yellow]{msg}[/yellow]")
291
+ else:
292
+ print(msg)
293
+ return False, False
294
+
295
+ with ProjectMetadataStore(project_root) as store:
296
+ store.initialize()
297
+
298
+ # Get all sources/tables
299
+ all_tables = store.get_all_sources()
300
+
301
+ if not all_tables:
302
+ msg = "No metadata found. Run 'dvt metadata snapshot' first."
303
+ if use_rich:
304
+ console.print(f"[yellow]{msg}[/yellow]")
305
+ else:
306
+ print(msg)
307
+ return True, True
308
+
309
+ if use_rich:
310
+ console.print(Panel.fit(
311
+ "[bold cyan]DVT Metadata Store[/bold cyan]",
312
+ border_style="cyan"
313
+ ))
314
+ console.print()
315
+
316
+ # Create summary table
317
+ table = Table(title="Captured Metadata")
318
+ table.add_column("Type", style="cyan")
319
+ table.add_column("Source/Model", style="green")
320
+ table.add_column("Table", style="white")
321
+ table.add_column("Columns", justify="right")
322
+ table.add_column("Last Updated", style="dim")
323
+
324
+ for source_name, table_name in all_tables:
325
+ metadata = store.get_table_metadata(source_name, table_name)
326
+ if metadata:
327
+ # Determine type (source or model)
328
+ item_type = "Model" if source_name.startswith("model:") else "Source"
329
+ display_name = source_name.replace("model:", "") if item_type == "Model" else source_name
330
+
331
+ table.add_row(
332
+ item_type,
333
+ display_name,
334
+ table_name,
335
+ str(len(metadata.columns)),
336
+ metadata.last_refreshed.strftime("%Y-%m-%d %H:%M") if metadata.last_refreshed else "-"
337
+ )
338
+
339
+ console.print(table)
340
+
341
+ # Stats
342
+ stats = store.get_stats()
343
+ console.print()
344
+ console.print(f"[dim]Total: {stats['metadata_tables']} tables, {stats['metadata_columns']} columns[/dim]")
345
+
346
+ else:
347
+ print("DVT Metadata Store")
348
+ print("=" * 60)
349
+ print(f"{'Type':<10} {'Source/Model':<20} {'Table':<20} {'Columns':>8}")
350
+ print("-" * 60)
351
+
352
+ for source_name, table_name in all_tables:
353
+ metadata = store.get_table_metadata(source_name, table_name)
354
+ if metadata:
355
+ item_type = "Model" if source_name.startswith("model:") else "Source"
356
+ display_name = source_name.replace("model:", "") if item_type == "Model" else source_name
357
+ print(f"{item_type:<10} {display_name:<20} {table_name:<20} {len(metadata.columns):>8}")
358
+
359
+ print("-" * 60)
360
+
361
+ return True, True
362
+
363
+ # =========================================================================
364
+ # Export CSV Subcommand
365
+ # =========================================================================
366
+
367
+ def run_export_csv(self):
368
+ """Export metadata to CSV file."""
369
+ from dbt.compute.metadata import ProjectMetadataStore
370
+
371
+ project_dir = getattr(self.args, 'project_dir', None)
372
+ project_root = Path(project_dir) if project_dir else Path(".")
373
+ filename = getattr(self.args, 'filename', 'metadata.csv')
374
+
375
+ try:
376
+ from rich.console import Console
377
+ console = Console()
378
+ use_rich = True
379
+ except ImportError:
380
+ use_rich = False
381
+
382
+ dvt_dir = project_root / ".dvt"
383
+ if not dvt_dir.exists():
384
+ msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
385
+ if use_rich:
386
+ console.print(f"[yellow]{msg}[/yellow]")
387
+ else:
388
+ print(msg)
389
+ return False, False
390
+
391
+ with ProjectMetadataStore(project_root) as store:
392
+ store.initialize()
393
+
394
+ # Get all metadata as CSV
395
+ all_tables = store.get_all_sources()
396
+
397
+ if not all_tables:
398
+ msg = "No metadata found. Run 'dvt metadata snapshot' first."
399
+ if use_rich:
400
+ console.print(f"[yellow]{msg}[/yellow]")
401
+ else:
402
+ print(msg)
403
+ return True, True
404
+
405
+ # Build CSV content
406
+ import csv
407
+ output_path = Path(filename)
408
+
409
+ with open(output_path, 'w', newline='') as csvfile:
410
+ writer = csv.writer(csvfile)
411
+ # Header
412
+ writer.writerow([
413
+ 'type', 'source_name', 'table_name', 'column_name',
414
+ 'adapter_type', 'spark_type', 'is_nullable', 'is_primary_key',
415
+ 'ordinal_position', 'last_refreshed'
416
+ ])
417
+
418
+ # Data
419
+ for source_name, table_name in all_tables:
420
+ metadata = store.get_table_metadata(source_name, table_name)
421
+ if metadata:
422
+ item_type = "model" if source_name.startswith("model:") else "source"
423
+ for col in metadata.columns:
424
+ writer.writerow([
425
+ item_type,
426
+ source_name,
427
+ table_name,
428
+ col.column_name,
429
+ col.adapter_type,
430
+ col.spark_type,
431
+ col.is_nullable,
432
+ col.is_primary_key,
433
+ col.ordinal_position,
434
+ metadata.last_refreshed.isoformat() if metadata.last_refreshed else ''
435
+ ])
436
+
437
+ if use_rich:
438
+ console.print(f"[green]✓[/green] Exported to [cyan]{output_path}[/cyan]")
439
+ else:
440
+ print(f"✓ Exported to {output_path}")
441
+
442
+ return True, True
443
+
444
+ # =========================================================================
445
+ # Export JSON Subcommand
446
+ # =========================================================================
447
+
448
+ def run_export_json(self):
449
+ """Export metadata to JSON file."""
450
+ from dbt.compute.metadata import ProjectMetadataStore
451
+
452
+ project_dir = getattr(self.args, 'project_dir', None)
453
+ project_root = Path(project_dir) if project_dir else Path(".")
454
+ filename = getattr(self.args, 'filename', 'metadata.json')
455
+
456
+ try:
457
+ from rich.console import Console
458
+ console = Console()
459
+ use_rich = True
460
+ except ImportError:
461
+ use_rich = False
462
+
463
+ dvt_dir = project_root / ".dvt"
464
+ if not dvt_dir.exists():
465
+ msg = "No .dvt directory found. Run 'dvt metadata snapshot' first."
466
+ if use_rich:
467
+ console.print(f"[yellow]{msg}[/yellow]")
468
+ else:
469
+ print(msg)
470
+ return False, False
471
+
472
+ with ProjectMetadataStore(project_root) as store:
473
+ store.initialize()
474
+
475
+ all_tables = store.get_all_sources()
476
+
477
+ if not all_tables:
478
+ msg = "No metadata found. Run 'dvt metadata snapshot' first."
479
+ if use_rich:
480
+ console.print(f"[yellow]{msg}[/yellow]")
481
+ else:
482
+ print(msg)
483
+ return True, True
484
+
485
+ # Build JSON structure
486
+ metadata_json = {
487
+ "version": "1.0",
488
+ "exported_at": datetime.now().isoformat(),
489
+ "sources": {},
490
+ "models": {}
491
+ }
492
+
493
+ for source_name, table_name in all_tables:
494
+ metadata = store.get_table_metadata(source_name, table_name)
495
+ if metadata:
496
+ is_model = source_name.startswith("model:")
497
+ target_dict = metadata_json["models"] if is_model else metadata_json["sources"]
498
+ clean_name = source_name.replace("model:", "") if is_model else source_name
499
+
500
+ if clean_name not in target_dict:
501
+ target_dict[clean_name] = {
502
+ "adapter": metadata.adapter_name,
503
+ "connection": metadata.connection_name,
504
+ "tables": {}
505
+ }
506
+
507
+ target_dict[clean_name]["tables"][table_name] = {
508
+ "schema": metadata.schema_name,
509
+ "last_refreshed": metadata.last_refreshed.isoformat() if metadata.last_refreshed else None,
510
+ "columns": [
511
+ {
512
+ "name": col.column_name,
513
+ "adapter_type": col.adapter_type,
514
+ "spark_type": col.spark_type,
515
+ "nullable": col.is_nullable,
516
+ "primary_key": col.is_primary_key,
517
+ "position": col.ordinal_position
518
+ }
519
+ for col in metadata.columns
520
+ ]
521
+ }
522
+
523
+ # Write JSON
524
+ output_path = Path(filename)
525
+ with open(output_path, 'w') as f:
526
+ json.dump(metadata_json, f, indent=2)
527
+
528
+ if use_rich:
529
+ console.print(f"[green]✓[/green] Exported to [cyan]{output_path}[/cyan]")
530
+ else:
531
+ print(f"✓ Exported to {output_path}")
532
+
533
+ return True, True
534
+
535
+ # =========================================================================
536
+ # Helper Methods
537
+ # =========================================================================
538
+
539
+ def _load_sources(self, project_root: Path) -> Dict[str, Dict[str, Any]]:
540
+ """Load source definitions from the project."""
541
+ import yaml
542
+
543
+ sources = {}
544
+ models_dir = project_root / "models"
545
+ if not models_dir.exists():
546
+ return sources
547
+
548
+ for yml_file in models_dir.rglob("*.yml"):
549
+ try:
550
+ with open(yml_file) as f:
551
+ content = yaml.safe_load(f)
552
+
553
+ if content and "sources" in content:
554
+ for source in content["sources"]:
555
+ source_name = source.get("name")
556
+ if source_name:
557
+ sources[source_name] = source
558
+ except Exception:
559
+ pass
560
+
561
+ return sources
562
+
563
+ def _load_models(self, project_root: Path) -> Dict[str, Dict[str, Any]]:
564
+ """Load model metadata from catalog.json (actual database schema).
565
+
566
+ The catalog.json is generated by `dvt docs generate` and contains
567
+ actual column information from the database, not just what's documented
568
+ in YAML files.
569
+ """
570
+ models = {}
571
+
572
+ # Primary source: catalog.json (actual database schema)
573
+ catalog_path = project_root / "target" / "catalog.json"
574
+ if catalog_path.exists():
575
+ try:
576
+ with open(catalog_path) as f:
577
+ catalog = json.load(f)
578
+
579
+ nodes = catalog.get("nodes", {})
580
+ for node_id, node_info in nodes.items():
581
+ # Only process models (not seeds, tests, etc.)
582
+ if node_id.startswith("model."):
583
+ metadata = node_info.get("metadata", {})
584
+ columns = node_info.get("columns", {})
585
+
586
+ if columns:
587
+ model_name = metadata.get("name")
588
+ if model_name:
589
+ models[model_name] = {
590
+ "name": model_name,
591
+ "unique_id": node_id,
592
+ "database": metadata.get("database"),
593
+ "schema": metadata.get("schema"),
594
+ "type": metadata.get("type"), # TABLE, VIEW
595
+ "columns": [
596
+ {
597
+ "name": col_info.get("name"),
598
+ "data_type": col_info.get("type"),
599
+ "index": col_info.get("index", 0),
600
+ }
601
+ for col_name, col_info in columns.items()
602
+ ],
603
+ "_from_catalog": True,
604
+ }
605
+ except Exception as e:
606
+ # Fall back to YAML if catalog fails
607
+ pass
608
+
609
+ # Fallback: YAML definitions (for models not in catalog)
610
+ if not models:
611
+ import yaml
612
+ models_dir = project_root / "models"
613
+ if models_dir.exists():
614
+ for yml_file in models_dir.rglob("*.yml"):
615
+ try:
616
+ with open(yml_file) as f:
617
+ content = yaml.safe_load(f)
618
+
619
+ if content and "models" in content:
620
+ for model in content["models"]:
621
+ model_name = model.get("name")
622
+ if model_name and model.get("columns"):
623
+ model["_file_path"] = str(yml_file)
624
+ model["_from_catalog"] = False
625
+ models[model_name] = model
626
+ except Exception:
627
+ pass
628
+
629
+ return models
630
+
631
+ def _snap_source(
632
+ self,
633
+ store,
634
+ source_name: str,
635
+ source_config: Dict[str, Any]
636
+ ) -> Tuple[int, int]:
637
+ """Snapshot metadata from a single source."""
638
+ from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
639
+ from dbt.compute.metadata.registry import TypeRegistry
640
+
641
+ tables_count = 0
642
+ columns_count = 0
643
+
644
+ schema = source_config.get("schema", "public")
645
+ tables = source_config.get("tables", [])
646
+ adapter_name = source_config.get("adapter", "postgres")
647
+
648
+ for table_config in tables:
649
+ table_name = table_config.get("name")
650
+ if not table_name:
651
+ continue
652
+
653
+ columns_config = table_config.get("columns", [])
654
+ if not columns_config:
655
+ continue
656
+
657
+ columns = []
658
+ for idx, col_config in enumerate(columns_config):
659
+ col_name = col_config.get("name")
660
+ if not col_name:
661
+ continue
662
+
663
+ adapter_type = col_config.get("data_type", "VARCHAR")
664
+ type_info = TypeRegistry.get_spark_type(adapter_name, adapter_type)
665
+ spark_type = type_info["spark_native_type"] if type_info else "StringType"
666
+
667
+ columns.append(ColumnMetadata(
668
+ column_name=col_name,
669
+ adapter_type=adapter_type,
670
+ spark_type=spark_type,
671
+ is_nullable=col_config.get("nullable", True),
672
+ is_primary_key=col_config.get("primary_key", False),
673
+ ordinal_position=idx + 1,
674
+ ))
675
+
676
+ if columns:
677
+ metadata = TableMetadata(
678
+ source_name=source_name,
679
+ table_name=table_name,
680
+ adapter_name=adapter_name,
681
+ connection_name=source_name,
682
+ schema_name=schema,
683
+ columns=columns,
684
+ last_refreshed=datetime.now(),
685
+ )
686
+ store.save_table_metadata(metadata)
687
+ tables_count += 1
688
+ columns_count += len(columns)
689
+
690
+ return tables_count, columns_count
691
+
692
+ def _snap_model(
693
+ self,
694
+ store,
695
+ model_name: str,
696
+ model_config: Dict[str, Any]
697
+ ) -> Tuple[int, int]:
698
+ """Snapshot metadata from a model definition.
699
+
700
+ Handles both catalog-based (actual database schema) and YAML-based
701
+ (documented columns) sources.
702
+ """
703
+ from dbt.compute.metadata.store import TableMetadata, ColumnMetadata
704
+ from dbt.compute.metadata.registry import TypeRegistry
705
+
706
+ columns_config = model_config.get("columns", [])
707
+ if not columns_config:
708
+ return 0, 0
709
+
710
+ # Determine adapter type based on database in catalog
711
+ from_catalog = model_config.get("_from_catalog", False)
712
+ database = model_config.get("database", "")
713
+ schema_name = model_config.get("schema", "default")
714
+
715
+ # Infer adapter from database name in catalog
716
+ if from_catalog:
717
+ # Use database name to guess adapter (postgres, snowflake, etc.)
718
+ adapter_name = self._infer_adapter_from_database(database)
719
+ else:
720
+ config = model_config.get("config", {})
721
+ adapter_name = config.get("adapter_type", "postgres")
722
+
723
+ columns = []
724
+ for idx, col_config in enumerate(columns_config):
725
+ col_name = col_config.get("name")
726
+ if not col_name:
727
+ continue
728
+
729
+ # Get adapter type from catalog (actual DB type) or YAML
730
+ adapter_type = col_config.get("data_type") or col_config.get("type", "STRING")
731
+
732
+ # Convert adapter type to Spark type
733
+ type_info = TypeRegistry.get_spark_type(adapter_name, adapter_type)
734
+ spark_type = type_info["spark_native_type"] if type_info else "StringType"
735
+
736
+ # For catalog-based, use index for position
737
+ if from_catalog:
738
+ ordinal_position = col_config.get("index", idx + 1)
739
+ else:
740
+ ordinal_position = idx + 1
741
+
742
+ # Nullable defaults to True; catalog doesn't provide this info
743
+ is_nullable = True
744
+ is_primary_key = False
745
+
746
+ # Check YAML tests for not_null and unique (only for YAML-based)
747
+ if not from_catalog:
748
+ tests = col_config.get("tests", []) or col_config.get("data_tests", [])
749
+ if tests:
750
+ for test in tests:
751
+ if test == "not_null" or (isinstance(test, dict) and "not_null" in test):
752
+ is_nullable = False
753
+ if test == "unique" or (isinstance(test, dict) and "unique" in test):
754
+ is_primary_key = True
755
+
756
+ columns.append(ColumnMetadata(
757
+ column_name=col_name,
758
+ adapter_type=adapter_type,
759
+ spark_type=spark_type,
760
+ is_nullable=is_nullable,
761
+ is_primary_key=is_primary_key,
762
+ ordinal_position=ordinal_position,
763
+ ))
764
+
765
+ if columns:
766
+ # Sort by ordinal position for consistent output
767
+ columns.sort(key=lambda c: c.ordinal_position)
768
+
769
+ metadata = TableMetadata(
770
+ source_name=f"model:{model_name}",
771
+ table_name=model_name,
772
+ adapter_name=adapter_name,
773
+ connection_name="default",
774
+ schema_name=schema_name,
775
+ columns=columns,
776
+ last_refreshed=datetime.now(),
777
+ )
778
+ store.save_table_metadata(metadata)
779
+ return 1, len(columns)
780
+
781
+ return 0, 0
782
+
783
+ def _infer_adapter_from_database(self, database: str) -> str:
784
+ """Infer adapter type from database name."""
785
+ db_lower = database.lower() if database else ""
786
+
787
+ # Common database name patterns
788
+ if "postgres" in db_lower or "pg" in db_lower:
789
+ return "postgres"
790
+ elif "snowflake" in db_lower or "sf" in db_lower:
791
+ return "snowflake"
792
+ elif "databricks" in db_lower or "spark" in db_lower:
793
+ return "databricks"
794
+ elif "redshift" in db_lower:
795
+ return "redshift"
796
+ elif "bigquery" in db_lower or "bq" in db_lower:
797
+ return "bigquery"
798
+ elif "mysql" in db_lower:
799
+ return "mysql"
800
+ elif "sqlserver" in db_lower or "mssql" in db_lower:
801
+ return "sqlserver"
802
+ else:
803
+ # Default to postgres as it's most common
804
+ return "postgres"