dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,662 @@
1
+ # =============================================================================
2
+ # DVT Profile Serve - Web UI for Profiling Results
3
+ # =============================================================================
4
+ # Serves a beautiful web interface to view profiling results stored in
5
+ # metadata_store.duckdb, similar to PipeRider's report viewer.
6
+ #
7
+ # Usage:
8
+ # dvt profile serve # Start server on http://localhost:8580
9
+ # dvt profile serve --port 9000 # Custom port
10
+ # dvt profile serve --no-browser # Don't auto-open browser
11
+ #
12
+ # Installation:
13
+ # Copy this file to: core/dbt/task/profile_serve.py
14
+ #
15
+ # DVT v0.58.0: New web UI for profiling results
16
+ # =============================================================================
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import threading
22
+ import webbrowser
23
+ from datetime import datetime
24
+ from http.server import HTTPServer, SimpleHTTPRequestHandler
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional
27
+ from urllib.parse import parse_qs, urlparse
28
+
29
+ # Try to import Rich for CLI output
30
+ try:
31
+ from rich.console import Console
32
+ from rich.panel import Panel
33
+ from rich import box
34
+ console = Console()
35
+ HAS_RICH = True
36
+ except ImportError:
37
+ HAS_RICH = False
38
+ console = None
39
+
40
+
41
+ class ProfileAPIHandler(SimpleHTTPRequestHandler):
42
+ """HTTP handler for the Profile Viewer API and static files."""
43
+
44
+ def __init__(self, *args, metadata_store_path: Path = None, **kwargs):
45
+ self.metadata_store_path = metadata_store_path
46
+ super().__init__(*args, **kwargs)
47
+
48
+ def do_GET(self):
49
+ """Handle GET requests."""
50
+ parsed = urlparse(self.path)
51
+ path = parsed.path
52
+
53
+ # API endpoints
54
+ if path == "/api/profiles":
55
+ self._serve_profiles_list()
56
+ elif path == "/api/profile":
57
+ query = parse_qs(parsed.query)
58
+ table_name = query.get("table", [None])[0]
59
+ self._serve_profile_detail(table_name)
60
+ elif path == "/api/summary":
61
+ self._serve_summary()
62
+ elif path == "/" or path == "/index.html":
63
+ self._serve_html()
64
+ else:
65
+ # Serve static files
66
+ super().do_GET()
67
+
68
+ def _serve_json(self, data: Any, status: int = 200):
69
+ """Send JSON response."""
70
+ self.send_response(status)
71
+ self.send_header("Content-Type", "application/json")
72
+ self.send_header("Access-Control-Allow-Origin", "*")
73
+ self.end_headers()
74
+ self.wfile.write(json.dumps(data, default=str).encode())
75
+
76
+ def _serve_html(self):
77
+ """Serve the main HTML page."""
78
+ html = self._generate_html()
79
+ self.send_response(200)
80
+ self.send_header("Content-Type", "text/html")
81
+ self.end_headers()
82
+ self.wfile.write(html.encode())
83
+
84
+ def _get_connection(self):
85
+ """Get DuckDB connection to metadata store."""
86
+ try:
87
+ import duckdb
88
+ return duckdb.connect(str(self.metadata_store_path), read_only=True)
89
+ except Exception as e:
90
+ return None
91
+
92
+ def _serve_profiles_list(self):
93
+ """Serve list of all profiled tables (from profile_results table)."""
94
+ conn = self._get_connection()
95
+ if not conn:
96
+ self._serve_json({"error": "Could not connect to metadata store"}, 500)
97
+ return
98
+
99
+ try:
100
+ # Query profile_results table (populated by dvt profile run)
101
+ result = conn.execute("""
102
+ SELECT
103
+ source_name,
104
+ table_name,
105
+ profile_mode,
106
+ COUNT(DISTINCT column_name) as column_count,
107
+ MAX(row_count) as row_count,
108
+ MAX(profiled_at) as last_profiled,
109
+ SUM(CASE WHEN alerts IS NOT NULL AND alerts != '[]' THEN 1 ELSE 0 END) as alert_count
110
+ FROM profile_results
111
+ GROUP BY source_name, table_name, profile_mode
112
+ ORDER BY source_name, table_name
113
+ """).fetchall()
114
+
115
+ profiles = []
116
+ for row in result:
117
+ profiles.append({
118
+ "source_name": row[0],
119
+ "table_name": row[1],
120
+ "profile_mode": row[2],
121
+ "column_count": row[3],
122
+ "row_count": row[4],
123
+ "last_profiled": row[5],
124
+ "alert_count": row[6],
125
+ "type": "source" if not row[0].startswith("model:") else "model",
126
+ })
127
+
128
+ self._serve_json({"profiles": profiles})
129
+ except Exception as e:
130
+ self._serve_json({"profiles": [], "error": str(e)})
131
+ finally:
132
+ conn.close()
133
+
134
+ def _serve_profile_detail(self, table_name: str):
135
+ """Serve detailed profile for a specific table (from profile_results)."""
136
+ if not table_name:
137
+ self._serve_json({"error": "table parameter required"}, 400)
138
+ return
139
+
140
+ conn = self._get_connection()
141
+ if not conn:
142
+ self._serve_json({"error": "Could not connect to metadata store"}, 500)
143
+ return
144
+
145
+ try:
146
+ import json as json_lib
147
+
148
+ # Query profile_results for PipeRider-style metrics
149
+ result = conn.execute("""
150
+ SELECT
151
+ column_name,
152
+ profile_mode,
153
+ row_count,
154
+ null_count,
155
+ null_percent,
156
+ distinct_count,
157
+ distinct_percent,
158
+ min_value,
159
+ max_value,
160
+ mean_value,
161
+ median_value,
162
+ stddev_value,
163
+ p25,
164
+ p50,
165
+ p75,
166
+ min_length,
167
+ max_length,
168
+ avg_length,
169
+ histogram,
170
+ top_values,
171
+ alerts,
172
+ profiled_at,
173
+ duration_ms
174
+ FROM profile_results
175
+ WHERE table_name = ?
176
+ ORDER BY column_name
177
+ """, [table_name]).fetchall()
178
+
179
+ columns = []
180
+ row_count = None
181
+ profile_mode = None
182
+ profiled_at = None
183
+ total_alerts = []
184
+
185
+ for row in result:
186
+ # Get table-level info from first row
187
+ if row_count is None:
188
+ row_count = row[2]
189
+ profile_mode = row[1]
190
+ profiled_at = row[21]
191
+
192
+ # Parse JSON fields
193
+ histogram = None
194
+ top_values = None
195
+ alerts = []
196
+ try:
197
+ if row[18]:
198
+ histogram = json_lib.loads(row[18]) if isinstance(row[18], str) else row[18]
199
+ if row[19]:
200
+ top_values = json_lib.loads(row[19]) if isinstance(row[19], str) else row[19]
201
+ if row[20]:
202
+ alerts = json_lib.loads(row[20]) if isinstance(row[20], str) else row[20]
203
+ total_alerts.extend(alerts)
204
+ except:
205
+ pass
206
+
207
+ columns.append({
208
+ "name": row[0],
209
+ "profile_mode": row[1],
210
+ "null_count": row[3],
211
+ "null_percent": row[4],
212
+ "distinct_count": row[5],
213
+ "distinct_percent": row[6],
214
+ "min_value": row[7],
215
+ "max_value": row[8],
216
+ "mean_value": row[9],
217
+ "median_value": row[10],
218
+ "stddev_value": row[11],
219
+ "p25": row[12],
220
+ "p50": row[13],
221
+ "p75": row[14],
222
+ "min_length": row[15],
223
+ "max_length": row[16],
224
+ "avg_length": row[17],
225
+ "histogram": histogram,
226
+ "top_values": top_values,
227
+ "alerts": alerts,
228
+ "duration_ms": row[22],
229
+ })
230
+
231
+ # Also try to get schema metadata from column_metadata
232
+ schema_info = {}
233
+ try:
234
+ schema_result = conn.execute("""
235
+ SELECT column_name, adapter_type, spark_type, is_nullable, is_primary_key
236
+ FROM column_metadata
237
+ WHERE table_name = ?
238
+ """, [table_name]).fetchall()
239
+ for sr in schema_result:
240
+ schema_info[sr[0]] = {
241
+ "adapter_type": sr[1],
242
+ "spark_type": sr[2],
243
+ "is_nullable": sr[3],
244
+ "is_primary_key": sr[4],
245
+ }
246
+ except:
247
+ pass
248
+
249
+ # Merge schema info into columns
250
+ for col in columns:
251
+ if col["name"] in schema_info:
252
+ col.update(schema_info[col["name"]])
253
+
254
+ self._serve_json({
255
+ "table_name": table_name,
256
+ "profile_mode": profile_mode,
257
+ "row_count": row_count,
258
+ "column_count": len(columns),
259
+ "profiled_at": profiled_at,
260
+ "alert_count": len(total_alerts),
261
+ "alerts": total_alerts,
262
+ "columns": columns,
263
+ })
264
+ except Exception as e:
265
+ self._serve_json({"error": str(e)}, 500)
266
+ finally:
267
+ conn.close()
268
+
269
+ def _serve_summary(self):
270
+ """Serve summary statistics (from profile_results)."""
271
+ conn = self._get_connection()
272
+ if not conn:
273
+ self._serve_json({"error": "Could not connect to metadata store"}, 500)
274
+ return
275
+
276
+ try:
277
+ # Get summary stats from profile_results
278
+ tables = conn.execute("""
279
+ SELECT COUNT(DISTINCT table_name) FROM profile_results
280
+ """).fetchone()[0]
281
+
282
+ columns = conn.execute("""
283
+ SELECT COUNT(DISTINCT source_name || '.' || table_name || '.' || column_name)
284
+ FROM profile_results
285
+ """).fetchone()[0]
286
+
287
+ sources = conn.execute("""
288
+ SELECT COUNT(DISTINCT source_name) FROM profile_results
289
+ """).fetchone()[0]
290
+
291
+ # Count total rows across all tables
292
+ total_rows = conn.execute("""
293
+ SELECT COALESCE(SUM(row_count), 0) FROM (
294
+ SELECT DISTINCT source_name, table_name, MAX(row_count) as row_count
295
+ FROM profile_results
296
+ GROUP BY source_name, table_name
297
+ )
298
+ """).fetchone()[0]
299
+
300
+ # Count alerts
301
+ alert_count = 0
302
+ try:
303
+ import json as json_lib
304
+ alerts_result = conn.execute("""
305
+ SELECT alerts FROM profile_results WHERE alerts IS NOT NULL AND alerts != '[]'
306
+ """).fetchall()
307
+ for row in alerts_result:
308
+ if row[0]:
309
+ try:
310
+ alerts = json_lib.loads(row[0]) if isinstance(row[0], str) else row[0]
311
+ alert_count += len(alerts) if alerts else 0
312
+ except:
313
+ pass
314
+ except:
315
+ pass
316
+
317
+ # Get models count (tables with source_name starting with 'model:')
318
+ models = conn.execute("""
319
+ SELECT COUNT(DISTINCT table_name) FROM profile_results
320
+ WHERE source_name LIKE 'model:%'
321
+ """).fetchone()[0]
322
+
323
+ self._serve_json({
324
+ "total_tables": tables,
325
+ "total_columns": columns,
326
+ "sources": sources - models if sources > models else sources,
327
+ "models": models,
328
+ "total_rows": total_rows,
329
+ "alert_count": alert_count,
330
+ })
331
+ except Exception as e:
332
+ self._serve_json({"error": str(e)}, 500)
333
+ finally:
334
+ conn.close()
335
+
336
+ def _generate_html(self) -> str:
337
+ """Generate the HTML page for the profile viewer."""
338
+ return '''<!DOCTYPE html>
339
+ <html lang="en">
340
+ <head>
341
+ <meta charset="UTF-8">
342
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
343
+ <title>DVT Profile Viewer</title>
344
+ <style>
345
+ :root {
346
+ --primary: #6366f1;
347
+ --primary-dark: #4f46e5;
348
+ --success: #10b981;
349
+ --warning: #f59e0b;
350
+ --error: #ef4444;
351
+ --bg: #0f172a;
352
+ --bg-card: #1e293b;
353
+ --text: #f1f5f9;
354
+ --text-dim: #94a3b8;
355
+ --border: #334155;
356
+ }
357
+ * { box-sizing: border-box; margin: 0; padding: 0; }
358
+ body {
359
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
360
+ background: var(--bg);
361
+ color: var(--text);
362
+ min-height: 100vh;
363
+ }
364
+ .header {
365
+ background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
366
+ padding: 2rem;
367
+ text-align: center;
368
+ }
369
+ .header h1 { font-size: 2rem; margin-bottom: 0.5rem; }
370
+ .header p { color: rgba(255,255,255,0.8); }
371
+ .container { max-width: 1400px; margin: 0 auto; padding: 2rem; }
372
+ .stats-grid {
373
+ display: grid;
374
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
375
+ gap: 1rem;
376
+ margin-bottom: 2rem;
377
+ }
378
+ .stat-card {
379
+ background: var(--bg-card);
380
+ border-radius: 12px;
381
+ padding: 1.5rem;
382
+ border: 1px solid var(--border);
383
+ }
384
+ .stat-card h3 { color: var(--text-dim); font-size: 0.875rem; margin-bottom: 0.5rem; }
385
+ .stat-card .value { font-size: 2rem; font-weight: 700; color: var(--primary); }
386
+ .stat-card .value.alert { color: var(--error); }
387
+ .tables-section { margin-top: 2rem; }
388
+ .tables-section h2 { margin-bottom: 1rem; }
389
+ .table-list { display: grid; gap: 1rem; }
390
+ .table-card {
391
+ background: var(--bg-card);
392
+ border-radius: 12px;
393
+ padding: 1.5rem;
394
+ border: 1px solid var(--border);
395
+ cursor: pointer;
396
+ transition: all 0.2s;
397
+ }
398
+ .table-card:hover { border-color: var(--primary); transform: translateY(-2px); }
399
+ .table-card.selected { border-color: var(--primary); box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.3); }
400
+ .table-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.5rem; }
401
+ .table-name { font-weight: 600; font-size: 1.1rem; }
402
+ .table-badges { display: flex; gap: 0.5rem; }
403
+ .table-type { font-size: 0.75rem; padding: 0.25rem 0.5rem; border-radius: 4px; background: var(--primary); }
404
+ .table-type.source { background: var(--success); }
405
+ .table-type.model { background: var(--warning); }
406
+ .alert-badge { font-size: 0.75rem; padding: 0.25rem 0.5rem; border-radius: 4px; background: var(--error); }
407
+ .table-meta { color: var(--text-dim); font-size: 0.875rem; }
408
+ .table-stats { display: flex; gap: 1rem; margin-top: 0.5rem; color: var(--text-dim); font-size: 0.8rem; }
409
+ .detail-panel {
410
+ position: fixed; top: 0; right: -600px; width: 600px; height: 100vh;
411
+ background: var(--bg-card); border-left: 1px solid var(--border);
412
+ transition: right 0.3s; overflow-y: auto; z-index: 100;
413
+ }
414
+ .detail-panel.open { right: 0; }
415
+ .detail-header { padding: 1.5rem; border-bottom: 1px solid var(--border); display: flex; justify-content: space-between; align-items: center; }
416
+ .detail-header h2 { font-size: 1.25rem; }
417
+ .close-btn { background: none; border: none; color: var(--text); font-size: 1.5rem; cursor: pointer; }
418
+ .detail-content { padding: 1.5rem; }
419
+ .summary-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 1rem; margin-bottom: 1.5rem; }
420
+ .summary-item { background: var(--bg); padding: 1rem; border-radius: 8px; text-align: center; }
421
+ .summary-item .label { color: var(--text-dim); font-size: 0.75rem; margin-bottom: 0.25rem; }
422
+ .summary-item .value { font-size: 1.25rem; font-weight: 600; }
423
+ .alerts-section { background: rgba(239, 68, 68, 0.1); border: 1px solid var(--error); border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem; }
424
+ .alerts-section h3 { color: var(--error); font-size: 0.9rem; margin-bottom: 0.5rem; }
425
+ .alert-item { padding: 0.5rem; background: var(--bg); border-radius: 4px; margin-top: 0.5rem; font-size: 0.85rem; }
426
+ .column-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; }
427
+ .column-table th, .column-table td { padding: 0.75rem; text-align: left; border-bottom: 1px solid var(--border); }
428
+ .column-table th { color: var(--text-dim); font-weight: 500; font-size: 0.7rem; text-transform: uppercase; position: sticky; top: 0; background: var(--bg-card); }
429
+ .type-badge { font-family: monospace; font-size: 0.75rem; background: rgba(99, 102, 241, 0.2); padding: 0.2rem 0.4rem; border-radius: 4px; }
430
+ .metric { font-family: monospace; font-size: 0.8rem; }
431
+ .metric.warning { color: var(--warning); }
432
+ .metric.error { color: var(--error); }
433
+ .progress-bar { width: 100%; height: 6px; background: var(--border); border-radius: 3px; overflow: hidden; }
434
+ .progress-bar .fill { height: 100%; background: var(--primary); }
435
+ .progress-bar .fill.warning { background: var(--warning); }
436
+ .progress-bar .fill.error { background: var(--error); }
437
+ .loading { text-align: center; padding: 3rem; color: var(--text-dim); }
438
+ .error { background: rgba(239, 68, 68, 0.2); border: 1px solid var(--error); padding: 1rem; border-radius: 8px; margin: 1rem 0; }
439
+ @media (max-width: 768px) { .detail-panel { width: 100%; right: -100%; } }
440
+ </style>
441
+ </head>
442
+ <body>
443
+ <div class="header">
444
+ <h1>DVT Profile Viewer</h1>
445
+ <p>PipeRider-style data profiling results</p>
446
+ </div>
447
+
448
+ <div class="container">
449
+ <div class="stats-grid" id="stats">
450
+ <div class="stat-card"><h3>Tables</h3><div class="value" id="stat-tables">-</div></div>
451
+ <div class="stat-card"><h3>Columns</h3><div class="value" id="stat-columns">-</div></div>
452
+ <div class="stat-card"><h3>Total Rows</h3><div class="value" id="stat-rows">-</div></div>
453
+ <div class="stat-card"><h3>Sources</h3><div class="value" id="stat-sources">-</div></div>
454
+ <div class="stat-card"><h3>Models</h3><div class="value" id="stat-models">-</div></div>
455
+ <div class="stat-card"><h3>Alerts</h3><div class="value alert" id="stat-alerts">-</div></div>
456
+ </div>
457
+
458
+ <div class="tables-section">
459
+ <h2>Profiled Tables</h2>
460
+ <div class="table-list" id="table-list"><div class="loading">Loading profiles...</div></div>
461
+ </div>
462
+ </div>
463
+
464
+ <div class="detail-panel" id="detail-panel">
465
+ <div class="detail-header">
466
+ <h2 id="detail-title">Table Details</h2>
467
+ <button class="close-btn" onclick="closeDetail()">&times;</button>
468
+ </div>
469
+ <div class="detail-content" id="detail-content">
470
+ <div class="loading">Select a table to view details</div>
471
+ </div>
472
+ </div>
473
+
474
+ <script>
475
+ function formatNumber(n) { return n != null ? n.toLocaleString() : '-'; }
476
+ function formatPercent(n) { return n != null ? n.toFixed(1) + '%' : '-'; }
477
+
478
+ async function loadSummary() {
479
+ try {
480
+ const resp = await fetch('/api/summary');
481
+ const data = await resp.json();
482
+ document.getElementById('stat-tables').textContent = formatNumber(data.total_tables);
483
+ document.getElementById('stat-columns').textContent = formatNumber(data.total_columns);
484
+ document.getElementById('stat-rows').textContent = formatNumber(data.total_rows);
485
+ document.getElementById('stat-sources').textContent = formatNumber(data.sources);
486
+ document.getElementById('stat-models').textContent = formatNumber(data.models);
487
+ document.getElementById('stat-alerts').textContent = formatNumber(data.alert_count);
488
+ } catch (e) { console.error('Failed to load summary:', e); }
489
+ }
490
+
491
+ async function loadProfiles() {
492
+ const container = document.getElementById('table-list');
493
+ try {
494
+ const resp = await fetch('/api/profiles');
495
+ const data = await resp.json();
496
+ if (data.profiles.length === 0) {
497
+ container.innerHTML = '<div class="loading">No profiles found. Run "dvt profile run" first.</div>';
498
+ return;
499
+ }
500
+ container.innerHTML = data.profiles.map(p => `
501
+ <div class="table-card" onclick="showDetail('${p.table_name}')">
502
+ <div class="table-header">
503
+ <span class="table-name">${p.source_name ? p.source_name + '.' : ''}${p.table_name}</span>
504
+ <div class="table-badges">
505
+ ${p.alert_count > 0 ? `<span class="alert-badge">${p.alert_count} alerts</span>` : ''}
506
+ <span class="table-type ${p.type}">${p.type}</span>
507
+ </div>
508
+ </div>
509
+ <div class="table-stats">
510
+ <span>${formatNumber(p.row_count)} rows</span>
511
+ <span>${p.column_count} columns</span>
512
+ <span>Mode: ${p.profile_mode || 'minimal'}</span>
513
+ </div>
514
+ <div class="table-meta">Profiled: ${p.last_profiled ? new Date(p.last_profiled).toLocaleString() : '-'}</div>
515
+ </div>
516
+ `).join('');
517
+ } catch (e) { container.innerHTML = `<div class="error">Failed to load profiles: ${e.message}</div>`; }
518
+ }
519
+
520
+ async function showDetail(tableName) {
521
+ const panel = document.getElementById('detail-panel');
522
+ const title = document.getElementById('detail-title');
523
+ const content = document.getElementById('detail-content');
524
+ panel.classList.add('open');
525
+ title.textContent = tableName;
526
+ content.innerHTML = '<div class="loading">Loading...</div>';
527
+ document.querySelectorAll('.table-card').forEach(c => c.classList.remove('selected'));
528
+ if (event && event.currentTarget) event.currentTarget.classList.add('selected');
529
+
530
+ try {
531
+ const resp = await fetch(`/api/profile?table=${encodeURIComponent(tableName)}`);
532
+ const data = await resp.json();
533
+ if (data.error) { content.innerHTML = `<div class="error">${data.error}</div>`; return; }
534
+
535
+ let html = `
536
+ <div class="summary-grid">
537
+ <div class="summary-item"><div class="label">Rows</div><div class="value">${formatNumber(data.row_count)}</div></div>
538
+ <div class="summary-item"><div class="label">Columns</div><div class="value">${data.column_count}</div></div>
539
+ <div class="summary-item"><div class="label">Alerts</div><div class="value" style="color:var(--error)">${data.alert_count}</div></div>
540
+ </div>`;
541
+
542
+ if (data.alerts && data.alerts.length > 0) {
543
+ html += `<div class="alerts-section"><h3>Quality Alerts</h3>`;
544
+ data.alerts.forEach(a => {
545
+ html += `<div class="alert-item"><strong>${a.column_name || '-'}:</strong> ${a.message || a.type}</div>`;
546
+ });
547
+ html += `</div>`;
548
+ }
549
+
550
+ html += `<table class="column-table"><thead><tr>
551
+ <th>Column</th><th>Nulls</th><th>Distinct</th><th>Min</th><th>Max</th><th>Type</th>
552
+ </tr></thead><tbody>`;
553
+
554
+ data.columns.forEach(c => {
555
+ const nullClass = c.null_percent > 50 ? 'error' : c.null_percent > 10 ? 'warning' : '';
556
+ html += `<tr>
557
+ <td><strong>${c.name}</strong></td>
558
+ <td>
559
+ <div class="metric ${nullClass}">${formatPercent(c.null_percent)}</div>
560
+ <div class="progress-bar"><div class="fill ${nullClass}" style="width:${c.null_percent || 0}%"></div></div>
561
+ </td>
562
+ <td><span class="metric">${formatNumber(c.distinct_count)}</span></td>
563
+ <td><span class="metric">${c.min_value != null ? c.min_value : '-'}</span></td>
564
+ <td><span class="metric">${c.max_value != null ? c.max_value : '-'}</span></td>
565
+ <td><span class="type-badge">${c.adapter_type || '-'}</span></td>
566
+ </tr>`;
567
+ });
568
+ html += `</tbody></table>`;
569
+ content.innerHTML = html;
570
+ } catch (e) { content.innerHTML = `<div class="error">Failed to load details: ${e.message}</div>`; }
571
+ }
572
+
573
+ function closeDetail() {
574
+ document.getElementById('detail-panel').classList.remove('open');
575
+ document.querySelectorAll('.table-card').forEach(c => c.classList.remove('selected'));
576
+ }
577
+
578
+ loadSummary();
579
+ loadProfiles();
580
+ </script>
581
+ </body>
582
+ </html>'''
583
+
584
+ def log_message(self, format, *args):
585
+ """Suppress default logging."""
586
+ pass
587
+
588
+
589
+ def serve_profile_ui(
590
+ project_dir: Path,
591
+ port: int = 8580,
592
+ host: str = "localhost",
593
+ open_browser: bool = True,
594
+ ):
595
+ """
596
+ Start the profile viewer web server.
597
+
598
+ Args:
599
+ project_dir: Path to the DVT project
600
+ port: Port to serve on (default: 8580)
601
+ host: Host to bind to (default: localhost)
602
+ open_browser: Whether to open browser automatically
603
+ """
604
+ # Find metadata store
605
+ metadata_store_path = project_dir / ".dvt" / "metadata_store.duckdb"
606
+
607
+ if not metadata_store_path.exists():
608
+ if HAS_RICH:
609
+ console.print(Panel(
610
+ "[yellow]No metadata store found.[/yellow]\n\n"
611
+ "Run [bold cyan]dvt profile[/bold cyan] first to capture profiling data.",
612
+ title="[bold red]Error[/bold red]",
613
+ border_style="red",
614
+ ))
615
+ else:
616
+ print("Error: No metadata store found.")
617
+ print("Run 'dvt profile' first to capture profiling data.")
618
+ return False
619
+
620
+ # Create handler with metadata store path
621
+ def handler(*args, **kwargs):
622
+ return ProfileAPIHandler(*args, metadata_store_path=metadata_store_path, **kwargs)
623
+
624
+ # Start server
625
+ server = HTTPServer((host, port), handler)
626
+ url = f"http://{host}:{port}"
627
+
628
+ if HAS_RICH:
629
+ console.print()
630
+ console.print(Panel(
631
+ f"[bold green]Profile Viewer running at:[/bold green]\n\n"
632
+ f" [bold cyan]{url}[/bold cyan]\n\n"
633
+ f"[dim]Press Ctrl+C to stop[/dim]",
634
+ title="[bold magenta]🔍 DVT Profile Viewer[/bold magenta]",
635
+ border_style="magenta",
636
+ box=box.DOUBLE,
637
+ ))
638
+ console.print()
639
+ else:
640
+ print(f"\nDVT Profile Viewer running at: {url}")
641
+ print("Press Ctrl+C to stop\n")
642
+
643
+ # Open browser
644
+ if open_browser:
645
+ threading.Timer(1.0, lambda: webbrowser.open(url)).start()
646
+
647
+ try:
648
+ server.serve_forever()
649
+ except KeyboardInterrupt:
650
+ if HAS_RICH:
651
+ console.print("\n[yellow]Server stopped.[/yellow]")
652
+ else:
653
+ print("\nServer stopped.")
654
+
655
+ return True
656
+
657
+
658
+ if __name__ == "__main__":
659
+ # For testing
660
+ import sys
661
+ project_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
662
+ serve_profile_ui(project_dir)