dataform-dependency-visualizer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataform-dependency-visualizer
3
+ Version: 0.1.0
4
+ Summary: Visualize Dataform table dependencies as interactive SVG diagrams
5
+ Author-email: Thamo <thamo@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/thamo/dataform-dependency-visualizer
8
+ Project-URL: Repository, https://github.com/thamo/dataform-dependency-visualizer
9
+ Keywords: dataform,dependencies,visualization,bigquery,sql
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Database
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Dynamic: license-file
25
+
26
+ # Dataform Dependency Visualizer
27
+
28
+ Generate beautiful, interactive SVG diagrams showing dependencies between Dataform tables.
29
+
30
+ ## Features
31
+
32
+ - 📊 **Individual table diagrams** - One SVG per table showing immediate dependencies
33
+ - 🎨 **Color-coded by type** - Tables, views, and operations visually distinct
34
+ - 🔍 **Master index viewer** - Browse all tables in single interface
35
+ - 📁 **Schema organization** - Organized by schema with collapsible sections
36
+ - ⚡ **Pure Python SVGs** - No Graphviz required
37
+ - 🎯 **Orthogonal routing** - Clean, professional arrow paths
38
+ - 📝 **Smart text wrapping** - Long table names split across lines
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install dataform-dependency-visualizer
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ### 1. Generate dependency report
49
+
50
+ In your Dataform project directory:
51
+
52
+ ```bash
53
+ dataform compile --json > dependencies_report.txt
54
+ ```
55
+
56
+ ### 2. Generate SVG diagrams
57
+
58
+ ```bash
59
+ # Generate for specific schema
60
+ dataform-deps generate dashboard_wwim
61
+
62
+ # Generate for all schemas (excluding refined_*)
63
+ dataform-deps generate-all
64
+
65
+ # Generate master index
66
+ dataform-deps index
67
+ ```
68
+
69
+ ### 3. View diagrams
70
+
71
+ Open `output/dependencies_master_index.html` in your browser to browse all tables and their dependencies.
72
+
73
+ ## Usage
74
+
75
+ ### Command Line
76
+
77
+ **Generate diagrams for a specific schema:**
78
+
79
+ ```bash
80
+ dataform-deps generate SCHEMA_NAME
81
+ ```
82
+
83
+ Example:
84
+ ```bash
85
+ dataform-deps generate dashboard_wwim
86
+ ```
87
+
88
+ **Generate for all schemas:**
89
+
90
+ ```bash
91
+ dataform-deps generate-all
92
+ ```
93
+
94
+ By default, schemas starting with `refined_` are excluded. To include them:
95
+
96
+ ```bash
97
+ dataform-deps generate-all --include-refined
98
+ ```
99
+
100
+ **Generate master index:**
101
+
102
+ ```bash
103
+ dataform-deps index
104
+ ```
105
+
106
+ Creates `output/dependencies_master_index.html` with links to all tables.
107
+
108
+ **Check prerequisites:**
109
+
110
+ ```bash
111
+ dataform-deps check
112
+ ```
113
+
114
+ Verifies that:
115
+ - You're in a Dataform project directory
116
+ - `dependencies_report.txt` exists
117
+ - The report contains valid dependency data
118
+
119
+ ## Example Output
120
+
121
+ ### Individual Table Diagram
122
+
123
+ Each table gets its own SVG showing:
124
+ - **Dependencies (upstream)** - Tables this table depends on
125
+ - **Dependents (downstream)** - Tables that depend on this table
126
+ - **Color coding** - Tables (blue), views (green), operations (orange)
127
+
128
+ ### Master Index
129
+
130
+ The master index organizes all tables by schema with:
131
+ - Clickable table names that open their SVG
132
+ - Expandable/collapsible schemas
133
+ - Type badges (table/view/incremental)
134
+ - Search functionality
135
+
136
+ ## Requirements
137
+
138
+ - Python 3.8+
139
+ - Dataform project with compiled dependencies
140
+
141
+ ## How It Works
142
+
143
+ 1. Parse `dependencies_report.txt` generated by Dataform
144
+ 2. Extract table dependencies and metadata
145
+ 3. Generate SVG diagrams with orthogonal routing
146
+ 4. Create master index HTML for easy navigation
147
+
148
+ ## Project Structure
149
+
150
+ ```
151
+ output/
152
+ ├── dependencies_master_index.html # Main entry point
153
+ └── dependencies/
154
+ ├── schema1_table1.svg
155
+ ├── schema1_table2.svg
156
+ └── ...
157
+ ```
158
+
159
+ ## License
160
+
161
+ MIT License - see LICENSE file for details.
162
+
163
+ ## Contributing
164
+
165
+ Contributions welcome! Please open an issue or PR on GitHub.
166
+
167
+ ## Links
168
+
169
+ - [GitHub Repository](https://github.com/yourusername/dataform-dependency-visualizer)
170
+ - [Issue Tracker](https://github.com/yourusername/dataform-dependency-visualizer/issues)
@@ -0,0 +1,13 @@
1
+ dataform_dependency_visualizer-0.1.0.dist-info/licenses/LICENSE,sha256=ixWsdND9GXUi7Eb1toJqBGEpQox6S5UWDFlMUrgwTLY,1089
2
+ dataform_viz/__init__.py,sha256=tspWZ_DIMIQ7-SbhaHBnui7wmJgnDEn1q_OWHJcwolQ,379
3
+ dataform_viz/cli.py,sha256=BQ8UKFqpF21WyVGyJmmjqtLmNWxeDte-ysithtUeTHA,4577
4
+ dataform_viz/dataform_check.py,sha256=A40KXcbIpHQTB1kgiLHGMXMV878Pj3OLMEYGSbx9VUI,6684
5
+ dataform_viz/master_index.py,sha256=KT8VbjVVi5MJR0_l6a6Y4dlR46_-iCihS-Mixgd6XWk,10735
6
+ dataform_viz/parser.py,sha256=qLjif5TwKoNqXbXudPSIhNmADfjGrjVWZUK9ZpBbwBg,2133
7
+ dataform_viz/svg_generator.py,sha256=BlgzyEQ6iBUHXdrAFiE5JN_UAHwJd9Pg152T5-roNho,15816
8
+ dataform_viz/visualizer.py,sha256=n3DVfBOYebE66Nun8rdU9gVsqmoUAr-jk2JT9ygw5T4,4645
9
+ dataform_dependency_visualizer-0.1.0.dist-info/METADATA,sha256=_ErrVSh7LByFIZpAcg2um_c9r8ZZeCG7cacDyfuT8_A,4509
10
+ dataform_dependency_visualizer-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ dataform_dependency_visualizer-0.1.0.dist-info/entry_points.txt,sha256=M1Ue84hbvcL7duLuI9ieyX0d0hSXnzOpsiaskhCt-1Y,56
12
+ dataform_dependency_visualizer-0.1.0.dist-info/top_level.txt,sha256=EIiWhSvCyeLRkhNmH1r-NChI-pZ0NbECn4Ya0YsF91U,13
13
+ dataform_dependency_visualizer-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dataform-deps = dataform_viz.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 [Your Name]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ dataform_viz
@@ -0,0 +1,14 @@
1
+ """
2
+ Dataform Dependency Visualizer
3
+
4
+ Generate interactive SVG diagrams showing table dependencies in Dataform projects.
5
+ """
6
+
7
+ __version__ = "0.1.0"
8
+ __author__ = "Your Name"
9
+ __email__ = "your.email@example.com"
10
+
11
+ from .visualizer import DependencyVisualizer
12
+ from .parser import parse_dependencies_report
13
+
14
+ __all__ = ["DependencyVisualizer", "parse_dependencies_report"]
dataform_viz/cli.py ADDED
@@ -0,0 +1,159 @@
1
+ """
2
+ Command-line interface for dataform-dependency-visualizer
3
+ """
4
+ import sys
5
+ import argparse
6
+ from pathlib import Path
7
+ from .visualizer import DependencyVisualizer
8
+
9
+
10
+ def cmd_generate(args):
11
+ """Generate SVGs for a specific schema"""
12
+ viz = DependencyVisualizer(args.report)
13
+
14
+ try:
15
+ count = viz.generate_schema_svgs(
16
+ args.schema,
17
+ output_dir=args.output
18
+ )
19
+ print(f"✓ Generated {count} SVG diagrams for {args.schema}")
20
+ print(f" Output: {args.output}/dependencies_{args.schema}/")
21
+ return 0
22
+ except Exception as e:
23
+ print(f"✗ Error: {e}", file=sys.stderr)
24
+ return 1
25
+
26
+
27
+ def cmd_generate_all(args):
28
+ """Generate SVGs for all schemas"""
29
+ viz = DependencyVisualizer(args.report)
30
+
31
+ try:
32
+ results = viz.generate_all_schemas(
33
+ output_dir=args.output,
34
+ exclude_patterns=args.exclude or ['refined_*']
35
+ )
36
+
37
+ total = sum(results.values())
38
+ print(f"\n✓ Generated {total} SVG diagrams across {len(results)} schemas:")
39
+ for schema, count in sorted(results.items()):
40
+ print(f" - {schema}: {count} tables")
41
+ print(f"\nOutput directory: {args.output}/")
42
+ return 0
43
+ except Exception as e:
44
+ print(f"✗ Error: {e}", file=sys.stderr)
45
+ return 1
46
+
47
+
48
+ def cmd_index(args):
49
+ """Generate master index"""
50
+ viz = DependencyVisualizer(args.report)
51
+
52
+ try:
53
+ index_file = viz.generate_master_index(output_dir=args.output)
54
+ print(f"✓ Master index created: {index_file}")
55
+
56
+ if args.open:
57
+ import subprocess
58
+ subprocess.run(['start', str(index_file)], shell=True)
59
+
60
+ return 0
61
+ except Exception as e:
62
+ print(f"✗ Error: {e}", file=sys.stderr)
63
+ return 1
64
+
65
+
66
+ def cmd_setup(args):
67
+ """Full setup pipeline"""
68
+ from .dataform_check import check_prerequisites
69
+
70
+ print("=" * 60)
71
+ print("DATAFORM DEPENDENCIES VISUALIZATION SETUP")
72
+ print("=" * 60)
73
+
74
+ if not check_prerequisites():
75
+ print("\n⚠ Prerequisites check failed")
76
+ return 1
77
+
78
+ # Generate all
79
+ args_all = argparse.Namespace(
80
+ report=args.report,
81
+ output=args.output,
82
+ exclude=args.exclude
83
+ )
84
+ if cmd_generate_all(args_all) != 0:
85
+ return 1
86
+
87
+ # Generate index
88
+ args_idx = argparse.Namespace(
89
+ report=args.report,
90
+ output=args.output,
91
+ open=True
92
+ )
93
+ return cmd_index(args_idx)
94
+
95
+
96
+ def main():
97
+ """Main CLI entry point"""
98
+ parser = argparse.ArgumentParser(
99
+ prog='dataform-deps',
100
+ description='Generate interactive SVG diagrams for Dataform table dependencies'
101
+ )
102
+
103
+ parser.add_argument(
104
+ '--report',
105
+ default='dependencies_report.txt',
106
+ help='Path to dependencies report file (default: dependencies_report.txt)'
107
+ )
108
+
109
+ parser.add_argument(
110
+ '--output',
111
+ default='output',
112
+ help='Output directory (default: output)'
113
+ )
114
+
115
+ subparsers = parser.add_subparsers(dest='command', help='Commands')
116
+
117
+ # Generate command
118
+ gen_parser = subparsers.add_parser('generate', help='Generate SVGs for a schema')
119
+ gen_parser.add_argument('schema', help='Schema name to generate')
120
+ gen_parser.set_defaults(func=cmd_generate)
121
+
122
+ # Generate-all command
123
+ gen_all_parser = subparsers.add_parser('generate-all', help='Generate SVGs for all schemas')
124
+ gen_all_parser.add_argument(
125
+ '--exclude',
126
+ nargs='+',
127
+ help='Schema patterns to exclude (default: refined_*)'
128
+ )
129
+ gen_all_parser.set_defaults(func=cmd_generate_all)
130
+
131
+ # Index command
132
+ idx_parser = subparsers.add_parser('index', help='Generate master index')
133
+ idx_parser.add_argument(
134
+ '--open',
135
+ action='store_true',
136
+ help='Open index in browser'
137
+ )
138
+ idx_parser.set_defaults(func=cmd_index)
139
+
140
+ # Setup command
141
+ setup_parser = subparsers.add_parser('setup', help='Full setup pipeline')
142
+ setup_parser.add_argument(
143
+ '--exclude',
144
+ nargs='+',
145
+ help='Schema patterns to exclude (default: refined_*)'
146
+ )
147
+ setup_parser.set_defaults(func=cmd_setup)
148
+
149
+ args = parser.parse_args()
150
+
151
+ if not args.command:
152
+ parser.print_help()
153
+ return 1
154
+
155
+ return args.func(args)
156
+
157
+
158
+ if __name__ == '__main__':
159
+ sys.exit(main())
@@ -0,0 +1,175 @@
1
+ import subprocess
2
+ import json
3
+ import sys
4
+ import os
5
+
6
+ def get_dataform_graph():
7
+ print("Compiling Dataform graph (this may take a moment)...")
8
+
9
+ # Try to find local dataform binary first (most reliable)
10
+ dataform_cmd = "dataform"
11
+ local_bin = os.path.join(os.getcwd(), "node_modules", ".bin", "dataform.cmd")
12
+
13
+ if os.path.exists(local_bin):
14
+ dataform_cmd = f'"{local_bin}"'
15
+ else:
16
+ # Fallback to npx logic...
17
+ # helper to find npx
18
+ npx_cmd = "npx"
19
+ possible_paths = [
20
+ r"C:\Program Files\nodejs\npx.cmd",
21
+ r"C:\Program Files (x86)\nodejs\npx.cmd"
22
+ ]
23
+
24
+ # Check if 'npx' is in path, if not try absolute paths
25
+ from shutil import which
26
+ if which("npx") is None:
27
+ for p in possible_paths:
28
+ if os.path.exists(p):
29
+ npx_cmd = f'"{p}"' # wrapper quotes for shell
30
+ break
31
+
32
+ dataform_cmd = f"{npx_cmd} dataform"
33
+
34
+ cmd = f"{dataform_cmd} compile --json"
35
+
36
+ # Run dataform compile and capture JSON output
37
+ # shell=True is often required on Windows to find npx.cmd
38
+ try:
39
+ # We assume npx is in the path.
40
+ result = subprocess.run(cmd,
41
+ capture_output=True,
42
+ text=True,
43
+ encoding='utf-8',
44
+ errors='ignore',
45
+ shell=True,
46
+ check=True)
47
+ if not result.stdout:
48
+ print("No output from dataform compile")
49
+ return None
50
+
51
+ # Dataform outputs log message then JSON on the same line
52
+ # Format: {"level":"INFO",...} { "tables": ...}
53
+ # We need to find the part that starts with { "tables"
54
+ output = result.stdout
55
+
56
+ # Find the position where the actual compilation result starts
57
+ # Look for '{ "tables"' or '{\n "tables"'
58
+ tables_pos = output.find('"tables"')
59
+ if tables_pos == -1:
60
+ print("Could not find 'tables' key in dataform output")
61
+ return None
62
+
63
+ # Walk backwards to find the opening brace of this object
64
+ brace_pos = output.rfind('{', 0, tables_pos)
65
+ if brace_pos == -1:
66
+ print("Could not find JSON object start")
67
+ return None
68
+
69
+ json_str = output[brace_pos:]
70
+ return json.loads(json_str)
71
+
72
+ except subprocess.CalledProcessError as e:
73
+ print(f"Error running dataform: {e.stderr}")
74
+ return None
75
+ except json.JSONDecodeError as e:
76
+ print(f"Error decoding dataform output: {e}")
77
+ print("First 500 chars of attempted parse:", json_str[:500] if 'json_str' in locals() else "N/A")
78
+ return None
79
+ except Exception as e:
80
+ print(f"Error processing dataform output: {e}")
81
+ return None
82
+
83
+ def normalize_name(target):
84
+ """Helper to get a consistent name string from a target object {database, schema, name}"""
85
+ # Adjust this based on how you prefer to see names (e.g., schema.name or just name)
86
+ if not target:
87
+ return "UNKNOWN"
88
+ return f"{target.get('schema', '')}.{target.get('name', '')}"
89
+
90
+ def main():
91
+ graph = get_dataform_graph()
92
+ if not graph:
93
+ return
94
+
95
+ tables = graph.get("tables", [])
96
+
97
+ # build a mapping of standard_name -> table_info
98
+ # Dataform JSON output for 'target' usually has schema and name.
99
+ # The 'dependencies' list is usually an array of strings representing the names of the targets.
100
+
101
+ table_lookup = {}
102
+
103
+ # First pass: Index all tables by their full name and short name
104
+ for t in tables:
105
+ tgt = t.get("target", {})
106
+ full_name = normalize_name(tgt) # e.g. "dataset.table"
107
+ short_name = tgt.get("name")
108
+
109
+ # Store using full name as key
110
+ table_lookup[full_name] = {
111
+ "type": t.get("type"),
112
+ "dependencies": t.get("dependencyTargets", []), # dataform 2.x often uses dependencyTargets (objects) or dependencies (strings)
113
+ # recent dataform versions might strictly use dependencyTargets. Let's check both or inspect.
114
+ # If dependencyTargets matches the current structure, otherwise fallback.
115
+ "dependents": []
116
+ }
117
+
118
+ # Also store short name pointer if it doesn't conflict?
119
+ # For safety, let's just stick to iterating for search.
120
+
121
+ # Note: 'dependencies' key in the compiled JSON often contains the list of resolved target names.
122
+ # Let's inspect what we actually have.
123
+
124
+ # Calculate dependents
125
+ for t in tables:
126
+ tgt = t.get("target", {})
127
+ this_full_name = normalize_name(tgt)
128
+
129
+ # 'dependencyTargets' is preferred in newer dataform, it's a list of {schema, name, database...}
130
+ deps = t.get("dependencyTargets")
131
+ if deps is None:
132
+ # Fallback to 'dependencies' if dependencyTargets is missing (older CLI)
133
+ # But 'dependencies' might just be strings.
134
+ pass
135
+
136
+ if deps:
137
+ for d in deps:
138
+ dep_full_name = normalize_name(d)
139
+ if dep_full_name in table_lookup:
140
+ table_lookup[dep_full_name]["dependents"].append(this_full_name)
141
+
142
+ # Search functionality
143
+ search_term = ""
144
+ if len(sys.argv) > 1:
145
+ search_term = sys.argv[1].lower()
146
+
147
+ found_count = 0
148
+ print(f"\n--- Dataform Dependency Analysis ---")
149
+
150
+ for name, info in table_lookup.items():
151
+ if not search_term or search_term in name.lower():
152
+ found_count += 1
153
+ print(f"\nTable: {name} ({info['type']})")
154
+
155
+ deps = info['dependencies']
156
+ print(f" Dependencies ({len(deps)}):")
157
+ for d in deps:
158
+ print(f" <- {normalize_name(d)}")
159
+
160
+ depts = info['dependents']
161
+ print(f" Dependents ({len(depts)}):")
162
+ for d in depts:
163
+ print(f" -> {d}")
164
+
165
+ if found_count == 0 and search_term:
166
+ print(f"No tables found matching '{search_term}'")
167
+ elif not search_term:
168
+ print("\nTip: Pass a table name as an argument to filter results.")
169
+ print("Example: python utility_check_dependencies.py my_table_name")
170
+
171
+ if __name__ == "__main__":
172
+ # Ensure output directory exists
173
+ from pathlib import Path
174
+ Path('output').mkdir(exist_ok=True)
175
+ main()