dbt-cube-sync 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbt-cube-sync might be problematic. Click here for more details.
- dbt_cube_sync/__init__.py +5 -0
- dbt_cube_sync/cli.py +135 -0
- dbt_cube_sync/config.py +121 -0
- dbt_cube_sync/connectors/__init__.py +1 -0
- dbt_cube_sync/connectors/base.py +95 -0
- dbt_cube_sync/connectors/powerbi.py +34 -0
- dbt_cube_sync/connectors/superset.py +556 -0
- dbt_cube_sync/connectors/tableau.py +34 -0
- dbt_cube_sync/core/__init__.py +1 -0
- dbt_cube_sync/core/cube_generator.py +188 -0
- dbt_cube_sync/core/dbt_parser.py +178 -0
- dbt_cube_sync/core/models.py +66 -0
- dbt_cube_sync-0.1.0a1.dist-info/METADATA +230 -0
- dbt_cube_sync-0.1.0a1.dist-info/RECORD +16 -0
- dbt_cube_sync-0.1.0a1.dist-info/WHEEL +4 -0
- dbt_cube_sync-0.1.0a1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cube.js schema generator - creates Cube.js files from dbt models
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Dict, Any
|
|
8
|
+
from jinja2 import Environment, FileSystemLoader, Template
|
|
9
|
+
|
|
10
|
+
from .models import DbtModel, CubeSchema, CubeDimension, CubeMeasure
|
|
11
|
+
from .dbt_parser import DbtParser
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CubeGenerator:
|
|
15
|
+
"""Generates Cube.js schema files from dbt models"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, template_dir: str, output_dir: str):
|
|
18
|
+
"""
|
|
19
|
+
Initialize the generator
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
template_dir: Directory containing Jinja2 templates
|
|
23
|
+
output_dir: Directory to write generated Cube.js files
|
|
24
|
+
"""
|
|
25
|
+
self.template_dir = Path(template_dir)
|
|
26
|
+
self.output_dir = Path(output_dir)
|
|
27
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
# Initialize Jinja2 environment
|
|
30
|
+
self.env = Environment(loader=FileSystemLoader(str(self.template_dir)))
|
|
31
|
+
|
|
32
|
+
def generate_cube_files(self, models: List[DbtModel]) -> List[str]:
|
|
33
|
+
"""
|
|
34
|
+
Generate Cube.js files for all models
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
models: List of DbtModel instances
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of generated file paths
|
|
41
|
+
"""
|
|
42
|
+
generated_files = []
|
|
43
|
+
|
|
44
|
+
for model in models:
|
|
45
|
+
try:
|
|
46
|
+
cube_schema = self._convert_model_to_cube(model)
|
|
47
|
+
file_path = self._write_cube_file(cube_schema)
|
|
48
|
+
generated_files.append(str(file_path))
|
|
49
|
+
print(f"✓ Generated: {file_path.name}")
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(f"✗ Error generating cube for {model.name}: {str(e)}")
|
|
52
|
+
|
|
53
|
+
return generated_files
|
|
54
|
+
|
|
55
|
+
def _convert_model_to_cube(self, model: DbtModel) -> CubeSchema:
|
|
56
|
+
"""Convert a dbt model to a Cube.js schema"""
|
|
57
|
+
|
|
58
|
+
# Generate cube name (PascalCase)
|
|
59
|
+
cube_name = self._to_pascal_case(model.name)
|
|
60
|
+
|
|
61
|
+
# Generate SQL reference
|
|
62
|
+
sql = f"SELECT * FROM {model.schema_name}.{model.name}"
|
|
63
|
+
|
|
64
|
+
# Convert columns to dimensions
|
|
65
|
+
dimensions = []
|
|
66
|
+
for col_name, col_data in model.columns.items():
|
|
67
|
+
cube_type = DbtParser.map_data_type_to_cube_type(col_data.data_type or '')
|
|
68
|
+
|
|
69
|
+
dimension = CubeDimension(
|
|
70
|
+
name=col_name,
|
|
71
|
+
sql=col_name,
|
|
72
|
+
type=cube_type,
|
|
73
|
+
title=col_data.description or col_name.replace('_', ' ').title(),
|
|
74
|
+
description=col_data.description or col_name.replace('_', ' ').title()
|
|
75
|
+
)
|
|
76
|
+
dimensions.append(dimension)
|
|
77
|
+
|
|
78
|
+
# Convert explicitly defined metrics to measures
|
|
79
|
+
measures = []
|
|
80
|
+
for metric_name, metric_data in model.metrics.items():
|
|
81
|
+
cube_type = DbtParser.map_dbt_type_to_cube_type(metric_data.type)
|
|
82
|
+
|
|
83
|
+
# Generate SQL expression
|
|
84
|
+
if metric_data.sql:
|
|
85
|
+
sql_expr = metric_data.sql
|
|
86
|
+
elif metric_data.type == 'count':
|
|
87
|
+
sql_expr = "*"
|
|
88
|
+
else:
|
|
89
|
+
# Default to the metric name if no SQL provided
|
|
90
|
+
sql_expr = metric_name
|
|
91
|
+
|
|
92
|
+
measure = CubeMeasure(
|
|
93
|
+
name=metric_name,
|
|
94
|
+
type=cube_type,
|
|
95
|
+
sql=sql_expr,
|
|
96
|
+
title=metric_data.title or metric_name.replace('_', ' ').title(),
|
|
97
|
+
description=metric_data.description or metric_name.replace('_', ' ').title()
|
|
98
|
+
)
|
|
99
|
+
measures.append(measure)
|
|
100
|
+
|
|
101
|
+
return CubeSchema(
|
|
102
|
+
cube_name=cube_name,
|
|
103
|
+
sql=sql,
|
|
104
|
+
dimensions=dimensions,
|
|
105
|
+
measures=measures
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _write_cube_file(self, cube_schema: CubeSchema) -> Path:
|
|
109
|
+
"""Write a Cube.js schema to file"""
|
|
110
|
+
|
|
111
|
+
# Try to use template if available
|
|
112
|
+
template_path = self.template_dir / 'cube_template.js'
|
|
113
|
+
if template_path.exists():
|
|
114
|
+
template = self.env.get_template('cube_template.js')
|
|
115
|
+
content = template.render(
|
|
116
|
+
cube_name=cube_schema.cube_name,
|
|
117
|
+
sql=cube_schema.sql,
|
|
118
|
+
dimensions=cube_schema.dimensions,
|
|
119
|
+
measures=cube_schema.measures
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
# Fallback to hardcoded template
|
|
123
|
+
content = self._generate_cube_content(cube_schema)
|
|
124
|
+
|
|
125
|
+
# Write to file
|
|
126
|
+
file_path = self.output_dir / f"{cube_schema.cube_name}.js"
|
|
127
|
+
with open(file_path, 'w') as f:
|
|
128
|
+
f.write(content)
|
|
129
|
+
|
|
130
|
+
return file_path
|
|
131
|
+
|
|
132
|
+
def _generate_cube_content(self, cube_schema: CubeSchema) -> str:
|
|
133
|
+
"""Generate Cube.js content using hardcoded template"""
|
|
134
|
+
|
|
135
|
+
# Generate dimensions
|
|
136
|
+
dimensions_content = []
|
|
137
|
+
for dim in cube_schema.dimensions:
|
|
138
|
+
dim_content = f""" {dim.name}: {{
|
|
139
|
+
sql: `{dim.sql}`,
|
|
140
|
+
type: `{dim.type}`,
|
|
141
|
+
title: '{dim.title}'
|
|
142
|
+
}}"""
|
|
143
|
+
dimensions_content.append(dim_content)
|
|
144
|
+
|
|
145
|
+
# Generate measures
|
|
146
|
+
measures_content = []
|
|
147
|
+
for measure in cube_schema.measures:
|
|
148
|
+
measure_content = f""" {measure.name}: {{
|
|
149
|
+
type: `{measure.type}`,
|
|
150
|
+
sql: `{measure.sql}`,
|
|
151
|
+
title: '{measure.title}'
|
|
152
|
+
}}"""
|
|
153
|
+
measures_content.append(measure_content)
|
|
154
|
+
|
|
155
|
+
# Combine into full cube definition
|
|
156
|
+
dimensions_joined = ',\n\n'.join(dimensions_content)
|
|
157
|
+
measures_joined = ',\n\n'.join(measures_content)
|
|
158
|
+
|
|
159
|
+
content = f"""cube(`{cube_schema.cube_name}`, {{
|
|
160
|
+
sql: `{cube_schema.sql}`,
|
|
161
|
+
|
|
162
|
+
dimensions: {{
|
|
163
|
+
{dimensions_joined}
|
|
164
|
+
}},
|
|
165
|
+
|
|
166
|
+
measures: {{
|
|
167
|
+
{measures_joined}
|
|
168
|
+
}}
|
|
169
|
+
}});
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
return content
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _to_pascal_case(text: str) -> str:
|
|
176
|
+
"""Convert text to PascalCase"""
|
|
177
|
+
# Remove non-alphanumeric characters and split
|
|
178
|
+
words = re.sub(r'[^a-zA-Z0-9]', ' ', text).split()
|
|
179
|
+
# Capitalize first letter of each word and join
|
|
180
|
+
return ''.join(word.capitalize() for word in words if word)
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def _to_snake_case(text: str) -> str:
|
|
184
|
+
"""Convert text to snake_case"""
|
|
185
|
+
# Replace non-alphanumeric with underscores and lowercase
|
|
186
|
+
result = re.sub(r'[^a-zA-Z0-9]', '_', text).lower()
|
|
187
|
+
# Remove multiple underscores
|
|
188
|
+
return re.sub(r'_+', '_', result).strip('_')
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dbt manifest parser - extracts models, metrics, and column information
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from typing import Dict, List
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .models import DbtModel, DbtColumn, DbtMetric
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DbtParser:
|
|
13
|
+
"""Parses dbt manifest.json to extract model and metric information"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, manifest_path: str, catalog_path: str = None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the parser
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
manifest_path: Path to dbt manifest.json file
|
|
21
|
+
catalog_path: Optional path to dbt catalog.json for column types
|
|
22
|
+
"""
|
|
23
|
+
self.manifest_path = manifest_path
|
|
24
|
+
self.catalog_path = catalog_path
|
|
25
|
+
self.manifest = self._load_manifest()
|
|
26
|
+
self.catalog = self._load_catalog() if catalog_path else None
|
|
27
|
+
|
|
28
|
+
def _load_manifest(self) -> dict:
|
|
29
|
+
"""Load the dbt manifest.json file"""
|
|
30
|
+
if not os.path.exists(self.manifest_path):
|
|
31
|
+
raise FileNotFoundError(f"Manifest file not found: {self.manifest_path}")
|
|
32
|
+
|
|
33
|
+
with open(self.manifest_path, 'r') as f:
|
|
34
|
+
return json.load(f)
|
|
35
|
+
|
|
36
|
+
def _load_catalog(self) -> dict:
|
|
37
|
+
"""Load the dbt catalog.json file if available"""
|
|
38
|
+
if not self.catalog_path or not os.path.exists(self.catalog_path):
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
with open(self.catalog_path, 'r') as f:
|
|
43
|
+
return json.load(f)
|
|
44
|
+
except Exception as e:
|
|
45
|
+
print(f"Warning: Could not load catalog file {self.catalog_path}: {e}")
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def parse_models(self) -> List[DbtModel]:
|
|
49
|
+
"""
|
|
50
|
+
Extract models with metrics and columns from manifest
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of DbtModel instances
|
|
54
|
+
"""
|
|
55
|
+
models = []
|
|
56
|
+
nodes = self.manifest.get('nodes', {})
|
|
57
|
+
|
|
58
|
+
for node_id, node_data in nodes.items():
|
|
59
|
+
# Only process models
|
|
60
|
+
if node_data.get('resource_type') != 'model':
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
model = self._parse_model(node_id, node_data)
|
|
64
|
+
if model and model.columns and model.metrics: # Only include models with BOTH columns AND metrics
|
|
65
|
+
models.append(model)
|
|
66
|
+
|
|
67
|
+
return models
|
|
68
|
+
|
|
69
|
+
def _parse_model(self, node_id: str, node_data: dict) -> DbtModel:
|
|
70
|
+
"""Parse a single model from the manifest"""
|
|
71
|
+
model_name = node_data.get('name', '')
|
|
72
|
+
model_schema = node_data.get('schema', '')
|
|
73
|
+
model_database = node_data.get('database', '')
|
|
74
|
+
|
|
75
|
+
# Parse columns
|
|
76
|
+
columns = self._parse_columns(node_id, node_data)
|
|
77
|
+
|
|
78
|
+
# Parse metrics from config.meta.metrics
|
|
79
|
+
metrics = self._parse_metrics(node_data)
|
|
80
|
+
|
|
81
|
+
return DbtModel(
|
|
82
|
+
name=model_name,
|
|
83
|
+
database=model_database,
|
|
84
|
+
schema_name=model_schema,
|
|
85
|
+
node_id=node_id,
|
|
86
|
+
columns=columns,
|
|
87
|
+
metrics=metrics
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _parse_columns(self, node_id: str, node_data: dict) -> Dict[str, DbtColumn]:
|
|
91
|
+
"""Parse columns for a model, enhanced with catalog data if available"""
|
|
92
|
+
columns = {}
|
|
93
|
+
manifest_columns = node_data.get('columns', {})
|
|
94
|
+
|
|
95
|
+
# Get catalog columns for type information
|
|
96
|
+
catalog_columns = {}
|
|
97
|
+
if self.catalog and node_id in self.catalog.get('nodes', {}):
|
|
98
|
+
catalog_columns = self.catalog['nodes'][node_id].get('columns', {})
|
|
99
|
+
|
|
100
|
+
# If manifest has columns, use them with catalog type info
|
|
101
|
+
if manifest_columns:
|
|
102
|
+
for col_name, col_data in manifest_columns.items():
|
|
103
|
+
data_type = None
|
|
104
|
+
|
|
105
|
+
# Try to get data type from catalog
|
|
106
|
+
if col_name in catalog_columns:
|
|
107
|
+
data_type = catalog_columns[col_name].get('type', '')
|
|
108
|
+
|
|
109
|
+
columns[col_name] = DbtColumn(
|
|
110
|
+
name=col_name,
|
|
111
|
+
data_type=data_type,
|
|
112
|
+
description=col_data.get('description'),
|
|
113
|
+
meta=col_data.get('meta', {})
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
# If no manifest columns, use all catalog columns
|
|
117
|
+
for col_name, col_data in catalog_columns.items():
|
|
118
|
+
columns[col_name] = DbtColumn(
|
|
119
|
+
name=col_name,
|
|
120
|
+
data_type=col_data.get('type', ''),
|
|
121
|
+
description=f"Column from catalog: {col_name}",
|
|
122
|
+
meta={}
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return columns
|
|
126
|
+
|
|
127
|
+
def _parse_metrics(self, node_data: dict) -> Dict[str, DbtMetric]:
|
|
128
|
+
"""Parse metrics from model configuration"""
|
|
129
|
+
metrics = {}
|
|
130
|
+
|
|
131
|
+
# Look for metrics in config.meta.metrics
|
|
132
|
+
config = node_data.get('config', {})
|
|
133
|
+
meta = config.get('meta', {})
|
|
134
|
+
metrics_data = meta.get('metrics', {})
|
|
135
|
+
|
|
136
|
+
for metric_name, metric_config in metrics_data.items():
|
|
137
|
+
if isinstance(metric_config, dict):
|
|
138
|
+
metrics[metric_name] = DbtMetric(
|
|
139
|
+
name=metric_name,
|
|
140
|
+
type=metric_config.get('type', 'sum'),
|
|
141
|
+
sql=metric_config.get('sql'),
|
|
142
|
+
title=metric_config.get('title', metric_name.replace('_', ' ').title()),
|
|
143
|
+
description=metric_config.get('description', metric_name.replace('_', ' ').title())
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return metrics
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def map_dbt_type_to_cube_type(dbt_type: str) -> str:
|
|
150
|
+
"""Map dbt metric types to Cube.js measure types"""
|
|
151
|
+
type_mapping = {
|
|
152
|
+
'sum': 'sum',
|
|
153
|
+
'average': 'avg',
|
|
154
|
+
'avg': 'avg',
|
|
155
|
+
'count': 'count',
|
|
156
|
+
'count_distinct': 'countDistinct',
|
|
157
|
+
'min': 'min',
|
|
158
|
+
'max': 'max',
|
|
159
|
+
'number': 'number',
|
|
160
|
+
}
|
|
161
|
+
return type_mapping.get(dbt_type.lower(), 'sum')
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def map_data_type_to_cube_type(data_type: str) -> str:
|
|
165
|
+
"""Map SQL/dbt data types to Cube.js dimension types"""
|
|
166
|
+
if not data_type:
|
|
167
|
+
return 'string'
|
|
168
|
+
|
|
169
|
+
data_type = data_type.lower()
|
|
170
|
+
|
|
171
|
+
if any(t in data_type for t in ['int', 'bigint', 'decimal', 'numeric', 'float', 'double']):
|
|
172
|
+
return 'number'
|
|
173
|
+
elif any(t in data_type for t in ['timestamp', 'datetime', 'date']):
|
|
174
|
+
return 'time'
|
|
175
|
+
elif any(t in data_type for t in ['bool']):
|
|
176
|
+
return 'boolean'
|
|
177
|
+
else:
|
|
178
|
+
return 'string'
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic models for data structures
|
|
3
|
+
"""
|
|
4
|
+
from typing import Dict, List, Optional, Any
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DbtColumn(BaseModel):
|
|
9
|
+
"""Represents a dbt model column"""
|
|
10
|
+
name: str
|
|
11
|
+
data_type: Optional[str] = None
|
|
12
|
+
description: Optional[str] = None
|
|
13
|
+
meta: Optional[Dict[str, Any]] = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DbtMetric(BaseModel):
|
|
17
|
+
"""Represents a dbt metric"""
|
|
18
|
+
name: str
|
|
19
|
+
type: str
|
|
20
|
+
sql: Optional[str] = None
|
|
21
|
+
title: Optional[str] = None
|
|
22
|
+
description: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DbtModel(BaseModel):
|
|
26
|
+
"""Represents a parsed dbt model"""
|
|
27
|
+
name: str
|
|
28
|
+
database: str
|
|
29
|
+
schema_name: str # Renamed to avoid shadowing BaseModel.schema
|
|
30
|
+
node_id: str
|
|
31
|
+
columns: Dict[str, DbtColumn]
|
|
32
|
+
metrics: Dict[str, DbtMetric]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CubeDimension(BaseModel):
|
|
36
|
+
"""Represents a Cube.js dimension"""
|
|
37
|
+
name: str
|
|
38
|
+
sql: str
|
|
39
|
+
type: str
|
|
40
|
+
title: Optional[str] = None
|
|
41
|
+
description: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CubeMeasure(BaseModel):
|
|
45
|
+
"""Represents a Cube.js measure"""
|
|
46
|
+
name: str
|
|
47
|
+
type: str
|
|
48
|
+
sql: Optional[str] = None
|
|
49
|
+
title: Optional[str] = None
|
|
50
|
+
description: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CubeSchema(BaseModel):
|
|
54
|
+
"""Represents a complete Cube.js schema"""
|
|
55
|
+
cube_name: str
|
|
56
|
+
sql: str
|
|
57
|
+
dimensions: List[CubeDimension]
|
|
58
|
+
measures: List[CubeMeasure]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SyncResult(BaseModel):
|
|
62
|
+
"""Represents the result of a sync operation"""
|
|
63
|
+
file_or_dataset: str
|
|
64
|
+
status: str # 'success' or 'failed'
|
|
65
|
+
message: Optional[str] = None
|
|
66
|
+
error: Optional[str] = None
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbt-cube-sync
|
|
3
|
+
Version: 0.1.0a1
|
|
4
|
+
Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
|
|
5
|
+
Author: Ponder
|
|
6
|
+
Requires-Python: >=3.9,<4.0
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
15
|
+
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
|
16
|
+
Requires-Dist: pydantic (>=2.5.0,<3.0.0)
|
|
17
|
+
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
18
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# dbt-cube-sync
|
|
22
|
+
|
|
23
|
+
A powerful synchronization tool that creates a seamless pipeline from dbt models to Cube.js schemas and BI tools (Superset, Tableau, PowerBI).
|
|
24
|
+
|
|
25
|
+
## Features
|
|
26
|
+
|
|
27
|
+
- 🔄 **dbt → Cube.js**: Auto-generate Cube.js schemas from dbt models with metrics
|
|
28
|
+
- 📊 **Cube.js → BI Tools**: Sync schemas to multiple BI platforms
|
|
29
|
+
- 🏗️ **Extensible Architecture**: Plugin-based connector system for easy BI tool integration
|
|
30
|
+
- 🐳 **Docker Support**: Containerized execution with orchestration support
|
|
31
|
+
- 🎯 **CLI Interface**: Simple command-line tools for automation
|
|
32
|
+
|
|
33
|
+
## Supported BI Tools
|
|
34
|
+
|
|
35
|
+
- ✅ **Apache Superset** - Full implementation
|
|
36
|
+
- 🚧 **Tableau** - Placeholder (coming soon)
|
|
37
|
+
- 🚧 **PowerBI** - Placeholder (coming soon)
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
### Using Poetry (Development)
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
cd dbt-cube-sync
|
|
45
|
+
poetry install
|
|
46
|
+
poetry run dbt-cube-sync --help
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Using Docker
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
docker build -t dbt-cube-sync .
|
|
53
|
+
docker run --rm dbt-cube-sync --help
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
### 1. Create Configuration File
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Create sample config
|
|
62
|
+
dbt-cube-sync create-config sync-config.yaml
|
|
63
|
+
|
|
64
|
+
# Edit the config file with your BI tool credentials
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Generate Cube.js Schemas
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# Generate from dbt manifest
|
|
71
|
+
dbt-cube-sync generate-cubes \\
|
|
72
|
+
--dbt-manifest ./DbtEducationalDataProject/target/manifest.json \\
|
|
73
|
+
--output-dir ./cube/conf/cube_output
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 3. Sync to BI Tool
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Sync to Superset
|
|
80
|
+
dbt-cube-sync sync-bi superset \\
|
|
81
|
+
--cube-dir ./cube/conf/cube_output \\
|
|
82
|
+
--config-file ./sync-config.yaml
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### 4. Full Pipeline
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# Complete dbt → Cube.js → Superset pipeline
|
|
89
|
+
dbt-cube-sync full-sync \\
|
|
90
|
+
--dbt-manifest ./DbtEducationalDataProject/target/manifest.json \\
|
|
91
|
+
--cube-dir ./cube/conf/cube_output \\
|
|
92
|
+
--bi-connector superset \\
|
|
93
|
+
--config-file ./sync-config.yaml
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Configuration
|
|
97
|
+
|
|
98
|
+
### Sample Configuration (`sync-config.yaml`)
|
|
99
|
+
|
|
100
|
+
```yaml
|
|
101
|
+
connectors:
|
|
102
|
+
superset:
|
|
103
|
+
type: superset
|
|
104
|
+
url: http://localhost:8088
|
|
105
|
+
username: admin
|
|
106
|
+
password: admin
|
|
107
|
+
database_name: Cube
|
|
108
|
+
|
|
109
|
+
tableau:
|
|
110
|
+
type: tableau
|
|
111
|
+
url: https://your-tableau-server.com
|
|
112
|
+
username: your-username
|
|
113
|
+
password: your-password
|
|
114
|
+
|
|
115
|
+
powerbi:
|
|
116
|
+
type: powerbi
|
|
117
|
+
# PowerBI specific configuration
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## CLI Commands
|
|
121
|
+
|
|
122
|
+
### `generate-cubes`
|
|
123
|
+
Generate Cube.js schema files from dbt models.
|
|
124
|
+
|
|
125
|
+
**Options:**
|
|
126
|
+
- `--dbt-manifest` / `-m`: Path to dbt manifest.json file
|
|
127
|
+
- `--output-dir` / `-o`: Output directory for Cube.js files
|
|
128
|
+
- `--template-dir` / `-t`: Directory containing Cube.js templates
|
|
129
|
+
|
|
130
|
+
### `sync-bi`
|
|
131
|
+
Sync Cube.js schemas to BI tool datasets.
|
|
132
|
+
|
|
133
|
+
**Arguments:**
|
|
134
|
+
- `connector`: BI tool type (`superset`, `tableau`, `powerbi`)
|
|
135
|
+
|
|
136
|
+
**Options:**
|
|
137
|
+
- `--cube-dir` / `-c`: Directory containing Cube.js files
|
|
138
|
+
- `--config-file` / `-f`: Configuration file for BI tool connection
|
|
139
|
+
|
|
140
|
+
### `full-sync`
|
|
141
|
+
Complete pipeline: dbt models → Cube.js schemas → BI tool datasets.
|
|
142
|
+
|
|
143
|
+
**Options:**
|
|
144
|
+
- `--dbt-manifest` / `-m`: Path to dbt manifest.json file
|
|
145
|
+
- `--cube-dir` / `-c`: Directory for Cube.js files
|
|
146
|
+
- `--template-dir` / `-t`: Directory containing Cube.js templates
|
|
147
|
+
- `--bi-connector` / `-b`: BI tool to sync to
|
|
148
|
+
- `--config-file` / `-f`: Configuration file for BI tool connection
|
|
149
|
+
|
|
150
|
+
## Architecture
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
dbt models (with metrics)
|
|
154
|
+
↓
|
|
155
|
+
dbt-cube-sync generate-cubes
|
|
156
|
+
↓
|
|
157
|
+
Cube.js schemas
|
|
158
|
+
↓
|
|
159
|
+
dbt-cube-sync sync-bi [connector]
|
|
160
|
+
↓
|
|
161
|
+
BI Tool Datasets (Superset/Tableau/PowerBI)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Project Structure
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
dbt-cube-sync/
|
|
168
|
+
├── dbt_cube_sync/
|
|
169
|
+
│ ├── cli.py # CLI interface
|
|
170
|
+
│ ├── config.py # Configuration management
|
|
171
|
+
│ ├── core/
|
|
172
|
+
│ │ ├── dbt_parser.py # dbt manifest parser
|
|
173
|
+
│ │ ├── cube_generator.py # Cube.js generator
|
|
174
|
+
│ │ └── models.py # Pydantic data models
|
|
175
|
+
│ └── connectors/
|
|
176
|
+
│ ├── base.py # Abstract base connector
|
|
177
|
+
│ ├── superset.py # Superset implementation
|
|
178
|
+
│ ├── tableau.py # Tableau placeholder
|
|
179
|
+
│ └── powerbi.py # PowerBI placeholder
|
|
180
|
+
├── Dockerfile # Container definition
|
|
181
|
+
├── pyproject.toml # Poetry configuration
|
|
182
|
+
└── README.md
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Adding New BI Connectors
|
|
186
|
+
|
|
187
|
+
1. Create a new connector class inheriting from `BaseConnector`
|
|
188
|
+
2. Implement the required abstract methods
|
|
189
|
+
3. Register the connector using `ConnectorRegistry.register()`
|
|
190
|
+
|
|
191
|
+
Example:
|
|
192
|
+
```python
|
|
193
|
+
from .base import BaseConnector, ConnectorRegistry
|
|
194
|
+
|
|
195
|
+
class MyBIConnector(BaseConnector):
|
|
196
|
+
def _validate_config(self):
|
|
197
|
+
# Validation logic
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
def connect(self):
|
|
201
|
+
# Connection logic
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
def sync_cube_schemas(self, cube_dir):
|
|
205
|
+
# Sync implementation
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
# Register the connector
|
|
209
|
+
ConnectorRegistry.register('mybi', MyBIConnector)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Docker Integration
|
|
213
|
+
|
|
214
|
+
The tool is designed to work in containerized environments with proper dependency orchestration:
|
|
215
|
+
|
|
216
|
+
1. **dbt docs**: Runs `dbt build` then serves documentation
|
|
217
|
+
2. **dbt-cube-sync**: Runs sync pipeline after dbt and Cube.js are ready
|
|
218
|
+
3. **BI Tools**: Receive synced datasets after sync completes
|
|
219
|
+
|
|
220
|
+
## Contributing
|
|
221
|
+
|
|
222
|
+
1. Fork the repository
|
|
223
|
+
2. Create a feature branch
|
|
224
|
+
3. Implement your changes
|
|
225
|
+
4. Add tests if applicable
|
|
226
|
+
5. Submit a pull request
|
|
227
|
+
|
|
228
|
+
## License
|
|
229
|
+
|
|
230
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
dbt_cube_sync/__init__.py,sha256=aifkfgUDRPL5v0LZzceH2LXu66YDkJjdpvKwXsdikbI,113
|
|
2
|
+
dbt_cube_sync/cli.py,sha256=hcZzKqmSFEvEXoDWbbluna5mqdWVC6Cej6imWob0ml4,4983
|
|
3
|
+
dbt_cube_sync/config.py,sha256=qhGE7CxTmh0RhPizgd3x3Yj-3L2LoC00UQIDT0q9FlQ,3858
|
|
4
|
+
dbt_cube_sync/connectors/__init__.py,sha256=NG6tYZ3CYD5bG_MfNLZrUM8YoBEKArG8-AOmJ8pwvQI,52
|
|
5
|
+
dbt_cube_sync/connectors/base.py,sha256=JLzerxJdt34z0kWuyieL6UQhf5_dUYPGmwkiRWBuSPY,2802
|
|
6
|
+
dbt_cube_sync/connectors/powerbi.py,sha256=2Y8fTfh_6Q_Myma1ymipPh1U3HsfQKcktVequXXnIXI,1275
|
|
7
|
+
dbt_cube_sync/connectors/superset.py,sha256=7-tWo5yBK-kF9mRH9jLJXJtfjVkM44LMKn7qreWy9GM,20846
|
|
8
|
+
dbt_cube_sync/connectors/tableau.py,sha256=jKve1zErzTbgPOtmPB92ZwZl4I6uEySedM51JiwlGrE,1261
|
|
9
|
+
dbt_cube_sync/core/__init__.py,sha256=kgsawtU5dqEvnHz6dU8qwJbH3rtIV7QlK2MhtYVDCaY,46
|
|
10
|
+
dbt_cube_sync/core/cube_generator.py,sha256=JtFEqlr9_ShS0e9LxjE0I1HlU13x7MX5sQ3O1G4r9ns,6444
|
|
11
|
+
dbt_cube_sync/core/dbt_parser.py,sha256=Woed3RCuluVFaRZgjG1m0m-v2c4_Zb721lQq5hLKXqM,6521
|
|
12
|
+
dbt_cube_sync/core/models.py,sha256=PvfE4A_1K_fSutO_KdzseBuRp-ZQtyKCW3Qonzu0NuU,1543
|
|
13
|
+
dbt_cube_sync-0.1.0a1.dist-info/METADATA,sha256=Aou7v6ErpetsOzTHcRK_4R5yFs0lX-Jc260_z3WfJ7A,6116
|
|
14
|
+
dbt_cube_sync-0.1.0a1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
15
|
+
dbt_cube_sync-0.1.0a1.dist-info/entry_points.txt,sha256=iEAB_nZ1AoSeFwSHPY2tr02xmTHLVFKp5CJeFh0AfCw,56
|
|
16
|
+
dbt_cube_sync-0.1.0a1.dist-info/RECORD,,
|