dbt-cube-sync 0.1.0a3__tar.gz → 0.1.0a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbt-cube-sync might be problematic. Click here for more details.
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/PKG-INFO +1 -1
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/connectors/superset.py +41 -23
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/core/cube_generator.py +104 -5
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/core/dbt_parser.py +42 -3
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/core/models.py +40 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/pyproject.toml +1 -1
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/README.md +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/cli.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/config.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/connectors/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/connectors/base.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/connectors/powerbi.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/connectors/tableau.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a5}/dbt_cube_sync/core/__init__.py +0 -0
|
@@ -337,12 +337,26 @@ class SupersetConnector(BaseConnector):
|
|
|
337
337
|
'count_distinct': 'COUNT(DISTINCT'
|
|
338
338
|
}
|
|
339
339
|
|
|
340
|
+
# Remove Cube.js ${} syntax and convert to plain SQL column references
|
|
341
|
+
cleaned_expression = self._clean_cube_expression(sql_expression)
|
|
342
|
+
|
|
340
343
|
agg_func = agg_mapping.get(agg_type, 'SUM')
|
|
341
344
|
|
|
342
345
|
if agg_type == 'count_distinct':
|
|
343
|
-
return f"{agg_func} {
|
|
346
|
+
return f"{agg_func} {cleaned_expression})"
|
|
344
347
|
else:
|
|
345
|
-
return f"{agg_func}({
|
|
348
|
+
return f"{agg_func}({cleaned_expression})"
|
|
349
|
+
|
|
350
|
+
def _clean_cube_expression(self, expression: str) -> str:
|
|
351
|
+
"""Convert Cube.js expressions to SQL column references for Superset"""
|
|
352
|
+
import re
|
|
353
|
+
|
|
354
|
+
# Remove ${} syntax - convert ${column_name} to column_name
|
|
355
|
+
cleaned = re.sub(r'\$\{([^}]+)\}', r'\1', expression)
|
|
356
|
+
|
|
357
|
+
# Handle more complex expressions like arithmetic
|
|
358
|
+
# Keep parentheses and operators but clean column references
|
|
359
|
+
return cleaned
|
|
346
360
|
|
|
347
361
|
def _create_or_update_dataset(self, schema_info: Dict[str, Any]) -> int:
|
|
348
362
|
"""Create a new dataset or update existing one"""
|
|
@@ -506,37 +520,41 @@ class SupersetConnector(BaseConnector):
|
|
|
506
520
|
|
|
507
521
|
def _update_metrics(self, existing_metrics: List[dict], measures: List[dict]) -> List[dict]:
|
|
508
522
|
"""Update metrics with new measures"""
|
|
509
|
-
# Clean existing metrics
|
|
523
|
+
# Clean existing metrics and create a lookup by name
|
|
510
524
|
updated_metrics = []
|
|
525
|
+
existing_metric_names = {}
|
|
526
|
+
|
|
511
527
|
for metric in existing_metrics:
|
|
512
528
|
clean_metric = {k: v for k, v in metric.items()
|
|
513
529
|
if k not in ['created_on', 'changed_on', 'uuid']}
|
|
530
|
+
existing_metric_names[metric.get('metric_name')] = len(updated_metrics)
|
|
514
531
|
updated_metrics.append(clean_metric)
|
|
515
532
|
|
|
516
|
-
# Add
|
|
517
|
-
existing_metric_names = {m.get('metric_name') for m in existing_metrics}
|
|
518
|
-
added_count = 0
|
|
519
|
-
|
|
533
|
+
# Add or update metrics
|
|
520
534
|
for measure in measures:
|
|
521
535
|
metric_name = measure['metric_name']
|
|
522
536
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
537
|
+
new_metric = {
|
|
538
|
+
'metric_name': metric_name,
|
|
539
|
+
'verbose_name': measure['verbose_name'],
|
|
540
|
+
'expression': measure['expression'],
|
|
541
|
+
'description': measure['description'],
|
|
542
|
+
'metric_type': 'simple',
|
|
543
|
+
'currency': None,
|
|
544
|
+
'd3format': None,
|
|
545
|
+
'extra': None,
|
|
546
|
+
'warning_text': None
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
if metric_name in existing_metric_names:
|
|
550
|
+
# Update existing metric
|
|
551
|
+
index = existing_metric_names[metric_name]
|
|
552
|
+
updated_metrics[index].update(new_metric)
|
|
553
|
+
print(f" ✓ Updated '{metric_name}': {measure['expression']}")
|
|
538
554
|
else:
|
|
539
|
-
|
|
555
|
+
# Add new metric
|
|
556
|
+
updated_metrics.append(new_metric)
|
|
557
|
+
print(f" ✓ Added '{metric_name}': {measure['expression']}")
|
|
540
558
|
|
|
541
559
|
return updated_metrics
|
|
542
560
|
|
|
@@ -7,7 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import List, Dict, Any
|
|
8
8
|
from jinja2 import Environment, FileSystemLoader, Template
|
|
9
9
|
|
|
10
|
-
from .models import DbtModel, CubeSchema, CubeDimension, CubeMeasure
|
|
10
|
+
from .models import DbtModel, CubeSchema, CubeDimension, CubeMeasure, CubePreAggregation, CubeRefreshKey
|
|
11
11
|
from .dbt_parser import DbtParser
|
|
12
12
|
|
|
13
13
|
|
|
@@ -98,11 +98,36 @@ class CubeGenerator:
|
|
|
98
98
|
)
|
|
99
99
|
measures.append(measure)
|
|
100
100
|
|
|
101
|
+
# Convert pre-aggregations
|
|
102
|
+
pre_aggregations = []
|
|
103
|
+
for pre_agg_name, pre_agg_data in model.pre_aggregations.items():
|
|
104
|
+
# Convert refresh_key if present
|
|
105
|
+
refresh_key = None
|
|
106
|
+
if pre_agg_data.refresh_key:
|
|
107
|
+
refresh_key = CubeRefreshKey(
|
|
108
|
+
every=pre_agg_data.refresh_key.every,
|
|
109
|
+
sql=pre_agg_data.refresh_key.sql,
|
|
110
|
+
incremental=pre_agg_data.refresh_key.incremental,
|
|
111
|
+
update_window=pre_agg_data.refresh_key.update_window
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
pre_aggregation = CubePreAggregation(
|
|
115
|
+
name=pre_agg_name,
|
|
116
|
+
type=pre_agg_data.type,
|
|
117
|
+
measures=pre_agg_data.measures,
|
|
118
|
+
dimensions=pre_agg_data.dimensions,
|
|
119
|
+
time_dimension=pre_agg_data.time_dimension,
|
|
120
|
+
granularity=pre_agg_data.granularity,
|
|
121
|
+
refresh_key=refresh_key
|
|
122
|
+
)
|
|
123
|
+
pre_aggregations.append(pre_aggregation)
|
|
124
|
+
|
|
101
125
|
return CubeSchema(
|
|
102
126
|
cube_name=cube_name,
|
|
103
127
|
sql=sql,
|
|
104
128
|
dimensions=dimensions,
|
|
105
|
-
measures=measures
|
|
129
|
+
measures=measures,
|
|
130
|
+
pre_aggregations=pre_aggregations
|
|
106
131
|
)
|
|
107
132
|
|
|
108
133
|
def _write_cube_file(self, cube_schema: CubeSchema) -> Path:
|
|
@@ -116,7 +141,8 @@ class CubeGenerator:
|
|
|
116
141
|
cube_name=cube_schema.cube_name,
|
|
117
142
|
sql=cube_schema.sql,
|
|
118
143
|
dimensions=cube_schema.dimensions,
|
|
119
|
-
measures=cube_schema.measures
|
|
144
|
+
measures=cube_schema.measures,
|
|
145
|
+
pre_aggregations=cube_schema.pre_aggregations
|
|
120
146
|
)
|
|
121
147
|
else:
|
|
122
148
|
# Fallback to hardcoded template
|
|
@@ -131,7 +157,12 @@ class CubeGenerator:
|
|
|
131
157
|
|
|
132
158
|
def _generate_cube_content(self, cube_schema: CubeSchema) -> str:
|
|
133
159
|
"""Generate Cube.js content using hardcoded template"""
|
|
134
|
-
|
|
160
|
+
|
|
161
|
+
# Extract table name from SQL for refresh_key replacement
|
|
162
|
+
import re
|
|
163
|
+
table_name_match = re.search(r'FROM\s+([^\s,;]+)', cube_schema.sql, re.IGNORECASE)
|
|
164
|
+
table_name = table_name_match.group(1) if table_name_match else None
|
|
165
|
+
|
|
135
166
|
# Generate dimensions
|
|
136
167
|
dimensions_content = []
|
|
137
168
|
for dim in cube_schema.dimensions:
|
|
@@ -152,11 +183,79 @@ class CubeGenerator:
|
|
|
152
183
|
}}"""
|
|
153
184
|
measures_content.append(measure_content)
|
|
154
185
|
|
|
186
|
+
# Generate pre-aggregations
|
|
187
|
+
pre_aggregations_content = []
|
|
188
|
+
for pre_agg in cube_schema.pre_aggregations:
|
|
189
|
+
pre_agg_parts = [f" type: `{pre_agg.type}`"]
|
|
190
|
+
|
|
191
|
+
if pre_agg.measures:
|
|
192
|
+
measures_list = ', '.join([f'CUBE.{measure}' for measure in pre_agg.measures])
|
|
193
|
+
pre_agg_parts.append(f" measures: [{measures_list}]")
|
|
194
|
+
|
|
195
|
+
if pre_agg.dimensions:
|
|
196
|
+
dims_list = ', '.join([f'CUBE.{dim}' for dim in pre_agg.dimensions])
|
|
197
|
+
pre_agg_parts.append(f" dimensions: [{dims_list}]")
|
|
198
|
+
|
|
199
|
+
if pre_agg.time_dimension:
|
|
200
|
+
pre_agg_parts.append(f" time_dimension: CUBE.{pre_agg.time_dimension}")
|
|
201
|
+
|
|
202
|
+
if pre_agg.granularity:
|
|
203
|
+
pre_agg_parts.append(f" granularity: `{pre_agg.granularity}`")
|
|
204
|
+
|
|
205
|
+
if pre_agg.refresh_key:
|
|
206
|
+
refresh_key_parts = []
|
|
207
|
+
if pre_agg.refresh_key.every:
|
|
208
|
+
refresh_key_parts.append(f" every: `{pre_agg.refresh_key.every}`")
|
|
209
|
+
if pre_agg.refresh_key.sql:
|
|
210
|
+
# Replace ${CUBE} and ${this} with actual table name
|
|
211
|
+
refresh_sql = pre_agg.refresh_key.sql
|
|
212
|
+
if table_name:
|
|
213
|
+
refresh_sql = refresh_sql.replace('${CUBE}', table_name)
|
|
214
|
+
refresh_sql = refresh_sql.replace('${this}', table_name)
|
|
215
|
+
refresh_key_parts.append(f" sql: `{refresh_sql}`")
|
|
216
|
+
if pre_agg.refresh_key.incremental is not None:
|
|
217
|
+
refresh_key_parts.append(f" incremental: {str(pre_agg.refresh_key.incremental).lower()}")
|
|
218
|
+
if pre_agg.refresh_key.update_window:
|
|
219
|
+
refresh_key_parts.append(f" update_window: `{pre_agg.refresh_key.update_window}`")
|
|
220
|
+
|
|
221
|
+
if refresh_key_parts:
|
|
222
|
+
refresh_key_content = ',\n'.join(refresh_key_parts)
|
|
223
|
+
pre_agg_parts.append(f" refresh_key: {{\n{refresh_key_content}\n }}")
|
|
224
|
+
|
|
225
|
+
parts_joined = ',\n'.join(pre_agg_parts)
|
|
226
|
+
pre_agg_content = f""" {pre_agg.name}: {{
|
|
227
|
+
{parts_joined}
|
|
228
|
+
}}"""
|
|
229
|
+
pre_aggregations_content.append(pre_agg_content)
|
|
230
|
+
|
|
155
231
|
# Combine into full cube definition
|
|
156
232
|
dimensions_joined = ',\n\n'.join(dimensions_content)
|
|
157
233
|
measures_joined = ',\n\n'.join(measures_content)
|
|
158
234
|
|
|
159
|
-
|
|
235
|
+
# Ensure we have measures (required for a useful Cube.js schema)
|
|
236
|
+
if not measures_content:
|
|
237
|
+
raise ValueError(f"Cube {cube_schema.cube_name} has no measures defined. Measures are required for Cube.js schemas.")
|
|
238
|
+
|
|
239
|
+
if pre_aggregations_content:
|
|
240
|
+
pre_aggregations_joined = ',\n\n'.join(pre_aggregations_content)
|
|
241
|
+
content = f"""cube(`{cube_schema.cube_name}`, {{
|
|
242
|
+
sql: `{cube_schema.sql}`,
|
|
243
|
+
|
|
244
|
+
dimensions: {{
|
|
245
|
+
{dimensions_joined}
|
|
246
|
+
}},
|
|
247
|
+
|
|
248
|
+
measures: {{
|
|
249
|
+
{measures_joined}
|
|
250
|
+
}},
|
|
251
|
+
|
|
252
|
+
pre_aggregations: {{
|
|
253
|
+
{pre_aggregations_joined}
|
|
254
|
+
}}
|
|
255
|
+
}});
|
|
256
|
+
"""
|
|
257
|
+
else:
|
|
258
|
+
content = f"""cube(`{cube_schema.cube_name}`, {{
|
|
160
259
|
sql: `{cube_schema.sql}`,
|
|
161
260
|
|
|
162
261
|
dimensions: {{
|
|
@@ -6,7 +6,7 @@ import os
|
|
|
6
6
|
from typing import Dict, List
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from .models import DbtModel, DbtColumn, DbtMetric
|
|
9
|
+
from .models import DbtModel, DbtColumn, DbtMetric, DbtPreAggregation, DbtRefreshKey
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DbtParser:
|
|
@@ -61,7 +61,8 @@ class DbtParser:
|
|
|
61
61
|
continue
|
|
62
62
|
|
|
63
63
|
model = self._parse_model(node_id, node_data)
|
|
64
|
-
|
|
64
|
+
# Include models that have columns AND metrics (measures are required for useful Cube.js schemas)
|
|
65
|
+
if model and model.columns and model.metrics:
|
|
65
66
|
models.append(model)
|
|
66
67
|
|
|
67
68
|
return models
|
|
@@ -78,13 +79,17 @@ class DbtParser:
|
|
|
78
79
|
# Parse metrics from config.meta.metrics
|
|
79
80
|
metrics = self._parse_metrics(node_data)
|
|
80
81
|
|
|
82
|
+
# Parse pre-aggregations from config.meta.pre_aggregations
|
|
83
|
+
pre_aggregations = self._parse_pre_aggregations(node_data)
|
|
84
|
+
|
|
81
85
|
return DbtModel(
|
|
82
86
|
name=model_name,
|
|
83
87
|
database=model_database,
|
|
84
88
|
schema_name=model_schema,
|
|
85
89
|
node_id=node_id,
|
|
86
90
|
columns=columns,
|
|
87
|
-
metrics=metrics
|
|
91
|
+
metrics=metrics,
|
|
92
|
+
pre_aggregations=pre_aggregations
|
|
88
93
|
)
|
|
89
94
|
|
|
90
95
|
def _parse_columns(self, node_id: str, node_data: dict) -> Dict[str, DbtColumn]:
|
|
@@ -145,6 +150,40 @@ class DbtParser:
|
|
|
145
150
|
|
|
146
151
|
return metrics
|
|
147
152
|
|
|
153
|
+
def _parse_pre_aggregations(self, node_data: dict) -> Dict[str, DbtPreAggregation]:
|
|
154
|
+
"""Parse pre-aggregations from model configuration"""
|
|
155
|
+
pre_aggregations = {}
|
|
156
|
+
|
|
157
|
+
# Look for pre-aggregations in config.meta.pre_aggregations
|
|
158
|
+
config = node_data.get('config', {})
|
|
159
|
+
meta = config.get('meta', {})
|
|
160
|
+
pre_aggs_data = meta.get('pre_aggregations', {})
|
|
161
|
+
|
|
162
|
+
for pre_agg_name, pre_agg_config in pre_aggs_data.items():
|
|
163
|
+
if isinstance(pre_agg_config, dict):
|
|
164
|
+
# Parse refresh_key if present
|
|
165
|
+
refresh_key = None
|
|
166
|
+
refresh_key_config = pre_agg_config.get('refresh_key')
|
|
167
|
+
if refresh_key_config and isinstance(refresh_key_config, dict):
|
|
168
|
+
refresh_key = DbtRefreshKey(
|
|
169
|
+
every=refresh_key_config.get('every'),
|
|
170
|
+
sql=refresh_key_config.get('sql'),
|
|
171
|
+
incremental=refresh_key_config.get('incremental'),
|
|
172
|
+
update_window=refresh_key_config.get('update_window')
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
pre_aggregations[pre_agg_name] = DbtPreAggregation(
|
|
176
|
+
name=pre_agg_name,
|
|
177
|
+
type=pre_agg_config.get('type', 'rollup'),
|
|
178
|
+
measures=pre_agg_config.get('measures', []),
|
|
179
|
+
dimensions=pre_agg_config.get('dimensions', []),
|
|
180
|
+
time_dimension=pre_agg_config.get('time_dimension'),
|
|
181
|
+
granularity=pre_agg_config.get('granularity'),
|
|
182
|
+
refresh_key=refresh_key
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
return pre_aggregations
|
|
186
|
+
|
|
148
187
|
@staticmethod
|
|
149
188
|
def map_dbt_type_to_cube_type(dbt_type: str) -> str:
|
|
150
189
|
"""Map dbt metric types to Cube.js measure types"""
|
|
@@ -22,6 +22,25 @@ class DbtMetric(BaseModel):
|
|
|
22
22
|
description: Optional[str] = None
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
class DbtRefreshKey(BaseModel):
|
|
26
|
+
"""Represents a refresh_key configuration for pre-aggregations"""
|
|
27
|
+
every: Optional[str] = None
|
|
28
|
+
sql: Optional[str] = None
|
|
29
|
+
incremental: Optional[bool] = None
|
|
30
|
+
update_window: Optional[str] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DbtPreAggregation(BaseModel):
|
|
34
|
+
"""Represents a dbt pre-aggregation configuration"""
|
|
35
|
+
name: str
|
|
36
|
+
type: str = "rollup"
|
|
37
|
+
measures: Optional[List[str]] = None
|
|
38
|
+
dimensions: Optional[List[str]] = None
|
|
39
|
+
time_dimension: Optional[str] = None
|
|
40
|
+
granularity: Optional[str] = None
|
|
41
|
+
refresh_key: Optional[DbtRefreshKey] = None
|
|
42
|
+
|
|
43
|
+
|
|
25
44
|
class DbtModel(BaseModel):
|
|
26
45
|
"""Represents a parsed dbt model"""
|
|
27
46
|
name: str
|
|
@@ -30,6 +49,7 @@ class DbtModel(BaseModel):
|
|
|
30
49
|
node_id: str
|
|
31
50
|
columns: Dict[str, DbtColumn]
|
|
32
51
|
metrics: Dict[str, DbtMetric]
|
|
52
|
+
pre_aggregations: Dict[str, DbtPreAggregation] = {}
|
|
33
53
|
|
|
34
54
|
|
|
35
55
|
class CubeDimension(BaseModel):
|
|
@@ -50,12 +70,32 @@ class CubeMeasure(BaseModel):
|
|
|
50
70
|
description: Optional[str] = None
|
|
51
71
|
|
|
52
72
|
|
|
73
|
+
class CubeRefreshKey(BaseModel):
|
|
74
|
+
"""Represents a Cube.js refresh_key configuration"""
|
|
75
|
+
every: Optional[str] = None
|
|
76
|
+
sql: Optional[str] = None
|
|
77
|
+
incremental: Optional[bool] = None
|
|
78
|
+
update_window: Optional[str] = None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class CubePreAggregation(BaseModel):
|
|
82
|
+
"""Represents a Cube.js pre-aggregation"""
|
|
83
|
+
name: str
|
|
84
|
+
type: str = "rollup"
|
|
85
|
+
measures: Optional[List[str]] = None
|
|
86
|
+
dimensions: Optional[List[str]] = None
|
|
87
|
+
time_dimension: Optional[str] = None
|
|
88
|
+
granularity: Optional[str] = None
|
|
89
|
+
refresh_key: Optional[CubeRefreshKey] = None
|
|
90
|
+
|
|
91
|
+
|
|
53
92
|
class CubeSchema(BaseModel):
|
|
54
93
|
"""Represents a complete Cube.js schema"""
|
|
55
94
|
cube_name: str
|
|
56
95
|
sql: str
|
|
57
96
|
dimensions: List[CubeDimension]
|
|
58
97
|
measures: List[CubeMeasure]
|
|
98
|
+
pre_aggregations: List[CubePreAggregation] = []
|
|
59
99
|
|
|
60
100
|
|
|
61
101
|
class SyncResult(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|