dbt-cube-sync 0.1.0a4__tar.gz → 0.1.0a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbt-cube-sync might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbt-cube-sync
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a5
4
4
  Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
5
5
  Author: Ponder
6
6
  Requires-Python: >=3.9,<4.0
@@ -7,7 +7,7 @@ from pathlib import Path
7
7
  from typing import List, Dict, Any
8
8
  from jinja2 import Environment, FileSystemLoader, Template
9
9
 
10
- from .models import DbtModel, CubeSchema, CubeDimension, CubeMeasure
10
+ from .models import DbtModel, CubeSchema, CubeDimension, CubeMeasure, CubePreAggregation, CubeRefreshKey
11
11
  from .dbt_parser import DbtParser
12
12
 
13
13
 
@@ -98,11 +98,36 @@ class CubeGenerator:
98
98
  )
99
99
  measures.append(measure)
100
100
 
101
+ # Convert pre-aggregations
102
+ pre_aggregations = []
103
+ for pre_agg_name, pre_agg_data in model.pre_aggregations.items():
104
+ # Convert refresh_key if present
105
+ refresh_key = None
106
+ if pre_agg_data.refresh_key:
107
+ refresh_key = CubeRefreshKey(
108
+ every=pre_agg_data.refresh_key.every,
109
+ sql=pre_agg_data.refresh_key.sql,
110
+ incremental=pre_agg_data.refresh_key.incremental,
111
+ update_window=pre_agg_data.refresh_key.update_window
112
+ )
113
+
114
+ pre_aggregation = CubePreAggregation(
115
+ name=pre_agg_name,
116
+ type=pre_agg_data.type,
117
+ measures=pre_agg_data.measures,
118
+ dimensions=pre_agg_data.dimensions,
119
+ time_dimension=pre_agg_data.time_dimension,
120
+ granularity=pre_agg_data.granularity,
121
+ refresh_key=refresh_key
122
+ )
123
+ pre_aggregations.append(pre_aggregation)
124
+
101
125
  return CubeSchema(
102
126
  cube_name=cube_name,
103
127
  sql=sql,
104
128
  dimensions=dimensions,
105
- measures=measures
129
+ measures=measures,
130
+ pre_aggregations=pre_aggregations
106
131
  )
107
132
 
108
133
  def _write_cube_file(self, cube_schema: CubeSchema) -> Path:
@@ -116,7 +141,8 @@ class CubeGenerator:
116
141
  cube_name=cube_schema.cube_name,
117
142
  sql=cube_schema.sql,
118
143
  dimensions=cube_schema.dimensions,
119
- measures=cube_schema.measures
144
+ measures=cube_schema.measures,
145
+ pre_aggregations=cube_schema.pre_aggregations
120
146
  )
121
147
  else:
122
148
  # Fallback to hardcoded template
@@ -131,7 +157,12 @@ class CubeGenerator:
131
157
 
132
158
  def _generate_cube_content(self, cube_schema: CubeSchema) -> str:
133
159
  """Generate Cube.js content using hardcoded template"""
134
-
160
+
161
+ # Extract table name from SQL for refresh_key replacement
162
+ import re
163
+ table_name_match = re.search(r'FROM\s+([^\s,;]+)', cube_schema.sql, re.IGNORECASE)
164
+ table_name = table_name_match.group(1) if table_name_match else None
165
+
135
166
  # Generate dimensions
136
167
  dimensions_content = []
137
168
  for dim in cube_schema.dimensions:
@@ -152,11 +183,79 @@ class CubeGenerator:
152
183
  }}"""
153
184
  measures_content.append(measure_content)
154
185
 
186
+ # Generate pre-aggregations
187
+ pre_aggregations_content = []
188
+ for pre_agg in cube_schema.pre_aggregations:
189
+ pre_agg_parts = [f" type: `{pre_agg.type}`"]
190
+
191
+ if pre_agg.measures:
192
+ measures_list = ', '.join([f'CUBE.{measure}' for measure in pre_agg.measures])
193
+ pre_agg_parts.append(f" measures: [{measures_list}]")
194
+
195
+ if pre_agg.dimensions:
196
+ dims_list = ', '.join([f'CUBE.{dim}' for dim in pre_agg.dimensions])
197
+ pre_agg_parts.append(f" dimensions: [{dims_list}]")
198
+
199
+ if pre_agg.time_dimension:
200
+ pre_agg_parts.append(f" time_dimension: CUBE.{pre_agg.time_dimension}")
201
+
202
+ if pre_agg.granularity:
203
+ pre_agg_parts.append(f" granularity: `{pre_agg.granularity}`")
204
+
205
+ if pre_agg.refresh_key:
206
+ refresh_key_parts = []
207
+ if pre_agg.refresh_key.every:
208
+ refresh_key_parts.append(f" every: `{pre_agg.refresh_key.every}`")
209
+ if pre_agg.refresh_key.sql:
210
+ # Replace ${CUBE} and ${this} with actual table name
211
+ refresh_sql = pre_agg.refresh_key.sql
212
+ if table_name:
213
+ refresh_sql = refresh_sql.replace('${CUBE}', table_name)
214
+ refresh_sql = refresh_sql.replace('${this}', table_name)
215
+ refresh_key_parts.append(f" sql: `{refresh_sql}`")
216
+ if pre_agg.refresh_key.incremental is not None:
217
+ refresh_key_parts.append(f" incremental: {str(pre_agg.refresh_key.incremental).lower()}")
218
+ if pre_agg.refresh_key.update_window:
219
+ refresh_key_parts.append(f" update_window: `{pre_agg.refresh_key.update_window}`")
220
+
221
+ if refresh_key_parts:
222
+ refresh_key_content = ',\n'.join(refresh_key_parts)
223
+ pre_agg_parts.append(f" refresh_key: {{\n{refresh_key_content}\n }}")
224
+
225
+ parts_joined = ',\n'.join(pre_agg_parts)
226
+ pre_agg_content = f""" {pre_agg.name}: {{
227
+ {parts_joined}
228
+ }}"""
229
+ pre_aggregations_content.append(pre_agg_content)
230
+
155
231
  # Combine into full cube definition
156
232
  dimensions_joined = ',\n\n'.join(dimensions_content)
157
233
  measures_joined = ',\n\n'.join(measures_content)
158
234
 
159
- content = f"""cube(`{cube_schema.cube_name}`, {{
235
+ # Ensure we have measures (required for a useful Cube.js schema)
236
+ if not measures_content:
237
+ raise ValueError(f"Cube {cube_schema.cube_name} has no measures defined. Measures are required for Cube.js schemas.")
238
+
239
+ if pre_aggregations_content:
240
+ pre_aggregations_joined = ',\n\n'.join(pre_aggregations_content)
241
+ content = f"""cube(`{cube_schema.cube_name}`, {{
242
+ sql: `{cube_schema.sql}`,
243
+
244
+ dimensions: {{
245
+ {dimensions_joined}
246
+ }},
247
+
248
+ measures: {{
249
+ {measures_joined}
250
+ }},
251
+
252
+ pre_aggregations: {{
253
+ {pre_aggregations_joined}
254
+ }}
255
+ }});
256
+ """
257
+ else:
258
+ content = f"""cube(`{cube_schema.cube_name}`, {{
160
259
  sql: `{cube_schema.sql}`,
161
260
 
162
261
  dimensions: {{
@@ -6,7 +6,7 @@ import os
6
6
  from typing import Dict, List
7
7
  from pathlib import Path
8
8
 
9
- from .models import DbtModel, DbtColumn, DbtMetric
9
+ from .models import DbtModel, DbtColumn, DbtMetric, DbtPreAggregation, DbtRefreshKey
10
10
 
11
11
 
12
12
  class DbtParser:
@@ -61,7 +61,8 @@ class DbtParser:
61
61
  continue
62
62
 
63
63
  model = self._parse_model(node_id, node_data)
64
- if model and model.columns and model.metrics: # Only include models with BOTH columns AND metrics
64
+ # Include models that have columns AND metrics (measures are required for useful Cube.js schemas)
65
+ if model and model.columns and model.metrics:
65
66
  models.append(model)
66
67
 
67
68
  return models
@@ -78,13 +79,17 @@ class DbtParser:
78
79
  # Parse metrics from config.meta.metrics
79
80
  metrics = self._parse_metrics(node_data)
80
81
 
82
+ # Parse pre-aggregations from config.meta.pre_aggregations
83
+ pre_aggregations = self._parse_pre_aggregations(node_data)
84
+
81
85
  return DbtModel(
82
86
  name=model_name,
83
87
  database=model_database,
84
88
  schema_name=model_schema,
85
89
  node_id=node_id,
86
90
  columns=columns,
87
- metrics=metrics
91
+ metrics=metrics,
92
+ pre_aggregations=pre_aggregations
88
93
  )
89
94
 
90
95
  def _parse_columns(self, node_id: str, node_data: dict) -> Dict[str, DbtColumn]:
@@ -145,6 +150,40 @@ class DbtParser:
145
150
 
146
151
  return metrics
147
152
 
153
+ def _parse_pre_aggregations(self, node_data: dict) -> Dict[str, DbtPreAggregation]:
154
+ """Parse pre-aggregations from model configuration"""
155
+ pre_aggregations = {}
156
+
157
+ # Look for pre-aggregations in config.meta.pre_aggregations
158
+ config = node_data.get('config', {})
159
+ meta = config.get('meta', {})
160
+ pre_aggs_data = meta.get('pre_aggregations', {})
161
+
162
+ for pre_agg_name, pre_agg_config in pre_aggs_data.items():
163
+ if isinstance(pre_agg_config, dict):
164
+ # Parse refresh_key if present
165
+ refresh_key = None
166
+ refresh_key_config = pre_agg_config.get('refresh_key')
167
+ if refresh_key_config and isinstance(refresh_key_config, dict):
168
+ refresh_key = DbtRefreshKey(
169
+ every=refresh_key_config.get('every'),
170
+ sql=refresh_key_config.get('sql'),
171
+ incremental=refresh_key_config.get('incremental'),
172
+ update_window=refresh_key_config.get('update_window')
173
+ )
174
+
175
+ pre_aggregations[pre_agg_name] = DbtPreAggregation(
176
+ name=pre_agg_name,
177
+ type=pre_agg_config.get('type', 'rollup'),
178
+ measures=pre_agg_config.get('measures', []),
179
+ dimensions=pre_agg_config.get('dimensions', []),
180
+ time_dimension=pre_agg_config.get('time_dimension'),
181
+ granularity=pre_agg_config.get('granularity'),
182
+ refresh_key=refresh_key
183
+ )
184
+
185
+ return pre_aggregations
186
+
148
187
  @staticmethod
149
188
  def map_dbt_type_to_cube_type(dbt_type: str) -> str:
150
189
  """Map dbt metric types to Cube.js measure types"""
@@ -22,6 +22,25 @@ class DbtMetric(BaseModel):
22
22
  description: Optional[str] = None
23
23
 
24
24
 
25
+ class DbtRefreshKey(BaseModel):
26
+ """Represents a refresh_key configuration for pre-aggregations"""
27
+ every: Optional[str] = None
28
+ sql: Optional[str] = None
29
+ incremental: Optional[bool] = None
30
+ update_window: Optional[str] = None
31
+
32
+
33
+ class DbtPreAggregation(BaseModel):
34
+ """Represents a dbt pre-aggregation configuration"""
35
+ name: str
36
+ type: str = "rollup"
37
+ measures: Optional[List[str]] = None
38
+ dimensions: Optional[List[str]] = None
39
+ time_dimension: Optional[str] = None
40
+ granularity: Optional[str] = None
41
+ refresh_key: Optional[DbtRefreshKey] = None
42
+
43
+
25
44
  class DbtModel(BaseModel):
26
45
  """Represents a parsed dbt model"""
27
46
  name: str
@@ -30,6 +49,7 @@ class DbtModel(BaseModel):
30
49
  node_id: str
31
50
  columns: Dict[str, DbtColumn]
32
51
  metrics: Dict[str, DbtMetric]
52
+ pre_aggregations: Dict[str, DbtPreAggregation] = {}
33
53
 
34
54
 
35
55
  class CubeDimension(BaseModel):
@@ -50,12 +70,32 @@ class CubeMeasure(BaseModel):
50
70
  description: Optional[str] = None
51
71
 
52
72
 
73
+ class CubeRefreshKey(BaseModel):
74
+ """Represents a Cube.js refresh_key configuration"""
75
+ every: Optional[str] = None
76
+ sql: Optional[str] = None
77
+ incremental: Optional[bool] = None
78
+ update_window: Optional[str] = None
79
+
80
+
81
+ class CubePreAggregation(BaseModel):
82
+ """Represents a Cube.js pre-aggregation"""
83
+ name: str
84
+ type: str = "rollup"
85
+ measures: Optional[List[str]] = None
86
+ dimensions: Optional[List[str]] = None
87
+ time_dimension: Optional[str] = None
88
+ granularity: Optional[str] = None
89
+ refresh_key: Optional[CubeRefreshKey] = None
90
+
91
+
53
92
  class CubeSchema(BaseModel):
54
93
  """Represents a complete Cube.js schema"""
55
94
  cube_name: str
56
95
  sql: str
57
96
  dimensions: List[CubeDimension]
58
97
  measures: List[CubeMeasure]
98
+ pre_aggregations: List[CubePreAggregation] = []
59
99
 
60
100
 
61
101
  class SyncResult(BaseModel):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbt-cube-sync"
3
- version = "0.1.0a4"
3
+ version = "0.1.0a5"
4
4
  description = "Synchronization tool for dbt models to Cube.js schemas and BI tools"
5
5
  authors = ["Ponder"]
6
6
  readme = "README.md"