dbt-cube-sync 0.1.0a2__tar.gz → 0.1.0a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbt-cube-sync might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbt-cube-sync
3
- Version: 0.1.0a2
3
+ Version: 0.1.0a4
4
4
  Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
5
5
  Author: Ponder
6
6
  Requires-Python: >=3.9,<4.0
@@ -191,19 +191,7 @@ class SupersetConnector(BaseConnector):
191
191
 
192
192
  cube_name = cube_name_match.group(1)
193
193
 
194
- # Use public schema and cube name for Superset dataset
195
- schema_name = "public"
196
- table_name = cube_name # Use cube name (not database table name)
197
-
198
- # Extract actual database table for reference (but don't use it for dataset)
199
- sql_match = re.search(r'sql:\s*[`"\']\s*SELECT\s+.*FROM\s+(\w+\.\w+)', content, re.IGNORECASE)
200
- actual_db_table = None
201
- if sql_match:
202
- actual_db_table = sql_match.group(1)
203
-
204
194
  print(f" Cube: {cube_name}")
205
- print(f" Schema: {schema_name}")
206
- print(f" Table: {table_name}")
207
195
 
208
196
  # Parse dimensions
209
197
  dimensions = self._parse_dimensions(content)
@@ -213,9 +201,8 @@ class SupersetConnector(BaseConnector):
213
201
 
214
202
  return {
215
203
  'cube_name': cube_name,
216
- 'schema': schema_name,
217
- 'table_name': table_name, # This is now the cube name for dataset creation
218
- 'actual_db_table': actual_db_table, # This is the real DB table
204
+ 'schema': 'public', # Always use public schema for Cube.js
205
+ 'table_name': cube_name, # Use cube name as table name (e.g., CoursePerformanceSummary)
219
206
  'dimensions': dimensions,
220
207
  'measures': measures
221
208
  }
@@ -350,12 +337,26 @@ class SupersetConnector(BaseConnector):
350
337
  'count_distinct': 'COUNT(DISTINCT'
351
338
  }
352
339
 
340
+ # Remove Cube.js ${} syntax and convert to plain SQL column references
341
+ cleaned_expression = self._clean_cube_expression(sql_expression)
342
+
353
343
  agg_func = agg_mapping.get(agg_type, 'SUM')
354
344
 
355
345
  if agg_type == 'count_distinct':
356
- return f"{agg_func} {sql_expression})"
346
+ return f"{agg_func} {cleaned_expression})"
357
347
  else:
358
- return f"{agg_func}({sql_expression})"
348
+ return f"{agg_func}({cleaned_expression})"
349
+
350
+ def _clean_cube_expression(self, expression: str) -> str:
351
+ """Convert Cube.js expressions to SQL column references for Superset"""
352
+ import re
353
+
354
+ # Remove ${} syntax - convert ${column_name} to column_name
355
+ cleaned = re.sub(r'\$\{([^}]+)\}', r'\1', expression)
356
+
357
+ # Handle more complex expressions like arithmetic
358
+ # Keep parentheses and operators but clean column references
359
+ return cleaned
359
360
 
360
361
  def _create_or_update_dataset(self, schema_info: Dict[str, Any]) -> int:
361
362
  """Create a new dataset or update existing one"""
@@ -409,27 +410,14 @@ class SupersetConnector(BaseConnector):
409
410
  """Create a new dataset in Superset"""
410
411
  dataset_url = f"{self.base_url}/api/v1/dataset/"
411
412
 
412
- # If we have actual DB table info, use custom SQL, otherwise use table reference
413
- if schema_info.get('actual_db_table'):
414
- # Create a custom SQL dataset that references the actual table but is named with cube name
415
- sql_query = f"SELECT * FROM {schema_info['actual_db_table']}"
416
- payload = {
417
- "database": self.database_id,
418
- "schema": schema_info['schema'], # "public"
419
- "table_name": schema_info['table_name'], # cube name like "CoursePerformanceSummary"
420
- "sql": sql_query,
421
- "normalize_columns": False,
422
- "always_filter_main_dttm": False
423
- }
424
- else:
425
- # Fallback to direct table reference
426
- payload = {
427
- "database": self.database_id,
428
- "schema": schema_info['schema'],
429
- "table_name": schema_info['table_name'],
430
- "normalize_columns": False,
431
- "always_filter_main_dttm": False
432
- }
413
+ # Create a simple table dataset (Cube.js will handle the actual data source)
414
+ payload = {
415
+ "database": self.database_id,
416
+ "schema": schema_info['schema'], # "public"
417
+ "table_name": schema_info['table_name'], # cube name like "CoursePerformanceSummary"
418
+ "normalize_columns": False,
419
+ "always_filter_main_dttm": False
420
+ }
433
421
 
434
422
  print(f"\\n📊 Creating new dataset: {schema_info['table_name']}")
435
423
  response = self.session.post(dataset_url, json=payload)
@@ -532,37 +520,41 @@ class SupersetConnector(BaseConnector):
532
520
 
533
521
  def _update_metrics(self, existing_metrics: List[dict], measures: List[dict]) -> List[dict]:
534
522
  """Update metrics with new measures"""
535
- # Clean existing metrics
523
+ # Clean existing metrics and create a lookup by name
536
524
  updated_metrics = []
525
+ existing_metric_names = {}
526
+
537
527
  for metric in existing_metrics:
538
528
  clean_metric = {k: v for k, v in metric.items()
539
529
  if k not in ['created_on', 'changed_on', 'uuid']}
530
+ existing_metric_names[metric.get('metric_name')] = len(updated_metrics)
540
531
  updated_metrics.append(clean_metric)
541
532
 
542
- # Add new metrics
543
- existing_metric_names = {m.get('metric_name') for m in existing_metrics}
544
- added_count = 0
545
-
533
+ # Add or update metrics
546
534
  for measure in measures:
547
535
  metric_name = measure['metric_name']
548
536
 
549
- if metric_name not in existing_metric_names:
550
- new_metric = {
551
- 'metric_name': metric_name,
552
- 'verbose_name': measure['verbose_name'],
553
- 'expression': measure['expression'],
554
- 'description': measure['description'],
555
- 'metric_type': 'simple',
556
- 'currency': None,
557
- 'd3format': None,
558
- 'extra': None,
559
- 'warning_text': None
560
- }
561
- updated_metrics.append(new_metric)
562
- print(f" ✓ Prepared '{metric_name}': {measure['expression']}")
563
- added_count += 1
537
+ new_metric = {
538
+ 'metric_name': metric_name,
539
+ 'verbose_name': measure['verbose_name'],
540
+ 'expression': measure['expression'],
541
+ 'description': measure['description'],
542
+ 'metric_type': 'simple',
543
+ 'currency': None,
544
+ 'd3format': None,
545
+ 'extra': None,
546
+ 'warning_text': None
547
+ }
548
+
549
+ if metric_name in existing_metric_names:
550
+ # Update existing metric
551
+ index = existing_metric_names[metric_name]
552
+ updated_metrics[index].update(new_metric)
553
+ print(f" ✓ Updated '{metric_name}': {measure['expression']}")
564
554
  else:
565
- print(f" ⊘ Skipping '{metric_name}' (already exists)")
555
+ # Add new metric
556
+ updated_metrics.append(new_metric)
557
+ print(f" ✓ Added '{metric_name}': {measure['expression']}")
566
558
 
567
559
  return updated_metrics
568
560
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbt-cube-sync"
3
- version = "0.1.0a2"
3
+ version = "0.1.0a4"
4
4
  description = "Synchronization tool for dbt models to Cube.js schemas and BI tools"
5
5
  authors = ["Ponder"]
6
6
  readme = "README.md"