dbt-cube-sync 0.1.0a8__tar.gz → 0.1.0a10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbt-cube-sync might be problematic. Click here for more details.

Files changed (18) hide show
  1. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/PKG-INFO +1 -1
  2. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/superset.py +9 -3
  3. dbt_cube_sync-0.1.0a10/dbt_cube_sync/core/db_inspector.py +149 -0
  4. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/pyproject.toml +1 -1
  5. dbt_cube_sync-0.1.0a8/dbt_cube_sync/core/db_inspector.py +0 -97
  6. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/README.md +0 -0
  7. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/__init__.py +0 -0
  8. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/cli.py +0 -0
  9. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/config.py +0 -0
  10. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/__init__.py +0 -0
  11. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/base.py +0 -0
  12. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/powerbi.py +0 -0
  13. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/tableau.py +0 -0
  14. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/__init__.py +0 -0
  15. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/cube_generator.py +0 -0
  16. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/dbt_parser.py +0 -0
  17. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/models.py +0 -0
  18. {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/state_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbt-cube-sync
3
- Version: 0.1.0a8
3
+ Version: 0.1.0a10
4
4
  Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
5
5
  Author: Ponder
6
6
  Requires-Python: >=3.9,<4.0
@@ -60,13 +60,19 @@ class SupersetConnector(BaseConnector):
60
60
  """Authenticate and get JWT token"""
61
61
  login_url = f"{self.base_url}/api/v1/security/login"
62
62
  payload = {
63
- "username": self.config['username'],
64
63
  "password": self.config['password'],
65
64
  "provider": "db",
66
- "refresh": True
65
+ "refresh": "true",
66
+ "username": self.config['username']
67
67
  }
68
-
68
+
69
69
  response = self.session.post(login_url, json=payload)
70
+ if response.status_code == 401:
71
+ raise Exception(
72
+ f"Superset authentication failed (401). "
73
+ f"Check username/password and ensure provider='{payload['provider']}' is correct. "
74
+ f"Response: {response.text}"
75
+ )
70
76
  response.raise_for_status()
71
77
 
72
78
  data = response.json()
@@ -0,0 +1,149 @@
1
+ """
2
+ Database inspector - fetches column types using SQLAlchemy or direct SQL.
3
+
4
+ Uses Redshift-specific queries for Redshift databases (which don't support
5
+ standard PostgreSQL reflection), and SQLAlchemy reflection for other databases.
6
+ """
7
+ from typing import Dict, Optional
8
+ from sqlalchemy import create_engine, MetaData, Table, text
9
+ from sqlalchemy.engine import Engine
10
+
11
+
12
+ class DatabaseInspector:
13
+ """Inspects database schema to extract column type information."""
14
+
15
+ def __init__(self, sqlalchemy_uri: str):
16
+ """
17
+ Initialize the database inspector.
18
+
19
+ Args:
20
+ sqlalchemy_uri: SQLAlchemy connection URI (e.g., postgresql://user:pass@host:port/db)
21
+ """
22
+ self.is_redshift = 'redshift' in sqlalchemy_uri.lower()
23
+
24
+ if self.is_redshift:
25
+ self.engine: Engine = create_engine(
26
+ sqlalchemy_uri,
27
+ connect_args={'sslmode': 'prefer'}
28
+ )
29
+ else:
30
+ self.engine: Engine = create_engine(sqlalchemy_uri)
31
+
32
+ self.metadata = MetaData()
33
+ self._table_cache: Dict[str, Dict[str, str]] = {}
34
+
35
+ def _get_redshift_columns(self, schema: str, table_name: str) -> Dict[str, str]:
36
+ """
37
+ Get column types from Redshift using LIMIT 0 query (fastest method).
38
+
39
+ Executes SELECT * FROM table LIMIT 0 and reads column types from cursor description.
40
+ This is very fast because it doesn't scan any data - just returns metadata.
41
+ """
42
+ columns = {}
43
+
44
+ # LIMIT 0 query - returns no rows but gives us column metadata
45
+ query = text(f'SELECT * FROM "{schema}"."{table_name}" LIMIT 0')
46
+
47
+ # Redshift type OID to name mapping (common types)
48
+ redshift_type_map = {
49
+ 16: 'boolean',
50
+ 20: 'bigint',
51
+ 21: 'smallint',
52
+ 23: 'integer',
53
+ 25: 'text',
54
+ 700: 'real',
55
+ 701: 'double precision',
56
+ 1042: 'char',
57
+ 1043: 'varchar',
58
+ 1082: 'date',
59
+ 1083: 'time',
60
+ 1114: 'timestamp',
61
+ 1184: 'timestamptz',
62
+ 1700: 'numeric',
63
+ 2950: 'uuid',
64
+ }
65
+
66
+ try:
67
+ with self.engine.connect() as conn:
68
+ result = conn.execute(query)
69
+ # Get column info from cursor description
70
+ # Format: (name, type_code, display_size, internal_size, precision, scale, null_ok)
71
+ if result.cursor and result.cursor.description:
72
+ for col_desc in result.cursor.description:
73
+ col_name = col_desc[0]
74
+ type_code = col_desc[1]
75
+ # Map type code to type name, fallback to 'varchar' if unknown
76
+ col_type = redshift_type_map.get(type_code, 'varchar')
77
+ columns[col_name] = col_type
78
+
79
+ except Exception as e:
80
+ print(f"Warning: Could not inspect Redshift table {schema}.{table_name}: {e}")
81
+
82
+ return columns
83
+
84
+ def get_table_columns(self, schema: str, table_name: str) -> Dict[str, str]:
85
+ """
86
+ Get column names and their data types for a specific table.
87
+
88
+ Uses Redshift-specific queries for Redshift, SQLAlchemy reflection for others.
89
+
90
+ Args:
91
+ schema: Database schema name
92
+ table_name: Table name
93
+
94
+ Returns:
95
+ Dictionary mapping column names to data types
96
+ """
97
+ cache_key = f"{schema}.{table_name}"
98
+
99
+ # Check cache first
100
+ if cache_key in self._table_cache:
101
+ return self._table_cache[cache_key]
102
+
103
+ columns = {}
104
+
105
+ if self.is_redshift:
106
+ # Use Redshift-specific query
107
+ columns = self._get_redshift_columns(schema, table_name)
108
+ else:
109
+ # Use standard SQLAlchemy reflection
110
+ try:
111
+ table = Table(
112
+ table_name,
113
+ self.metadata,
114
+ autoload_with=self.engine,
115
+ schema=schema
116
+ )
117
+ for column in table.columns:
118
+ columns[column.name] = str(column.type)
119
+
120
+ except Exception as e:
121
+ print(f"Warning: Could not inspect table {schema}.{table_name}: {e}")
122
+
123
+ self._table_cache[cache_key] = columns
124
+ return columns
125
+
126
+ def reflect_multiple_tables(
127
+ self, tables: list[tuple[str, str]]
128
+ ) -> Dict[str, Dict[str, str]]:
129
+ """
130
+ Reflect multiple tables in bulk for performance optimization.
131
+
132
+ Args:
133
+ tables: List of (schema, table_name) tuples
134
+
135
+ Returns:
136
+ Dict mapping "schema.table_name" -> {column_name: column_type}
137
+ """
138
+ results = {}
139
+
140
+ for schema, table_name in tables:
141
+ cache_key = f"{schema}.{table_name}"
142
+ results[cache_key] = self.get_table_columns(schema, table_name)
143
+
144
+ return results
145
+
146
+ def close(self):
147
+ """Close the database connection and clear cache."""
148
+ self._table_cache.clear()
149
+ self.engine.dispose()
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbt-cube-sync"
3
- version = "0.1.0a8"
3
+ version = "0.1.0a10"
4
4
  description = "Synchronization tool for dbt models to Cube.js schemas and BI tools"
5
5
  authors = ["Ponder"]
6
6
  readme = "README.md"
@@ -1,97 +0,0 @@
1
- """
2
- Database inspector - fetches column types using SQLAlchemy MetaData reflection.
3
-
4
- Uses SQLAlchemy's Table(..., autoload_with=engine) for portable, database-agnostic
5
- column type extraction. This approach works consistently across PostgreSQL, MySQL,
6
- Snowflake, BigQuery, Redshift, and other databases.
7
- """
8
- from typing import Dict, Optional
9
- from sqlalchemy import create_engine, MetaData, Table
10
- from sqlalchemy.engine import Engine
11
-
12
-
13
- class DatabaseInspector:
14
- """Inspects database schema to extract column type information using SQLAlchemy reflection."""
15
-
16
- def __init__(self, sqlalchemy_uri: str):
17
- """
18
- Initialize the database inspector.
19
-
20
- Args:
21
- sqlalchemy_uri: SQLAlchemy connection URI (e.g., postgresql://user:pass@host:port/db)
22
- """
23
- # Add connect_args for Redshift compatibility
24
- if 'redshift' in sqlalchemy_uri.lower():
25
- self.engine: Engine = create_engine(
26
- sqlalchemy_uri,
27
- connect_args={'sslmode': 'prefer'}
28
- )
29
- else:
30
- self.engine: Engine = create_engine(sqlalchemy_uri)
31
-
32
- self.metadata = MetaData()
33
- self._table_cache: Dict[str, Table] = {}
34
-
35
- def get_table_columns(self, schema: str, table_name: str) -> Dict[str, str]:
36
- """
37
- Get column names and their data types for a specific table.
38
-
39
- Uses SQLAlchemy MetaData reflection for portable column extraction.
40
-
41
- Args:
42
- schema: Database schema name
43
- table_name: Table name
44
-
45
- Returns:
46
- Dictionary mapping column names to data types
47
- """
48
- columns = {}
49
- cache_key = f"{schema}.{table_name}"
50
-
51
- try:
52
- # Check cache first
53
- if cache_key in self._table_cache:
54
- table = self._table_cache[cache_key]
55
- else:
56
- # Reflect table using SQLAlchemy MetaData
57
- table = Table(
58
- table_name,
59
- self.metadata,
60
- autoload_with=self.engine,
61
- schema=schema
62
- )
63
- self._table_cache[cache_key] = table
64
-
65
- # Extract column types
66
- for column in table.columns:
67
- columns[column.name] = str(column.type)
68
-
69
- except Exception as e:
70
- print(f"Warning: Could not inspect table {schema}.{table_name}: {e}")
71
-
72
- return columns
73
-
74
- def reflect_multiple_tables(
75
- self, tables: list[tuple[str, str]]
76
- ) -> Dict[str, Dict[str, str]]:
77
- """
78
- Reflect multiple tables in bulk for performance optimization.
79
-
80
- Args:
81
- tables: List of (schema, table_name) tuples
82
-
83
- Returns:
84
- Dict mapping "schema.table_name" -> {column_name: column_type}
85
- """
86
- results = {}
87
-
88
- for schema, table_name in tables:
89
- cache_key = f"{schema}.{table_name}"
90
- results[cache_key] = self.get_table_columns(schema, table_name)
91
-
92
- return results
93
-
94
- def close(self):
95
- """Close the database connection and clear cache."""
96
- self._table_cache.clear()
97
- self.engine.dispose()