dbt-cube-sync 0.1.0a8__tar.gz → 0.1.0a10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbt-cube-sync might be problematic. Click here for more details.
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/PKG-INFO +1 -1
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/superset.py +9 -3
- dbt_cube_sync-0.1.0a10/dbt_cube_sync/core/db_inspector.py +149 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/pyproject.toml +1 -1
- dbt_cube_sync-0.1.0a8/dbt_cube_sync/core/db_inspector.py +0 -97
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/README.md +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/cli.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/config.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/base.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/powerbi.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/connectors/tableau.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/cube_generator.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/dbt_parser.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/models.py +0 -0
- {dbt_cube_sync-0.1.0a8 → dbt_cube_sync-0.1.0a10}/dbt_cube_sync/core/state_manager.py +0 -0
|
@@ -60,13 +60,19 @@ class SupersetConnector(BaseConnector):
|
|
|
60
60
|
"""Authenticate and get JWT token"""
|
|
61
61
|
login_url = f"{self.base_url}/api/v1/security/login"
|
|
62
62
|
payload = {
|
|
63
|
-
"username": self.config['username'],
|
|
64
63
|
"password": self.config['password'],
|
|
65
64
|
"provider": "db",
|
|
66
|
-
"refresh":
|
|
65
|
+
"refresh": "true",
|
|
66
|
+
"username": self.config['username']
|
|
67
67
|
}
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
response = self.session.post(login_url, json=payload)
|
|
70
|
+
if response.status_code == 401:
|
|
71
|
+
raise Exception(
|
|
72
|
+
f"Superset authentication failed (401). "
|
|
73
|
+
f"Check username/password and ensure provider='{payload['provider']}' is correct. "
|
|
74
|
+
f"Response: {response.text}"
|
|
75
|
+
)
|
|
70
76
|
response.raise_for_status()
|
|
71
77
|
|
|
72
78
|
data = response.json()
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database inspector - fetches column types using SQLAlchemy or direct SQL.
|
|
3
|
+
|
|
4
|
+
Uses Redshift-specific queries for Redshift databases (which don't support
|
|
5
|
+
standard PostgreSQL reflection), and SQLAlchemy reflection for other databases.
|
|
6
|
+
"""
|
|
7
|
+
from typing import Dict, Optional
|
|
8
|
+
from sqlalchemy import create_engine, MetaData, Table, text
|
|
9
|
+
from sqlalchemy.engine import Engine
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DatabaseInspector:
|
|
13
|
+
"""Inspects database schema to extract column type information."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, sqlalchemy_uri: str):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the database inspector.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
sqlalchemy_uri: SQLAlchemy connection URI (e.g., postgresql://user:pass@host:port/db)
|
|
21
|
+
"""
|
|
22
|
+
self.is_redshift = 'redshift' in sqlalchemy_uri.lower()
|
|
23
|
+
|
|
24
|
+
if self.is_redshift:
|
|
25
|
+
self.engine: Engine = create_engine(
|
|
26
|
+
sqlalchemy_uri,
|
|
27
|
+
connect_args={'sslmode': 'prefer'}
|
|
28
|
+
)
|
|
29
|
+
else:
|
|
30
|
+
self.engine: Engine = create_engine(sqlalchemy_uri)
|
|
31
|
+
|
|
32
|
+
self.metadata = MetaData()
|
|
33
|
+
self._table_cache: Dict[str, Dict[str, str]] = {}
|
|
34
|
+
|
|
35
|
+
def _get_redshift_columns(self, schema: str, table_name: str) -> Dict[str, str]:
|
|
36
|
+
"""
|
|
37
|
+
Get column types from Redshift using LIMIT 0 query (fastest method).
|
|
38
|
+
|
|
39
|
+
Executes SELECT * FROM table LIMIT 0 and reads column types from cursor description.
|
|
40
|
+
This is very fast because it doesn't scan any data - just returns metadata.
|
|
41
|
+
"""
|
|
42
|
+
columns = {}
|
|
43
|
+
|
|
44
|
+
# LIMIT 0 query - returns no rows but gives us column metadata
|
|
45
|
+
query = text(f'SELECT * FROM "{schema}"."{table_name}" LIMIT 0')
|
|
46
|
+
|
|
47
|
+
# Redshift type OID to name mapping (common types)
|
|
48
|
+
redshift_type_map = {
|
|
49
|
+
16: 'boolean',
|
|
50
|
+
20: 'bigint',
|
|
51
|
+
21: 'smallint',
|
|
52
|
+
23: 'integer',
|
|
53
|
+
25: 'text',
|
|
54
|
+
700: 'real',
|
|
55
|
+
701: 'double precision',
|
|
56
|
+
1042: 'char',
|
|
57
|
+
1043: 'varchar',
|
|
58
|
+
1082: 'date',
|
|
59
|
+
1083: 'time',
|
|
60
|
+
1114: 'timestamp',
|
|
61
|
+
1184: 'timestamptz',
|
|
62
|
+
1700: 'numeric',
|
|
63
|
+
2950: 'uuid',
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
with self.engine.connect() as conn:
|
|
68
|
+
result = conn.execute(query)
|
|
69
|
+
# Get column info from cursor description
|
|
70
|
+
# Format: (name, type_code, display_size, internal_size, precision, scale, null_ok)
|
|
71
|
+
if result.cursor and result.cursor.description:
|
|
72
|
+
for col_desc in result.cursor.description:
|
|
73
|
+
col_name = col_desc[0]
|
|
74
|
+
type_code = col_desc[1]
|
|
75
|
+
# Map type code to type name, fallback to 'varchar' if unknown
|
|
76
|
+
col_type = redshift_type_map.get(type_code, 'varchar')
|
|
77
|
+
columns[col_name] = col_type
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"Warning: Could not inspect Redshift table {schema}.{table_name}: {e}")
|
|
81
|
+
|
|
82
|
+
return columns
|
|
83
|
+
|
|
84
|
+
def get_table_columns(self, schema: str, table_name: str) -> Dict[str, str]:
|
|
85
|
+
"""
|
|
86
|
+
Get column names and their data types for a specific table.
|
|
87
|
+
|
|
88
|
+
Uses Redshift-specific queries for Redshift, SQLAlchemy reflection for others.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
schema: Database schema name
|
|
92
|
+
table_name: Table name
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Dictionary mapping column names to data types
|
|
96
|
+
"""
|
|
97
|
+
cache_key = f"{schema}.{table_name}"
|
|
98
|
+
|
|
99
|
+
# Check cache first
|
|
100
|
+
if cache_key in self._table_cache:
|
|
101
|
+
return self._table_cache[cache_key]
|
|
102
|
+
|
|
103
|
+
columns = {}
|
|
104
|
+
|
|
105
|
+
if self.is_redshift:
|
|
106
|
+
# Use Redshift-specific query
|
|
107
|
+
columns = self._get_redshift_columns(schema, table_name)
|
|
108
|
+
else:
|
|
109
|
+
# Use standard SQLAlchemy reflection
|
|
110
|
+
try:
|
|
111
|
+
table = Table(
|
|
112
|
+
table_name,
|
|
113
|
+
self.metadata,
|
|
114
|
+
autoload_with=self.engine,
|
|
115
|
+
schema=schema
|
|
116
|
+
)
|
|
117
|
+
for column in table.columns:
|
|
118
|
+
columns[column.name] = str(column.type)
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Warning: Could not inspect table {schema}.{table_name}: {e}")
|
|
122
|
+
|
|
123
|
+
self._table_cache[cache_key] = columns
|
|
124
|
+
return columns
|
|
125
|
+
|
|
126
|
+
def reflect_multiple_tables(
|
|
127
|
+
self, tables: list[tuple[str, str]]
|
|
128
|
+
) -> Dict[str, Dict[str, str]]:
|
|
129
|
+
"""
|
|
130
|
+
Reflect multiple tables in bulk for performance optimization.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
tables: List of (schema, table_name) tuples
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Dict mapping "schema.table_name" -> {column_name: column_type}
|
|
137
|
+
"""
|
|
138
|
+
results = {}
|
|
139
|
+
|
|
140
|
+
for schema, table_name in tables:
|
|
141
|
+
cache_key = f"{schema}.{table_name}"
|
|
142
|
+
results[cache_key] = self.get_table_columns(schema, table_name)
|
|
143
|
+
|
|
144
|
+
return results
|
|
145
|
+
|
|
146
|
+
def close(self):
|
|
147
|
+
"""Close the database connection and clear cache."""
|
|
148
|
+
self._table_cache.clear()
|
|
149
|
+
self.engine.dispose()
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Database inspector - fetches column types using SQLAlchemy MetaData reflection.
|
|
3
|
-
|
|
4
|
-
Uses SQLAlchemy's Table(..., autoload_with=engine) for portable, database-agnostic
|
|
5
|
-
column type extraction. This approach works consistently across PostgreSQL, MySQL,
|
|
6
|
-
Snowflake, BigQuery, Redshift, and other databases.
|
|
7
|
-
"""
|
|
8
|
-
from typing import Dict, Optional
|
|
9
|
-
from sqlalchemy import create_engine, MetaData, Table
|
|
10
|
-
from sqlalchemy.engine import Engine
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class DatabaseInspector:
|
|
14
|
-
"""Inspects database schema to extract column type information using SQLAlchemy reflection."""
|
|
15
|
-
|
|
16
|
-
def __init__(self, sqlalchemy_uri: str):
|
|
17
|
-
"""
|
|
18
|
-
Initialize the database inspector.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
sqlalchemy_uri: SQLAlchemy connection URI (e.g., postgresql://user:pass@host:port/db)
|
|
22
|
-
"""
|
|
23
|
-
# Add connect_args for Redshift compatibility
|
|
24
|
-
if 'redshift' in sqlalchemy_uri.lower():
|
|
25
|
-
self.engine: Engine = create_engine(
|
|
26
|
-
sqlalchemy_uri,
|
|
27
|
-
connect_args={'sslmode': 'prefer'}
|
|
28
|
-
)
|
|
29
|
-
else:
|
|
30
|
-
self.engine: Engine = create_engine(sqlalchemy_uri)
|
|
31
|
-
|
|
32
|
-
self.metadata = MetaData()
|
|
33
|
-
self._table_cache: Dict[str, Table] = {}
|
|
34
|
-
|
|
35
|
-
def get_table_columns(self, schema: str, table_name: str) -> Dict[str, str]:
|
|
36
|
-
"""
|
|
37
|
-
Get column names and their data types for a specific table.
|
|
38
|
-
|
|
39
|
-
Uses SQLAlchemy MetaData reflection for portable column extraction.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
schema: Database schema name
|
|
43
|
-
table_name: Table name
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
Dictionary mapping column names to data types
|
|
47
|
-
"""
|
|
48
|
-
columns = {}
|
|
49
|
-
cache_key = f"{schema}.{table_name}"
|
|
50
|
-
|
|
51
|
-
try:
|
|
52
|
-
# Check cache first
|
|
53
|
-
if cache_key in self._table_cache:
|
|
54
|
-
table = self._table_cache[cache_key]
|
|
55
|
-
else:
|
|
56
|
-
# Reflect table using SQLAlchemy MetaData
|
|
57
|
-
table = Table(
|
|
58
|
-
table_name,
|
|
59
|
-
self.metadata,
|
|
60
|
-
autoload_with=self.engine,
|
|
61
|
-
schema=schema
|
|
62
|
-
)
|
|
63
|
-
self._table_cache[cache_key] = table
|
|
64
|
-
|
|
65
|
-
# Extract column types
|
|
66
|
-
for column in table.columns:
|
|
67
|
-
columns[column.name] = str(column.type)
|
|
68
|
-
|
|
69
|
-
except Exception as e:
|
|
70
|
-
print(f"Warning: Could not inspect table {schema}.{table_name}: {e}")
|
|
71
|
-
|
|
72
|
-
return columns
|
|
73
|
-
|
|
74
|
-
def reflect_multiple_tables(
|
|
75
|
-
self, tables: list[tuple[str, str]]
|
|
76
|
-
) -> Dict[str, Dict[str, str]]:
|
|
77
|
-
"""
|
|
78
|
-
Reflect multiple tables in bulk for performance optimization.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
tables: List of (schema, table_name) tuples
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
Dict mapping "schema.table_name" -> {column_name: column_type}
|
|
85
|
-
"""
|
|
86
|
-
results = {}
|
|
87
|
-
|
|
88
|
-
for schema, table_name in tables:
|
|
89
|
-
cache_key = f"{schema}.{table_name}"
|
|
90
|
-
results[cache_key] = self.get_table_columns(schema, table_name)
|
|
91
|
-
|
|
92
|
-
return results
|
|
93
|
-
|
|
94
|
-
def close(self):
|
|
95
|
-
"""Close the database connection and clear cache."""
|
|
96
|
-
self._table_cache.clear()
|
|
97
|
-
self.engine.dispose()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|