dbt-cube-sync 0.1.0a3__tar.gz → 0.1.0a15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_cube_sync-0.1.0a15/PKG-INFO +364 -0
- dbt_cube_sync-0.1.0a15/README.md +343 -0
- dbt_cube_sync-0.1.0a15/dbt_cube_sync/cli.py +619 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/connectors/superset.py +96 -57
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/core/cube_generator.py +118 -15
- dbt_cube_sync-0.1.0a15/dbt_cube_sync/core/db_inspector.py +149 -0
- dbt_cube_sync-0.1.0a15/dbt_cube_sync/core/dbt_parser.py +353 -0
- dbt_cube_sync-0.1.0a15/dbt_cube_sync/core/models.py +125 -0
- dbt_cube_sync-0.1.0a15/dbt_cube_sync/core/state_manager.py +343 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/pyproject.toml +2 -1
- dbt_cube_sync-0.1.0a3/PKG-INFO +0 -230
- dbt_cube_sync-0.1.0a3/README.md +0 -210
- dbt_cube_sync-0.1.0a3/dbt_cube_sync/cli.py +0 -135
- dbt_cube_sync-0.1.0a3/dbt_cube_sync/core/dbt_parser.py +0 -178
- dbt_cube_sync-0.1.0a3/dbt_cube_sync/core/models.py +0 -66
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/config.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/connectors/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/connectors/base.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/connectors/powerbi.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/connectors/tableau.py +0 -0
- {dbt_cube_sync-0.1.0a3 → dbt_cube_sync-0.1.0a15}/dbt_cube_sync/core/__init__.py +0 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbt-cube-sync
|
|
3
|
+
Version: 0.1.0a15
|
|
4
|
+
Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
|
|
5
|
+
Author: Ponder
|
|
6
|
+
Requires-Python: >=3.9,<4.0
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
15
|
+
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
|
16
|
+
Requires-Dist: pydantic (>=2.5.0,<3.0.0)
|
|
17
|
+
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
18
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
19
|
+
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# dbt-cube-sync
|
|
23
|
+
|
|
24
|
+
A powerful synchronization tool that creates a seamless pipeline from dbt models to Cube.js schemas and BI tools (Superset, Tableau, PowerBI).
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- 🔄 **dbt → Cube.js**: Auto-generate Cube.js schemas from dbt models with metrics
|
|
29
|
+
- 🗃️ **Flexible Data Type Source**: Get column types from catalog OR directly from database via SQLAlchemy
|
|
30
|
+
- 🎯 **Model Filtering**: Process specific models instead of all models
|
|
31
|
+
- 📊 **Cube.js → BI Tools**: Sync schemas to multiple BI platforms
|
|
32
|
+
- 🏗️ **Extensible Architecture**: Plugin-based connector system for easy BI tool integration
|
|
33
|
+
- 🐳 **Docker Support**: Containerized execution with orchestration support
|
|
34
|
+
- 🎯 **CLI Interface**: Simple command-line tools for automation
|
|
35
|
+
|
|
36
|
+
## Supported BI Tools
|
|
37
|
+
|
|
38
|
+
- ✅ **Apache Superset** - Full implementation
|
|
39
|
+
- 🚧 **Tableau** - Placeholder (coming soon)
|
|
40
|
+
- 🚧 **PowerBI** - Placeholder (coming soon)
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
### Using Poetry (Development)
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
cd dbt-cube-sync
|
|
48
|
+
poetry install
|
|
49
|
+
poetry run dbt-cube-sync --help
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Database Drivers (for SQLAlchemy URI feature)
|
|
53
|
+
|
|
54
|
+
If you want to use the `--sqlalchemy-uri` option to fetch column types directly from your database, you'll need to install the appropriate database driver:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# PostgreSQL
|
|
58
|
+
poetry add psycopg2-binary
|
|
59
|
+
|
|
60
|
+
# MySQL
|
|
61
|
+
poetry add pymysql
|
|
62
|
+
|
|
63
|
+
# Snowflake
|
|
64
|
+
poetry add snowflake-sqlalchemy
|
|
65
|
+
|
|
66
|
+
# BigQuery
|
|
67
|
+
poetry add sqlalchemy-bigquery
|
|
68
|
+
|
|
69
|
+
# Redshift
|
|
70
|
+
poetry add sqlalchemy-redshift
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Using Docker
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
docker build -t dbt-cube-sync .
|
|
77
|
+
docker run --rm dbt-cube-sync --help
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Quick Start
|
|
81
|
+
|
|
82
|
+
### 1. Generate Cube.js Schemas from dbt
|
|
83
|
+
|
|
84
|
+
**Option A: Using catalog file (traditional method)**
|
|
85
|
+
```bash
|
|
86
|
+
dbt-cube-sync dbt-to-cube \
|
|
87
|
+
--manifest ./target/manifest.json \
|
|
88
|
+
--catalog ./target/catalog.json \
|
|
89
|
+
--output ./cube_output
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Option B: Using database connection (no catalog needed)**
|
|
93
|
+
```bash
|
|
94
|
+
dbt-cube-sync dbt-to-cube \
|
|
95
|
+
--manifest ./target/manifest.json \
|
|
96
|
+
--sqlalchemy-uri postgresql://user:password@localhost:5432/mydb \
|
|
97
|
+
--output ./cube_output
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Option C: Filter specific models**
|
|
101
|
+
```bash
|
|
102
|
+
dbt-cube-sync dbt-to-cube \
|
|
103
|
+
--manifest ./target/manifest.json \
|
|
104
|
+
--sqlalchemy-uri postgresql://user:password@localhost:5432/mydb \
|
|
105
|
+
--models orders,customers,products \
|
|
106
|
+
--output ./cube_output
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### 2. Sync to BI Tool (Optional)
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# Sync to Superset
|
|
113
|
+
dbt-cube-sync cube-to-bi superset \
|
|
114
|
+
--cube-files ./cube_output \
|
|
115
|
+
--url http://localhost:8088 \
|
|
116
|
+
--username admin \
|
|
117
|
+
--password admin \
|
|
118
|
+
--cube-connection-name Cube
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Configuration
|
|
122
|
+
|
|
123
|
+
### Sample Configuration (`sync-config.yaml`)
|
|
124
|
+
|
|
125
|
+
```yaml
|
|
126
|
+
connectors:
|
|
127
|
+
superset:
|
|
128
|
+
type: superset
|
|
129
|
+
url: http://localhost:8088
|
|
130
|
+
username: admin
|
|
131
|
+
password: admin
|
|
132
|
+
database_name: Cube
|
|
133
|
+
|
|
134
|
+
tableau:
|
|
135
|
+
type: tableau
|
|
136
|
+
url: https://your-tableau-server.com
|
|
137
|
+
username: your-username
|
|
138
|
+
password: your-password
|
|
139
|
+
|
|
140
|
+
powerbi:
|
|
141
|
+
type: powerbi
|
|
142
|
+
# PowerBI specific configuration
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## CLI Commands
|
|
146
|
+
|
|
147
|
+
### Quick Reference
|
|
148
|
+
|
|
149
|
+
| Command | Description |
|
|
150
|
+
|---------|-------------|
|
|
151
|
+
| `sync-all` | **Ultimate command** - Incremental sync: dbt → Cube.js → Superset → RAG |
|
|
152
|
+
| `dbt-to-cube` | Generate Cube.js schemas from dbt models (with incremental support) |
|
|
153
|
+
| `cube-to-bi` | Sync Cube.js schemas to BI tools (Superset, Tableau, PowerBI) |
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
### `sync-all` (Recommended)
|
|
158
|
+
|
|
159
|
+
**Ultimate incremental sync command** - handles the complete pipeline with state tracking.
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# Basic incremental sync (Cube.js only)
|
|
163
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output
|
|
164
|
+
|
|
165
|
+
# Full pipeline: dbt → Cube.js → Superset
|
|
166
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output \
|
|
167
|
+
--superset-url http://localhost:8088 \
|
|
168
|
+
--superset-username admin \
|
|
169
|
+
--superset-password admin
|
|
170
|
+
|
|
171
|
+
# Full pipeline: dbt → Cube.js → Superset → RAG embeddings
|
|
172
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output \
|
|
173
|
+
--superset-url http://localhost:8088 \
|
|
174
|
+
--superset-username admin \
|
|
175
|
+
--superset-password admin \
|
|
176
|
+
--rag-api-url http://localhost:8000
|
|
177
|
+
|
|
178
|
+
# Force full rebuild (ignore state)
|
|
179
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output --force-full-sync
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
**Options:**
|
|
183
|
+
| Option | Required | Description |
|
|
184
|
+
|--------|----------|-------------|
|
|
185
|
+
| `--manifest, -m` | Yes | Path to dbt manifest.json |
|
|
186
|
+
| `--catalog, -c` | No* | Path to dbt catalog.json |
|
|
187
|
+
| `--sqlalchemy-uri, -s` | No* | Database URI for column types |
|
|
188
|
+
| `--output, -o` | Yes | Output directory for Cube.js files |
|
|
189
|
+
| `--state-path` | No | State file path (default: `.dbt-cube-sync-state.json`) |
|
|
190
|
+
| `--force-full-sync` | No | Force full rebuild, ignore state |
|
|
191
|
+
| `--superset-url` | No | Superset URL |
|
|
192
|
+
| `--superset-username` | No | Superset username |
|
|
193
|
+
| `--superset-password` | No | Superset password |
|
|
194
|
+
| `--cube-connection-name` | No | Cube database name in Superset (default: `Cube`) |
|
|
195
|
+
| `--rag-api-url` | No | RAG API URL for embedding updates |
|
|
196
|
+
|
|
197
|
+
*Either `--catalog` or `--sqlalchemy-uri` is required.
|
|
198
|
+
|
|
199
|
+
**How Incremental Sync Works:**
|
|
200
|
+
1. Reads state file (`.dbt-cube-sync-state.json`) with model checksums
|
|
201
|
+
2. Compares against current manifest to detect changes
|
|
202
|
+
3. Only processes **added** or **modified** models
|
|
203
|
+
4. Deletes Cube.js files for **removed** models
|
|
204
|
+
5. Updates state file with new checksums
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
### `dbt-to-cube`
|
|
209
|
+
|
|
210
|
+
Generate Cube.js schema files from dbt models with incremental support.
|
|
211
|
+
|
|
212
|
+
**Options:**
|
|
213
|
+
- `--manifest` / `-m`: Path to dbt manifest.json file (required)
|
|
214
|
+
- `--catalog` / `-c`: Path to dbt catalog.json file
|
|
215
|
+
- `--sqlalchemy-uri` / `-s`: SQLAlchemy database URI for fetching column types
|
|
216
|
+
- `--models`: Comma-separated list of model names to process
|
|
217
|
+
- `--output` / `-o`: Output directory for Cube.js files (required)
|
|
218
|
+
- `--template-dir` / `-t`: Directory containing Cube.js templates (default: ./cube/templates)
|
|
219
|
+
- `--state-path`: State file for incremental sync (default: `.dbt-cube-sync-state.json`)
|
|
220
|
+
- `--force-full-sync`: Force full regeneration, ignore cached state
|
|
221
|
+
- `--no-state`: Disable state tracking (legacy behavior)
|
|
222
|
+
|
|
223
|
+
**Examples:**
|
|
224
|
+
```bash
|
|
225
|
+
# Incremental sync (default)
|
|
226
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/
|
|
227
|
+
|
|
228
|
+
# Force full rebuild
|
|
229
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/ --force-full-sync
|
|
230
|
+
|
|
231
|
+
# Using database connection (no catalog needed)
|
|
232
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -s postgresql://user:pass@localhost/db -o output/
|
|
233
|
+
|
|
234
|
+
# Filter specific models
|
|
235
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/ --models users,orders
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
### `cube-to-bi`
|
|
241
|
+
|
|
242
|
+
Sync Cube.js schemas to BI tool datasets.
|
|
243
|
+
|
|
244
|
+
**Arguments:**
|
|
245
|
+
- `bi_tool`: BI tool type (`superset`, `tableau`, `powerbi`)
|
|
246
|
+
|
|
247
|
+
**Options:**
|
|
248
|
+
- `--cube-files` / `-c`: Directory containing Cube.js files (required)
|
|
249
|
+
- `--url` / `-u`: BI tool URL (required)
|
|
250
|
+
- `--username` / `-n`: BI tool username (required)
|
|
251
|
+
- `--password` / `-p`: BI tool password (required)
|
|
252
|
+
- `--cube-connection-name` / `-d`: Name of Cube database connection in BI tool (default: Cube)
|
|
253
|
+
|
|
254
|
+
**Example:**
|
|
255
|
+
```bash
|
|
256
|
+
dbt-cube-sync cube-to-bi superset -c cube_output/ -u http://localhost:8088 -n admin -p admin -d Cube
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## State File
|
|
262
|
+
|
|
263
|
+
The state file (`.dbt-cube-sync-state.json`) tracks:
|
|
264
|
+
|
|
265
|
+
```json
|
|
266
|
+
{
|
|
267
|
+
"version": "1.0",
|
|
268
|
+
"last_sync_timestamp": "2024-01-15T10:30:00Z",
|
|
269
|
+
"manifest_path": "/path/to/manifest.json",
|
|
270
|
+
"models": {
|
|
271
|
+
"model.project.users": {
|
|
272
|
+
"checksum": "abc123...",
|
|
273
|
+
"has_metrics": true,
|
|
274
|
+
"last_generated": "2024-01-15T10:30:00Z",
|
|
275
|
+
"output_file": "./cube_output/Users.js"
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
Delete this file to force a full rebuild, or use `--force-full-sync`.
|
|
282
|
+
|
|
283
|
+
## Architecture
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
dbt models (with metrics)
|
|
287
|
+
↓
|
|
288
|
+
dbt-cube-sync generate-cubes
|
|
289
|
+
↓
|
|
290
|
+
Cube.js schemas
|
|
291
|
+
↓
|
|
292
|
+
dbt-cube-sync sync-bi [connector]
|
|
293
|
+
↓
|
|
294
|
+
BI Tool Datasets (Superset/Tableau/PowerBI)
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Project Structure
|
|
298
|
+
|
|
299
|
+
```
|
|
300
|
+
dbt-cube-sync/
|
|
301
|
+
├── dbt_cube_sync/
|
|
302
|
+
│ ├── cli.py # CLI interface
|
|
303
|
+
│ ├── config.py # Configuration management
|
|
304
|
+
│ ├── core/
|
|
305
|
+
│ │ ├── dbt_parser.py # dbt manifest parser
|
|
306
|
+
│ │ ├── db_inspector.py # Database column type inspector (SQLAlchemy)
|
|
307
|
+
│ │ ├── cube_generator.py # Cube.js generator
|
|
308
|
+
│ │ └── models.py # Pydantic data models
|
|
309
|
+
│ └── connectors/
|
|
310
|
+
│ ├── base.py # Abstract base connector
|
|
311
|
+
│ ├── superset.py # Superset implementation
|
|
312
|
+
│ ├── tableau.py # Tableau placeholder
|
|
313
|
+
│ └── powerbi.py # PowerBI placeholder
|
|
314
|
+
├── Dockerfile # Container definition
|
|
315
|
+
├── pyproject.toml # Poetry configuration
|
|
316
|
+
└── README.md
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
## Adding New BI Connectors
|
|
320
|
+
|
|
321
|
+
1. Create a new connector class inheriting from `BaseConnector`
|
|
322
|
+
2. Implement the required abstract methods
|
|
323
|
+
3. Register the connector using `ConnectorRegistry.register()`
|
|
324
|
+
|
|
325
|
+
Example:
|
|
326
|
+
```python
|
|
327
|
+
from .base import BaseConnector, ConnectorRegistry
|
|
328
|
+
|
|
329
|
+
class MyBIConnector(BaseConnector):
|
|
330
|
+
def _validate_config(self):
|
|
331
|
+
# Validation logic
|
|
332
|
+
pass
|
|
333
|
+
|
|
334
|
+
def connect(self):
|
|
335
|
+
# Connection logic
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
def sync_cube_schemas(self, cube_dir):
|
|
339
|
+
# Sync implementation
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
# Register the connector
|
|
343
|
+
ConnectorRegistry.register('mybi', MyBIConnector)
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
## Docker Integration
|
|
347
|
+
|
|
348
|
+
The tool is designed to work in containerized environments with proper dependency orchestration:
|
|
349
|
+
|
|
350
|
+
1. **dbt docs**: Runs `dbt build` then serves documentation
|
|
351
|
+
2. **dbt-cube-sync**: Runs sync pipeline after dbt and Cube.js are ready
|
|
352
|
+
3. **BI Tools**: Receive synced datasets after sync completes
|
|
353
|
+
|
|
354
|
+
## Contributing
|
|
355
|
+
|
|
356
|
+
1. Fork the repository
|
|
357
|
+
2. Create a feature branch
|
|
358
|
+
3. Implement your changes
|
|
359
|
+
4. Add tests if applicable
|
|
360
|
+
5. Submit a pull request
|
|
361
|
+
|
|
362
|
+
## License
|
|
363
|
+
|
|
364
|
+
MIT License - see LICENSE file for details.
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
# dbt-cube-sync
|
|
2
|
+
|
|
3
|
+
A powerful synchronization tool that creates a seamless pipeline from dbt models to Cube.js schemas and BI tools (Superset, Tableau, PowerBI).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🔄 **dbt → Cube.js**: Auto-generate Cube.js schemas from dbt models with metrics
|
|
8
|
+
- 🗃️ **Flexible Data Type Source**: Get column types from catalog OR directly from database via SQLAlchemy
|
|
9
|
+
- 🎯 **Model Filtering**: Process specific models instead of all models
|
|
10
|
+
- 📊 **Cube.js → BI Tools**: Sync schemas to multiple BI platforms
|
|
11
|
+
- 🏗️ **Extensible Architecture**: Plugin-based connector system for easy BI tool integration
|
|
12
|
+
- 🐳 **Docker Support**: Containerized execution with orchestration support
|
|
13
|
+
- 🎯 **CLI Interface**: Simple command-line tools for automation
|
|
14
|
+
|
|
15
|
+
## Supported BI Tools
|
|
16
|
+
|
|
17
|
+
- ✅ **Apache Superset** - Full implementation
|
|
18
|
+
- 🚧 **Tableau** - Placeholder (coming soon)
|
|
19
|
+
- 🚧 **PowerBI** - Placeholder (coming soon)
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
### Using Poetry (Development)
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
cd dbt-cube-sync
|
|
27
|
+
poetry install
|
|
28
|
+
poetry run dbt-cube-sync --help
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Database Drivers (for SQLAlchemy URI feature)
|
|
32
|
+
|
|
33
|
+
If you want to use the `--sqlalchemy-uri` option to fetch column types directly from your database, you'll need to install the appropriate database driver:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# PostgreSQL
|
|
37
|
+
poetry add psycopg2-binary
|
|
38
|
+
|
|
39
|
+
# MySQL
|
|
40
|
+
poetry add pymysql
|
|
41
|
+
|
|
42
|
+
# Snowflake
|
|
43
|
+
poetry add snowflake-sqlalchemy
|
|
44
|
+
|
|
45
|
+
# BigQuery
|
|
46
|
+
poetry add sqlalchemy-bigquery
|
|
47
|
+
|
|
48
|
+
# Redshift
|
|
49
|
+
poetry add sqlalchemy-redshift
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Using Docker
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
docker build -t dbt-cube-sync .
|
|
56
|
+
docker run --rm dbt-cube-sync --help
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quick Start
|
|
60
|
+
|
|
61
|
+
### 1. Generate Cube.js Schemas from dbt
|
|
62
|
+
|
|
63
|
+
**Option A: Using catalog file (traditional method)**
|
|
64
|
+
```bash
|
|
65
|
+
dbt-cube-sync dbt-to-cube \
|
|
66
|
+
--manifest ./target/manifest.json \
|
|
67
|
+
--catalog ./target/catalog.json \
|
|
68
|
+
--output ./cube_output
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Option B: Using database connection (no catalog needed)**
|
|
72
|
+
```bash
|
|
73
|
+
dbt-cube-sync dbt-to-cube \
|
|
74
|
+
--manifest ./target/manifest.json \
|
|
75
|
+
--sqlalchemy-uri postgresql://user:password@localhost:5432/mydb \
|
|
76
|
+
--output ./cube_output
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Option C: Filter specific models**
|
|
80
|
+
```bash
|
|
81
|
+
dbt-cube-sync dbt-to-cube \
|
|
82
|
+
--manifest ./target/manifest.json \
|
|
83
|
+
--sqlalchemy-uri postgresql://user:password@localhost:5432/mydb \
|
|
84
|
+
--models orders,customers,products \
|
|
85
|
+
--output ./cube_output
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 2. Sync to BI Tool (Optional)
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Sync to Superset
|
|
92
|
+
dbt-cube-sync cube-to-bi superset \
|
|
93
|
+
--cube-files ./cube_output \
|
|
94
|
+
--url http://localhost:8088 \
|
|
95
|
+
--username admin \
|
|
96
|
+
--password admin \
|
|
97
|
+
--cube-connection-name Cube
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Configuration
|
|
101
|
+
|
|
102
|
+
### Sample Configuration (`sync-config.yaml`)
|
|
103
|
+
|
|
104
|
+
```yaml
|
|
105
|
+
connectors:
|
|
106
|
+
superset:
|
|
107
|
+
type: superset
|
|
108
|
+
url: http://localhost:8088
|
|
109
|
+
username: admin
|
|
110
|
+
password: admin
|
|
111
|
+
database_name: Cube
|
|
112
|
+
|
|
113
|
+
tableau:
|
|
114
|
+
type: tableau
|
|
115
|
+
url: https://your-tableau-server.com
|
|
116
|
+
username: your-username
|
|
117
|
+
password: your-password
|
|
118
|
+
|
|
119
|
+
powerbi:
|
|
120
|
+
type: powerbi
|
|
121
|
+
# PowerBI specific configuration
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## CLI Commands
|
|
125
|
+
|
|
126
|
+
### Quick Reference
|
|
127
|
+
|
|
128
|
+
| Command | Description |
|
|
129
|
+
|---------|-------------|
|
|
130
|
+
| `sync-all` | **Ultimate command** - Incremental sync: dbt → Cube.js → Superset → RAG |
|
|
131
|
+
| `dbt-to-cube` | Generate Cube.js schemas from dbt models (with incremental support) |
|
|
132
|
+
| `cube-to-bi` | Sync Cube.js schemas to BI tools (Superset, Tableau, PowerBI) |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### `sync-all` (Recommended)
|
|
137
|
+
|
|
138
|
+
**Ultimate incremental sync command** - handles the complete pipeline with state tracking.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
# Basic incremental sync (Cube.js only)
|
|
142
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output
|
|
143
|
+
|
|
144
|
+
# Full pipeline: dbt → Cube.js → Superset
|
|
145
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output \
|
|
146
|
+
--superset-url http://localhost:8088 \
|
|
147
|
+
--superset-username admin \
|
|
148
|
+
--superset-password admin
|
|
149
|
+
|
|
150
|
+
# Full pipeline: dbt → Cube.js → Superset → RAG embeddings
|
|
151
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output \
|
|
152
|
+
--superset-url http://localhost:8088 \
|
|
153
|
+
--superset-username admin \
|
|
154
|
+
--superset-password admin \
|
|
155
|
+
--rag-api-url http://localhost:8000
|
|
156
|
+
|
|
157
|
+
# Force full rebuild (ignore state)
|
|
158
|
+
dbt-cube-sync sync-all -m manifest.json -c catalog.json -o ./cube_output --force-full-sync
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Options:**
|
|
162
|
+
| Option | Required | Description |
|
|
163
|
+
|--------|----------|-------------|
|
|
164
|
+
| `--manifest, -m` | Yes | Path to dbt manifest.json |
|
|
165
|
+
| `--catalog, -c` | No* | Path to dbt catalog.json |
|
|
166
|
+
| `--sqlalchemy-uri, -s` | No* | Database URI for column types |
|
|
167
|
+
| `--output, -o` | Yes | Output directory for Cube.js files |
|
|
168
|
+
| `--state-path` | No | State file path (default: `.dbt-cube-sync-state.json`) |
|
|
169
|
+
| `--force-full-sync` | No | Force full rebuild, ignore state |
|
|
170
|
+
| `--superset-url` | No | Superset URL |
|
|
171
|
+
| `--superset-username` | No | Superset username |
|
|
172
|
+
| `--superset-password` | No | Superset password |
|
|
173
|
+
| `--cube-connection-name` | No | Cube database name in Superset (default: `Cube`) |
|
|
174
|
+
| `--rag-api-url` | No | RAG API URL for embedding updates |
|
|
175
|
+
|
|
176
|
+
*Either `--catalog` or `--sqlalchemy-uri` is required.
|
|
177
|
+
|
|
178
|
+
**How Incremental Sync Works:**
|
|
179
|
+
1. Reads state file (`.dbt-cube-sync-state.json`) with model checksums
|
|
180
|
+
2. Compares against current manifest to detect changes
|
|
181
|
+
3. Only processes **added** or **modified** models
|
|
182
|
+
4. Deletes Cube.js files for **removed** models
|
|
183
|
+
5. Updates state file with new checksums
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
### `dbt-to-cube`
|
|
188
|
+
|
|
189
|
+
Generate Cube.js schema files from dbt models with incremental support.
|
|
190
|
+
|
|
191
|
+
**Options:**
|
|
192
|
+
- `--manifest` / `-m`: Path to dbt manifest.json file (required)
|
|
193
|
+
- `--catalog` / `-c`: Path to dbt catalog.json file
|
|
194
|
+
- `--sqlalchemy-uri` / `-s`: SQLAlchemy database URI for fetching column types
|
|
195
|
+
- `--models`: Comma-separated list of model names to process
|
|
196
|
+
- `--output` / `-o`: Output directory for Cube.js files (required)
|
|
197
|
+
- `--template-dir` / `-t`: Directory containing Cube.js templates (default: ./cube/templates)
|
|
198
|
+
- `--state-path`: State file for incremental sync (default: `.dbt-cube-sync-state.json`)
|
|
199
|
+
- `--force-full-sync`: Force full regeneration, ignore cached state
|
|
200
|
+
- `--no-state`: Disable state tracking (legacy behavior)
|
|
201
|
+
|
|
202
|
+
**Examples:**
|
|
203
|
+
```bash
|
|
204
|
+
# Incremental sync (default)
|
|
205
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/
|
|
206
|
+
|
|
207
|
+
# Force full rebuild
|
|
208
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/ --force-full-sync
|
|
209
|
+
|
|
210
|
+
# Using database connection (no catalog needed)
|
|
211
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -s postgresql://user:pass@localhost/db -o output/
|
|
212
|
+
|
|
213
|
+
# Filter specific models
|
|
214
|
+
dbt-cube-sync dbt-to-cube -m manifest.json -c catalog.json -o output/ --models users,orders
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
### `cube-to-bi`
|
|
220
|
+
|
|
221
|
+
Sync Cube.js schemas to BI tool datasets.
|
|
222
|
+
|
|
223
|
+
**Arguments:**
|
|
224
|
+
- `bi_tool`: BI tool type (`superset`, `tableau`, `powerbi`)
|
|
225
|
+
|
|
226
|
+
**Options:**
|
|
227
|
+
- `--cube-files` / `-c`: Directory containing Cube.js files (required)
|
|
228
|
+
- `--url` / `-u`: BI tool URL (required)
|
|
229
|
+
- `--username` / `-n`: BI tool username (required)
|
|
230
|
+
- `--password` / `-p`: BI tool password (required)
|
|
231
|
+
- `--cube-connection-name` / `-d`: Name of Cube database connection in BI tool (default: Cube)
|
|
232
|
+
|
|
233
|
+
**Example:**
|
|
234
|
+
```bash
|
|
235
|
+
dbt-cube-sync cube-to-bi superset -c cube_output/ -u http://localhost:8088 -n admin -p admin -d Cube
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## State File
|
|
241
|
+
|
|
242
|
+
The state file (`.dbt-cube-sync-state.json`) tracks:
|
|
243
|
+
|
|
244
|
+
```json
|
|
245
|
+
{
|
|
246
|
+
"version": "1.0",
|
|
247
|
+
"last_sync_timestamp": "2024-01-15T10:30:00Z",
|
|
248
|
+
"manifest_path": "/path/to/manifest.json",
|
|
249
|
+
"models": {
|
|
250
|
+
"model.project.users": {
|
|
251
|
+
"checksum": "abc123...",
|
|
252
|
+
"has_metrics": true,
|
|
253
|
+
"last_generated": "2024-01-15T10:30:00Z",
|
|
254
|
+
"output_file": "./cube_output/Users.js"
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Delete this file to force a full rebuild, or use `--force-full-sync`.
|
|
261
|
+
|
|
262
|
+
## Architecture
|
|
263
|
+
|
|
264
|
+
```
|
|
265
|
+
dbt models (with metrics)
|
|
266
|
+
↓
|
|
267
|
+
dbt-cube-sync generate-cubes
|
|
268
|
+
↓
|
|
269
|
+
Cube.js schemas
|
|
270
|
+
↓
|
|
271
|
+
dbt-cube-sync sync-bi [connector]
|
|
272
|
+
↓
|
|
273
|
+
BI Tool Datasets (Superset/Tableau/PowerBI)
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Project Structure
|
|
277
|
+
|
|
278
|
+
```
|
|
279
|
+
dbt-cube-sync/
|
|
280
|
+
├── dbt_cube_sync/
|
|
281
|
+
│ ├── cli.py # CLI interface
|
|
282
|
+
│ ├── config.py # Configuration management
|
|
283
|
+
│ ├── core/
|
|
284
|
+
│ │ ├── dbt_parser.py # dbt manifest parser
|
|
285
|
+
│ │ ├── db_inspector.py # Database column type inspector (SQLAlchemy)
|
|
286
|
+
│ │ ├── cube_generator.py # Cube.js generator
|
|
287
|
+
│ │ └── models.py # Pydantic data models
|
|
288
|
+
│ └── connectors/
|
|
289
|
+
│ ├── base.py # Abstract base connector
|
|
290
|
+
│ ├── superset.py # Superset implementation
|
|
291
|
+
│ ├── tableau.py # Tableau placeholder
|
|
292
|
+
│ └── powerbi.py # PowerBI placeholder
|
|
293
|
+
├── Dockerfile # Container definition
|
|
294
|
+
├── pyproject.toml # Poetry configuration
|
|
295
|
+
└── README.md
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## Adding New BI Connectors
|
|
299
|
+
|
|
300
|
+
1. Create a new connector class inheriting from `BaseConnector`
|
|
301
|
+
2. Implement the required abstract methods
|
|
302
|
+
3. Register the connector using `ConnectorRegistry.register()`
|
|
303
|
+
|
|
304
|
+
Example:
|
|
305
|
+
```python
|
|
306
|
+
from .base import BaseConnector, ConnectorRegistry
|
|
307
|
+
|
|
308
|
+
class MyBIConnector(BaseConnector):
|
|
309
|
+
def _validate_config(self):
|
|
310
|
+
# Validation logic
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
def connect(self):
|
|
314
|
+
# Connection logic
|
|
315
|
+
pass
|
|
316
|
+
|
|
317
|
+
def sync_cube_schemas(self, cube_dir):
|
|
318
|
+
# Sync implementation
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
# Register the connector
|
|
322
|
+
ConnectorRegistry.register('mybi', MyBIConnector)
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Docker Integration
|
|
326
|
+
|
|
327
|
+
The tool is designed to work in containerized environments with proper dependency orchestration:
|
|
328
|
+
|
|
329
|
+
1. **dbt docs**: Runs `dbt build` then serves documentation
|
|
330
|
+
2. **dbt-cube-sync**: Runs sync pipeline after dbt and Cube.js are ready
|
|
331
|
+
3. **BI Tools**: Receive synced datasets after sync completes
|
|
332
|
+
|
|
333
|
+
## Contributing
|
|
334
|
+
|
|
335
|
+
1. Fork the repository
|
|
336
|
+
2. Create a feature branch
|
|
337
|
+
3. Implement your changes
|
|
338
|
+
4. Add tests if applicable
|
|
339
|
+
5. Submit a pull request
|
|
340
|
+
|
|
341
|
+
## License
|
|
342
|
+
|
|
343
|
+
MIT License - see LICENSE file for details.
|