dbt-js 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +365 -325
- package/bin/dbt-js.js +4 -4
- package/package.json +53 -53
- package/src/api.js +271 -257
- package/src/batches.js +129 -120
- package/src/cli.js +178 -175
- package/src/config.js +139 -68
- package/src/dag.js +67 -67
- package/src/db.js +194 -182
- package/src/materialize.js +197 -197
- package/src/project.js +139 -107
- package/src/render.js +65 -62
- package/src/seed.js +68 -68
- package/src/tests.js +49 -49
package/bin/dbt-js.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { main } from '../src/cli.js';
|
|
3
|
-
|
|
4
|
-
main();
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { main } from '../src/cli.js';
|
|
3
|
+
|
|
4
|
+
main();
|
package/package.json
CHANGED
|
@@ -1,53 +1,53 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "dbt-js",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "A minimalist, dbt-like SQL transformation tool for Node.js — compile SQL models, build a dependency DAG, and materialize them on any supported SQL database.",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "./src/api.js",
|
|
7
|
-
"exports": {
|
|
8
|
-
".": "./src/api.js",
|
|
9
|
-
"./package.json": "./package.json"
|
|
10
|
-
},
|
|
11
|
-
"bin": {
|
|
12
|
-
"dbt-js": "bin/dbt-js.js"
|
|
13
|
-
},
|
|
14
|
-
"license": "MIT",
|
|
15
|
-
"author": "Shahzad Hamza",
|
|
16
|
-
"repository": {
|
|
17
|
-
"type": "git",
|
|
18
|
-
"url": "git+https://github.com/shahzadhamza/dbt-js.git"
|
|
19
|
-
},
|
|
20
|
-
"homepage": "https://github.com/shahzadhamza/dbt-js.git#readme",
|
|
21
|
-
"bugs": {
|
|
22
|
-
"url": "https://github.com/shahzadhamza/dbt-js/issues"
|
|
23
|
-
},
|
|
24
|
-
"keywords": [
|
|
25
|
-
"dbt",
|
|
26
|
-
"sql",
|
|
27
|
-
"postgres",
|
|
28
|
-
"mysql",
|
|
29
|
-
"sqlite",
|
|
30
|
-
"duckdb",
|
|
31
|
-
"etl",
|
|
32
|
-
"elt",
|
|
33
|
-
"data-transformation",
|
|
34
|
-
"dag",
|
|
35
|
-
"analytics"
|
|
36
|
-
],
|
|
37
|
-
"files": [
|
|
38
|
-
"bin",
|
|
39
|
-
"src",
|
|
40
|
-
"README.md",
|
|
41
|
-
"LICENSE"
|
|
42
|
-
],
|
|
43
|
-
"engines": {
|
|
44
|
-
"node": ">=20"
|
|
45
|
-
},
|
|
46
|
-
"dependencies": {
|
|
47
|
-
"@duckdb/node-api": "1.5.3-r.3",
|
|
48
|
-
"better-sqlite3": "~12.9.0",
|
|
49
|
-
"csv-parse": "^5.6.0",
|
|
50
|
-
"mysql2": "^3.11.0",
|
|
51
|
-
"pg": "^8.13.0"
|
|
52
|
-
}
|
|
53
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "dbt-js",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "A minimalist, dbt-like SQL transformation tool for Node.js — compile SQL models, build a dependency DAG, and materialize them on any supported SQL database.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./src/api.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/api.js",
|
|
9
|
+
"./package.json": "./package.json"
|
|
10
|
+
},
|
|
11
|
+
"bin": {
|
|
12
|
+
"dbt-js": "bin/dbt-js.js"
|
|
13
|
+
},
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"author": "Shahzad Hamza",
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "git+https://github.com/shahzadhamza/dbt-js.git"
|
|
19
|
+
},
|
|
20
|
+
"homepage": "https://github.com/shahzadhamza/dbt-js.git#readme",
|
|
21
|
+
"bugs": {
|
|
22
|
+
"url": "https://github.com/shahzadhamza/dbt-js/issues"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"dbt",
|
|
26
|
+
"sql",
|
|
27
|
+
"postgres",
|
|
28
|
+
"mysql",
|
|
29
|
+
"sqlite",
|
|
30
|
+
"duckdb",
|
|
31
|
+
"etl",
|
|
32
|
+
"elt",
|
|
33
|
+
"data-transformation",
|
|
34
|
+
"dag",
|
|
35
|
+
"analytics"
|
|
36
|
+
],
|
|
37
|
+
"files": [
|
|
38
|
+
"bin",
|
|
39
|
+
"src",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"engines": {
|
|
44
|
+
"node": ">=20"
|
|
45
|
+
},
|
|
46
|
+
"dependencies": {
|
|
47
|
+
"@duckdb/node-api": "1.5.3-r.3",
|
|
48
|
+
"better-sqlite3": "~12.9.0",
|
|
49
|
+
"csv-parse": "^5.6.0",
|
|
50
|
+
"mysql2": "^3.11.0",
|
|
51
|
+
"pg": "^8.13.0"
|
|
52
|
+
}
|
|
53
|
+
}
|
package/src/api.js
CHANGED
|
@@ -1,257 +1,271 @@
|
|
|
1
|
-
// Programmatic API — what `import 'dbt-js'` gives you. Every function takes a
|
|
2
|
-
// projectDir (default process.cwd()), opens its own connection, and closes it
|
|
3
|
-
// before returning. Loading/config errors throw; model and test failures are
|
|
4
|
-
// returned as ok: false. Nothing here writes to the console or exits the
|
|
5
|
-
// process — pass onEvent to observe progress.
|
|
6
|
-
//
|
|
7
|
-
// Instead of project files you can pass the project inline:
|
|
8
|
-
// config — the contents of dbtjs.config.json as an object (file not read)
|
|
9
|
-
// models — a { name: rawSql } map replacing models/*.sql (same format,
|
|
10
|
-
// /* config: {...} */ comment included)
|
|
11
|
-
// projectDir then only anchors relative duckdb paths and locates seeds/.
|
|
12
|
-
|
|
13
|
-
import { loadConfig, validateConfig } from './config.js';
|
|
14
|
-
import { loadProject } from './project.js';
|
|
15
|
-
import { buildDag, expandSelection } from './dag.js';
|
|
16
|
-
import { connect, ensureSchema } from './db.js';
|
|
17
|
-
import { runModel } from './materialize.js';
|
|
18
|
-
import { buildTests, runTest } from './tests.js';
|
|
19
|
-
import { loadSeed } from './seed.js';
|
|
20
|
-
import { render } from './render.js';
|
|
21
|
-
import { computeBatches } from './batches.js';
|
|
22
|
-
|
|
23
|
-
function loadAll({ projectDir = process.cwd(), vars, config, models: inlineModels } = {}) {
|
|
24
|
-
const cfg = config
|
|
25
|
-
? validateConfig(structuredClone(config), projectDir) // clone: validation mutates (defaults, env interp, path resolve)
|
|
26
|
-
: loadConfig(projectDir);
|
|
27
|
-
if (vars) cfg.vars = { ...cfg.vars, ...vars };
|
|
28
|
-
const { models, seeds } = loadProject(projectDir, { models: inlineModels });
|
|
29
|
-
const { nodes, order } = buildDag(models, seeds);
|
|
30
|
-
return { cfg, models, seeds, nodes, order, projectDir };
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async function withClient(cfg, projectDir, fn) {
|
|
34
|
-
const client = await connect(cfg.connection, { projectDir, schema: cfg.schema });
|
|
35
|
-
try {
|
|
36
|
-
return await fn(client);
|
|
37
|
-
} finally {
|
|
38
|
-
await client.end();
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// → { ok, models: [{ name, status: 'ok'|'fail'|'skip', materialized, action?,
|
|
43
|
-
// rowCount?, batchCount?, failedBatches?, durationMs?, error? }] }
|
|
44
|
-
export async function run(opts = {}) {
|
|
45
|
-
const { select, fullRefresh = false, eventTimeStart, eventTimeEnd, onEvent } = opts;
|
|
46
|
-
if (eventTimeEnd && !eventTimeStart) throw new Error('eventTimeEnd requires eventTimeStart');
|
|
47
|
-
const { cfg, nodes, order, projectDir } = loadAll(opts);
|
|
48
|
-
const selected = expandSelection(select, nodes, order).filter(
|
|
49
|
-
(n) => nodes.get(n).type === 'model'
|
|
50
|
-
);
|
|
51
|
-
if (!selected.length) throw new Error('Nothing to run for this selection');
|
|
52
|
-
|
|
53
|
-
return withClient(cfg, projectDir, async (client) => {
|
|
54
|
-
await ensureSchema(client, cfg.schema);
|
|
55
|
-
const models = [];
|
|
56
|
-
const bad = new Set(); // failed or skipped — either blocks downstream
|
|
57
|
-
for (const [i, name] of selected.entries()) {
|
|
58
|
-
const node = nodes.get(name);
|
|
59
|
-
const base = {
|
|
60
|
-
type: 'model',
|
|
61
|
-
name,
|
|
62
|
-
materialized: node.config.materialized,
|
|
63
|
-
index: i + 1,
|
|
64
|
-
total: selected.length,
|
|
65
|
-
};
|
|
66
|
-
if (node.deps.some((d) => bad.has(d))) {
|
|
67
|
-
const rec = { ...base, status: 'skip' };
|
|
68
|
-
bad.add(name);
|
|
69
|
-
models.push(rec);
|
|
70
|
-
onEvent?.(rec);
|
|
71
|
-
continue;
|
|
72
|
-
}
|
|
73
|
-
const start = Date.now();
|
|
74
|
-
let rec;
|
|
75
|
-
try {
|
|
76
|
-
const result = await runModel(client, node, cfg, {
|
|
77
|
-
fullRefresh,
|
|
78
|
-
vars: cfg.vars,
|
|
79
|
-
eventTimeStart,
|
|
80
|
-
eventTimeEnd,
|
|
81
|
-
onBatch: (b) => onEvent?.({ type: 'batch', model: name, ...b }),
|
|
82
|
-
});
|
|
83
|
-
const failedBatches = result.failedBatches ?? [];
|
|
84
|
-
rec = {
|
|
85
|
-
...base,
|
|
86
|
-
status: failedBatches.length ? 'fail' : 'ok',
|
|
87
|
-
action: result.action,
|
|
88
|
-
rowCount: result.rowCount,
|
|
89
|
-
batchCount: result.batchCount,
|
|
90
|
-
failedBatches,
|
|
91
|
-
durationMs: Date.now() - start,
|
|
92
|
-
};
|
|
93
|
-
if (rec.status === 'fail') {
|
|
94
|
-
rec.error = `${failedBatches.length} of ${result.batchCount} batches failed`;
|
|
95
|
-
bad.add(name);
|
|
96
|
-
}
|
|
97
|
-
} catch (e) {
|
|
98
|
-
rec = { ...base, status: 'fail', error: e.message, durationMs: Date.now() - start };
|
|
99
|
-
bad.add(name);
|
|
100
|
-
}
|
|
101
|
-
models.push(rec);
|
|
102
|
-
onEvent?.(rec);
|
|
103
|
-
}
|
|
104
|
-
return { ok: models.every((m) => m.status === 'ok'), models };
|
|
105
|
-
});
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// → { ok, tests: [{ id, model, pass, violations, sample }] }
|
|
109
|
-
export async function test(opts = {}) {
|
|
110
|
-
const { select, onEvent } = opts;
|
|
111
|
-
const { cfg, nodes, order, projectDir } = loadAll(opts);
|
|
112
|
-
const selected = new Set(expandSelection(select, nodes, order));
|
|
113
|
-
const models = order
|
|
114
|
-
.filter((n) => selected.has(n) && nodes.get(n).type === 'model')
|
|
115
|
-
.map((n) => nodes.get(n));
|
|
116
|
-
const tests = buildTests(models, cfg.schema);
|
|
117
|
-
if (!tests.length) return { ok: true, tests: [] };
|
|
118
|
-
|
|
119
|
-
return withClient(cfg, projectDir, async (client) => {
|
|
120
|
-
const results = [];
|
|
121
|
-
for (const t of tests) {
|
|
122
|
-
const r = await runTest(client, t);
|
|
123
|
-
const rec = {
|
|
124
|
-
type: 'test',
|
|
125
|
-
id: t.id,
|
|
126
|
-
model: t.model,
|
|
127
|
-
pass: r.pass,
|
|
128
|
-
violations: r.violations ?? 0,
|
|
129
|
-
sample: r.sample ?? [],
|
|
130
|
-
};
|
|
131
|
-
results.push(rec);
|
|
132
|
-
onEvent?.(rec);
|
|
133
|
-
}
|
|
134
|
-
return { ok: results.every((r) => r.pass), tests: results };
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// → { ok: true, seeds: [{ name, rowCount, durationMs }] } — a failing seed throws
|
|
139
|
-
export async function seed(opts = {}) {
|
|
140
|
-
const { select, onEvent } = opts;
|
|
141
|
-
const { cfg, seeds, projectDir } = loadAll(opts);
|
|
142
|
-
const wanted = select ? new Set(String(select).split(',').map((s) => s.trim())) : null;
|
|
143
|
-
const selected = wanted ? seeds.filter((s) => wanted.has(s.name)) : seeds;
|
|
144
|
-
if (!selected.length) throw new Error('No seeds match this selection');
|
|
145
|
-
|
|
146
|
-
return withClient(cfg, projectDir, async (client) => {
|
|
147
|
-
await ensureSchema(client, cfg.schema);
|
|
148
|
-
const results = [];
|
|
149
|
-
for (const [i, s] of selected.entries()) {
|
|
150
|
-
const start = Date.now();
|
|
151
|
-
const { rowCount } = await loadSeed(client, s, cfg);
|
|
152
|
-
const rec = {
|
|
153
|
-
type: 'seed',
|
|
154
|
-
name: s.name,
|
|
155
|
-
index: i + 1,
|
|
156
|
-
total: selected.length,
|
|
157
|
-
rowCount,
|
|
158
|
-
durationMs: Date.now() - start,
|
|
159
|
-
};
|
|
160
|
-
results.push(rec);
|
|
161
|
-
onEvent?.(rec);
|
|
162
|
-
}
|
|
163
|
-
return { ok: true, seeds: results };
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// → [{ name, materialized, sql }] — no DB connection needed
|
|
168
|
-
export async function compile(opts = {}) {
|
|
169
|
-
const { select } = opts;
|
|
170
|
-
const { cfg, nodes, order } = loadAll(opts);
|
|
171
|
-
const selected = expandSelection(select, nodes, order).filter(
|
|
172
|
-
(n) => nodes.get(n).type === 'model'
|
|
173
|
-
);
|
|
174
|
-
return selected.map((name) => {
|
|
175
|
-
const node = nodes.get(name);
|
|
176
|
-
let batchCtx = {};
|
|
177
|
-
if (node.config.strategy === 'microbatch') {
|
|
178
|
-
// show the current normal-run window as one span, so the output is runnable SQL
|
|
179
|
-
const b = computeBatches({
|
|
180
|
-
begin: node.config.begin,
|
|
181
|
-
batchSize: node.config.batch_size,
|
|
182
|
-
lookback: node.config.lookback,
|
|
183
|
-
firstBuild: false,
|
|
184
|
-
timezone: node.config.timezone,
|
|
185
|
-
});
|
|
186
|
-
batchCtx = { batchStart: b[0].start, batchEnd: b[b.length - 1].end };
|
|
187
|
-
}
|
|
188
|
-
const ctx = {
|
|
189
|
-
name,
|
|
190
|
-
schema: cfg.schema,
|
|
191
|
-
vars: cfg.vars,
|
|
192
|
-
isIncremental: false, // compile is offline; run decides this against the live DB
|
|
193
|
-
sources: cfg.sources,
|
|
194
|
-
timezone: node.config.timezone,
|
|
195
|
-
};
|
|
196
|
-
const { sql } = render(node.rawSql, { ...ctx, ...batchCtx });
|
|
197
|
-
// hooks render without batch context — batch_start/batch_end are body-only
|
|
198
|
-
const preHookSql = node.config.pre_hook.map((h) => render(h, ctx).sql);
|
|
199
|
-
const postHookSql = node.config.post_hook.map((h) => render(h, ctx).sql);
|
|
200
|
-
return { name, materialized: node.config.materialized, sql, preHookSql, postHookSql };
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// → [{ name, kind, deps }] in execution order — no DB connection needed
|
|
205
|
-
export async function ls(opts = {}) {
|
|
206
|
-
const { nodes, order } = loadAll(opts);
|
|
207
|
-
return order.map((name) => {
|
|
208
|
-
const node = nodes.get(name);
|
|
209
|
-
return {
|
|
210
|
-
name,
|
|
211
|
-
kind: node.type === 'seed' ? 'seed' : node.config.materialized,
|
|
212
|
-
deps: node.deps,
|
|
213
|
-
};
|
|
214
|
-
});
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// → { rows, rowCount } — one arbitrary statement against the project's warehouse.
|
|
218
|
-
// Bypasses loadAll so it works on projects with zero models. readOnly (default)
|
|
219
|
-
// opens DuckDB with access_mode READ_ONLY / sets the Postgres session read-only.
|
|
220
|
-
export async function query(opts = {}) {
|
|
221
|
-
const { sql, params, readOnly = true, projectDir = process.cwd(), config } = opts;
|
|
222
|
-
if (typeof sql !== 'string' || !sql.trim()) throw new Error('sql is required');
|
|
223
|
-
const cfg = config
|
|
224
|
-
? validateConfig(structuredClone(config), projectDir)
|
|
225
|
-
: loadConfig(projectDir);
|
|
226
|
-
const client = await connect(cfg.connection, { projectDir, readOnly, schema: cfg.schema });
|
|
227
|
-
try {
|
|
228
|
-
return await client.query(sql, params);
|
|
229
|
-
} finally {
|
|
230
|
-
await client.end();
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// → { schema, modelCount, seedCount, target, database, version }
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
1
|
+
// Programmatic API — what `import 'dbt-js'` gives you. Every function takes a
|
|
2
|
+
// projectDir (default process.cwd()), opens its own connection, and closes it
|
|
3
|
+
// before returning. Loading/config errors throw; model and test failures are
|
|
4
|
+
// returned as ok: false. Nothing here writes to the console or exits the
|
|
5
|
+
// process — pass onEvent to observe progress.
|
|
6
|
+
//
|
|
7
|
+
// Instead of project files you can pass the project inline:
|
|
8
|
+
// config — the contents of dbtjs.config.json as an object (file not read)
|
|
9
|
+
// models — a { name: rawSql } map replacing models/*.sql (same format,
|
|
10
|
+
// /* config: {...} */ comment included)
|
|
11
|
+
// projectDir then only anchors relative duckdb paths and locates seeds/.
|
|
12
|
+
|
|
13
|
+
import { loadConfig, validateConfig } from './config.js';
|
|
14
|
+
import { loadProject } from './project.js';
|
|
15
|
+
import { buildDag, expandSelection } from './dag.js';
|
|
16
|
+
import { connect, ensureSchema } from './db.js';
|
|
17
|
+
import { runModel } from './materialize.js';
|
|
18
|
+
import { buildTests, runTest } from './tests.js';
|
|
19
|
+
import { loadSeed } from './seed.js';
|
|
20
|
+
import { render } from './render.js';
|
|
21
|
+
import { computeBatches } from './batches.js';
|
|
22
|
+
|
|
23
|
+
function loadAll({ projectDir = process.cwd(), vars, config, models: inlineModels } = {}) {
|
|
24
|
+
const cfg = config
|
|
25
|
+
? validateConfig(structuredClone(config), projectDir) // clone: validation mutates (defaults, env interp, path resolve)
|
|
26
|
+
: loadConfig(projectDir);
|
|
27
|
+
if (vars) cfg.vars = { ...cfg.vars, ...vars };
|
|
28
|
+
const { models, seeds } = loadProject(projectDir, { models: inlineModels });
|
|
29
|
+
const { nodes, order } = buildDag(models, seeds);
|
|
30
|
+
return { cfg, models, seeds, nodes, order, projectDir };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function withClient(cfg, projectDir, fn) {
|
|
34
|
+
const client = await connect(cfg.connection, { projectDir, schema: cfg.schema });
|
|
35
|
+
try {
|
|
36
|
+
return await fn(client);
|
|
37
|
+
} finally {
|
|
38
|
+
await client.end();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// → { ok, models: [{ name, status: 'ok'|'fail'|'skip', materialized, action?,
|
|
43
|
+
// rowCount?, batchCount?, failedBatches?, durationMs?, error? }] }
|
|
44
|
+
export async function run(opts = {}) {
|
|
45
|
+
const { select, fullRefresh = false, eventTimeStart, eventTimeEnd, onEvent } = opts;
|
|
46
|
+
if (eventTimeEnd && !eventTimeStart) throw new Error('eventTimeEnd requires eventTimeStart');
|
|
47
|
+
const { cfg, nodes, order, projectDir } = loadAll(opts);
|
|
48
|
+
const selected = expandSelection(select, nodes, order).filter(
|
|
49
|
+
(n) => nodes.get(n).type === 'model'
|
|
50
|
+
);
|
|
51
|
+
if (!selected.length) throw new Error('Nothing to run for this selection');
|
|
52
|
+
|
|
53
|
+
return withClient(cfg, projectDir, async (client) => {
|
|
54
|
+
await ensureSchema(client, cfg.schema);
|
|
55
|
+
const models = [];
|
|
56
|
+
const bad = new Set(); // failed or skipped — either blocks downstream
|
|
57
|
+
for (const [i, name] of selected.entries()) {
|
|
58
|
+
const node = nodes.get(name);
|
|
59
|
+
const base = {
|
|
60
|
+
type: 'model',
|
|
61
|
+
name,
|
|
62
|
+
materialized: node.config.materialized,
|
|
63
|
+
index: i + 1,
|
|
64
|
+
total: selected.length,
|
|
65
|
+
};
|
|
66
|
+
if (node.deps.some((d) => bad.has(d))) {
|
|
67
|
+
const rec = { ...base, status: 'skip' };
|
|
68
|
+
bad.add(name);
|
|
69
|
+
models.push(rec);
|
|
70
|
+
onEvent?.(rec);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
let rec;
|
|
75
|
+
try {
|
|
76
|
+
const result = await runModel(client, node, cfg, {
|
|
77
|
+
fullRefresh,
|
|
78
|
+
vars: cfg.vars,
|
|
79
|
+
eventTimeStart,
|
|
80
|
+
eventTimeEnd,
|
|
81
|
+
onBatch: (b) => onEvent?.({ type: 'batch', model: name, ...b }),
|
|
82
|
+
});
|
|
83
|
+
const failedBatches = result.failedBatches ?? [];
|
|
84
|
+
rec = {
|
|
85
|
+
...base,
|
|
86
|
+
status: failedBatches.length ? 'fail' : 'ok',
|
|
87
|
+
action: result.action,
|
|
88
|
+
rowCount: result.rowCount,
|
|
89
|
+
batchCount: result.batchCount,
|
|
90
|
+
failedBatches,
|
|
91
|
+
durationMs: Date.now() - start,
|
|
92
|
+
};
|
|
93
|
+
if (rec.status === 'fail') {
|
|
94
|
+
rec.error = `${failedBatches.length} of ${result.batchCount} batches failed`;
|
|
95
|
+
bad.add(name);
|
|
96
|
+
}
|
|
97
|
+
} catch (e) {
|
|
98
|
+
rec = { ...base, status: 'fail', error: e.message, durationMs: Date.now() - start };
|
|
99
|
+
bad.add(name);
|
|
100
|
+
}
|
|
101
|
+
models.push(rec);
|
|
102
|
+
onEvent?.(rec);
|
|
103
|
+
}
|
|
104
|
+
return { ok: models.every((m) => m.status === 'ok'), models };
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// → { ok, tests: [{ id, model, pass, violations, sample }] }
|
|
109
|
+
export async function test(opts = {}) {
|
|
110
|
+
const { select, onEvent } = opts;
|
|
111
|
+
const { cfg, nodes, order, projectDir } = loadAll(opts);
|
|
112
|
+
const selected = new Set(expandSelection(select, nodes, order));
|
|
113
|
+
const models = order
|
|
114
|
+
.filter((n) => selected.has(n) && nodes.get(n).type === 'model')
|
|
115
|
+
.map((n) => nodes.get(n));
|
|
116
|
+
const tests = buildTests(models, cfg.schema);
|
|
117
|
+
if (!tests.length) return { ok: true, tests: [] };
|
|
118
|
+
|
|
119
|
+
return withClient(cfg, projectDir, async (client) => {
|
|
120
|
+
const results = [];
|
|
121
|
+
for (const t of tests) {
|
|
122
|
+
const r = await runTest(client, t);
|
|
123
|
+
const rec = {
|
|
124
|
+
type: 'test',
|
|
125
|
+
id: t.id,
|
|
126
|
+
model: t.model,
|
|
127
|
+
pass: r.pass,
|
|
128
|
+
violations: r.violations ?? 0,
|
|
129
|
+
sample: r.sample ?? [],
|
|
130
|
+
};
|
|
131
|
+
results.push(rec);
|
|
132
|
+
onEvent?.(rec);
|
|
133
|
+
}
|
|
134
|
+
return { ok: results.every((r) => r.pass), tests: results };
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// → { ok: true, seeds: [{ name, rowCount, durationMs }] } — a failing seed throws
|
|
139
|
+
export async function seed(opts = {}) {
|
|
140
|
+
const { select, onEvent } = opts;
|
|
141
|
+
const { cfg, seeds, projectDir } = loadAll(opts);
|
|
142
|
+
const wanted = select ? new Set(String(select).split(',').map((s) => s.trim())) : null;
|
|
143
|
+
const selected = wanted ? seeds.filter((s) => wanted.has(s.name)) : seeds;
|
|
144
|
+
if (!selected.length) throw new Error('No seeds match this selection');
|
|
145
|
+
|
|
146
|
+
return withClient(cfg, projectDir, async (client) => {
|
|
147
|
+
await ensureSchema(client, cfg.schema);
|
|
148
|
+
const results = [];
|
|
149
|
+
for (const [i, s] of selected.entries()) {
|
|
150
|
+
const start = Date.now();
|
|
151
|
+
const { rowCount } = await loadSeed(client, s, cfg);
|
|
152
|
+
const rec = {
|
|
153
|
+
type: 'seed',
|
|
154
|
+
name: s.name,
|
|
155
|
+
index: i + 1,
|
|
156
|
+
total: selected.length,
|
|
157
|
+
rowCount,
|
|
158
|
+
durationMs: Date.now() - start,
|
|
159
|
+
};
|
|
160
|
+
results.push(rec);
|
|
161
|
+
onEvent?.(rec);
|
|
162
|
+
}
|
|
163
|
+
return { ok: true, seeds: results };
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// → [{ name, materialized, sql, preHookSql, postHookSql }] — no DB connection needed
|
|
168
|
+
export async function compile(opts = {}) {
|
|
169
|
+
const { select } = opts;
|
|
170
|
+
const { cfg, nodes, order } = loadAll(opts);
|
|
171
|
+
const selected = expandSelection(select, nodes, order).filter(
|
|
172
|
+
(n) => nodes.get(n).type === 'model'
|
|
173
|
+
);
|
|
174
|
+
return selected.map((name) => {
|
|
175
|
+
const node = nodes.get(name);
|
|
176
|
+
let batchCtx = {};
|
|
177
|
+
if (node.config.strategy === 'microbatch') {
|
|
178
|
+
// show the current normal-run window as one span, so the output is runnable SQL
|
|
179
|
+
const b = computeBatches({
|
|
180
|
+
begin: node.config.begin,
|
|
181
|
+
batchSize: node.config.batch_size,
|
|
182
|
+
lookback: node.config.lookback,
|
|
183
|
+
firstBuild: false,
|
|
184
|
+
timezone: node.config.timezone,
|
|
185
|
+
});
|
|
186
|
+
batchCtx = { batchStart: b[0].start, batchEnd: b[b.length - 1].end };
|
|
187
|
+
}
|
|
188
|
+
const ctx = {
|
|
189
|
+
name,
|
|
190
|
+
schema: cfg.schema,
|
|
191
|
+
vars: cfg.vars,
|
|
192
|
+
isIncremental: false, // compile is offline; run decides this against the live DB
|
|
193
|
+
sources: cfg.sources,
|
|
194
|
+
timezone: node.config.timezone,
|
|
195
|
+
};
|
|
196
|
+
const { sql } = render(node.rawSql, { ...ctx, ...batchCtx });
|
|
197
|
+
// hooks render without batch context — batch_start/batch_end are body-only
|
|
198
|
+
const preHookSql = node.config.pre_hook.map((h) => render(h, ctx).sql);
|
|
199
|
+
const postHookSql = node.config.post_hook.map((h) => render(h, ctx).sql);
|
|
200
|
+
return { name, materialized: node.config.materialized, sql, preHookSql, postHookSql };
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// → [{ name, kind, deps }] in execution order — no DB connection needed
|
|
205
|
+
export async function ls(opts = {}) {
|
|
206
|
+
const { nodes, order } = loadAll(opts);
|
|
207
|
+
return order.map((name) => {
|
|
208
|
+
const node = nodes.get(name);
|
|
209
|
+
return {
|
|
210
|
+
name,
|
|
211
|
+
kind: node.type === 'seed' ? 'seed' : node.config.materialized,
|
|
212
|
+
deps: node.deps,
|
|
213
|
+
};
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// → { rows, rowCount } — one arbitrary statement against the project's warehouse.
|
|
218
|
+
// Bypasses loadAll so it works on projects with zero models. readOnly (default)
|
|
219
|
+
// opens DuckDB with access_mode READ_ONLY / sets the Postgres session read-only.
|
|
220
|
+
export async function query(opts = {}) {
|
|
221
|
+
const { sql, params, readOnly = true, projectDir = process.cwd(), config } = opts;
|
|
222
|
+
if (typeof sql !== 'string' || !sql.trim()) throw new Error('sql is required');
|
|
223
|
+
const cfg = config
|
|
224
|
+
? validateConfig(structuredClone(config), projectDir)
|
|
225
|
+
: loadConfig(projectDir);
|
|
226
|
+
const client = await connect(cfg.connection, { projectDir, readOnly, schema: cfg.schema });
|
|
227
|
+
try {
|
|
228
|
+
return await client.query(sql, params);
|
|
229
|
+
} finally {
|
|
230
|
+
await client.end();
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// → { schema, modelCount, seedCount, target, database, version, attached }
|
|
235
|
+
// — connectivity check. `attached` lists DuckDB ATTACH catalogs (empty on
|
|
236
|
+
// other backends).
|
|
237
|
+
export async function debug(opts = {}) {
|
|
238
|
+
const { cfg, models, seeds, projectDir } = loadAll(opts);
|
|
239
|
+
const target = ['duckdb', 'sqlite'].includes(cfg.connection.type)
|
|
240
|
+
? `${cfg.connection.type} ${cfg.connection.path}`
|
|
241
|
+
: `${cfg.connection.host}:${cfg.connection.port}/${cfg.connection.database} as ${cfg.connection.user}`;
|
|
242
|
+
return withClient(cfg, projectDir, async (client) => {
|
|
243
|
+
const { rows } = await client.query(
|
|
244
|
+
cfg.connection.type === 'mysql'
|
|
245
|
+
? 'SELECT DATABASE() AS db, VERSION() AS version'
|
|
246
|
+
: cfg.connection.type === 'sqlite'
|
|
247
|
+
? 'SELECT sqlite_version() AS version'
|
|
248
|
+
: 'SELECT current_database() AS db, version() AS version'
|
|
249
|
+
);
|
|
250
|
+
let attached = [];
|
|
251
|
+
if (cfg.connection.type === 'duckdb') {
|
|
252
|
+
// proves the ATTACHes actually ran, not just that config parsed
|
|
253
|
+
const res = await client.query(
|
|
254
|
+
`SELECT database_name AS alias, path, type, readonly FROM duckdb_databases()
|
|
255
|
+
WHERE database_name NOT IN ('system', 'temp') AND NOT internal AND path IS NOT NULL
|
|
256
|
+
ORDER BY database_name`
|
|
257
|
+
);
|
|
258
|
+
// exclude the main database (its path matches connection.path)
|
|
259
|
+
attached = res.rows.filter((r) => r.path !== cfg.connection.path);
|
|
260
|
+
}
|
|
261
|
+
return {
|
|
262
|
+
schema: cfg.schema,
|
|
263
|
+
modelCount: models.length,
|
|
264
|
+
seedCount: seeds.length,
|
|
265
|
+
target,
|
|
266
|
+
database: rows[0].db ?? cfg.connection.path,
|
|
267
|
+
version: rows[0].version,
|
|
268
|
+
attached,
|
|
269
|
+
};
|
|
270
|
+
});
|
|
271
|
+
}
|