dbt-js 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +365 -325
- package/bin/dbt-js.js +4 -4
- package/package.json +53 -53
- package/src/api.js +271 -257
- package/src/batches.js +129 -120
- package/src/cli.js +178 -175
- package/src/config.js +139 -68
- package/src/dag.js +67 -67
- package/src/db.js +194 -182
- package/src/materialize.js +197 -197
- package/src/project.js +139 -107
- package/src/render.js +65 -62
- package/src/seed.js +68 -68
- package/src/tests.js +49 -49
package/src/materialize.js
CHANGED
|
@@ -1,197 +1,197 @@
|
|
|
1
|
-
import { quoteIdent, rel, relationKind, withTransaction } from './db.js';
|
|
2
|
-
import { render } from './render.js';
|
|
3
|
-
import { computeBatches } from './batches.js';
|
|
4
|
-
|
|
5
|
-
export async function runModel(client, node, projectCfg, opts = {}) {
|
|
6
|
-
const { fullRefresh = false, vars } = opts;
|
|
7
|
-
const { name, config, rawSql } = node;
|
|
8
|
-
const schema = projectCfg.schema;
|
|
9
|
-
const target = rel(schema, name);
|
|
10
|
-
const kind = await relationKind(client, schema, name);
|
|
11
|
-
const isIncremental = config.materialized === 'incremental' && !fullRefresh && kind === 'r';
|
|
12
|
-
|
|
13
|
-
const ctx = {
|
|
14
|
-
name,
|
|
15
|
-
schema,
|
|
16
|
-
vars: vars ?? projectCfg.vars,
|
|
17
|
-
isIncremental,
|
|
18
|
-
sources: projectCfg.sources,
|
|
19
|
-
timezone: config.timezone,
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
// Hooks run outside the materialization transaction, one statement each, so
|
|
23
|
-
// they can use statements Postgres forbids inside a txn (VACUUM, CREATE
|
|
24
|
-
// INDEX CONCURRENTLY). Microbatch runs them once per model, not per batch.
|
|
25
|
-
await runHooks(client, config.pre_hook, 'pre_hook', ctx);
|
|
26
|
-
|
|
27
|
-
if (config.materialized === 'incremental' && config.strategy === 'microbatch') {
|
|
28
|
-
return runMicrobatch(client, node, projectCfg, { ...opts, kind, hookCtx: ctx });
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
const { sql } = render(rawSql, ctx);
|
|
32
|
-
const result = await materialize(client, { name, config, sql, target, kind, isIncremental });
|
|
33
|
-
await runHooks(client, config.post_hook, 'post_hook', ctx);
|
|
34
|
-
return result;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
async function materialize(client, { name, config, sql, target, kind, isIncremental }) {
|
|
38
|
-
const sqlite = client.dialect === 'sqlite';
|
|
39
|
-
const cascade = sqlite ? '' : ' CASCADE'; // CASCADE is a SQLite syntax error
|
|
40
|
-
|
|
41
|
-
if (config.materialized === 'view') {
|
|
42
|
-
if (sqlite) {
|
|
43
|
-
// no CREATE OR REPLACE VIEW; SQLite DDL is transactional, so the wrap
|
|
44
|
-
// closes the window where the view would be absent
|
|
45
|
-
return withTransaction(client, async () => {
|
|
46
|
-
if (kind && kind !== 'v') await client.query(`DROP TABLE IF EXISTS ${target}`);
|
|
47
|
-
await client.query(`DROP VIEW IF EXISTS ${target}`);
|
|
48
|
-
await client.query(`CREATE VIEW ${target} AS\n${sql}`);
|
|
49
|
-
return { action: 'view' };
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
if (kind && kind !== 'v') await client.query(`DROP TABLE IF EXISTS ${target} CASCADE`);
|
|
53
|
-
await client.query(`CREATE OR REPLACE VIEW ${target} AS\n${sql}`);
|
|
54
|
-
return { action: 'view' };
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
if (!isIncremental) {
|
|
58
|
-
// table, or incremental first run / --full-refresh: transactional rebuild
|
|
59
|
-
return withTransaction(client, async () => {
|
|
60
|
-
if (kind === 'v') await client.query(`DROP VIEW IF EXISTS ${target}${cascade}`);
|
|
61
|
-
else await client.query(`DROP TABLE IF EXISTS ${target}${cascade}`);
|
|
62
|
-
const res = await client.query(`CREATE TABLE ${target} AS\n${sql}`);
|
|
63
|
-
const action = config.materialized === 'table' ? 'table' : 'incremental (full build)';
|
|
64
|
-
return { action, rowCount: res.rowCount };
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (config.strategy === 'append') {
|
|
69
|
-
const res = await client.query(`INSERT INTO ${target}\n${sql}`);
|
|
70
|
-
return { action: 'incremental append', rowCount: res.rowCount };
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// delete+insert: compute the SELECT once into a temp table, swap within one txn
|
|
74
|
-
const keys = Array.isArray(config.unique_key) ? config.unique_key : [config.unique_key];
|
|
75
|
-
const temp = quoteIdent(`${name}__dbtjs_incr`);
|
|
76
|
-
const mysql = client.dialect === 'mysql';
|
|
77
|
-
return withTransaction(client, async () => {
|
|
78
|
-
// explicit DROP rather than ON COMMIT DROP — DuckDB silently ignores the latter
|
|
79
|
-
await client.query(`CREATE TEMPORARY TABLE ${temp} AS\n${sql}`);
|
|
80
|
-
const match = keys.map((k) => `t.${quoteIdent(k)} = i.${quoteIdent(k)}`).join(' AND ');
|
|
81
|
-
// MySQL has no Postgres-style DELETE ... USING ... WHERE; its multi-table
|
|
82
|
-
// form references the temp table once per statement, satisfying MySQL's
|
|
83
|
-
// single-reference rule for TEMPORARY tables. SQLite has neither form —
|
|
84
|
-
// correlated EXISTS against the aliased target instead.
|
|
85
|
-
await client.query(
|
|
86
|
-
sqlite
|
|
87
|
-
? `DELETE FROM ${target} AS t WHERE EXISTS (SELECT 1 FROM ${temp} i WHERE ${match})`
|
|
88
|
-
: mysql
|
|
89
|
-
? `DELETE t FROM ${target} t JOIN ${temp} i ON ${match}`
|
|
90
|
-
: `DELETE FROM ${target} t USING ${temp} i WHERE ${match}`
|
|
91
|
-
);
|
|
92
|
-
const res = await client.query(`INSERT INTO ${target} SELECT * FROM ${temp}`);
|
|
93
|
-
// TEMPORARY keyword on MySQL: plain DROP TABLE implicitly commits,
|
|
94
|
-
// which would break this transaction's atomicity
|
|
95
|
-
await client.query(`DROP ${mysql ? 'TEMPORARY ' : ''}TABLE ${temp}`);
|
|
96
|
-
return { action: 'incremental delete+insert', rowCount: res.rowCount };
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
async function runHooks(client, hooks, which, ctx) {
|
|
101
|
-
for (const [i, hook] of hooks.entries()) {
|
|
102
|
-
const { sql } = render(hook, ctx);
|
|
103
|
-
try {
|
|
104
|
-
await client.query(sql);
|
|
105
|
-
} catch (e) {
|
|
106
|
-
throw new Error(`${which}[${i}]: ${e.message}`);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// Microbatch: split the event-time range into aligned windows; each batch is its
|
|
112
|
-
// own transaction that replaces the target rows inside its window. A failed
|
|
113
|
-
// batch is recorded and the rest keep running (retry via --event-time-start/-end).
|
|
114
|
-
async function runMicrobatch(client, node, projectCfg, opts) {
|
|
115
|
-
const { fullRefresh = false, vars, eventTimeStart, eventTimeEnd, onBatch, kind, hookCtx } = opts;
|
|
116
|
-
const { name, config, rawSql } = node;
|
|
117
|
-
const schema = projectCfg.schema;
|
|
118
|
-
const target = rel(schema, name);
|
|
119
|
-
const firstBuild = fullRefresh || kind !== 'r';
|
|
120
|
-
|
|
121
|
-
const batches = computeBatches({
|
|
122
|
-
begin: config.begin,
|
|
123
|
-
batchSize: config.batch_size,
|
|
124
|
-
lookback: config.lookback,
|
|
125
|
-
start: eventTimeStart,
|
|
126
|
-
end: eventTimeEnd,
|
|
127
|
-
firstBuild,
|
|
128
|
-
timezone: config.timezone,
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
const et = quoteIdent(config.event_time);
|
|
132
|
-
const sqlite = client.dialect === 'sqlite';
|
|
133
|
-
const cascade = sqlite ? '' : ' CASCADE';
|
|
134
|
-
const failed = [];
|
|
135
|
-
let total = 0;
|
|
136
|
-
let countUnknown = false;
|
|
137
|
-
let created = !firstBuild;
|
|
138
|
-
|
|
139
|
-
for (const b of batches) {
|
|
140
|
-
const { sql } = render(rawSql, {
|
|
141
|
-
name,
|
|
142
|
-
schema,
|
|
143
|
-
vars: vars ?? projectCfg.vars,
|
|
144
|
-
isIncremental: !firstBuild,
|
|
145
|
-
sources: projectCfg.sources,
|
|
146
|
-
batchStart: b.start,
|
|
147
|
-
batchEnd: b.end,
|
|
148
|
-
timezone: config.timezone,
|
|
149
|
-
});
|
|
150
|
-
try {
|
|
151
|
-
let rowCount;
|
|
152
|
-
if (!created) {
|
|
153
|
-
rowCount = await withTransaction(client, async () => {
|
|
154
|
-
if (kind === 'v') await client.query(`DROP VIEW IF EXISTS ${target}${cascade}`);
|
|
155
|
-
else await client.query(`DROP TABLE IF EXISTS ${target}${cascade}`);
|
|
156
|
-
const res = await client.query(`CREATE TABLE ${target} AS\n${sql}`);
|
|
157
|
-
return res.rowCount;
|
|
158
|
-
});
|
|
159
|
-
created = true;
|
|
160
|
-
} else {
|
|
161
|
-
rowCount = await withTransaction(client, async () => {
|
|
162
|
-
// SQLite compares timestamps as text, and a day-granularity event_time
|
|
163
|
-
// ('YYYY-MM-DD') sorts BELOW the batch boundary ('YYYY-MM-DD HH:MM:SS'
|
|
164
|
-
// from computeBatches) — datetime() normalizes both shapes
|
|
165
|
-
await client.query(
|
|
166
|
-
sqlite
|
|
167
|
-
? `DELETE FROM ${target} WHERE datetime(${et}) >= datetime('${b.start}') AND datetime(${et}) < datetime('${b.end}')`
|
|
168
|
-
: `DELETE FROM ${target} WHERE ${et} >= '${b.start}' AND ${et} < '${b.end}'`
|
|
169
|
-
);
|
|
170
|
-
const res = await client.query(`INSERT INTO ${target}\n${sql}`);
|
|
171
|
-
return res.rowCount;
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
if (rowCount == null) countUnknown = true;
|
|
175
|
-
else total += rowCount;
|
|
176
|
-
onBatch?.({ ...b, ok: true, rowCount });
|
|
177
|
-
} catch (e) {
|
|
178
|
-
onBatch?.({ ...b, ok: false, message: e.message });
|
|
179
|
-
if (!created) {
|
|
180
|
-
// the target doesn't exist yet, so no later batch can insert into it
|
|
181
|
-
throw new Error(`first batch (${b.start}) failed: ${e.message}`);
|
|
182
|
-
}
|
|
183
|
-
failed.push({ ...b, message: e.message });
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// skipped on partial failure: the model is already 'fail', don't stamp a
|
|
188
|
-
// success hook (grant, index, audit row) onto an incomplete build
|
|
189
|
-
if (!failed.length) await runHooks(client, config.post_hook, 'post_hook', hookCtx);
|
|
190
|
-
|
|
191
|
-
return {
|
|
192
|
-
action: 'incremental microbatch',
|
|
193
|
-
rowCount: countUnknown ? undefined : total,
|
|
194
|
-
batchCount: batches.length,
|
|
195
|
-
failedBatches: failed,
|
|
196
|
-
};
|
|
197
|
-
}
|
|
1
|
+
import { quoteIdent, rel, relationKind, withTransaction } from './db.js';
|
|
2
|
+
import { render } from './render.js';
|
|
3
|
+
import { computeBatches } from './batches.js';
|
|
4
|
+
|
|
5
|
+
export async function runModel(client, node, projectCfg, opts = {}) {
|
|
6
|
+
const { fullRefresh = false, vars } = opts;
|
|
7
|
+
const { name, config, rawSql } = node;
|
|
8
|
+
const schema = projectCfg.schema;
|
|
9
|
+
const target = rel(schema, name);
|
|
10
|
+
const kind = await relationKind(client, schema, name);
|
|
11
|
+
const isIncremental = config.materialized === 'incremental' && !fullRefresh && kind === 'r';
|
|
12
|
+
|
|
13
|
+
const ctx = {
|
|
14
|
+
name,
|
|
15
|
+
schema,
|
|
16
|
+
vars: vars ?? projectCfg.vars,
|
|
17
|
+
isIncremental,
|
|
18
|
+
sources: projectCfg.sources,
|
|
19
|
+
timezone: config.timezone,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
// Hooks run outside the materialization transaction, one statement each, so
|
|
23
|
+
// they can use statements Postgres forbids inside a txn (VACUUM, CREATE
|
|
24
|
+
// INDEX CONCURRENTLY). Microbatch runs them once per model, not per batch.
|
|
25
|
+
await runHooks(client, config.pre_hook, 'pre_hook', ctx);
|
|
26
|
+
|
|
27
|
+
if (config.materialized === 'incremental' && config.strategy === 'microbatch') {
|
|
28
|
+
return runMicrobatch(client, node, projectCfg, { ...opts, kind, hookCtx: ctx });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const { sql } = render(rawSql, ctx);
|
|
32
|
+
const result = await materialize(client, { name, config, sql, target, kind, isIncremental });
|
|
33
|
+
await runHooks(client, config.post_hook, 'post_hook', ctx);
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async function materialize(client, { name, config, sql, target, kind, isIncremental }) {
|
|
38
|
+
const sqlite = client.dialect === 'sqlite';
|
|
39
|
+
const cascade = sqlite ? '' : ' CASCADE'; // CASCADE is a SQLite syntax error
|
|
40
|
+
|
|
41
|
+
if (config.materialized === 'view') {
|
|
42
|
+
if (sqlite) {
|
|
43
|
+
// no CREATE OR REPLACE VIEW; SQLite DDL is transactional, so the wrap
|
|
44
|
+
// closes the window where the view would be absent
|
|
45
|
+
return withTransaction(client, async () => {
|
|
46
|
+
if (kind && kind !== 'v') await client.query(`DROP TABLE IF EXISTS ${target}`);
|
|
47
|
+
await client.query(`DROP VIEW IF EXISTS ${target}`);
|
|
48
|
+
await client.query(`CREATE VIEW ${target} AS\n${sql}`);
|
|
49
|
+
return { action: 'view' };
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
if (kind && kind !== 'v') await client.query(`DROP TABLE IF EXISTS ${target} CASCADE`);
|
|
53
|
+
await client.query(`CREATE OR REPLACE VIEW ${target} AS\n${sql}`);
|
|
54
|
+
return { action: 'view' };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (!isIncremental) {
|
|
58
|
+
// table, or incremental first run / --full-refresh: transactional rebuild
|
|
59
|
+
return withTransaction(client, async () => {
|
|
60
|
+
if (kind === 'v') await client.query(`DROP VIEW IF EXISTS ${target}${cascade}`);
|
|
61
|
+
else await client.query(`DROP TABLE IF EXISTS ${target}${cascade}`);
|
|
62
|
+
const res = await client.query(`CREATE TABLE ${target} AS\n${sql}`);
|
|
63
|
+
const action = config.materialized === 'table' ? 'table' : 'incremental (full build)';
|
|
64
|
+
return { action, rowCount: res.rowCount };
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (config.strategy === 'append') {
|
|
69
|
+
const res = await client.query(`INSERT INTO ${target}\n${sql}`);
|
|
70
|
+
return { action: 'incremental append', rowCount: res.rowCount };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// delete+insert: compute the SELECT once into a temp table, swap within one txn
|
|
74
|
+
const keys = Array.isArray(config.unique_key) ? config.unique_key : [config.unique_key];
|
|
75
|
+
const temp = quoteIdent(`${name}__dbtjs_incr`);
|
|
76
|
+
const mysql = client.dialect === 'mysql';
|
|
77
|
+
return withTransaction(client, async () => {
|
|
78
|
+
// explicit DROP rather than ON COMMIT DROP — DuckDB silently ignores the latter
|
|
79
|
+
await client.query(`CREATE TEMPORARY TABLE ${temp} AS\n${sql}`);
|
|
80
|
+
const match = keys.map((k) => `t.${quoteIdent(k)} = i.${quoteIdent(k)}`).join(' AND ');
|
|
81
|
+
// MySQL has no Postgres-style DELETE ... USING ... WHERE; its multi-table
|
|
82
|
+
// form references the temp table once per statement, satisfying MySQL's
|
|
83
|
+
// single-reference rule for TEMPORARY tables. SQLite has neither form —
|
|
84
|
+
// correlated EXISTS against the aliased target instead.
|
|
85
|
+
await client.query(
|
|
86
|
+
sqlite
|
|
87
|
+
? `DELETE FROM ${target} AS t WHERE EXISTS (SELECT 1 FROM ${temp} i WHERE ${match})`
|
|
88
|
+
: mysql
|
|
89
|
+
? `DELETE t FROM ${target} t JOIN ${temp} i ON ${match}`
|
|
90
|
+
: `DELETE FROM ${target} t USING ${temp} i WHERE ${match}`
|
|
91
|
+
);
|
|
92
|
+
const res = await client.query(`INSERT INTO ${target} SELECT * FROM ${temp}`);
|
|
93
|
+
// TEMPORARY keyword on MySQL: plain DROP TABLE implicitly commits,
|
|
94
|
+
// which would break this transaction's atomicity
|
|
95
|
+
await client.query(`DROP ${mysql ? 'TEMPORARY ' : ''}TABLE ${temp}`);
|
|
96
|
+
return { action: 'incremental delete+insert', rowCount: res.rowCount };
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function runHooks(client, hooks, which, ctx) {
|
|
101
|
+
for (const [i, hook] of hooks.entries()) {
|
|
102
|
+
const { sql } = render(hook, ctx);
|
|
103
|
+
try {
|
|
104
|
+
await client.query(sql);
|
|
105
|
+
} catch (e) {
|
|
106
|
+
throw new Error(`${which}[${i}]: ${e.message}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Microbatch: split the event-time range into aligned windows; each batch is its
|
|
112
|
+
// own transaction that replaces the target rows inside its window. A failed
|
|
113
|
+
// batch is recorded and the rest keep running (retry via --event-time-start/-end).
|
|
114
|
+
async function runMicrobatch(client, node, projectCfg, opts) {
|
|
115
|
+
const { fullRefresh = false, vars, eventTimeStart, eventTimeEnd, onBatch, kind, hookCtx } = opts;
|
|
116
|
+
const { name, config, rawSql } = node;
|
|
117
|
+
const schema = projectCfg.schema;
|
|
118
|
+
const target = rel(schema, name);
|
|
119
|
+
const firstBuild = fullRefresh || kind !== 'r';
|
|
120
|
+
|
|
121
|
+
const batches = computeBatches({
|
|
122
|
+
begin: config.begin,
|
|
123
|
+
batchSize: config.batch_size,
|
|
124
|
+
lookback: config.lookback,
|
|
125
|
+
start: eventTimeStart,
|
|
126
|
+
end: eventTimeEnd,
|
|
127
|
+
firstBuild,
|
|
128
|
+
timezone: config.timezone,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const et = quoteIdent(config.event_time);
|
|
132
|
+
const sqlite = client.dialect === 'sqlite';
|
|
133
|
+
const cascade = sqlite ? '' : ' CASCADE';
|
|
134
|
+
const failed = [];
|
|
135
|
+
let total = 0;
|
|
136
|
+
let countUnknown = false;
|
|
137
|
+
let created = !firstBuild;
|
|
138
|
+
|
|
139
|
+
for (const b of batches) {
|
|
140
|
+
const { sql } = render(rawSql, {
|
|
141
|
+
name,
|
|
142
|
+
schema,
|
|
143
|
+
vars: vars ?? projectCfg.vars,
|
|
144
|
+
isIncremental: !firstBuild,
|
|
145
|
+
sources: projectCfg.sources,
|
|
146
|
+
batchStart: b.start,
|
|
147
|
+
batchEnd: b.end,
|
|
148
|
+
timezone: config.timezone,
|
|
149
|
+
});
|
|
150
|
+
try {
|
|
151
|
+
let rowCount;
|
|
152
|
+
if (!created) {
|
|
153
|
+
rowCount = await withTransaction(client, async () => {
|
|
154
|
+
if (kind === 'v') await client.query(`DROP VIEW IF EXISTS ${target}${cascade}`);
|
|
155
|
+
else await client.query(`DROP TABLE IF EXISTS ${target}${cascade}`);
|
|
156
|
+
const res = await client.query(`CREATE TABLE ${target} AS\n${sql}`);
|
|
157
|
+
return res.rowCount;
|
|
158
|
+
});
|
|
159
|
+
created = true;
|
|
160
|
+
} else {
|
|
161
|
+
rowCount = await withTransaction(client, async () => {
|
|
162
|
+
// SQLite compares timestamps as text, and a day-granularity event_time
|
|
163
|
+
// ('YYYY-MM-DD') sorts BELOW the batch boundary ('YYYY-MM-DD HH:MM:SS'
|
|
164
|
+
// from computeBatches) — datetime() normalizes both shapes
|
|
165
|
+
await client.query(
|
|
166
|
+
sqlite
|
|
167
|
+
? `DELETE FROM ${target} WHERE datetime(${et}) >= datetime('${b.start}') AND datetime(${et}) < datetime('${b.end}')`
|
|
168
|
+
: `DELETE FROM ${target} WHERE ${et} >= '${b.start}' AND ${et} < '${b.end}'`
|
|
169
|
+
);
|
|
170
|
+
const res = await client.query(`INSERT INTO ${target}\n${sql}`);
|
|
171
|
+
return res.rowCount;
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
if (rowCount == null) countUnknown = true;
|
|
175
|
+
else total += rowCount;
|
|
176
|
+
onBatch?.({ ...b, ok: true, rowCount });
|
|
177
|
+
} catch (e) {
|
|
178
|
+
onBatch?.({ ...b, ok: false, message: e.message });
|
|
179
|
+
if (!created) {
|
|
180
|
+
// the target doesn't exist yet, so no later batch can insert into it
|
|
181
|
+
throw new Error(`first batch (${b.start}) failed: ${e.message}`);
|
|
182
|
+
}
|
|
183
|
+
failed.push({ ...b, message: e.message });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// skipped on partial failure: the model is already 'fail', don't stamp a
|
|
188
|
+
// success hook (grant, index, audit row) onto an incomplete build
|
|
189
|
+
if (!failed.length) await runHooks(client, config.post_hook, 'post_hook', hookCtx);
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
action: 'incremental microbatch',
|
|
193
|
+
rowCount: countUnknown ? undefined : total,
|
|
194
|
+
batchCount: batches.length,
|
|
195
|
+
failedBatches: failed,
|
|
196
|
+
};
|
|
197
|
+
}
|
package/src/project.js
CHANGED
|
@@ -1,107 +1,139 @@
|
|
|
1
|
-
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
2
|
-
import { basename, join } from 'node:path';
|
|
3
|
-
|
|
4
|
-
const CONFIG_RE = /\/\*\s*config:\s*([\s\S]*?)\*\//;
|
|
5
|
-
const MATERIALIZATIONS = new Set(['view', 'table', 'incremental']);
|
|
6
|
-
const STRATEGIES = new Set(['append', 'delete+insert', 'microbatch']);
|
|
7
|
-
const BATCH_SIZES = new Set(['hour', 'day', 'month', 'year']);
|
|
8
|
-
|
|
9
|
-
// inlineModels: optional { name: rawSql } map (same format as a model file,
|
|
10
|
-
// config comment included) — when given, models/ is not scanned.
|
|
11
|
-
export function loadProject(cwd = process.cwd(), { models: inlineModels } = {}) {
|
|
12
|
-
const models = [];
|
|
13
|
-
if (inlineModels) {
|
|
14
|
-
for (const [name, rawSql] of Object.entries(inlineModels)) {
|
|
15
|
-
models.push({ name, rawSql, config: parseModelConfig(name, rawSql) });
|
|
16
|
-
}
|
|
17
|
-
} else {
|
|
18
|
-
const modelsDir = join(cwd, 'models');
|
|
19
|
-
if (existsSync(modelsDir)) {
|
|
20
|
-
for (const file of readdirSync(modelsDir).filter((f) => f.endsWith('.sql')).sort()) {
|
|
21
|
-
const path = join(modelsDir, file);
|
|
22
|
-
const rawSql = readFileSync(path, 'utf8');
|
|
23
|
-
const name = basename(file, '.sql');
|
|
24
|
-
models.push({ name, path, rawSql, config: parseModelConfig(name, rawSql) });
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const seeds = [];
|
|
30
|
-
const seedsDir = join(cwd, 'seeds');
|
|
31
|
-
if (existsSync(seedsDir)) {
|
|
32
|
-
for (const file of readdirSync(seedsDir).filter((f) => f.endsWith('.csv')).sort()) {
|
|
33
|
-
seeds.push({ name: basename(file, '.csv'), path: join(seedsDir, file) });
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
const seen = new Set();
|
|
38
|
-
for (const { name } of [...models, ...seeds]) {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (config.
|
|
85
|
-
throw new Error(`Model '${name}':
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
config.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
1
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
2
|
+
import { basename, join } from 'node:path';
|
|
3
|
+
|
|
4
|
+
const CONFIG_RE = /\/\*\s*config:\s*([\s\S]*?)\*\//;
|
|
5
|
+
const MATERIALIZATIONS = new Set(['view', 'table', 'incremental']);
|
|
6
|
+
const STRATEGIES = new Set(['append', 'delete+insert', 'microbatch']);
|
|
7
|
+
const BATCH_SIZES = new Set(['hour', 'day', 'month', 'year']);
|
|
8
|
+
|
|
9
|
+
// inlineModels: optional { name: rawSql } map (same format as a model file,
|
|
10
|
+
// config comment included) — when given, models/ is not scanned.
|
|
11
|
+
export function loadProject(cwd = process.cwd(), { models: inlineModels } = {}) {
|
|
12
|
+
const models = [];
|
|
13
|
+
if (inlineModels) {
|
|
14
|
+
for (const [name, rawSql] of Object.entries(inlineModels)) {
|
|
15
|
+
models.push({ name, rawSql, config: parseModelConfig(name, rawSql) });
|
|
16
|
+
}
|
|
17
|
+
} else {
|
|
18
|
+
const modelsDir = join(cwd, 'models');
|
|
19
|
+
if (existsSync(modelsDir)) {
|
|
20
|
+
for (const file of readdirSync(modelsDir).filter((f) => f.endsWith('.sql')).sort()) {
|
|
21
|
+
const path = join(modelsDir, file);
|
|
22
|
+
const rawSql = readFileSync(path, 'utf8');
|
|
23
|
+
const name = basename(file, '.sql');
|
|
24
|
+
models.push({ name, path, rawSql, config: parseModelConfig(name, rawSql) });
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const seeds = [];
|
|
30
|
+
const seedsDir = join(cwd, 'seeds');
|
|
31
|
+
if (existsSync(seedsDir)) {
|
|
32
|
+
for (const file of readdirSync(seedsDir).filter((f) => f.endsWith('.csv')).sort()) {
|
|
33
|
+
seeds.push({ name: basename(file, '.csv'), path: join(seedsDir, file) });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const seen = new Set();
|
|
38
|
+
for (const { name } of [...models, ...seeds]) {
|
|
39
|
+
// ref()/source() in render.js only match \w+, so a name with other
|
|
40
|
+
// characters (e.g. "my-model") loads but is unreferenceable — reject it
|
|
41
|
+
// here with a clear message instead of a misleading template error later.
|
|
42
|
+
if (!/^\w+$/.test(name)) {
|
|
43
|
+
throw new Error(
|
|
44
|
+
`Invalid node name '${name}' — model and seed names must be word characters only ` +
|
|
45
|
+
`([A-Za-z0-9_]) so they can be used in ref()/source()`
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
if (seen.has(name)) throw new Error(`Duplicate node name '${name}' across models/ and seeds/`);
|
|
49
|
+
seen.add(name);
|
|
50
|
+
}
|
|
51
|
+
if (!models.length && !seeds.length) {
|
|
52
|
+
throw new Error(`No models/*.sql or seeds/*.csv found in ${cwd} (and no inline models given)`);
|
|
53
|
+
}
|
|
54
|
+
return { models, seeds };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function parseModelConfig(name, rawSql) {
|
|
58
|
+
const match = rawSql.match(CONFIG_RE);
|
|
59
|
+
let config = {};
|
|
60
|
+
if (match) {
|
|
61
|
+
try {
|
|
62
|
+
config = JSON.parse(match[1]);
|
|
63
|
+
} catch (e) {
|
|
64
|
+
throw new Error(`Invalid JSON in config comment of model '${name}': ${e.message}`);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
config.materialized ??= 'view';
|
|
68
|
+
if (!MATERIALIZATIONS.has(config.materialized)) {
|
|
69
|
+
throw new Error(`Model '${name}': unknown materialized '${config.materialized}' (use view|table|incremental)`);
|
|
70
|
+
}
|
|
71
|
+
config.timezone ??= 'UTC';
|
|
72
|
+
if (typeof config.timezone !== 'string') {
|
|
73
|
+
throw new Error(`Model '${name}': "timezone" must be a string (e.g. "UTC", "America/New_York")`);
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
// RangeError on an unknown IANA zone; 'UTC' is always valid
|
|
77
|
+
new Intl.DateTimeFormat('en-US', { timeZone: config.timezone });
|
|
78
|
+
} catch {
|
|
79
|
+
throw new Error(`Model '${name}': unknown timezone '${config.timezone}' (use an IANA name like "America/New_York" or "UTC")`);
|
|
80
|
+
}
|
|
81
|
+
for (const key of ['pre_hook', 'post_hook']) {
|
|
82
|
+
if (typeof config[key] === 'string') config[key] = [config[key]];
|
|
83
|
+
config[key] ??= [];
|
|
84
|
+
if (!Array.isArray(config[key]) || config[key].some((h) => typeof h !== 'string' || !h.trim())) {
|
|
85
|
+
throw new Error(`Model '${name}': "${key}" must be a SQL string or array of SQL strings`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (config.materialized === 'incremental') {
|
|
89
|
+
config.strategy ??= 'append';
|
|
90
|
+
if (!STRATEGIES.has(config.strategy)) {
|
|
91
|
+
throw new Error(`Model '${name}': unknown strategy '${config.strategy}' (use append|delete+insert|microbatch)`);
|
|
92
|
+
}
|
|
93
|
+
if (config.strategy === 'delete+insert' && !config.unique_key) {
|
|
94
|
+
throw new Error(`Model '${name}': strategy delete+insert requires "unique_key"`);
|
|
95
|
+
}
|
|
96
|
+
if (config.strategy === 'microbatch') {
|
|
97
|
+
if (typeof config.event_time !== 'string' || !config.event_time) {
|
|
98
|
+
throw new Error(`Model '${name}': microbatch requires "event_time" (a column of this model)`);
|
|
99
|
+
}
|
|
100
|
+
if (!config.begin || Number.isNaN(Date.parse(String(config.begin).replace(' ', 'T')))) {
|
|
101
|
+
throw new Error(`Model '${name}': microbatch requires "begin" (start of history, e.g. "2026-01-01")`);
|
|
102
|
+
}
|
|
103
|
+
if (!BATCH_SIZES.has(config.batch_size)) {
|
|
104
|
+
throw new Error(`Model '${name}': microbatch requires "batch_size" (hour|day|month|year)`);
|
|
105
|
+
}
|
|
106
|
+
config.lookback ??= 1;
|
|
107
|
+
if (!Number.isInteger(config.lookback) || config.lookback < 0) {
|
|
108
|
+
throw new Error(`Model '${name}': "lookback" must be a non-negative integer`);
|
|
109
|
+
}
|
|
110
|
+
if (config.unique_key) {
|
|
111
|
+
throw new Error(`Model '${name}': "unique_key" is not used by microbatch (batches replace by event_time window)`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (config.tests !== undefined) {
|
|
116
|
+
// Validate at load so compile/run/ls surface a bad spec, not just `test`.
|
|
117
|
+
if (typeof config.tests !== 'object' || Array.isArray(config.tests)) {
|
|
118
|
+
throw new Error(`Model '${name}': "tests" must be an object mapping column -> array of tests`);
|
|
119
|
+
}
|
|
120
|
+
for (const [column, specs] of Object.entries(config.tests)) {
|
|
121
|
+
if (!Array.isArray(specs)) {
|
|
122
|
+
throw new Error(`Model '${name}': tests for column '${column}' must be an array (e.g. ["not_null", "unique"])`);
|
|
123
|
+
}
|
|
124
|
+
for (const spec of specs) {
|
|
125
|
+
const ok =
|
|
126
|
+
spec === 'not_null' ||
|
|
127
|
+
spec === 'unique' ||
|
|
128
|
+
(spec && typeof spec === 'object' && Array.isArray(spec.accepted_values) && spec.accepted_values.length > 0);
|
|
129
|
+
if (!ok) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
`Model '${name}': invalid test ${JSON.stringify(spec)} on column '${column}' ` +
|
|
132
|
+
`(use "not_null", "unique", or { "accepted_values": [...] } with a non-empty list)`
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return config;
|
|
139
|
+
}
|