dbt-js 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +365 -325
- package/bin/dbt-js.js +4 -4
- package/package.json +53 -53
- package/src/api.js +271 -257
- package/src/batches.js +129 -120
- package/src/cli.js +178 -175
- package/src/config.js +139 -68
- package/src/dag.js +67 -67
- package/src/db.js +194 -182
- package/src/materialize.js +197 -197
- package/src/project.js +139 -107
- package/src/render.js +65 -62
- package/src/seed.js +68 -68
- package/src/tests.js +49 -49
package/src/config.js
CHANGED
|
@@ -1,68 +1,139 @@
|
|
|
1
|
-
import { readFileSync } from 'node:fs';
|
|
2
|
-
import { join, resolve } from 'node:path';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
let
|
|
13
|
-
try {
|
|
14
|
-
|
|
15
|
-
} catch
|
|
16
|
-
throw new Error(`
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
if (
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
}
|
|
54
|
-
cfg.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { basename, extname, join, resolve } from 'node:path';
|
|
3
|
+
|
|
4
|
+
const ATTACH_TYPES = ['duckdb', 'sqlite', 'postgres', 'mysql'];
|
|
5
|
+
// DuckDB's built-in catalogs — an attachment can't reuse these names.
|
|
6
|
+
const RESERVED_ALIASES = new Set(['memory', 'system', 'temp']);
|
|
7
|
+
// postgres/mysql attachments take a connection string, not a filesystem path.
|
|
8
|
+
const isFileAttach = (type) => !type || type === 'duckdb' || type === 'sqlite';
|
|
9
|
+
|
|
10
|
+
export function loadConfig(cwd = process.cwd()) {
|
|
11
|
+
const path = join(cwd, 'dbtjs.config.json');
|
|
12
|
+
let raw;
|
|
13
|
+
try {
|
|
14
|
+
raw = readFileSync(path, 'utf8');
|
|
15
|
+
} catch {
|
|
16
|
+
throw new Error(`No dbtjs.config.json found in ${cwd}`);
|
|
17
|
+
}
|
|
18
|
+
let cfg;
|
|
19
|
+
try {
|
|
20
|
+
cfg = JSON.parse(raw);
|
|
21
|
+
} catch (e) {
|
|
22
|
+
throw new Error(`Invalid JSON in ${path}: ${e.message}`);
|
|
23
|
+
}
|
|
24
|
+
return validateConfig(cfg, cwd);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Shared by the file path above and inline `config` objects passed to the api.
|
|
28
|
+
// Mutates and returns cfg (defaults, env interpolation, duckdb path resolution).
|
|
29
|
+
export function validateConfig(cfg, cwd = process.cwd()) {
|
|
30
|
+
if (!cfg.connection || typeof cfg.connection !== 'object') {
|
|
31
|
+
throw new Error('config must have a "connection" object');
|
|
32
|
+
}
|
|
33
|
+
cfg.connection.type ??= 'postgres';
|
|
34
|
+
if (!['postgres', 'duckdb', 'mysql', 'sqlite'].includes(cfg.connection.type)) {
|
|
35
|
+
throw new Error(
|
|
36
|
+
`connection.type must be "postgres", "duckdb", "mysql" or "sqlite", got "${cfg.connection.type}"`
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
if (['duckdb', 'sqlite'].includes(cfg.connection.type) && typeof cfg.connection.path !== 'string') {
|
|
40
|
+
throw new Error(
|
|
41
|
+
`${cfg.connection.type} connection requires a "path" string (file path or ":memory:")`
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
if (cfg.connection.type === 'mysql') {
|
|
45
|
+
if (typeof cfg.connection.database !== 'string') {
|
|
46
|
+
throw new Error('mysql connection requires a "database" string');
|
|
47
|
+
}
|
|
48
|
+
cfg.connection.port ??= 3306;
|
|
49
|
+
}
|
|
50
|
+
if (cfg.connection.attach !== undefined) validateAttach(cfg.connection);
|
|
51
|
+
if (!cfg.schema || typeof cfg.schema !== 'string') {
|
|
52
|
+
throw new Error('config must have a "schema" string (target schema for models)');
|
|
53
|
+
}
|
|
54
|
+
for (const [key, value] of Object.entries(cfg.connection)) {
|
|
55
|
+
if (typeof value === 'string') cfg.connection[key] = interpolateEnv(value, key);
|
|
56
|
+
}
|
|
57
|
+
if (['duckdb', 'sqlite'].includes(cfg.connection.type) && cfg.connection.path !== ':memory:') {
|
|
58
|
+
// anchor to the project dir so embedding apps can run from any cwd
|
|
59
|
+
cfg.connection.path = resolve(cwd, cfg.connection.path);
|
|
60
|
+
}
|
|
61
|
+
for (const [i, entry] of (cfg.connection.attach ?? []).entries()) {
|
|
62
|
+
for (const [key, value] of Object.entries(entry)) {
|
|
63
|
+
if (typeof value === 'string') entry[key] = interpolateEnv(value, `attach[${i}].${key}`);
|
|
64
|
+
}
|
|
65
|
+
// file-based attachments anchor to the project dir like connection.path;
|
|
66
|
+
// postgres/mysql paths are connection strings — leave them untouched
|
|
67
|
+
if (isFileAttach(entry.type) && entry.path !== ':memory:') {
|
|
68
|
+
entry.path = resolve(cwd, entry.path);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
cfg.vars ??= {};
|
|
72
|
+
cfg.sources ??= {};
|
|
73
|
+
cfg.seeds ??= {};
|
|
74
|
+
return cfg;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Validates connection.attach (DuckDB only). Each entry mounts an external
|
|
78
|
+
// database as a catalog via ATTACH. Env interpolation and path resolution run
|
|
79
|
+
// later in validateConfig, alongside the main connection.
|
|
80
|
+
function validateAttach(connection) {
|
|
81
|
+
if (connection.type !== 'duckdb') {
|
|
82
|
+
throw new Error('"attach" is only supported for duckdb connections');
|
|
83
|
+
}
|
|
84
|
+
if (!Array.isArray(connection.attach)) {
|
|
85
|
+
throw new Error('connection.attach must be an array of { alias, path } objects');
|
|
86
|
+
}
|
|
87
|
+
// main catalog name DuckDB derives from the file path (basename sans extension)
|
|
88
|
+
const mainName =
|
|
89
|
+
connection.path && connection.path !== ':memory:'
|
|
90
|
+
? basename(connection.path, extname(connection.path))
|
|
91
|
+
: null;
|
|
92
|
+
const seen = new Set();
|
|
93
|
+
for (const [i, entry] of connection.attach.entries()) {
|
|
94
|
+
const at = `connection.attach[${i}]`;
|
|
95
|
+
if (!entry || typeof entry !== 'object') throw new Error(`${at} must be an object`);
|
|
96
|
+
if (typeof entry.path !== 'string' || !entry.path) {
|
|
97
|
+
throw new Error(`${at} requires a non-empty "path" string (file path or connection string)`);
|
|
98
|
+
}
|
|
99
|
+
if (entry.type !== undefined && !ATTACH_TYPES.includes(entry.type)) {
|
|
100
|
+
throw new Error(`${at}.type must be one of ${ATTACH_TYPES.join(', ')}, got "${entry.type}"`);
|
|
101
|
+
}
|
|
102
|
+
if (entry.read_only !== undefined && typeof entry.read_only !== 'boolean') {
|
|
103
|
+
throw new Error(`${at}.read_only must be a boolean`);
|
|
104
|
+
}
|
|
105
|
+
// alias is optional for file-based attachments — DuckDB derives it from the
|
|
106
|
+
// path basename, and we mirror that so source().database can reference it.
|
|
107
|
+
// A connection string (postgres/mysql) has no meaningful basename, so its
|
|
108
|
+
// alias must be given explicitly.
|
|
109
|
+
if (entry.alias === undefined && isFileAttach(entry.type) && entry.path !== ':memory:') {
|
|
110
|
+
entry.alias = basename(entry.path, extname(entry.path));
|
|
111
|
+
}
|
|
112
|
+
if (typeof entry.alias !== 'string' || !entry.alias) {
|
|
113
|
+
throw new Error(
|
|
114
|
+
`${at} requires a non-empty "alias" string` +
|
|
115
|
+
(isFileAttach(entry.type) ? '' : ` (required for ${entry.type} connection strings)`)
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
if (RESERVED_ALIASES.has(entry.alias)) {
|
|
119
|
+
throw new Error(`${at}.alias "${entry.alias}" is reserved by DuckDB — choose another name`);
|
|
120
|
+
}
|
|
121
|
+
if (mainName && entry.alias === mainName) {
|
|
122
|
+
throw new Error(`${at}.alias "${entry.alias}" collides with the main database catalog — choose another name`);
|
|
123
|
+
}
|
|
124
|
+
if (seen.has(entry.alias)) {
|
|
125
|
+
throw new Error(`${at}.alias "${entry.alias}" is used by more than one attachment`);
|
|
126
|
+
}
|
|
127
|
+
seen.add(entry.alias);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function interpolateEnv(value, key) {
|
|
132
|
+
return value.replace(/\$\{(\w+)\}/g, (_, name) => {
|
|
133
|
+
const v = process.env[name];
|
|
134
|
+
if (v === undefined) {
|
|
135
|
+
throw new Error(`connection.${key} references \${${name}} but that environment variable is not set`);
|
|
136
|
+
}
|
|
137
|
+
return v;
|
|
138
|
+
});
|
|
139
|
+
}
|
package/src/dag.js
CHANGED
|
@@ -1,67 +1,67 @@
|
|
|
1
|
-
import { extractRefs } from './render.js';
|
|
2
|
-
|
|
3
|
-
// Returns { nodes: Map<name, node>, order: string[] } — dependencies before dependents.
|
|
4
|
-
// Seeds are DAG nodes with no deps so ref('seed_name') resolves and orders correctly;
|
|
5
|
-
// `run` does not load them (that's the seed command's job, like dbt).
|
|
6
|
-
export function buildDag(models, seeds) {
|
|
7
|
-
const nodes = new Map();
|
|
8
|
-
for (const seed of seeds) nodes.set(seed.name, { ...seed, type: 'seed', deps: [] });
|
|
9
|
-
for (const model of models) {
|
|
10
|
-
nodes.set(model.name, { ...model, type: 'model', deps: [...new Set(extractRefs(model.rawSql))] });
|
|
11
|
-
}
|
|
12
|
-
return { nodes, order: topoSort(nodes) };
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export function topoSort(nodes) {
|
|
16
|
-
const order = [];
|
|
17
|
-
const state = new Map(); // 0/undefined = unvisited, 1 = in stack, 2 = done
|
|
18
|
-
function visit(name, path) {
|
|
19
|
-
if (state.get(name) === 2) return;
|
|
20
|
-
if (state.get(name) === 1) {
|
|
21
|
-
throw new Error(`Cycle detected: ${[...path, name].join(' -> ')}`);
|
|
22
|
-
}
|
|
23
|
-
if (!nodes.has(name)) {
|
|
24
|
-
throw new Error(`'${path.at(-1)}' refs unknown model/seed '${name}'`);
|
|
25
|
-
}
|
|
26
|
-
state.set(name, 1);
|
|
27
|
-
for (const dep of nodes.get(name).deps) visit(dep, [...path, name]);
|
|
28
|
-
state.set(name, 2);
|
|
29
|
-
order.push(name);
|
|
30
|
-
}
|
|
31
|
-
for (const name of nodes.keys()) visit(name, []);
|
|
32
|
-
return order;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
// spec: "a,b" | "+name" (name + upstream) | "name+" (name + downstream); null = everything.
|
|
36
|
-
export function expandSelection(spec, nodes, order) {
|
|
37
|
-
if (!spec) return order;
|
|
38
|
-
const reversed = new Map([...nodes.keys()].map((k) => [k, []]));
|
|
39
|
-
for (const [name, node] of nodes) {
|
|
40
|
-
for (const dep of node.deps) reversed.get(dep)?.push(name);
|
|
41
|
-
}
|
|
42
|
-
const selected = new Set();
|
|
43
|
-
for (const token of spec.split(',').map((s) => s.trim()).filter(Boolean)) {
|
|
44
|
-
const upstream = token.startsWith('+');
|
|
45
|
-
const downstream = token.endsWith('+');
|
|
46
|
-
const name = token.replace(/^\+/, '').replace(/\+$/, '');
|
|
47
|
-
if (!nodes.has(name)) throw new Error(`--select: unknown model/seed '${name}'`);
|
|
48
|
-
selected.add(name);
|
|
49
|
-
if (upstream) for (const n of walk(name, (x) => nodes.get(x).deps)) selected.add(n);
|
|
50
|
-
if (downstream) for (const n of walk(name, (x) => reversed.get(x))) selected.add(n);
|
|
51
|
-
}
|
|
52
|
-
return order.filter((n) => selected.has(n));
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
function walk(start, next) {
|
|
56
|
-
const found = new Set();
|
|
57
|
-
const queue = [start];
|
|
58
|
-
while (queue.length) {
|
|
59
|
-
for (const n of next(queue.shift())) {
|
|
60
|
-
if (!found.has(n)) {
|
|
61
|
-
found.add(n);
|
|
62
|
-
queue.push(n);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
return found;
|
|
67
|
-
}
|
|
1
|
+
import { extractRefs } from './render.js';
|
|
2
|
+
|
|
3
|
+
// Returns { nodes: Map<name, node>, order: string[] } — dependencies before dependents.
|
|
4
|
+
// Seeds are DAG nodes with no deps so ref('seed_name') resolves and orders correctly;
|
|
5
|
+
// `run` does not load them (that's the seed command's job, like dbt).
|
|
6
|
+
export function buildDag(models, seeds) {
|
|
7
|
+
const nodes = new Map();
|
|
8
|
+
for (const seed of seeds) nodes.set(seed.name, { ...seed, type: 'seed', deps: [] });
|
|
9
|
+
for (const model of models) {
|
|
10
|
+
nodes.set(model.name, { ...model, type: 'model', deps: [...new Set(extractRefs(model.rawSql))] });
|
|
11
|
+
}
|
|
12
|
+
return { nodes, order: topoSort(nodes) };
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function topoSort(nodes) {
|
|
16
|
+
const order = [];
|
|
17
|
+
const state = new Map(); // 0/undefined = unvisited, 1 = in stack, 2 = done
|
|
18
|
+
function visit(name, path) {
|
|
19
|
+
if (state.get(name) === 2) return;
|
|
20
|
+
if (state.get(name) === 1) {
|
|
21
|
+
throw new Error(`Cycle detected: ${[...path, name].join(' -> ')}`);
|
|
22
|
+
}
|
|
23
|
+
if (!nodes.has(name)) {
|
|
24
|
+
throw new Error(`'${path.at(-1)}' refs unknown model/seed '${name}'`);
|
|
25
|
+
}
|
|
26
|
+
state.set(name, 1);
|
|
27
|
+
for (const dep of nodes.get(name).deps) visit(dep, [...path, name]);
|
|
28
|
+
state.set(name, 2);
|
|
29
|
+
order.push(name);
|
|
30
|
+
}
|
|
31
|
+
for (const name of nodes.keys()) visit(name, []);
|
|
32
|
+
return order;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// spec: "a,b" | "+name" (name + upstream) | "name+" (name + downstream); null = everything.
|
|
36
|
+
export function expandSelection(spec, nodes, order) {
|
|
37
|
+
if (!spec) return order;
|
|
38
|
+
const reversed = new Map([...nodes.keys()].map((k) => [k, []]));
|
|
39
|
+
for (const [name, node] of nodes) {
|
|
40
|
+
for (const dep of node.deps) reversed.get(dep)?.push(name);
|
|
41
|
+
}
|
|
42
|
+
const selected = new Set();
|
|
43
|
+
for (const token of spec.split(',').map((s) => s.trim()).filter(Boolean)) {
|
|
44
|
+
const upstream = token.startsWith('+');
|
|
45
|
+
const downstream = token.endsWith('+');
|
|
46
|
+
const name = token.replace(/^\+/, '').replace(/\+$/, '');
|
|
47
|
+
if (!nodes.has(name)) throw new Error(`--select: unknown model/seed '${name}'`);
|
|
48
|
+
selected.add(name);
|
|
49
|
+
if (upstream) for (const n of walk(name, (x) => nodes.get(x).deps)) selected.add(n);
|
|
50
|
+
if (downstream) for (const n of walk(name, (x) => reversed.get(x))) selected.add(n);
|
|
51
|
+
}
|
|
52
|
+
return order.filter((n) => selected.has(n));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function walk(start, next) {
|
|
56
|
+
const found = new Set();
|
|
57
|
+
const queue = [start];
|
|
58
|
+
while (queue.length) {
|
|
59
|
+
for (const n of next(queue.shift())) {
|
|
60
|
+
if (!found.has(n)) {
|
|
61
|
+
found.add(n);
|
|
62
|
+
queue.push(n);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return found;
|
|
67
|
+
}
|