dbt-js 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/render.js CHANGED
@@ -1,62 +1,65 @@
1
- // Minimal template renderer. Supported constructs:
2
- // {{ ref('model') }} {{ this }} {{ source('src', 'table') }}
3
- // {{ var('name') }} {{ var('name', default) }}
4
- // {{ batch_start }} {{ batch_end }} (microbatch models only)
5
- // {{ timezone }} (the model's config timezone)
6
- // {% if is_incremental() %} ... {% endif %} (no nesting)
7
- const CONFIG_RE = /\/\*\s*config:\s*[\s\S]*?\*\//;
8
- const IF_INCREMENTAL_RE = /\{%\s*if\s+is_incremental\(\)\s*%\}([\s\S]*?)\{%\s*endif\s*%\}/g;
9
- const REF_RE = /\{\{\s*ref\(\s*['"](\w+)['"]\s*\)\s*\}\}/g;
10
- const THIS_RE = /\{\{\s*this\s*\}\}/g;
11
- const SOURCE_RE = /\{\{\s*source\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)\s*\}\}/g;
12
- const VAR_RE = /\{\{\s*var\(\s*['"](\w+)['"]\s*(?:,\s*('[^']*'|"[^"]*"|[^)\s]+))?\s*\)\s*\}\}/g;
13
- const BATCH_RE = /\{\{\s*(batch_start|batch_end)\s*\}\}/g;
14
- const TIMEZONE_RE = /\{\{\s*timezone\s*\}\}/g;
15
- const LEFTOVER_RE = /\{\{[\s\S]*?\}\}|\{%[\s\S]*?%\}|\{\{|\{%/;
16
-
17
- const quoteIdent = (s) => `"${s.replace(/"/g, '""')}"`;
18
- const stripQuotes = (s) => (/^(['"]).*\1$/s.test(s) ? s.slice(1, -1) : s);
19
-
20
- // Cheap dependency extraction for DAG building — scans ref() calls without
21
- // rendering, so missing vars or incremental branches can't hide a dependency.
22
- export function extractRefs(rawSql) {
23
- return [...rawSql.matchAll(REF_RE)].map((m) => m[1]);
24
- }
25
-
26
- // ctx: { name, schema, vars, isIncremental, sources, batchStart?, batchEnd?, timezone? }
27
- export function render(rawSql, ctx) {
28
- const refs = [];
29
- let sql = rawSql.replace(CONFIG_RE, '');
30
- sql = sql.replace(IF_INCREMENTAL_RE, (_, body) => (ctx.isIncremental ? body : ''));
31
- sql = sql.replace(REF_RE, (_, name) => {
32
- refs.push(name);
33
- return `${quoteIdent(ctx.schema)}.${quoteIdent(name)}`;
34
- });
35
- sql = sql.replace(THIS_RE, () => `${quoteIdent(ctx.schema)}.${quoteIdent(ctx.name)}`);
36
- sql = sql.replace(SOURCE_RE, (_, src, table) => {
37
- const decl = ctx.sources?.[src];
38
- if (!decl?.schema) {
39
- throw new Error(
40
- `'${ctx.name}' uses undeclared source '${src}' — add it under "sources" in dbtjs.config.json`
41
- );
42
- }
43
- return `${quoteIdent(decl.schema)}.${quoteIdent(table)}`;
44
- });
45
- if (ctx.batchStart != null) {
46
- // only microbatch runs supply these; elsewhere the token falls through to the leftover guard
47
- sql = sql.replace(BATCH_RE, (_, which) => (which === 'batch_start' ? ctx.batchStart : ctx.batchEnd));
48
- }
49
- // raw substitution (like batch_start) author quotes it in SQL if needed
50
- sql = sql.replace(TIMEZONE_RE, ctx.timezone ?? 'UTC');
51
- sql = sql.replace(VAR_RE, (_, name, def) => {
52
- const value = ctx.vars?.[name];
53
- if (value !== undefined && value !== null) return String(value);
54
- if (def !== undefined) return stripQuotes(def);
55
- throw new Error(`Missing var '${name}' in '${ctx.name}' (no default given) — pass --vars '{"${name}": ...}'`);
56
- });
57
- const leftover = sql.match(LEFTOVER_RE);
58
- if (leftover) {
59
- throw new Error(`Unrecognized template expression in '${ctx.name}': ${leftover[0].slice(0, 80)}`);
60
- }
61
- return { sql: sql.trim(), refs };
62
- }
1
+ // Minimal template renderer. Supported constructs:
2
+ // {{ ref('model') }} {{ this }} {{ source('src', 'table') }}
3
+ // {{ var('name') }} {{ var('name', default) }}
4
+ // {{ batch_start }} {{ batch_end }} (microbatch models only)
5
+ // {{ timezone }} (the model's config timezone)
6
+ // {% if is_incremental() %} ... {% endif %} (no nesting)
7
+ const CONFIG_RE = /\/\*\s*config:\s*[\s\S]*?\*\//;
8
+ const IF_INCREMENTAL_RE = /\{%\s*if\s+is_incremental\(\)\s*%\}([\s\S]*?)\{%\s*endif\s*%\}/g;
9
+ const REF_RE = /\{\{\s*ref\(\s*['"](\w+)['"]\s*\)\s*\}\}/g;
10
+ const THIS_RE = /\{\{\s*this\s*\}\}/g;
11
+ const SOURCE_RE = /\{\{\s*source\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)\s*\}\}/g;
12
+ const VAR_RE = /\{\{\s*var\(\s*['"](\w+)['"]\s*(?:,\s*('[^']*'|"[^"]*"|[^)\s]+))?\s*\)\s*\}\}/g;
13
+ const BATCH_RE = /\{\{\s*(batch_start|batch_end)\s*\}\}/g;
14
+ const TIMEZONE_RE = /\{\{\s*timezone\s*\}\}/g;
15
+ const LEFTOVER_RE = /\{\{[\s\S]*?\}\}|\{%[\s\S]*?%\}|\{\{|\{%/;
16
+
17
+ const quoteIdent = (s) => `"${s.replace(/"/g, '""')}"`;
18
+ const stripQuotes = (s) => (/^(['"]).*\1$/s.test(s) ? s.slice(1, -1) : s);
19
+
20
+ // Cheap dependency extraction for DAG building — scans ref() calls without
21
+ // rendering, so missing vars or incremental branches can't hide a dependency.
22
+ export function extractRefs(rawSql) {
23
+ return [...rawSql.matchAll(REF_RE)].map((m) => m[1]);
24
+ }
25
+
26
+ // ctx: { name, schema, vars, isIncremental, sources, batchStart?, batchEnd?, timezone? }
27
+ export function render(rawSql, ctx) {
28
+ const refs = [];
29
+ let sql = rawSql.replace(CONFIG_RE, '');
30
+ sql = sql.replace(IF_INCREMENTAL_RE, (_, body) => (ctx.isIncremental ? body : ''));
31
+ sql = sql.replace(REF_RE, (_, name) => {
32
+ refs.push(name);
33
+ return `${quoteIdent(ctx.schema)}.${quoteIdent(name)}`;
34
+ });
35
+ sql = sql.replace(THIS_RE, () => `${quoteIdent(ctx.schema)}.${quoteIdent(ctx.name)}`);
36
+ sql = sql.replace(SOURCE_RE, (_, src, table) => {
37
+ const decl = ctx.sources?.[src];
38
+ if (!decl?.schema) {
39
+ throw new Error(
40
+ `'${ctx.name}' uses undeclared source '${src}' — add it under "sources" in dbtjs.config.json`
41
+ );
42
+ }
43
+ // an attached database (DuckDB ATTACH) adds a catalog qualifier:
44
+ // "database"."schema"."table"; without it the name stays two-part
45
+ const prefix = decl.database ? `${quoteIdent(decl.database)}.` : '';
46
+ return `${prefix}${quoteIdent(decl.schema)}.${quoteIdent(table)}`;
47
+ });
48
+ if (ctx.batchStart != null) {
49
+ // only microbatch runs supply these; elsewhere the token falls through to the leftover guard
50
+ sql = sql.replace(BATCH_RE, (_, which) => (which === 'batch_start' ? ctx.batchStart : ctx.batchEnd));
51
+ }
52
+ // raw substitution (like batch_start) — author quotes it in SQL if needed
53
+ sql = sql.replace(TIMEZONE_RE, ctx.timezone ?? 'UTC');
54
+ sql = sql.replace(VAR_RE, (_, name, def) => {
55
+ const value = ctx.vars?.[name];
56
+ if (value !== undefined && value !== null) return String(value);
57
+ if (def !== undefined) return stripQuotes(def);
58
+ throw new Error(`Missing var '${name}' in '${ctx.name}' (no default given) — pass --vars '{"${name}": ...}'`);
59
+ });
60
+ const leftover = sql.match(LEFTOVER_RE);
61
+ if (leftover) {
62
+ throw new Error(`Unrecognized template expression in '${ctx.name}': ${leftover[0].slice(0, 80)}`);
63
+ }
64
+ return { sql: sql.trim(), refs };
65
+ }
package/src/seed.js CHANGED
@@ -1,68 +1,68 @@
1
- import { readFileSync } from 'node:fs';
2
- import { parse } from 'csv-parse/sync';
3
- import { quoteIdent, rel, withTransaction } from './db.js';
4
-
5
- const BATCH_SIZE = 500;
6
-
7
- export async function loadSeed(client, seed, projectCfg) {
8
- const rows = parse(readFileSync(seed.path, 'utf8'), {
9
- columns: true,
10
- skip_empty_lines: true,
11
- trim: true,
12
- });
13
- if (!rows.length) throw new Error(`Seed '${seed.name}' has no data rows`);
14
-
15
- const columns = Object.keys(rows[0]);
16
- const overrides = projectCfg.seeds?.columnTypes?.[seed.name] ?? {};
17
- const mysql = client.dialect === 'mysql';
18
- const sqlite = client.dialect === 'sqlite';
19
- const types = columns.map((c) => {
20
- const t = overrides[c] ?? inferType(rows.map((r) => r[c]));
21
- // bare NUMERIC is DECIMAL(10,0) on MySQL — would silently round decimals
22
- return mysql && t === 'numeric' ? 'decimal(38,10)' : t;
23
- });
24
- const target = rel(projectCfg.schema, seed.name);
25
- // stay under SQLite's 32766-bind-variable cap (and Postgres's 65535) for wide CSVs
26
- const batchSize = Math.max(1, Math.min(BATCH_SIZE, Math.floor(32000 / columns.length)));
27
-
28
- await withTransaction(client, async () => {
29
- await client.query(`DROP TABLE IF EXISTS ${target}${sqlite ? '' : ' CASCADE'}`);
30
- const defs = columns.map((c, i) => `${quoteIdent(c)} ${types[i]}`).join(', ');
31
- await client.query(`CREATE TABLE ${target} (${defs})`);
32
- for (let i = 0; i < rows.length; i += batchSize) {
33
- const batch = rows.slice(i, i + batchSize);
34
- const params = [];
35
- const tuples = batch.map(
36
- (row) =>
37
- `(${columns
38
- .map((c, j) => {
39
- let v = row[c] === '' ? null : row[c];
40
- // MySQL booleans are TINYINT(1); the string 'true' errors under
41
- // strict mode. SQLite would store the TEXT 'true', which is falsy
42
- // in CASE WHEN (and better-sqlite3 can't bind true/false anyway).
43
- if ((mysql || sqlite) && v !== null && types[j] === 'boolean')
44
- v = /^(true|t)$/i.test(v) ? 1 : 0;
45
- params.push(v);
46
- return `$${params.length}`;
47
- })
48
- .join(', ')})`
49
- );
50
- await client.query(`INSERT INTO ${target} VALUES ${tuples.join(', ')}`, params);
51
- }
52
- });
53
- return { rowCount: rows.length };
54
- }
55
-
56
- // Minimal inference: integer/bigint, numeric, boolean, else text.
57
- // Empty strings load as NULL and are excluded from inference.
58
- // Anything fancier (dates, etc.) → seeds.columnTypes override in dbtjs.config.json.
59
- export function inferType(values) {
60
- const present = values.filter((v) => v !== '');
61
- if (!present.length) return 'text';
62
- if (present.every((v) => /^-?\d+$/.test(v))) {
63
- return present.some((v) => Math.abs(Number(v)) > 2147483647) ? 'bigint' : 'integer';
64
- }
65
- if (present.every((v) => /^-?\d*\.?\d+$/.test(v))) return 'numeric';
66
- if (present.every((v) => /^(true|false|t|f)$/i.test(v))) return 'boolean';
67
- return 'text';
68
- }
1
+ import { readFileSync } from 'node:fs';
2
+ import { parse } from 'csv-parse/sync';
3
+ import { quoteIdent, rel, withTransaction } from './db.js';
4
+
5
+ const BATCH_SIZE = 500;
6
+
7
+ export async function loadSeed(client, seed, projectCfg) {
8
+ const rows = parse(readFileSync(seed.path, 'utf8'), {
9
+ columns: true,
10
+ skip_empty_lines: true,
11
+ trim: true,
12
+ });
13
+ if (!rows.length) throw new Error(`Seed '${seed.name}' has no data rows`);
14
+
15
+ const columns = Object.keys(rows[0]);
16
+ const overrides = projectCfg.seeds?.columnTypes?.[seed.name] ?? {};
17
+ const mysql = client.dialect === 'mysql';
18
+ const sqlite = client.dialect === 'sqlite';
19
+ const types = columns.map((c) => {
20
+ const t = overrides[c] ?? inferType(rows.map((r) => r[c]));
21
+ // bare NUMERIC is DECIMAL(10,0) on MySQL — would silently round decimals
22
+ return mysql && t === 'numeric' ? 'decimal(38,10)' : t;
23
+ });
24
+ const target = rel(projectCfg.schema, seed.name);
25
+ // stay under SQLite's 32766-bind-variable cap (and Postgres's 65535) for wide CSVs
26
+ const batchSize = Math.max(1, Math.min(BATCH_SIZE, Math.floor(32000 / columns.length)));
27
+
28
+ await withTransaction(client, async () => {
29
+ await client.query(`DROP TABLE IF EXISTS ${target}${sqlite ? '' : ' CASCADE'}`);
30
+ const defs = columns.map((c, i) => `${quoteIdent(c)} ${types[i]}`).join(', ');
31
+ await client.query(`CREATE TABLE ${target} (${defs})`);
32
+ for (let i = 0; i < rows.length; i += batchSize) {
33
+ const batch = rows.slice(i, i + batchSize);
34
+ const params = [];
35
+ const tuples = batch.map(
36
+ (row) =>
37
+ `(${columns
38
+ .map((c, j) => {
39
+ let v = row[c] === '' ? null : row[c];
40
+ // MySQL booleans are TINYINT(1); the string 'true' errors under
41
+ // strict mode. SQLite would store the TEXT 'true', which is falsy
42
+ // in CASE WHEN (and better-sqlite3 can't bind true/false anyway).
43
+ if ((mysql || sqlite) && v !== null && types[j] === 'boolean')
44
+ v = /^(true|t)$/i.test(v) ? 1 : 0;
45
+ params.push(v);
46
+ return `$${params.length}`;
47
+ })
48
+ .join(', ')})`
49
+ );
50
+ await client.query(`INSERT INTO ${target} VALUES ${tuples.join(', ')}`, params);
51
+ }
52
+ });
53
+ return { rowCount: rows.length };
54
+ }
55
+
56
+ // Minimal inference: integer/bigint, numeric, boolean, else text.
57
+ // Empty strings load as NULL and are excluded from inference.
58
+ // Anything fancier (dates, etc.) → seeds.columnTypes override in dbtjs.config.json.
59
+ export function inferType(values) {
60
+ const present = values.filter((v) => v !== '');
61
+ if (!present.length) return 'text';
62
+ if (present.every((v) => /^-?\d+$/.test(v))) {
63
+ return present.some((v) => Math.abs(Number(v)) > 2147483647) ? 'bigint' : 'integer';
64
+ }
65
+ if (present.every((v) => /^-?\d*\.?\d+$/.test(v))) return 'numeric';
66
+ if (present.every((v) => /^(true|false|t|f)$/i.test(v))) return 'boolean';
67
+ return 'text';
68
+ }
package/src/tests.js CHANGED
@@ -1,49 +1,49 @@
1
- import { quoteIdent, rel } from './db.js';
2
-
3
- // Each test compiles to a SELECT returning violating rows; any row = FAIL.
4
- // NULLs only violate not_null (dbt semantics).
5
- export function buildTests(models, schema) {
6
- const tests = [];
7
- for (const model of models) {
8
- for (const [column, specs] of Object.entries(model.config.tests ?? {})) {
9
- const target = rel(schema, model.name);
10
- const col = quoteIdent(column);
11
- for (const spec of specs) {
12
- if (spec === 'not_null') {
13
- tests.push({
14
- id: `${model.name}.${column}.not_null`,
15
- model: model.name,
16
- sql: `SELECT * FROM ${target} WHERE ${col} IS NULL`,
17
- params: [],
18
- });
19
- } else if (spec === 'unique') {
20
- tests.push({
21
- id: `${model.name}.${column}.unique`,
22
- model: model.name,
23
- sql: `SELECT ${col}, count(*) AS n FROM ${target} WHERE ${col} IS NOT NULL GROUP BY ${col} HAVING count(*) > 1`,
24
- params: [],
25
- });
26
- } else if (spec?.accepted_values?.length) {
27
- const placeholders = spec.accepted_values.map((_, i) => `$${i + 1}`).join(', ');
28
- tests.push({
29
- id: `${model.name}.${column}.accepted_values`,
30
- model: model.name,
31
- sql: `SELECT ${col}, count(*) AS n FROM ${target} WHERE ${col} IS NOT NULL AND ${col} NOT IN (${placeholders}) GROUP BY ${col}`,
32
- params: spec.accepted_values,
33
- });
34
- } else {
35
- throw new Error(`Unknown test ${JSON.stringify(spec)} on ${model.name}.${column}`);
36
- }
37
- }
38
- }
39
- }
40
- return tests;
41
- }
42
-
43
- export async function runTest(client, test) {
44
- const count = await client.query(`SELECT count(*) AS n FROM (${test.sql}) q`, test.params);
45
- const violations = Number(count.rows[0].n);
46
- if (violations === 0) return { pass: true };
47
- const sample = await client.query(`${test.sql} LIMIT 10`, test.params);
48
- return { pass: false, violations, sample: sample.rows };
49
- }
1
+ import { quoteIdent, rel } from './db.js';
2
+
3
+ // Each test compiles to a SELECT returning violating rows; any row = FAIL.
4
+ // NULLs only violate not_null (dbt semantics).
5
+ export function buildTests(models, schema) {
6
+ const tests = [];
7
+ for (const model of models) {
8
+ for (const [column, specs] of Object.entries(model.config.tests ?? {})) {
9
+ const target = rel(schema, model.name);
10
+ const col = quoteIdent(column);
11
+ for (const spec of specs) {
12
+ if (spec === 'not_null') {
13
+ tests.push({
14
+ id: `${model.name}.${column}.not_null`,
15
+ model: model.name,
16
+ sql: `SELECT * FROM ${target} WHERE ${col} IS NULL`,
17
+ params: [],
18
+ });
19
+ } else if (spec === 'unique') {
20
+ tests.push({
21
+ id: `${model.name}.${column}.unique`,
22
+ model: model.name,
23
+ sql: `SELECT ${col}, count(*) AS n FROM ${target} WHERE ${col} IS NOT NULL GROUP BY ${col} HAVING count(*) > 1`,
24
+ params: [],
25
+ });
26
+ } else if (spec?.accepted_values?.length) {
27
+ const placeholders = spec.accepted_values.map((_, i) => `$${i + 1}`).join(', ');
28
+ tests.push({
29
+ id: `${model.name}.${column}.accepted_values`,
30
+ model: model.name,
31
+ sql: `SELECT ${col}, count(*) AS n FROM ${target} WHERE ${col} IS NOT NULL AND ${col} NOT IN (${placeholders}) GROUP BY ${col}`,
32
+ params: spec.accepted_values,
33
+ });
34
+ } else {
35
+ throw new Error(`Unknown test ${JSON.stringify(spec)} on ${model.name}.${column}`);
36
+ }
37
+ }
38
+ }
39
+ }
40
+ return tests;
41
+ }
42
+
43
+ export async function runTest(client, test) {
44
+ const count = await client.query(`SELECT count(*) AS n FROM (${test.sql}) q`, test.params);
45
+ const violations = Number(count.rows[0].n);
46
+ if (violations === 0) return { pass: true };
47
+ const sample = await client.query(`${test.sql} LIMIT 10`, test.params);
48
+ return { pass: false, violations, sample: sample.rows };
49
+ }