khotan-data 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +196 -0
- package/dist/chunk-42CNRMAQ.js +99 -0
- package/dist/chunk-42CNRMAQ.js.map +1 -0
- package/dist/chunk-6PDC7DFX.cjs +215 -0
- package/dist/chunk-6PDC7DFX.cjs.map +1 -0
- package/dist/chunk-6R4QVX2Q.cjs +80 -0
- package/dist/chunk-6R4QVX2Q.cjs.map +1 -0
- package/dist/chunk-FRRSW3TN.cjs +105 -0
- package/dist/chunk-FRRSW3TN.cjs.map +1 -0
- package/dist/chunk-NVPI7OV3.js +71 -0
- package/dist/chunk-NVPI7OV3.js.map +1 -0
- package/dist/chunk-TK4HD4XA.js +213 -0
- package/dist/chunk-TK4HD4XA.js.map +1 -0
- package/dist/drizzle.cjs +28 -0
- package/dist/drizzle.cjs.map +1 -0
- package/dist/drizzle.d.cts +120 -0
- package/dist/drizzle.d.ts +120 -0
- package/dist/drizzle.js +3 -0
- package/dist/drizzle.js.map +1 -0
- package/dist/index.cjs +107 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +35 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/pipeline.cjs +12 -0
- package/dist/pipeline.cjs.map +1 -0
- package/dist/pipeline.d.cts +43 -0
- package/dist/pipeline.d.ts +43 -0
- package/dist/pipeline.js +3 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/transform.cjs +40 -0
- package/dist/transform.cjs.map +1 -0
- package/dist/transform.d.cts +38 -0
- package/dist/transform.d.ts +38 -0
- package/dist/transform.js +3 -0
- package/dist/transform.js.map +1 -0
- package/dist/types-EpLTQcN2.d.cts +54 -0
- package/dist/types-EpLTQcN2.d.ts +54 -0
- package/package.json +110 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// src/drizzle-extract.ts
|
|
4
|
+
function fromQuery(name, queryFn) {
|
|
5
|
+
return {
|
|
6
|
+
name,
|
|
7
|
+
async *extract() {
|
|
8
|
+
const rows = await queryFn();
|
|
9
|
+
for (const row of rows) {
|
|
10
|
+
yield row;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function fromQueryCursor(name, generatorFn) {
|
|
16
|
+
return {
|
|
17
|
+
name,
|
|
18
|
+
extract: generatorFn
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
function fromQueryPaginated(name, opts) {
|
|
22
|
+
const pageSize = opts.pageSize ?? 1e3;
|
|
23
|
+
return {
|
|
24
|
+
name,
|
|
25
|
+
async *extract() {
|
|
26
|
+
let offset = 0;
|
|
27
|
+
for (; ; ) {
|
|
28
|
+
const rows = await opts.query(pageSize, offset);
|
|
29
|
+
if (rows.length === 0) break;
|
|
30
|
+
for (const row of rows) {
|
|
31
|
+
yield row;
|
|
32
|
+
}
|
|
33
|
+
if (rows.length < pageSize) break;
|
|
34
|
+
offset += pageSize;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// src/drizzle-load.ts
|
|
41
|
+
var PG_MAX_PARAMETERS = 65535;
|
|
42
|
+
function resolveMaxRows(options) {
|
|
43
|
+
if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;
|
|
44
|
+
if (options?.columnsPerRow) {
|
|
45
|
+
return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);
|
|
46
|
+
}
|
|
47
|
+
return 1e3;
|
|
48
|
+
}
|
|
49
|
+
function toDrizzle(name, writeFn, options) {
|
|
50
|
+
const maxRows = resolveMaxRows(options);
|
|
51
|
+
return {
|
|
52
|
+
name,
|
|
53
|
+
async load(records) {
|
|
54
|
+
const errors = [];
|
|
55
|
+
let loaded = 0;
|
|
56
|
+
for (let i = 0; i < records.length; i += maxRows) {
|
|
57
|
+
const chunk = records.slice(i, i + maxRows);
|
|
58
|
+
try {
|
|
59
|
+
await writeFn(chunk);
|
|
60
|
+
loaded += chunk.length;
|
|
61
|
+
} catch (err) {
|
|
62
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
63
|
+
for (const record of chunk) {
|
|
64
|
+
errors.push({ record, error });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return { recordsLoaded: loaded, errors };
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
function toDrizzleTx(name, db, writeFn, options) {
|
|
73
|
+
const maxRows = resolveMaxRows(options);
|
|
74
|
+
return {
|
|
75
|
+
name,
|
|
76
|
+
async load(records) {
|
|
77
|
+
const errors = [];
|
|
78
|
+
let loaded = 0;
|
|
79
|
+
try {
|
|
80
|
+
await db.transaction(async (tx) => {
|
|
81
|
+
for (let i = 0; i < records.length; i += maxRows) {
|
|
82
|
+
const chunk = records.slice(i, i + maxRows);
|
|
83
|
+
await writeFn(tx, chunk);
|
|
84
|
+
loaded += chunk.length;
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
} catch (err) {
|
|
88
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
89
|
+
for (const record of records) {
|
|
90
|
+
errors.push({ record, error });
|
|
91
|
+
}
|
|
92
|
+
loaded = 0;
|
|
93
|
+
}
|
|
94
|
+
return { recordsLoaded: loaded, errors };
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
exports.fromQuery = fromQuery;
|
|
100
|
+
exports.fromQueryCursor = fromQueryCursor;
|
|
101
|
+
exports.fromQueryPaginated = fromQueryPaginated;
|
|
102
|
+
exports.toDrizzle = toDrizzle;
|
|
103
|
+
exports.toDrizzleTx = toDrizzleTx;
|
|
104
|
+
//# sourceMappingURL=chunk-FRRSW3TN.cjs.map
|
|
105
|
+
//# sourceMappingURL=chunk-FRRSW3TN.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/drizzle-extract.ts","../src/drizzle-load.ts"],"names":[],"mappings":";;;AAoBO,SAAS,SAAA,CACd,MACA,OAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,MAAM,IAAA,GAAO,MAAM,OAAA,EAAQ;AAC3B,MAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,QAAA,MAAM,GAAA;AAAA,MACR;AAAA,IACF;AAAA,GACF;AACF;AAqBO,SAAS,eAAA,CACd,MACA,WAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,GACX;AACF;AAcO,SAAS,kBAAA,CACd,MACA,IAAA,EAIc;AACd,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,GAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,WAAS;AACP,QAAA,MAAM,IAAA,GAAO,MAAM,IAAA,CAAK,KAAA,CAAM,UAAU,MAAM,CAAA;AAC9C,QAAA,IAAI,IAAA,CAAK,WAAW,CAAA,EAAG;AACvB,QAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,UAAA,MAAM,GAAA;AAAA,QACR;AACA,QAAA,IAAI,IAAA,CAAK,SAAS,QAAA,EAAU;AAC5B,QAAA,MAAA,IAAU,QAAA;AAAA,MACZ;AAAA,IACF;AAAA,GACF;AACF;;;ACjGA,IAAM,iBAAA,GAAoB,KAAA;AAiB1B,SAAS,eAAe,OAAA,EAAoC;AAC1D,EAAA,IAAI,OAAA,EAAS,mBAAA,EAAqB,OAAO,OAAA,CAAQ,mBAAA;AACjD,EAAA,IAAI,SAAS,aAAA,EAAe;AAC1B,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,iBAAA,GAAoB,OAAA,CAAQ,aAAa,CAAA;AAAA,EAC7D;AACA,EAAA,OAAO,GAAA;AACT;AA+BO,SAAS,SAAA,CACd,IAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,QAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,QAAA,IAAI;AACF,UAAA,MAAM,QAAQ,KAAK,CAAA;AACnB,UAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,QAClB,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,KAAA,MAAW,UAAU,KAAA,EAAO;AAC1B,YAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,UAC/B;AAAA,QACF;AAAA,MACF;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF;AAeO,SAAS,WAAA,CACd,IAAA,EACA,EAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,IAAI;AACF,QAAA,MAAM,EAAA,CAAG,WAAA,CAAY,OAAO,EAAA,KAAO;AACjC,UAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,YAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,YAAA,MAAM,OAAA,CAAQ,IAAI,KAAK,CAAA;AACvB,YAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,UAClB;AAAA,QACF,CAAC,CAAA;AAAA,MACH,SAAS,GAAA,EAAK;AACZ,QAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,QAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,UAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,QAC/B;AACA,QAAA,MAAA,GAAS,CAAA;AAAA,MACX;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF","file":"chunk-FRRSW3TN.cjs","sourcesContent":["import type { DataRecord, Extractor } from \"./types.js\";\n\n/**\n * Create an extractor from any Drizzle select query.\n *\n * Pass a function that returns the query — this makes the extractor\n * re-runnable and avoids consuming a one-shot promise.\n *\n * @example\n * ```ts\n * import { fromQuery } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { users } from \"@/db/schema\";\n * import { eq } from \"drizzle-orm\";\n *\n * const extractor = fromQuery(\"active-users\", () =>\n * db.select().from(users).where(eq(users.active, true))\n * );\n * ```\n */\nexport function fromQuery<T extends DataRecord>(\n name: string,\n queryFn: () => PromiseLike<T[]>,\n): Extractor<T> {\n return {\n name,\n async *extract() {\n const rows = await queryFn();\n for (const row of rows) {\n yield row;\n }\n },\n };\n}\n\n/**\n * Create an extractor from a Drizzle query that streams results in\n * chunks. Use this for large tables where materializing all rows\n * at once is too expensive.\n *\n * @example\n * ```ts\n * const extractor = fromQueryCursor(\"all-events\", async function* () {\n * let offset = 0;\n * const limit = 5000;\n * while (true) {\n * const batch = await db.select().from(events).limit(limit).offset(offset);\n * if (batch.length === 0) break;\n * yield* batch;\n * offset += limit;\n * }\n * });\n * ```\n */\nexport function fromQueryCursor<T extends DataRecord>(\n name: string,\n generatorFn: () => AsyncIterable<T>,\n): Extractor<T> {\n return {\n name,\n extract: generatorFn,\n };\n}\n\n/**\n * Create an extractor from a paginated Drizzle query. Automatically\n * handles offset-based pagination so you don't have to write the loop.\n *\n * @example\n * ```ts\n * const extractor = fromQueryPaginated(\"all-users\", {\n * pageSize: 2000,\n * query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),\n * });\n * ```\n */\nexport function fromQueryPaginated<T extends DataRecord>(\n name: string,\n opts: {\n query: (limit: number, offset: number) => PromiseLike<T[]>;\n pageSize?: number;\n },\n): Extractor<T> {\n const pageSize = opts.pageSize ?? 1000;\n return {\n name,\n async *extract() {\n let offset = 0;\n for (;;) {\n const rows = await opts.query(pageSize, offset);\n if (rows.length === 0) break;\n for (const row of rows) {\n yield row;\n }\n if (rows.length < pageSize) break;\n offset += pageSize;\n }\n },\n };\n}\n","import type { DataRecord, Loader, LoadResult } from \"./types.js\";\n\nconst PG_MAX_PARAMETERS = 65535;\n\ninterface ToDrizzleOptions {\n /**\n * Max records per INSERT statement. When a batch exceeds this,\n * it's automatically split into sub-batches to stay within Postgres\n * parameter limits. Defaults to auto-calculated from columnsPerRow.\n */\n maxRowsPerStatement?: number;\n /**\n * Number of columns per row. Used to auto-calculate maxRowsPerStatement\n * to stay under Postgres' 65535 parameter limit.\n * If not provided, falls back to maxRowsPerStatement or 1000.\n */\n columnsPerRow?: number;\n}\n\nfunction resolveMaxRows(options?: ToDrizzleOptions): number {\n if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;\n if (options?.columnsPerRow) {\n return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);\n }\n return 1000;\n}\n\n/**\n * Create a loader that writes records using a Drizzle insert/upsert.\n *\n * You provide the write function — this keeps the loader decoupled from\n * specific Drizzle driver types while giving you full control over\n * insert/upsert/conflict behavior.\n *\n * Automatically sub-batches to stay within Postgres' 65535 parameter limit.\n *\n * @example\n * ```ts\n * import { toDrizzle } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { processedUsers } from \"@/db/schema\";\n *\n * // Simple insert\n * const loader = toDrizzle(\"insert-users\", (rows) =>\n * db.insert(processedUsers).values(rows)\n * );\n *\n * // Upsert\n * const loader = toDrizzle(\"upsert-users\", (rows) =>\n * db.insert(processedUsers).values(rows).onConflictDoUpdate({\n * target: processedUsers.id,\n * set: { name: sql`excluded.name`, updatedAt: new Date() },\n * })\n * );\n * ```\n */\nexport function toDrizzle<T extends DataRecord>(\n name: string,\n writeFn: (records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n try {\n await writeFn(chunk);\n loaded += chunk.length;\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of chunk) {\n errors.push({ record, error });\n }\n }\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n\n/**\n * Create a loader that writes records inside a Drizzle transaction.\n *\n * All sub-batches for a single load call are wrapped in a single\n * transaction — if any batch fails, the entire load is rolled back.\n *\n * @example\n * ```ts\n * const loader = toDrizzleTx(\"tx-insert\", db, (tx, rows) =>\n * tx.insert(processedUsers).values(rows)\n * );\n * ```\n */\nexport function toDrizzleTx<T extends DataRecord>(\n name: string,\n db: { transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R> },\n writeFn: (tx: never, records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n try {\n await db.transaction(async (tx) => {\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n await writeFn(tx, chunk);\n loaded += chunk.length;\n }\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of records) {\n errors.push({ record, error });\n }\n loaded = 0;\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n"]}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
// src/transformers.ts
|
|
2
|
+
function createTransformer(name, fn) {
|
|
3
|
+
return { name, transform: fn };
|
|
4
|
+
}
|
|
5
|
+
function map(name, fn) {
|
|
6
|
+
return createTransformer(name, fn);
|
|
7
|
+
}
|
|
8
|
+
function filter(name, predicate) {
|
|
9
|
+
return createTransformer(name, async (record) => {
|
|
10
|
+
const keep = await predicate(record);
|
|
11
|
+
return keep ? record : [];
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
function flatMap(name, fn) {
|
|
15
|
+
return createTransformer(name, fn);
|
|
16
|
+
}
|
|
17
|
+
function pick(name, keys) {
|
|
18
|
+
return createTransformer(name, (record) => {
|
|
19
|
+
const result = {};
|
|
20
|
+
for (const key of keys) {
|
|
21
|
+
if (key in record) {
|
|
22
|
+
result[key] = record[key];
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return result;
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
function omit(name, keys) {
|
|
29
|
+
const keySet = new Set(keys);
|
|
30
|
+
return createTransformer(name, (record) => {
|
|
31
|
+
const result = {};
|
|
32
|
+
for (const [key, value] of Object.entries(record)) {
|
|
33
|
+
if (!keySet.has(key)) {
|
|
34
|
+
result[key] = value;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return result;
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
function rename(name, mapping) {
|
|
41
|
+
return createTransformer(name, (record) => {
|
|
42
|
+
const result = {};
|
|
43
|
+
for (const [key, value] of Object.entries(record)) {
|
|
44
|
+
const newKey = mapping[key] ?? key;
|
|
45
|
+
result[newKey] = value;
|
|
46
|
+
}
|
|
47
|
+
return result;
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
function compose(name, transformers) {
|
|
51
|
+
return createTransformer(name, async (record) => {
|
|
52
|
+
let records = [record];
|
|
53
|
+
for (const transformer of transformers) {
|
|
54
|
+
const nextRecords = [];
|
|
55
|
+
for (const r of records) {
|
|
56
|
+
const result = await transformer.transform(r);
|
|
57
|
+
if (Array.isArray(result)) {
|
|
58
|
+
nextRecords.push(...result);
|
|
59
|
+
} else {
|
|
60
|
+
nextRecords.push(result);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
records = nextRecords;
|
|
64
|
+
}
|
|
65
|
+
return records;
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export { compose, createTransformer, filter, flatMap, map, omit, pick, rename };
|
|
70
|
+
//# sourceMappingURL=chunk-NVPI7OV3.js.map
|
|
71
|
+
//# sourceMappingURL=chunk-NVPI7OV3.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/transformers.ts"],"names":[],"mappings":";AAKO,SAAS,iBAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,EAAA,EAAG;AAC/B;AAKO,SAAS,GAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAMO,SAAS,MAAA,CACd,MACA,SAAA,EACmB;AACnB,EAAA,OAAO,iBAAA,CAAkB,IAAA,EAAM,OAAO,MAAA,KAAc;AAClD,IAAA,MAAM,IAAA,GAAO,MAAM,SAAA,CAAU,MAAM,CAAA;AACnC,IAAA,OAAO,IAAA,GAAO,SAAS,EAAC;AAAA,EAC1B,CAAC,CAAA;AACH;AAKO,SAAS,OAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,MAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,MAAA,CAAO,GAAG,CAAA;AAAA,MACvD;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAY,IAAI,CAAA;AACnC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,IAAI,CAAC,MAAA,CAAO,GAAA,CAAI,GAAG,CAAA,EAAG;AACpB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,KAAA;AAAA,MAC7C;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,MAAA,CACd,MACA,OAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,CAAC,MAAA,KAAc;AAC/C,IAAA,MAAM,SAAqB,EAAC;AAC5B,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,MAAM,MAAA,GAAS,OAAA,CAAQ,GAAG,CAAA,IAAK,GAAA;AAC/B,MAAA,MAAA,CAAO,MAAM,CAAA,GAAI,KAAA;AAAA,IACnB;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAMO,SAAS,OAAA,CACd,MACA,YAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,OAAO,MAAA,KAAc;AACrD,IAAA,IAAI,OAAA,GAAwB,CAAC,MAAM,CAAA;AAEnC,IAAA,KAAA,MAAW,eAAe,YAAA,EAAc;AACtC,MAAA,MAAM,cAA4B,EAAC;AACnC,MAAA,KAAA,MAAW,KAAK,OAAA,EAAS;AACvB,QAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,CAAC,CAAA;AAC5C,QAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,EAAG;AACzB,UAAA,WAAA,CAAY,IAAA,CAAK,GAAG,MAAM,CAAA;AAAA,QAC5B,CAAA,MAAO;AACL,UAAA,WAAA,CAAY,KAAK,MAAM,CAAA;AAAA,QACzB;AAAA,MACF;AACA,MAAA,OAAA,GAAU,WAAA;AAAA,IACZ;AAEA,IAAA,OAAO,OAAA;AAAA,EACT,CAAC,CAAA;AACH","file":"chunk-NVPI7OV3.js","sourcesContent":["import type { DataRecord, Transformer } from \"./types.js\";\n\n/**\n * Create a custom transformer from a function.\n */\nexport function createTransformer<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | TOutput[] | Promise<TOutput | TOutput[]>,\n): Transformer<TInput, TOutput> {\n return { name, transform: fn };\n}\n\n/**\n * Transform each record using a mapping function.\n */\nexport function map<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | Promise<TOutput>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Filter records based on a predicate. Records that don't match\n * are dropped (returned as empty array).\n */\nexport function filter<T extends DataRecord = DataRecord>(\n name: string,\n predicate: (record: T) => boolean | Promise<boolean>,\n): Transformer<T, T> {\n return createTransformer(name, async (record: T) => {\n const keep = await predicate(record);\n return keep ? record : [];\n });\n}\n\n/**\n * Transform each record into zero or more records.\n */\nexport function flatMap<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput[] | Promise<TOutput[]>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Pick specific keys from each record.\n */\nexport function pick<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Pick<T, K> & DataRecord> {\n return createTransformer<T, Pick<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Pick<T, K> & DataRecord;\n for (const key of keys) {\n if (key in record) {\n (result as Record<string, unknown>)[key] = record[key];\n }\n }\n return result;\n });\n}\n\n/**\n * Omit specific keys from each record.\n */\nexport function omit<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Omit<T, K> & DataRecord> {\n const keySet = new Set<string>(keys);\n return createTransformer<T, Omit<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Omit<T, K> & DataRecord;\n for (const [key, value] of Object.entries(record)) {\n if (!keySet.has(key)) {\n (result as Record<string, unknown>)[key] = value;\n }\n }\n return result;\n });\n}\n\n/**\n * Rename keys in each record.\n */\nexport function rename<T extends DataRecord>(\n name: string,\n mapping: Record<string, string>,\n): Transformer<T> {\n return createTransformer<T>(name, (record: T) => {\n const result: DataRecord = {};\n for (const [key, value] of Object.entries(record)) {\n const newKey = mapping[key] ?? key;\n result[newKey] = value;\n }\n return result;\n });\n}\n\n/**\n * Compose multiple transformers into a single transformer that\n * applies them in sequence.\n */\nexport function compose<T extends DataRecord>(\n name: string,\n transformers: Transformer[],\n): Transformer<T> {\n return createTransformer<T>(name, async (record: T) => {\n let records: DataRecord[] = [record];\n\n for (const transformer of transformers) {\n const nextRecords: DataRecord[] = [];\n for (const r of records) {\n const result = await transformer.transform(r);\n if (Array.isArray(result)) {\n nextRecords.push(...result);\n } else {\n nextRecords.push(result);\n }\n }\n records = nextRecords;\n }\n\n return records;\n });\n}\n"]}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
// src/pipeline-builder.ts
|
|
2
|
+
var Pipeline = class _Pipeline {
|
|
3
|
+
#name;
|
|
4
|
+
#extractor;
|
|
5
|
+
#transformers;
|
|
6
|
+
#loaders;
|
|
7
|
+
#listeners;
|
|
8
|
+
constructor(name, extractor, transformers, loaders, listeners) {
|
|
9
|
+
this.#name = name;
|
|
10
|
+
this.#extractor = extractor;
|
|
11
|
+
this.#transformers = transformers;
|
|
12
|
+
this.#loaders = loaders;
|
|
13
|
+
this.#listeners = listeners;
|
|
14
|
+
}
|
|
15
|
+
static create(name) {
|
|
16
|
+
return new _Pipeline(name, null, [], [], []);
|
|
17
|
+
}
|
|
18
|
+
get name() {
|
|
19
|
+
return this.#name;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Set the data source for this pipeline.
|
|
23
|
+
*/
|
|
24
|
+
extract(extractor) {
|
|
25
|
+
return new _Pipeline(
|
|
26
|
+
this.#name,
|
|
27
|
+
extractor,
|
|
28
|
+
this.#transformers,
|
|
29
|
+
this.#loaders,
|
|
30
|
+
this.#listeners
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Add a transformation step.
|
|
35
|
+
*/
|
|
36
|
+
transform(transformer) {
|
|
37
|
+
return new _Pipeline(
|
|
38
|
+
this.#name,
|
|
39
|
+
this.#extractor,
|
|
40
|
+
[...this.#transformers, transformer],
|
|
41
|
+
this.#loaders,
|
|
42
|
+
this.#listeners
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Add a load destination.
|
|
47
|
+
*/
|
|
48
|
+
load(loader) {
|
|
49
|
+
return new _Pipeline(
|
|
50
|
+
this.#name,
|
|
51
|
+
this.#extractor,
|
|
52
|
+
this.#transformers,
|
|
53
|
+
[...this.#loaders, loader],
|
|
54
|
+
this.#listeners
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Subscribe to pipeline events.
|
|
59
|
+
*/
|
|
60
|
+
on(listener) {
|
|
61
|
+
return new _Pipeline(
|
|
62
|
+
this.#name,
|
|
63
|
+
this.#extractor,
|
|
64
|
+
this.#transformers,
|
|
65
|
+
this.#loaders,
|
|
66
|
+
[...this.#listeners, listener]
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Execute the pipeline.
|
|
71
|
+
*/
|
|
72
|
+
async run(options = {}) {
|
|
73
|
+
if (!this.#extractor) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
`Pipeline "${this.#name}" has no extractor. Call .extract() before .run().`
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
if (this.#loaders.length === 0) {
|
|
79
|
+
throw new Error(
|
|
80
|
+
`Pipeline "${this.#name}" has no loaders. Call .load() before .run().`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
const { batchSize = 1e3, continueOnError = false, signal } = options;
|
|
84
|
+
const startTime = performance.now();
|
|
85
|
+
const errors = [];
|
|
86
|
+
let recordsProcessed = 0;
|
|
87
|
+
let recordsLoaded = 0;
|
|
88
|
+
this.#emit({
|
|
89
|
+
type: "pipeline:start",
|
|
90
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
91
|
+
data: { name: this.#name }
|
|
92
|
+
});
|
|
93
|
+
let batch = [];
|
|
94
|
+
const flushBatch = async () => {
|
|
95
|
+
if (batch.length === 0) return;
|
|
96
|
+
for (const loader of this.#loaders) {
|
|
97
|
+
try {
|
|
98
|
+
this.#emit({
|
|
99
|
+
type: "step:start",
|
|
100
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
101
|
+
stepName: loader.name
|
|
102
|
+
});
|
|
103
|
+
const result = await loader.load(batch);
|
|
104
|
+
recordsLoaded += result.recordsLoaded;
|
|
105
|
+
for (const err of result.errors) {
|
|
106
|
+
errors.push({
|
|
107
|
+
stepName: loader.name,
|
|
108
|
+
error: err.error,
|
|
109
|
+
record: err.record
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
this.#emit({
|
|
113
|
+
type: "step:end",
|
|
114
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
115
|
+
stepName: loader.name,
|
|
116
|
+
data: result
|
|
117
|
+
});
|
|
118
|
+
} catch (err) {
|
|
119
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
120
|
+
errors.push({ stepName: loader.name, error });
|
|
121
|
+
if (!continueOnError) {
|
|
122
|
+
throw error;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
batch = [];
|
|
127
|
+
};
|
|
128
|
+
try {
|
|
129
|
+
for await (const raw of this.#extractor.extract()) {
|
|
130
|
+
if (signal?.aborted) {
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
this.#emit({
|
|
134
|
+
type: "record:extracted",
|
|
135
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
136
|
+
stepName: this.#extractor.name,
|
|
137
|
+
data: raw
|
|
138
|
+
});
|
|
139
|
+
let records = [raw];
|
|
140
|
+
for (const transformer of this.#transformers) {
|
|
141
|
+
const nextRecords = [];
|
|
142
|
+
for (const record of records) {
|
|
143
|
+
try {
|
|
144
|
+
this.#emit({
|
|
145
|
+
type: "step:start",
|
|
146
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
147
|
+
stepName: transformer.name
|
|
148
|
+
});
|
|
149
|
+
const result = await transformer.transform(record);
|
|
150
|
+
const transformed = Array.isArray(result) ? result : [result];
|
|
151
|
+
nextRecords.push(...transformed);
|
|
152
|
+
this.#emit({
|
|
153
|
+
type: "record:transformed",
|
|
154
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
155
|
+
stepName: transformer.name,
|
|
156
|
+
data: transformed
|
|
157
|
+
});
|
|
158
|
+
} catch (err) {
|
|
159
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
160
|
+
errors.push({
|
|
161
|
+
stepName: transformer.name,
|
|
162
|
+
error,
|
|
163
|
+
record
|
|
164
|
+
});
|
|
165
|
+
this.#emit({
|
|
166
|
+
type: "error",
|
|
167
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
168
|
+
stepName: transformer.name,
|
|
169
|
+
data: error
|
|
170
|
+
});
|
|
171
|
+
if (!continueOnError) {
|
|
172
|
+
throw error;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
records = nextRecords;
|
|
177
|
+
}
|
|
178
|
+
batch.push(...records);
|
|
179
|
+
recordsProcessed += records.length;
|
|
180
|
+
if (batch.length >= batchSize) {
|
|
181
|
+
await flushBatch();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
await flushBatch();
|
|
185
|
+
} catch {
|
|
186
|
+
if (!continueOnError) {
|
|
187
|
+
const duration2 = performance.now() - startTime;
|
|
188
|
+
this.#emit({
|
|
189
|
+
type: "pipeline:end",
|
|
190
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
191
|
+
data: { recordsProcessed, recordsLoaded, errors, duration: duration2 }
|
|
192
|
+
});
|
|
193
|
+
return { recordsProcessed, recordsLoaded, errors, duration: duration2 };
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const duration = performance.now() - startTime;
|
|
197
|
+
this.#emit({
|
|
198
|
+
type: "pipeline:end",
|
|
199
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
200
|
+
data: { recordsProcessed, recordsLoaded, errors, duration }
|
|
201
|
+
});
|
|
202
|
+
return { recordsProcessed, recordsLoaded, errors, duration };
|
|
203
|
+
}
|
|
204
|
+
#emit(event) {
|
|
205
|
+
for (const listener of this.#listeners) {
|
|
206
|
+
listener(event);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
export { Pipeline };
|
|
212
|
+
//# sourceMappingURL=chunk-TK4HD4XA.js.map
|
|
213
|
+
//# sourceMappingURL=chunk-TK4HD4XA.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/pipeline-builder.ts"],"names":["duration"],"mappings":";AAuBO,IAAM,QAAA,GAAN,MAAM,SAAA,CAAmD;AAAA,EACrD,KAAA;AAAA,EACA,UAAA;AAAA,EACA,aAAA;AAAA,EACA,QAAA;AAAA,EACA,UAAA;AAAA,EAED,WAAA,CACN,IAAA,EACA,SAAA,EACA,YAAA,EACA,SACA,SAAA,EACA;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAA;AACb,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,aAAA,GAAgB,YAAA;AACrB,IAAA,IAAA,CAAK,QAAA,GAAW,OAAA;AAChB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAAA,EACpB;AAAA,EAEA,OAAO,OAAO,IAAA,EAAwB;AACpC,IAAA,OAAO,IAAI,UAAS,IAAA,EAAM,IAAA,EAAM,EAAC,EAAG,EAAC,EAAG,EAAE,CAAA;AAAA,EAC5C;AAAA,EAEA,IAAI,IAAA,GAAe;AACjB,IAAA,OAAO,IAAA,CAAK,KAAA;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,QAA8B,SAAA,EAAsC;AAClE,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,SAAA;AAAA,MACA,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,UACE,WAAA,EACmB;AACnB,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,aAAA,EAAe,WAAW,CAAA;AAAA,MACnC,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,KAAK,MAAA,EAA8C;AACjD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,QAAA,EAAU,MAAM,CAAA;AAAA,MACzB,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,GAAG,QAAA,EAAqD;AACtD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,UAAA,EAAY,QAAQ;AAAA,KAC/B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,GAAA,CAAI,OAAA,GAA2B,EAAC,EAA4B;AAChE,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,kDAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,6CAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,SAAA,GAAY,GAAA,EAAM,eAAA,GAAkB,KAAA,EAAO,QAAO,GAAI,OAAA;AAE9D,IAAA,MAAM,SAAA,GAAY,YAAY,GAAA,EAAI;AAClC,IAAA,MAAM,SAA8B,EAAC;AACrC,IAAA,IAAI,gBAAA,GAAmB,CAAA;AACvB,IAAA,IAAI,aAAA,GAAgB,CAAA;AAEpB,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,gBAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,IAAA,EAAM,IAAA,CAAK,KAAA;AAAM,KAC1B,CAAA;AAED,IAAA,IAAI,QAAsB,EAAC;AAE3B,IAAA,MAAM,aAAa,YAA2B;AAC5C,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AAExB,MAAA,KAAA,MAAW,MAAA,IAAU,KAAK,QAAA,EAAU;AAClC,QAAA,IAAI;AACF,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,YAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO;AAAA,WAClB,CAAA;AAED,UAAA,MAAM,MAAA,GAAS,MAAM,MAAA,CAAO,IAAA,CAAK,KAAK,CAAA;AACtC,UAAA,aAAA,IAAiB,MAAA,CAAO,aAAA;AAExB,UAAA,KAAA,MAAW,GAAA,IAAO,OAAO,MAAA,EAAQ;AAC/B,YAAA,MAAA,CAAO,IAAA,CAAK;AAAA,cACV,UAAU,MAAA,CAAO,IAAA;AAAA,cACjB,OAAO,GAAA,CAAI,KAAA;AAAA,cACX,QAAQ,GAAA,CAAI;AAAA,aACb,CAAA;AAAA,UACH;AAEA,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,UAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO,IAAA;AAAA,YACjB,IAAA,EAAM;AAAA,WACP,CAAA;AAAA,QACH,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,MAAA,CAAO,KAAK,EAAE,QAAA,EAAU,MAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AAE5C,UAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,YAAA,MAAM,KAAA;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,EAAC;AAAA,IACX,CAAA;AAEA,IAAA,IAAI;AACF,MAAA,WAAA,MAAiB,GAAA,IAAO,IAAA,CAAK,UAAA,CAAW,OAAA,EAAQ,EAAG;AACjD,QAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,UAAA;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,kBAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,QAAA,EAAU,KAAK,UAAA,CAAW,IAAA;AAAA,UAC1B,IAAA,EAAM;AAAA,SACP,CAAA;AAED,QAAA,IAAI,OAAA,GAAwB,CAAC,GAAG,CAAA;AAEhC,QAAA,KAAA,MAAW,WAAA,IAAe,KAAK,aAAA,EAAe;AAC5C,UAAA,MAAM,cAA4B,EAAC;AAEnC,UAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,YAAA,IAAI;AACF,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,YAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY;AAAA,eACvB,CAAA;AAED,cAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,MAAM,CAAA;AACjD,cAAA,MAAM,cAAc,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,GAAI,MAAA,GAAS,CAAC,MAAM,CAAA;AAC5D,cAAA,WAAA,CAAY,IAAA,CAAK,GAAG,WAAW,CAAA;AAE/B,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,oBAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAAA,YACH,SAAS,GAAA,EAAK;AACZ,cAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,cAAA,MAAA,CAAO,IAAA,CAAK;AAAA,gBACV,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,KAAA;AAAA,gBACA;AAAA,eACD,CAAA;AAED,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,OAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAED,cAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,gBAAA,MAAM,KAAA;AAAA,cACR;AAAA,YACF;AAAA,UACF;AAEA,UAAA,OAAA,GAAU,WAAA;AAAA,QACZ;AAEA,QAAA,KAAA,CAAM,IAAA,CAAK,GAAG,OAAO,CAAA;AACrB,QAAA,gBAAA,IAAoB,OAAA,CAAQ,MAAA;AAE5B,QAAA,IAAI,KAAA,CAAM,UAAU,SAAA,EAAW;AAC7B,UAAA,MAAM,UAAA,EAAW;AAAA,QACnB;AAAA,MACF;AAEA,MAAA,MAAM,UAAA,EAAW;AAAA,IACnB,CAAA,CAAA,MAAQ;AACN,MAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,QAAA,MAAMA,SAAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AACrC,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,cAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,MAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA;AAAS,SAC3D,CAAA;AACD,QAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA,EAAS;AAAA,MAC7D;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AAErC,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,cAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,QAAQ,QAAA;AAAS,KAC3D,CAAA;AAED,IAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,QAAA,EAAS;AAAA,EAC7D;AAAA,EAEA,MAAM,KAAA,EAKG;AACP,IAAA,KAAA,MAAW,QAAA,IAAY,KAAK,UAAA,EAAY;AACtC,MAAA,QAAA,CAAS,KAA6C,CAAA;AAAA,IACxD;AAAA,EACF;AACF","file":"chunk-TK4HD4XA.js","sourcesContent":["import type {\n DataRecord,\n Extractor,\n Loader,\n PipelineEventListener,\n PipelineOptions,\n PipelineResult,\n PipelineStepError,\n Transformer,\n} from \"./types.js\";\n\n/**\n * A composable, type-safe ETL pipeline builder.\n *\n * @example\n * ```ts\n * const result = await Pipeline.create(\"my-pipeline\")\n * .extract(myExtractor)\n * .transform(myTransformer)\n * .load(myLoader)\n * .run();\n * ```\n */\nexport class Pipeline<TCurrent extends DataRecord = DataRecord> {\n readonly #name: string;\n readonly #extractor: Extractor | null;\n readonly #transformers: Transformer[];\n readonly #loaders: Loader[];\n readonly #listeners: PipelineEventListener[];\n\n private constructor(\n name: string,\n extractor: Extractor | null,\n transformers: Transformer[],\n loaders: Loader[],\n listeners: PipelineEventListener[],\n ) {\n this.#name = name;\n this.#extractor = extractor;\n this.#transformers = transformers;\n this.#loaders = loaders;\n this.#listeners = listeners;\n }\n\n static create(name: string): Pipeline {\n return new Pipeline(name, null, [], [], []);\n }\n\n get name(): string {\n return this.#name;\n }\n\n /**\n * Set the data source for this pipeline.\n */\n extract<T extends DataRecord>(extractor: Extractor<T>): Pipeline<T> {\n return new Pipeline(\n this.#name,\n extractor,\n this.#transformers,\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a transformation step.\n */\n transform<TOutput extends DataRecord>(\n transformer: Transformer<TCurrent, TOutput>,\n ): Pipeline<TOutput> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n [...this.#transformers, transformer],\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a load destination.\n */\n load(loader: Loader<TCurrent>): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n [...this.#loaders, loader],\n this.#listeners,\n );\n }\n\n /**\n * Subscribe to pipeline events.\n */\n on(listener: PipelineEventListener): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n this.#loaders,\n [...this.#listeners, listener],\n );\n }\n\n /**\n * Execute the pipeline.\n */\n async run(options: PipelineOptions = {}): Promise<PipelineResult> {\n if (!this.#extractor) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no extractor. Call .extract() before .run().`,\n );\n }\n\n if (this.#loaders.length === 0) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no loaders. Call .load() before .run().`,\n );\n }\n\n const { batchSize = 1000, continueOnError = false, signal } = options;\n\n const startTime = performance.now();\n const errors: PipelineStepError[] = [];\n let recordsProcessed = 0;\n let recordsLoaded = 0;\n\n this.#emit({\n type: \"pipeline:start\",\n timestamp: new Date(),\n data: { name: this.#name },\n });\n\n let batch: DataRecord[] = [];\n\n const flushBatch = async (): Promise<void> => {\n if (batch.length === 0) return;\n\n for (const loader of this.#loaders) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: loader.name,\n });\n\n const result = await loader.load(batch);\n recordsLoaded += result.recordsLoaded;\n\n for (const err of result.errors) {\n errors.push({\n stepName: loader.name,\n error: err.error,\n record: err.record,\n });\n }\n\n this.#emit({\n type: \"step:end\",\n timestamp: new Date(),\n stepName: loader.name,\n data: result,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({ stepName: loader.name, error });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n batch = [];\n };\n\n try {\n for await (const raw of this.#extractor.extract()) {\n if (signal?.aborted) {\n break;\n }\n\n this.#emit({\n type: \"record:extracted\",\n timestamp: new Date(),\n stepName: this.#extractor.name,\n data: raw,\n });\n\n let records: DataRecord[] = [raw];\n\n for (const transformer of this.#transformers) {\n const nextRecords: DataRecord[] = [];\n\n for (const record of records) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: transformer.name,\n });\n\n const result = await transformer.transform(record);\n const transformed = Array.isArray(result) ? result : [result];\n nextRecords.push(...transformed);\n\n this.#emit({\n type: \"record:transformed\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: transformed,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({\n stepName: transformer.name,\n error,\n record,\n });\n\n this.#emit({\n type: \"error\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: error,\n });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n records = nextRecords;\n }\n\n batch.push(...records);\n recordsProcessed += records.length;\n\n if (batch.length >= batchSize) {\n await flushBatch();\n }\n }\n\n await flushBatch();\n } catch {\n if (!continueOnError) {\n const duration = performance.now() - startTime;\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n }\n\n const duration = performance.now() - startTime;\n\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n\n #emit(event: {\n type: string;\n timestamp: Date;\n stepName?: string;\n data?: unknown;\n }): void {\n for (const listener of this.#listeners) {\n listener(event as Parameters<PipelineEventListener>[0]);\n }\n }\n}\n"]}
|
package/dist/drizzle.cjs
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var chunkFRRSW3TN_cjs = require('./chunk-FRRSW3TN.cjs');
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Object.defineProperty(exports, "fromQuery", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
get: function () { return chunkFRRSW3TN_cjs.fromQuery; }
|
|
10
|
+
});
|
|
11
|
+
Object.defineProperty(exports, "fromQueryCursor", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
get: function () { return chunkFRRSW3TN_cjs.fromQueryCursor; }
|
|
14
|
+
});
|
|
15
|
+
Object.defineProperty(exports, "fromQueryPaginated", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
get: function () { return chunkFRRSW3TN_cjs.fromQueryPaginated; }
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports, "toDrizzle", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
get: function () { return chunkFRRSW3TN_cjs.toDrizzle; }
|
|
22
|
+
});
|
|
23
|
+
Object.defineProperty(exports, "toDrizzleTx", {
|
|
24
|
+
enumerable: true,
|
|
25
|
+
get: function () { return chunkFRRSW3TN_cjs.toDrizzleTx; }
|
|
26
|
+
});
|
|
27
|
+
//# sourceMappingURL=drizzle.cjs.map
|
|
28
|
+
//# sourceMappingURL=drizzle.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"drizzle.cjs"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { D as DataRecord, E as Extractor, b as Loader } from './types-EpLTQcN2.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Create an extractor from any Drizzle select query.
|
|
5
|
+
*
|
|
6
|
+
* Pass a function that returns the query — this makes the extractor
|
|
7
|
+
* re-runnable and avoids consuming a one-shot promise.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```ts
|
|
11
|
+
* import { fromQuery } from "khotan-data/drizzle";
|
|
12
|
+
* import { db } from "@/db";
|
|
13
|
+
* import { users } from "@/db/schema";
|
|
14
|
+
* import { eq } from "drizzle-orm";
|
|
15
|
+
*
|
|
16
|
+
* const extractor = fromQuery("active-users", () =>
|
|
17
|
+
* db.select().from(users).where(eq(users.active, true))
|
|
18
|
+
* );
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
declare function fromQuery<T extends DataRecord>(name: string, queryFn: () => PromiseLike<T[]>): Extractor<T>;
|
|
22
|
+
/**
|
|
23
|
+
* Create an extractor from a Drizzle query that streams results in
|
|
24
|
+
* chunks. Use this for large tables where materializing all rows
|
|
25
|
+
* at once is too expensive.
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```ts
|
|
29
|
+
* const extractor = fromQueryCursor("all-events", async function* () {
|
|
30
|
+
* let offset = 0;
|
|
31
|
+
* const limit = 5000;
|
|
32
|
+
* while (true) {
|
|
33
|
+
* const batch = await db.select().from(events).limit(limit).offset(offset);
|
|
34
|
+
* if (batch.length === 0) break;
|
|
35
|
+
* yield* batch;
|
|
36
|
+
* offset += limit;
|
|
37
|
+
* }
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
declare function fromQueryCursor<T extends DataRecord>(name: string, generatorFn: () => AsyncIterable<T>): Extractor<T>;
|
|
42
|
+
/**
|
|
43
|
+
* Create an extractor from a paginated Drizzle query. Automatically
|
|
44
|
+
* handles offset-based pagination so you don't have to write the loop.
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```ts
|
|
48
|
+
* const extractor = fromQueryPaginated("all-users", {
|
|
49
|
+
* pageSize: 2000,
|
|
50
|
+
* query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),
|
|
51
|
+
* });
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
declare function fromQueryPaginated<T extends DataRecord>(name: string, opts: {
|
|
55
|
+
query: (limit: number, offset: number) => PromiseLike<T[]>;
|
|
56
|
+
pageSize?: number;
|
|
57
|
+
}): Extractor<T>;
|
|
58
|
+
|
|
59
|
+
interface ToDrizzleOptions {
|
|
60
|
+
/**
|
|
61
|
+
* Max records per INSERT statement. When a batch exceeds this,
|
|
62
|
+
* it's automatically split into sub-batches to stay within Postgres
|
|
63
|
+
* parameter limits. Defaults to auto-calculated from columnsPerRow.
|
|
64
|
+
*/
|
|
65
|
+
maxRowsPerStatement?: number;
|
|
66
|
+
/**
|
|
67
|
+
* Number of columns per row. Used to auto-calculate maxRowsPerStatement
|
|
68
|
+
* to stay under Postgres' 65535 parameter limit.
|
|
69
|
+
* If not provided, falls back to maxRowsPerStatement or 1000.
|
|
70
|
+
*/
|
|
71
|
+
columnsPerRow?: number;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Create a loader that writes records using a Drizzle insert/upsert.
|
|
75
|
+
*
|
|
76
|
+
* You provide the write function — this keeps the loader decoupled from
|
|
77
|
+
* specific Drizzle driver types while giving you full control over
|
|
78
|
+
* insert/upsert/conflict behavior.
|
|
79
|
+
*
|
|
80
|
+
* Automatically sub-batches to stay within Postgres' 65535 parameter limit.
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```ts
|
|
84
|
+
* import { toDrizzle } from "khotan-data/drizzle";
|
|
85
|
+
* import { db } from "@/db";
|
|
86
|
+
* import { processedUsers } from "@/db/schema";
|
|
87
|
+
*
|
|
88
|
+
* // Simple insert
|
|
89
|
+
* const loader = toDrizzle("insert-users", (rows) =>
|
|
90
|
+
* db.insert(processedUsers).values(rows)
|
|
91
|
+
* );
|
|
92
|
+
*
|
|
93
|
+
* // Upsert
|
|
94
|
+
* const loader = toDrizzle("upsert-users", (rows) =>
|
|
95
|
+
* db.insert(processedUsers).values(rows).onConflictDoUpdate({
|
|
96
|
+
* target: processedUsers.id,
|
|
97
|
+
* set: { name: sql`excluded.name`, updatedAt: new Date() },
|
|
98
|
+
* })
|
|
99
|
+
* );
|
|
100
|
+
* ```
|
|
101
|
+
*/
|
|
102
|
+
declare function toDrizzle<T extends DataRecord>(name: string, writeFn: (records: T[]) => PromiseLike<unknown>, options?: ToDrizzleOptions): Loader<T>;
|
|
103
|
+
/**
|
|
104
|
+
* Create a loader that writes records inside a Drizzle transaction.
|
|
105
|
+
*
|
|
106
|
+
* All sub-batches for a single load call are wrapped in a single
|
|
107
|
+
* transaction — if any batch fails, the entire load is rolled back.
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* ```ts
|
|
111
|
+
* const loader = toDrizzleTx("tx-insert", db, (tx, rows) =>
|
|
112
|
+
* tx.insert(processedUsers).values(rows)
|
|
113
|
+
* );
|
|
114
|
+
* ```
|
|
115
|
+
*/
|
|
116
|
+
declare function toDrizzleTx<T extends DataRecord>(name: string, db: {
|
|
117
|
+
transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R>;
|
|
118
|
+
}, writeFn: (tx: never, records: T[]) => PromiseLike<unknown>, options?: ToDrizzleOptions): Loader<T>;
|
|
119
|
+
|
|
120
|
+
export { fromQuery, fromQueryCursor, fromQueryPaginated, toDrizzle, toDrizzleTx };
|