khotan-data 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ 'use strict';
2
+
3
+ // src/drizzle-extract.ts
4
+ function fromQuery(name, queryFn) {
5
+ return {
6
+ name,
7
+ async *extract() {
8
+ const rows = await queryFn();
9
+ for (const row of rows) {
10
+ yield row;
11
+ }
12
+ }
13
+ };
14
+ }
15
+ function fromQueryCursor(name, generatorFn) {
16
+ return {
17
+ name,
18
+ extract: generatorFn
19
+ };
20
+ }
21
+ function fromQueryPaginated(name, opts) {
22
+ const pageSize = opts.pageSize ?? 1e3;
23
+ return {
24
+ name,
25
+ async *extract() {
26
+ let offset = 0;
27
+ for (; ; ) {
28
+ const rows = await opts.query(pageSize, offset);
29
+ if (rows.length === 0) break;
30
+ for (const row of rows) {
31
+ yield row;
32
+ }
33
+ if (rows.length < pageSize) break;
34
+ offset += pageSize;
35
+ }
36
+ }
37
+ };
38
+ }
39
+
40
+ // src/drizzle-load.ts
41
+ var PG_MAX_PARAMETERS = 65535;
42
+ function resolveMaxRows(options) {
43
+ if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;
44
+ if (options?.columnsPerRow) {
45
+ return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);
46
+ }
47
+ return 1e3;
48
+ }
49
+ function toDrizzle(name, writeFn, options) {
50
+ const maxRows = resolveMaxRows(options);
51
+ return {
52
+ name,
53
+ async load(records) {
54
+ const errors = [];
55
+ let loaded = 0;
56
+ for (let i = 0; i < records.length; i += maxRows) {
57
+ const chunk = records.slice(i, i + maxRows);
58
+ try {
59
+ await writeFn(chunk);
60
+ loaded += chunk.length;
61
+ } catch (err) {
62
+ const error = err instanceof Error ? err : new Error(String(err));
63
+ for (const record of chunk) {
64
+ errors.push({ record, error });
65
+ }
66
+ }
67
+ }
68
+ return { recordsLoaded: loaded, errors };
69
+ }
70
+ };
71
+ }
72
+ function toDrizzleTx(name, db, writeFn, options) {
73
+ const maxRows = resolveMaxRows(options);
74
+ return {
75
+ name,
76
+ async load(records) {
77
+ const errors = [];
78
+ let loaded = 0;
79
+ try {
80
+ await db.transaction(async (tx) => {
81
+ for (let i = 0; i < records.length; i += maxRows) {
82
+ const chunk = records.slice(i, i + maxRows);
83
+ await writeFn(tx, chunk);
84
+ loaded += chunk.length;
85
+ }
86
+ });
87
+ } catch (err) {
88
+ const error = err instanceof Error ? err : new Error(String(err));
89
+ for (const record of records) {
90
+ errors.push({ record, error });
91
+ }
92
+ loaded = 0;
93
+ }
94
+ return { recordsLoaded: loaded, errors };
95
+ }
96
+ };
97
+ }
98
+
99
+ exports.fromQuery = fromQuery;
100
+ exports.fromQueryCursor = fromQueryCursor;
101
+ exports.fromQueryPaginated = fromQueryPaginated;
102
+ exports.toDrizzle = toDrizzle;
103
+ exports.toDrizzleTx = toDrizzleTx;
104
+ //# sourceMappingURL=chunk-FRRSW3TN.cjs.map
105
+ //# sourceMappingURL=chunk-FRRSW3TN.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/drizzle-extract.ts","../src/drizzle-load.ts"],"names":[],"mappings":";;;AAoBO,SAAS,SAAA,CACd,MACA,OAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,MAAM,IAAA,GAAO,MAAM,OAAA,EAAQ;AAC3B,MAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,QAAA,MAAM,GAAA;AAAA,MACR;AAAA,IACF;AAAA,GACF;AACF;AAqBO,SAAS,eAAA,CACd,MACA,WAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,GACX;AACF;AAcO,SAAS,kBAAA,CACd,MACA,IAAA,EAIc;AACd,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,GAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,WAAS;AACP,QAAA,MAAM,IAAA,GAAO,MAAM,IAAA,CAAK,KAAA,CAAM,UAAU,MAAM,CAAA;AAC9C,QAAA,IAAI,IAAA,CAAK,WAAW,CAAA,EAAG;AACvB,QAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,UAAA,MAAM,GAAA;AAAA,QACR;AACA,QAAA,IAAI,IAAA,CAAK,SAAS,QAAA,EAAU;AAC5B,QAAA,MAAA,IAAU,QAAA;AAAA,MACZ;AAAA,IACF;AAAA,GACF;AACF;;;ACjGA,IAAM,iBAAA,GAAoB,KAAA;AAiB1B,SAAS,eAAe,OAAA,EAAoC;AAC1D,EAAA,IAAI,OAAA,EAAS,mBAAA,EAAqB,OAAO,OAAA,CAAQ,mBAAA;AACjD,EAAA,IAAI,SAAS,aAAA,EAAe;AAC1B,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,iBAAA,GAAoB,OAAA,CAAQ,aAAa,CAAA;AAAA,EAC7D;AACA,EAAA,OAAO,GAAA;AACT;AA+BO,SAAS,SAAA,CACd,IAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,QAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,QAAA,IAAI;AACF,UAAA,MAAM,QAAQ,KAAK,CAAA;AACnB,UAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,QAClB,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,KAAA,MAAW,UAAU,KAAA,EAAO;AAC1B,YAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,UAC/B;AAAA,QACF;AAAA,MACF;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF;AAeO,SAAS,WAAA,CACd,IAAA,EACA,EAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,IAAI;AACF,QAAA,MAAM,EAAA,CAAG,WAAA,CAAY,OAAO,EAAA,KAAO;AACjC,UAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,YAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,YAAA,MAAM,OAAA,CAAQ,IAAI,KAAK,CAAA;AACvB,YAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,UAClB;AAAA,QACF,CAAC,CAAA;AAAA,MACH,SAAS,GAAA,EAAK;AACZ,QAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,QAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,UAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,QAC/B;AACA,QAAA,MAAA,GAAS,CAAA;AAAA,MACX;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF","file":"chunk-FRRSW3TN.cjs","sourcesContent":["import type { DataRecord, Extractor } from \"./types.js\";\n\n/**\n * Create an extractor from any Drizzle select query.\n *\n * Pass a function that returns the query — this makes the extractor\n * re-runnable and avoids consuming a one-shot promise.\n *\n * @example\n * ```ts\n * import { fromQuery } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { users } from \"@/db/schema\";\n * import { eq } from \"drizzle-orm\";\n *\n * const extractor = fromQuery(\"active-users\", () =>\n * db.select().from(users).where(eq(users.active, true))\n * );\n * ```\n */\nexport function fromQuery<T extends DataRecord>(\n name: string,\n queryFn: () => PromiseLike<T[]>,\n): Extractor<T> {\n return {\n name,\n async *extract() {\n const rows = await queryFn();\n for (const row of rows) {\n yield row;\n }\n },\n };\n}\n\n/**\n * Create an extractor from a Drizzle query that streams results in\n * chunks. Use this for large tables where materializing all rows\n * at once is too expensive.\n *\n * @example\n * ```ts\n * const extractor = fromQueryCursor(\"all-events\", async function* () {\n * let offset = 0;\n * const limit = 5000;\n * while (true) {\n * const batch = await db.select().from(events).limit(limit).offset(offset);\n * if (batch.length === 0) break;\n * yield* batch;\n * offset += limit;\n * }\n * });\n * ```\n */\nexport function fromQueryCursor<T extends DataRecord>(\n name: string,\n generatorFn: () => AsyncIterable<T>,\n): Extractor<T> {\n return {\n name,\n extract: generatorFn,\n };\n}\n\n/**\n * Create an extractor from a paginated Drizzle query. Automatically\n * handles offset-based pagination so you don't have to write the loop.\n *\n * @example\n * ```ts\n * const extractor = fromQueryPaginated(\"all-users\", {\n * pageSize: 2000,\n * query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),\n * });\n * ```\n */\nexport function fromQueryPaginated<T extends DataRecord>(\n name: string,\n opts: {\n query: (limit: number, offset: number) => PromiseLike<T[]>;\n pageSize?: number;\n },\n): Extractor<T> {\n const pageSize = opts.pageSize ?? 1000;\n return {\n name,\n async *extract() {\n let offset = 0;\n for (;;) {\n const rows = await opts.query(pageSize, offset);\n if (rows.length === 0) break;\n for (const row of rows) {\n yield row;\n }\n if (rows.length < pageSize) break;\n offset += pageSize;\n }\n },\n };\n}\n","import type { DataRecord, Loader, LoadResult } from \"./types.js\";\n\nconst PG_MAX_PARAMETERS = 65535;\n\ninterface ToDrizzleOptions {\n /**\n * Max records per INSERT statement. When a batch exceeds this,\n * it's automatically split into sub-batches to stay within Postgres\n * parameter limits. Defaults to auto-calculated from columnsPerRow.\n */\n maxRowsPerStatement?: number;\n /**\n * Number of columns per row. Used to auto-calculate maxRowsPerStatement\n * to stay under Postgres' 65535 parameter limit.\n * If not provided, falls back to maxRowsPerStatement or 1000.\n */\n columnsPerRow?: number;\n}\n\nfunction resolveMaxRows(options?: ToDrizzleOptions): number {\n if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;\n if (options?.columnsPerRow) {\n return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);\n }\n return 1000;\n}\n\n/**\n * Create a loader that writes records using a Drizzle insert/upsert.\n *\n * You provide the write function — this keeps the loader decoupled from\n * specific Drizzle driver types while giving you full control over\n * insert/upsert/conflict behavior.\n *\n * Automatically sub-batches to stay within Postgres' 65535 parameter limit.\n *\n * @example\n * ```ts\n * import { toDrizzle } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { processedUsers } from \"@/db/schema\";\n *\n * // Simple insert\n * const loader = toDrizzle(\"insert-users\", (rows) =>\n * db.insert(processedUsers).values(rows)\n * );\n *\n * // Upsert\n * const loader = toDrizzle(\"upsert-users\", (rows) =>\n * db.insert(processedUsers).values(rows).onConflictDoUpdate({\n * target: processedUsers.id,\n * set: { name: sql`excluded.name`, updatedAt: new Date() },\n * })\n * );\n * ```\n */\nexport function toDrizzle<T extends DataRecord>(\n name: string,\n writeFn: (records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n try {\n await writeFn(chunk);\n loaded += chunk.length;\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of chunk) {\n errors.push({ record, error });\n }\n }\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n\n/**\n * Create a loader that writes records inside a Drizzle transaction.\n *\n * All sub-batches for a single load call are wrapped in a single\n * transaction — if any batch fails, the entire load is rolled back.\n *\n * @example\n * ```ts\n * const loader = toDrizzleTx(\"tx-insert\", db, (tx, rows) =>\n * tx.insert(processedUsers).values(rows)\n * );\n * ```\n */\nexport function toDrizzleTx<T extends DataRecord>(\n name: string,\n db: { transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R> },\n writeFn: (tx: never, records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n try {\n await db.transaction(async (tx) => {\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n await writeFn(tx, chunk);\n loaded += chunk.length;\n }\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of records) {\n errors.push({ record, error });\n }\n loaded = 0;\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n"]}
@@ -0,0 +1,71 @@
1
+ // src/transformers.ts
2
+ function createTransformer(name, fn) {
3
+ return { name, transform: fn };
4
+ }
5
+ function map(name, fn) {
6
+ return createTransformer(name, fn);
7
+ }
8
+ function filter(name, predicate) {
9
+ return createTransformer(name, async (record) => {
10
+ const keep = await predicate(record);
11
+ return keep ? record : [];
12
+ });
13
+ }
14
+ function flatMap(name, fn) {
15
+ return createTransformer(name, fn);
16
+ }
17
+ function pick(name, keys) {
18
+ return createTransformer(name, (record) => {
19
+ const result = {};
20
+ for (const key of keys) {
21
+ if (key in record) {
22
+ result[key] = record[key];
23
+ }
24
+ }
25
+ return result;
26
+ });
27
+ }
28
+ function omit(name, keys) {
29
+ const keySet = new Set(keys);
30
+ return createTransformer(name, (record) => {
31
+ const result = {};
32
+ for (const [key, value] of Object.entries(record)) {
33
+ if (!keySet.has(key)) {
34
+ result[key] = value;
35
+ }
36
+ }
37
+ return result;
38
+ });
39
+ }
40
+ function rename(name, mapping) {
41
+ return createTransformer(name, (record) => {
42
+ const result = {};
43
+ for (const [key, value] of Object.entries(record)) {
44
+ const newKey = mapping[key] ?? key;
45
+ result[newKey] = value;
46
+ }
47
+ return result;
48
+ });
49
+ }
50
+ function compose(name, transformers) {
51
+ return createTransformer(name, async (record) => {
52
+ let records = [record];
53
+ for (const transformer of transformers) {
54
+ const nextRecords = [];
55
+ for (const r of records) {
56
+ const result = await transformer.transform(r);
57
+ if (Array.isArray(result)) {
58
+ nextRecords.push(...result);
59
+ } else {
60
+ nextRecords.push(result);
61
+ }
62
+ }
63
+ records = nextRecords;
64
+ }
65
+ return records;
66
+ });
67
+ }
68
+
69
+ export { compose, createTransformer, filter, flatMap, map, omit, pick, rename };
70
+ //# sourceMappingURL=chunk-NVPI7OV3.js.map
71
+ //# sourceMappingURL=chunk-NVPI7OV3.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/transformers.ts"],"names":[],"mappings":";AAKO,SAAS,iBAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,EAAA,EAAG;AAC/B;AAKO,SAAS,GAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAMO,SAAS,MAAA,CACd,MACA,SAAA,EACmB;AACnB,EAAA,OAAO,iBAAA,CAAkB,IAAA,EAAM,OAAO,MAAA,KAAc;AAClD,IAAA,MAAM,IAAA,GAAO,MAAM,SAAA,CAAU,MAAM,CAAA;AACnC,IAAA,OAAO,IAAA,GAAO,SAAS,EAAC;AAAA,EAC1B,CAAC,CAAA;AACH;AAKO,SAAS,OAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,MAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,MAAA,CAAO,GAAG,CAAA;AAAA,MACvD;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAY,IAAI,CAAA;AACnC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,IAAI,CAAC,MAAA,CAAO,GAAA,CAAI,GAAG,CAAA,EAAG;AACpB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,KAAA;AAAA,MAC7C;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,MAAA,CACd,MACA,OAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,CAAC,MAAA,KAAc;AAC/C,IAAA,MAAM,SAAqB,EAAC;AAC5B,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,MAAM,MAAA,GAAS,OAAA,CAAQ,GAAG,CAAA,IAAK,GAAA;AAC/B,MAAA,MAAA,CAAO,MAAM,CAAA,GAAI,KAAA;AAAA,IACnB;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAMO,SAAS,OAAA,CACd,MACA,YAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,OAAO,MAAA,KAAc;AACrD,IAAA,IAAI,OAAA,GAAwB,CAAC,MAAM,CAAA;AAEnC,IAAA,KAAA,MAAW,eAAe,YAAA,EAAc;AACtC,MAAA,MAAM,cAA4B,EAAC;AACnC,MAAA,KAAA,MAAW,KAAK,OAAA,EAAS;AACvB,QAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,CAAC,CAAA;AAC5C,QAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,EAAG;AACzB,UAAA,WAAA,CAAY,IAAA,CAAK,GAAG,MAAM,CAAA;AAAA,QAC5B,CAAA,MAAO;AACL,UAAA,WAAA,CAAY,KAAK,MAAM,CAAA;AAAA,QACzB;AAAA,MACF;AACA,MAAA,OAAA,GAAU,WAAA;AAAA,IACZ;AAEA,IAAA,OAAO,OAAA;AAAA,EACT,CAAC,CAAA;AACH","file":"chunk-NVPI7OV3.js","sourcesContent":["import type { DataRecord, Transformer } from \"./types.js\";\n\n/**\n * Create a custom transformer from a function.\n */\nexport function createTransformer<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | TOutput[] | Promise<TOutput | TOutput[]>,\n): Transformer<TInput, TOutput> {\n return { name, transform: fn };\n}\n\n/**\n * Transform each record using a mapping function.\n */\nexport function map<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | Promise<TOutput>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Filter records based on a predicate. Records that don't match\n * are dropped (returned as empty array).\n */\nexport function filter<T extends DataRecord = DataRecord>(\n name: string,\n predicate: (record: T) => boolean | Promise<boolean>,\n): Transformer<T, T> {\n return createTransformer(name, async (record: T) => {\n const keep = await predicate(record);\n return keep ? record : [];\n });\n}\n\n/**\n * Transform each record into zero or more records.\n */\nexport function flatMap<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput[] | Promise<TOutput[]>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Pick specific keys from each record.\n */\nexport function pick<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Pick<T, K> & DataRecord> {\n return createTransformer<T, Pick<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Pick<T, K> & DataRecord;\n for (const key of keys) {\n if (key in record) {\n (result as Record<string, unknown>)[key] = record[key];\n }\n }\n return result;\n });\n}\n\n/**\n * Omit specific keys from each record.\n */\nexport function omit<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Omit<T, K> & DataRecord> {\n const keySet = new Set<string>(keys);\n return createTransformer<T, Omit<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Omit<T, K> & DataRecord;\n for (const [key, value] of Object.entries(record)) {\n if (!keySet.has(key)) {\n (result as Record<string, unknown>)[key] = value;\n }\n }\n return result;\n });\n}\n\n/**\n * Rename keys in each record.\n */\nexport function rename<T extends DataRecord>(\n name: string,\n mapping: Record<string, string>,\n): Transformer<T> {\n return createTransformer<T>(name, (record: T) => {\n const result: DataRecord = {};\n for (const [key, value] of Object.entries(record)) {\n const newKey = mapping[key] ?? key;\n result[newKey] = value;\n }\n return result;\n });\n}\n\n/**\n * Compose multiple transformers into a single transformer that\n * applies them in sequence.\n */\nexport function compose<T extends DataRecord>(\n name: string,\n transformers: Transformer[],\n): Transformer<T> {\n return createTransformer<T>(name, async (record: T) => {\n let records: DataRecord[] = [record];\n\n for (const transformer of transformers) {\n const nextRecords: DataRecord[] = [];\n for (const r of records) {\n const result = await transformer.transform(r);\n if (Array.isArray(result)) {\n nextRecords.push(...result);\n } else {\n nextRecords.push(result);\n }\n }\n records = nextRecords;\n }\n\n return records;\n });\n}\n"]}
@@ -0,0 +1,213 @@
1
+ // src/pipeline-builder.ts
2
+ var Pipeline = class _Pipeline {
3
+ #name;
4
+ #extractor;
5
+ #transformers;
6
+ #loaders;
7
+ #listeners;
8
+ constructor(name, extractor, transformers, loaders, listeners) {
9
+ this.#name = name;
10
+ this.#extractor = extractor;
11
+ this.#transformers = transformers;
12
+ this.#loaders = loaders;
13
+ this.#listeners = listeners;
14
+ }
15
+ static create(name) {
16
+ return new _Pipeline(name, null, [], [], []);
17
+ }
18
+ get name() {
19
+ return this.#name;
20
+ }
21
+ /**
22
+ * Set the data source for this pipeline.
23
+ */
24
+ extract(extractor) {
25
+ return new _Pipeline(
26
+ this.#name,
27
+ extractor,
28
+ this.#transformers,
29
+ this.#loaders,
30
+ this.#listeners
31
+ );
32
+ }
33
+ /**
34
+ * Add a transformation step.
35
+ */
36
+ transform(transformer) {
37
+ return new _Pipeline(
38
+ this.#name,
39
+ this.#extractor,
40
+ [...this.#transformers, transformer],
41
+ this.#loaders,
42
+ this.#listeners
43
+ );
44
+ }
45
+ /**
46
+ * Add a load destination.
47
+ */
48
+ load(loader) {
49
+ return new _Pipeline(
50
+ this.#name,
51
+ this.#extractor,
52
+ this.#transformers,
53
+ [...this.#loaders, loader],
54
+ this.#listeners
55
+ );
56
+ }
57
+ /**
58
+ * Subscribe to pipeline events.
59
+ */
60
+ on(listener) {
61
+ return new _Pipeline(
62
+ this.#name,
63
+ this.#extractor,
64
+ this.#transformers,
65
+ this.#loaders,
66
+ [...this.#listeners, listener]
67
+ );
68
+ }
69
+ /**
70
+ * Execute the pipeline.
71
+ */
72
+ async run(options = {}) {
73
+ if (!this.#extractor) {
74
+ throw new Error(
75
+ `Pipeline "${this.#name}" has no extractor. Call .extract() before .run().`
76
+ );
77
+ }
78
+ if (this.#loaders.length === 0) {
79
+ throw new Error(
80
+ `Pipeline "${this.#name}" has no loaders. Call .load() before .run().`
81
+ );
82
+ }
83
+ const { batchSize = 1e3, continueOnError = false, signal } = options;
84
+ const startTime = performance.now();
85
+ const errors = [];
86
+ let recordsProcessed = 0;
87
+ let recordsLoaded = 0;
88
+ this.#emit({
89
+ type: "pipeline:start",
90
+ timestamp: /* @__PURE__ */ new Date(),
91
+ data: { name: this.#name }
92
+ });
93
+ let batch = [];
94
+ const flushBatch = async () => {
95
+ if (batch.length === 0) return;
96
+ for (const loader of this.#loaders) {
97
+ try {
98
+ this.#emit({
99
+ type: "step:start",
100
+ timestamp: /* @__PURE__ */ new Date(),
101
+ stepName: loader.name
102
+ });
103
+ const result = await loader.load(batch);
104
+ recordsLoaded += result.recordsLoaded;
105
+ for (const err of result.errors) {
106
+ errors.push({
107
+ stepName: loader.name,
108
+ error: err.error,
109
+ record: err.record
110
+ });
111
+ }
112
+ this.#emit({
113
+ type: "step:end",
114
+ timestamp: /* @__PURE__ */ new Date(),
115
+ stepName: loader.name,
116
+ data: result
117
+ });
118
+ } catch (err) {
119
+ const error = err instanceof Error ? err : new Error(String(err));
120
+ errors.push({ stepName: loader.name, error });
121
+ if (!continueOnError) {
122
+ throw error;
123
+ }
124
+ }
125
+ }
126
+ batch = [];
127
+ };
128
+ try {
129
+ for await (const raw of this.#extractor.extract()) {
130
+ if (signal?.aborted) {
131
+ break;
132
+ }
133
+ this.#emit({
134
+ type: "record:extracted",
135
+ timestamp: /* @__PURE__ */ new Date(),
136
+ stepName: this.#extractor.name,
137
+ data: raw
138
+ });
139
+ let records = [raw];
140
+ for (const transformer of this.#transformers) {
141
+ const nextRecords = [];
142
+ for (const record of records) {
143
+ try {
144
+ this.#emit({
145
+ type: "step:start",
146
+ timestamp: /* @__PURE__ */ new Date(),
147
+ stepName: transformer.name
148
+ });
149
+ const result = await transformer.transform(record);
150
+ const transformed = Array.isArray(result) ? result : [result];
151
+ nextRecords.push(...transformed);
152
+ this.#emit({
153
+ type: "record:transformed",
154
+ timestamp: /* @__PURE__ */ new Date(),
155
+ stepName: transformer.name,
156
+ data: transformed
157
+ });
158
+ } catch (err) {
159
+ const error = err instanceof Error ? err : new Error(String(err));
160
+ errors.push({
161
+ stepName: transformer.name,
162
+ error,
163
+ record
164
+ });
165
+ this.#emit({
166
+ type: "error",
167
+ timestamp: /* @__PURE__ */ new Date(),
168
+ stepName: transformer.name,
169
+ data: error
170
+ });
171
+ if (!continueOnError) {
172
+ throw error;
173
+ }
174
+ }
175
+ }
176
+ records = nextRecords;
177
+ }
178
+ batch.push(...records);
179
+ recordsProcessed += records.length;
180
+ if (batch.length >= batchSize) {
181
+ await flushBatch();
182
+ }
183
+ }
184
+ await flushBatch();
185
+ } catch {
186
+ if (!continueOnError) {
187
+ const duration2 = performance.now() - startTime;
188
+ this.#emit({
189
+ type: "pipeline:end",
190
+ timestamp: /* @__PURE__ */ new Date(),
191
+ data: { recordsProcessed, recordsLoaded, errors, duration: duration2 }
192
+ });
193
+ return { recordsProcessed, recordsLoaded, errors, duration: duration2 };
194
+ }
195
+ }
196
+ const duration = performance.now() - startTime;
197
+ this.#emit({
198
+ type: "pipeline:end",
199
+ timestamp: /* @__PURE__ */ new Date(),
200
+ data: { recordsProcessed, recordsLoaded, errors, duration }
201
+ });
202
+ return { recordsProcessed, recordsLoaded, errors, duration };
203
+ }
204
+ #emit(event) {
205
+ for (const listener of this.#listeners) {
206
+ listener(event);
207
+ }
208
+ }
209
+ };
210
+
211
+ export { Pipeline };
212
+ //# sourceMappingURL=chunk-TK4HD4XA.js.map
213
+ //# sourceMappingURL=chunk-TK4HD4XA.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/pipeline-builder.ts"],"names":["duration"],"mappings":";AAuBO,IAAM,QAAA,GAAN,MAAM,SAAA,CAAmD;AAAA,EACrD,KAAA;AAAA,EACA,UAAA;AAAA,EACA,aAAA;AAAA,EACA,QAAA;AAAA,EACA,UAAA;AAAA,EAED,WAAA,CACN,IAAA,EACA,SAAA,EACA,YAAA,EACA,SACA,SAAA,EACA;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAA;AACb,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,aAAA,GAAgB,YAAA;AACrB,IAAA,IAAA,CAAK,QAAA,GAAW,OAAA;AAChB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAAA,EACpB;AAAA,EAEA,OAAO,OAAO,IAAA,EAAwB;AACpC,IAAA,OAAO,IAAI,UAAS,IAAA,EAAM,IAAA,EAAM,EAAC,EAAG,EAAC,EAAG,EAAE,CAAA;AAAA,EAC5C;AAAA,EAEA,IAAI,IAAA,GAAe;AACjB,IAAA,OAAO,IAAA,CAAK,KAAA;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,QAA8B,SAAA,EAAsC;AAClE,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,SAAA;AAAA,MACA,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,UACE,WAAA,EACmB;AACnB,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,aAAA,EAAe,WAAW,CAAA;AAAA,MACnC,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,KAAK,MAAA,EAA8C;AACjD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,QAAA,EAAU,MAAM,CAAA;AAAA,MACzB,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,GAAG,QAAA,EAAqD;AACtD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,UAAA,EAAY,QAAQ;AAAA,KAC/B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,GAAA,CAAI,OAAA,GAA2B,EAAC,EAA4B;AAChE,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,kDAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,6CAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,SAAA,GAAY,GAAA,EAAM,eAAA,GAAkB,KAAA,EAAO,QAAO,GAAI,OAAA;AAE9D,IAAA,MAAM,SAAA,GAAY,YAAY,GAAA,EAAI;AAClC,IAAA,MAAM,SAA8B,EAAC;AACrC,IAAA,IAAI,gBAAA,GAAmB,CAAA;AACvB,IAAA,IAAI,aAAA,GAAgB,CAAA;AAEpB,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,gBAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,IAAA,EAAM,IAAA,CAAK,KAAA;AAAM,KAC1B,CAAA;AAED,IAAA,IAAI,QAAsB,EAAC;AAE3B,IAAA,MAAM,aAAa,YAA2B;AAC5C,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AAExB,MAAA,KAAA,MAAW,MAAA,IAAU,KAAK,QAAA,EAAU;AAClC,QAAA,IAAI;AACF,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,YAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO;AAAA,WAClB,CAAA;AAED,UAAA,MAAM,MAAA,GAAS,MAAM,MAAA,CAAO,IAAA,CAAK,KAAK,CAAA;AACtC,UAAA,aAAA,IAAiB,MAAA,CAAO,aAAA;AAExB,UAAA,KAAA,MAAW,GAAA,IAAO,OAAO,MAAA,EAAQ;AAC/B,YAAA,MAAA,CAAO,IAAA,CAAK;AAAA,cACV,UAAU,MAAA,CAAO,IAAA;AAAA,cACjB,OAAO,GAAA,CAAI,KAAA;AAAA,cACX,QAAQ,GAAA,CAAI;AAAA,aACb,CAAA;AAAA,UACH;AAEA,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,UAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO,IAAA;AAAA,YACjB,IAAA,EAAM;AAAA,WACP,CAAA;AAAA,QACH,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,MAAA,CAAO,KAAK,EAAE,QAAA,EAAU,MAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AAE5C,UAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,YAAA,MAAM,KAAA;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,EAAC;AAAA,IACX,CAAA;AAEA,IAAA,IAAI;AACF,MAAA,WAAA,MAAiB,GAAA,IAAO,IAAA,CAAK,UAAA,CAAW,OAAA,EAAQ,EAAG;AACjD,QAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,UAAA;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,kBAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,QAAA,EAAU,KAAK,UAAA,CAAW,IAAA;AAAA,UAC1B,IAAA,EAAM;AAAA,SACP,CAAA;AAED,QAAA,IAAI,OAAA,GAAwB,CAAC,GAAG,CAAA;AAEhC,QAAA,KAAA,MAAW,WAAA,IAAe,KAAK,aAAA,EAAe;AAC5C,UAAA,MAAM,cAA4B,EAAC;AAEnC,UAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,YAAA,IAAI;AACF,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,YAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY;AAAA,eACvB,CAAA;AAED,cAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,MAAM,CAAA;AACjD,cAAA,MAAM,cAAc,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,GAAI,MAAA,GAAS,CAAC,MAAM,CAAA;AAC5D,cAAA,WAAA,CAAY,IAAA,CAAK,GAAG,WAAW,CAAA;AAE/B,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,oBAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAAA,YACH,SAAS,GAAA,EAAK;AACZ,cAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,cAAA,MAAA,CAAO,IAAA,CAAK;AAAA,gBACV,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,KAAA;AAAA,gBACA;AAAA,eACD,CAAA;AAED,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,OAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAED,cAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,gBAAA,MAAM,KAAA;AAAA,cACR;AAAA,YACF;AAAA,UACF;AAEA,UAAA,OAAA,GAAU,WAAA;AAAA,QACZ;AAEA,QAAA,KAAA,CAAM,IAAA,CAAK,GAAG,OAAO,CAAA;AACrB,QAAA,gBAAA,IAAoB,OAAA,CAAQ,MAAA;AAE5B,QAAA,IAAI,KAAA,CAAM,UAAU,SAAA,EAAW;AAC7B,UAAA,MAAM,UAAA,EAAW;AAAA,QACnB;AAAA,MACF;AAEA,MAAA,MAAM,UAAA,EAAW;AAAA,IACnB,CAAA,CAAA,MAAQ;AACN,MAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,QAAA,MAAMA,SAAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AACrC,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,cAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,MAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA;AAAS,SAC3D,CAAA;AACD,QAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA,EAAS;AAAA,MAC7D;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AAErC,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,cAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,QAAQ,QAAA;AAAS,KAC3D,CAAA;AAED,IAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,QAAA,EAAS;AAAA,EAC7D;AAAA,EAEA,MAAM,KAAA,EAKG;AACP,IAAA,KAAA,MAAW,QAAA,IAAY,KAAK,UAAA,EAAY;AACtC,MAAA,QAAA,CAAS,KAA6C,CAAA;AAAA,IACxD;AAAA,EACF;AACF","file":"chunk-TK4HD4XA.js","sourcesContent":["import type {\n DataRecord,\n Extractor,\n Loader,\n PipelineEventListener,\n PipelineOptions,\n PipelineResult,\n PipelineStepError,\n Transformer,\n} from \"./types.js\";\n\n/**\n * A composable, type-safe ETL pipeline builder.\n *\n * @example\n * ```ts\n * const result = await Pipeline.create(\"my-pipeline\")\n * .extract(myExtractor)\n * .transform(myTransformer)\n * .load(myLoader)\n * .run();\n * ```\n */\nexport class Pipeline<TCurrent extends DataRecord = DataRecord> {\n readonly #name: string;\n readonly #extractor: Extractor | null;\n readonly #transformers: Transformer[];\n readonly #loaders: Loader[];\n readonly #listeners: PipelineEventListener[];\n\n private constructor(\n name: string,\n extractor: Extractor | null,\n transformers: Transformer[],\n loaders: Loader[],\n listeners: PipelineEventListener[],\n ) {\n this.#name = name;\n this.#extractor = extractor;\n this.#transformers = transformers;\n this.#loaders = loaders;\n this.#listeners = listeners;\n }\n\n static create(name: string): Pipeline {\n return new Pipeline(name, null, [], [], []);\n }\n\n get name(): string {\n return this.#name;\n }\n\n /**\n * Set the data source for this pipeline.\n */\n extract<T extends DataRecord>(extractor: Extractor<T>): Pipeline<T> {\n return new Pipeline(\n this.#name,\n extractor,\n this.#transformers,\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a transformation step.\n */\n transform<TOutput extends DataRecord>(\n transformer: Transformer<TCurrent, TOutput>,\n ): Pipeline<TOutput> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n [...this.#transformers, transformer],\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a load destination.\n */\n load(loader: Loader<TCurrent>): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n [...this.#loaders, loader],\n this.#listeners,\n );\n }\n\n /**\n * Subscribe to pipeline events.\n */\n on(listener: PipelineEventListener): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n this.#loaders,\n [...this.#listeners, listener],\n );\n }\n\n /**\n * Execute the pipeline.\n */\n async run(options: PipelineOptions = {}): Promise<PipelineResult> {\n if (!this.#extractor) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no extractor. Call .extract() before .run().`,\n );\n }\n\n if (this.#loaders.length === 0) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no loaders. Call .load() before .run().`,\n );\n }\n\n const { batchSize = 1000, continueOnError = false, signal } = options;\n\n const startTime = performance.now();\n const errors: PipelineStepError[] = [];\n let recordsProcessed = 0;\n let recordsLoaded = 0;\n\n this.#emit({\n type: \"pipeline:start\",\n timestamp: new Date(),\n data: { name: this.#name },\n });\n\n let batch: DataRecord[] = [];\n\n const flushBatch = async (): Promise<void> => {\n if (batch.length === 0) return;\n\n for (const loader of this.#loaders) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: loader.name,\n });\n\n const result = await loader.load(batch);\n recordsLoaded += result.recordsLoaded;\n\n for (const err of result.errors) {\n errors.push({\n stepName: loader.name,\n error: err.error,\n record: err.record,\n });\n }\n\n this.#emit({\n type: \"step:end\",\n timestamp: new Date(),\n stepName: loader.name,\n data: result,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({ stepName: loader.name, error });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n batch = [];\n };\n\n try {\n for await (const raw of this.#extractor.extract()) {\n if (signal?.aborted) {\n break;\n }\n\n this.#emit({\n type: \"record:extracted\",\n timestamp: new Date(),\n stepName: this.#extractor.name,\n data: raw,\n });\n\n let records: DataRecord[] = [raw];\n\n for (const transformer of this.#transformers) {\n const nextRecords: DataRecord[] = [];\n\n for (const record of records) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: transformer.name,\n });\n\n const result = await transformer.transform(record);\n const transformed = Array.isArray(result) ? result : [result];\n nextRecords.push(...transformed);\n\n this.#emit({\n type: \"record:transformed\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: transformed,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({\n stepName: transformer.name,\n error,\n record,\n });\n\n this.#emit({\n type: \"error\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: error,\n });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n records = nextRecords;\n }\n\n batch.push(...records);\n recordsProcessed += records.length;\n\n if (batch.length >= batchSize) {\n await flushBatch();\n }\n }\n\n await flushBatch();\n } catch {\n if (!continueOnError) {\n const duration = performance.now() - startTime;\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n }\n\n const duration = performance.now() - startTime;\n\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n\n #emit(event: {\n type: string;\n timestamp: Date;\n stepName?: string;\n data?: unknown;\n }): void {\n for (const listener of this.#listeners) {\n listener(event as Parameters<PipelineEventListener>[0]);\n }\n }\n}\n"]}
@@ -0,0 +1,28 @@
1
+ 'use strict';
2
+
3
+ var chunkFRRSW3TN_cjs = require('./chunk-FRRSW3TN.cjs');
4
+
5
+
6
+
7
+ Object.defineProperty(exports, "fromQuery", {
8
+ enumerable: true,
9
+ get: function () { return chunkFRRSW3TN_cjs.fromQuery; }
10
+ });
11
+ Object.defineProperty(exports, "fromQueryCursor", {
12
+ enumerable: true,
13
+ get: function () { return chunkFRRSW3TN_cjs.fromQueryCursor; }
14
+ });
15
+ Object.defineProperty(exports, "fromQueryPaginated", {
16
+ enumerable: true,
17
+ get: function () { return chunkFRRSW3TN_cjs.fromQueryPaginated; }
18
+ });
19
+ Object.defineProperty(exports, "toDrizzle", {
20
+ enumerable: true,
21
+ get: function () { return chunkFRRSW3TN_cjs.toDrizzle; }
22
+ });
23
+ Object.defineProperty(exports, "toDrizzleTx", {
24
+ enumerable: true,
25
+ get: function () { return chunkFRRSW3TN_cjs.toDrizzleTx; }
26
+ });
27
+ //# sourceMappingURL=drizzle.cjs.map
28
+ //# sourceMappingURL=drizzle.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"drizzle.cjs"}
@@ -0,0 +1,120 @@
1
+ import { D as DataRecord, E as Extractor, b as Loader } from './types-EpLTQcN2.cjs';
2
+
3
+ /**
4
+ * Create an extractor from any Drizzle select query.
5
+ *
6
+ * Pass a function that returns the query — this makes the extractor
7
+ * re-runnable and avoids consuming a one-shot promise.
8
+ *
9
+ * @example
10
+ * ```ts
11
+ * import { fromQuery } from "khotan-data/drizzle";
12
+ * import { db } from "@/db";
13
+ * import { users } from "@/db/schema";
14
+ * import { eq } from "drizzle-orm";
15
+ *
16
+ * const extractor = fromQuery("active-users", () =>
17
+ * db.select().from(users).where(eq(users.active, true))
18
+ * );
19
+ * ```
20
+ */
21
+ declare function fromQuery<T extends DataRecord>(name: string, queryFn: () => PromiseLike<T[]>): Extractor<T>;
22
+ /**
23
+ * Create an extractor from a Drizzle query that streams results in
24
+ * chunks. Use this for large tables where materializing all rows
25
+ * at once is too expensive.
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * const extractor = fromQueryCursor("all-events", async function* () {
30
+ * let offset = 0;
31
+ * const limit = 5000;
32
+ * while (true) {
33
+ * const batch = await db.select().from(events).limit(limit).offset(offset);
34
+ * if (batch.length === 0) break;
35
+ * yield* batch;
36
+ * offset += limit;
37
+ * }
38
+ * });
39
+ * ```
40
+ */
41
+ declare function fromQueryCursor<T extends DataRecord>(name: string, generatorFn: () => AsyncIterable<T>): Extractor<T>;
42
+ /**
43
+ * Create an extractor from a paginated Drizzle query. Automatically
44
+ * handles offset-based pagination so you don't have to write the loop.
45
+ *
46
+ * @example
47
+ * ```ts
48
+ * const extractor = fromQueryPaginated("all-users", {
49
+ * pageSize: 2000,
50
+ * query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),
51
+ * });
52
+ * ```
53
+ */
54
+ declare function fromQueryPaginated<T extends DataRecord>(name: string, opts: {
55
+ query: (limit: number, offset: number) => PromiseLike<T[]>;
56
+ pageSize?: number;
57
+ }): Extractor<T>;
58
+
59
+ interface ToDrizzleOptions {
60
+ /**
61
+ * Max records per INSERT statement. When a batch exceeds this,
62
+ * it's automatically split into sub-batches to stay within Postgres
63
+ * parameter limits. Defaults to auto-calculated from columnsPerRow.
64
+ */
65
+ maxRowsPerStatement?: number;
66
+ /**
67
+ * Number of columns per row. Used to auto-calculate maxRowsPerStatement
68
+ * to stay under Postgres' 65535 parameter limit.
69
+ * If not provided, falls back to maxRowsPerStatement or 1000.
70
+ */
71
+ columnsPerRow?: number;
72
+ }
73
+ /**
74
+ * Create a loader that writes records using a Drizzle insert/upsert.
75
+ *
76
+ * You provide the write function — this keeps the loader decoupled from
77
+ * specific Drizzle driver types while giving you full control over
78
+ * insert/upsert/conflict behavior.
79
+ *
80
+ * Automatically sub-batches to stay within Postgres' 65535 parameter limit.
81
+ *
82
+ * @example
83
+ * ```ts
84
+ * import { toDrizzle } from "khotan-data/drizzle";
85
+ * import { db } from "@/db";
86
+ * import { processedUsers } from "@/db/schema";
87
+ *
88
+ * // Simple insert
89
+ * const loader = toDrizzle("insert-users", (rows) =>
90
+ * db.insert(processedUsers).values(rows)
91
+ * );
92
+ *
93
+ * // Upsert
94
+ * const loader = toDrizzle("upsert-users", (rows) =>
95
+ * db.insert(processedUsers).values(rows).onConflictDoUpdate({
96
+ * target: processedUsers.id,
97
+ * set: { name: sql`excluded.name`, updatedAt: new Date() },
98
+ * })
99
+ * );
100
+ * ```
101
+ */
102
+ declare function toDrizzle<T extends DataRecord>(name: string, writeFn: (records: T[]) => PromiseLike<unknown>, options?: ToDrizzleOptions): Loader<T>;
103
+ /**
104
+ * Create a loader that writes records inside a Drizzle transaction.
105
+ *
106
+ * All sub-batches for a single load call are wrapped in a single
107
+ * transaction — if any batch fails, the entire load is rolled back.
108
+ *
109
+ * @example
110
+ * ```ts
111
+ * const loader = toDrizzleTx("tx-insert", db, (tx, rows) =>
112
+ * tx.insert(processedUsers).values(rows)
113
+ * );
114
+ * ```
115
+ */
116
+ declare function toDrizzleTx<T extends DataRecord>(name: string, db: {
117
+ transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R>;
118
+ }, writeFn: (tx: never, records: T[]) => PromiseLike<unknown>, options?: ToDrizzleOptions): Loader<T>;
119
+
120
+ export { fromQuery, fromQueryCursor, fromQueryPaginated, toDrizzle, toDrizzleTx };