khotan-data 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 khotan-data contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,196 @@
1
+ # khotan-data
2
+
3
+ Data primitives for TypeScript — ETL pipelines, transforms, and Drizzle Postgres integration.
4
+
5
+ Built for **Next.js + Drizzle + Postgres** projects. Think better-auth for data management.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install khotan-data
11
+ ```
12
+
13
+ Requires `drizzle-orm` as a peer dependency (you almost certainly already have it).
14
+
15
+ ## Quick Start
16
+
17
+ ```typescript
18
+ import { Pipeline, fromQuery, map, filter, toDrizzle } from "khotan-data";
19
+ import { db } from "@/db";
20
+ import { users, analytics } from "@/db/schema";
21
+ import { eq } from "drizzle-orm";
22
+
23
+ const result = await Pipeline.create("user-analytics")
24
+ .extract(
25
+ fromQuery("active-users", () =>
26
+ db.select().from(users).where(eq(users.active, true))
27
+ ),
28
+ )
29
+ .transform(filter("adults", (r) => r.age >= 18))
30
+ .transform(
31
+ map("enrich", (r) => ({
32
+ userId: r.id,
33
+ email: r.email.toLowerCase(),
34
+ segment: r.age >= 65 ? "senior" : "standard",
35
+ processedAt: new Date(),
36
+ })),
37
+ )
38
+ .load(
39
+ toDrizzle("write-analytics", (rows) =>
40
+ db.insert(analytics).values(rows)
41
+ ),
42
+ )
43
+ .run();
44
+ ```
45
+
46
+ ## Extractors
47
+
48
+ Pull data from Drizzle queries:
49
+
50
+ ```typescript
51
+ import { fromQuery, fromQueryPaginated, fromQueryCursor } from "khotan-data/drizzle";
52
+
53
+ // One-shot query
54
+ const source = fromQuery("users", () =>
55
+ db.select().from(users).where(eq(users.active, true))
56
+ );
57
+
58
+ // Auto-paginated for large tables
59
+ const source = fromQueryPaginated("all-orders", {
60
+ pageSize: 5000,
61
+ query: (limit, offset) =>
62
+ db.select().from(orders).limit(limit).offset(offset),
63
+ });
64
+
65
+ // Full control with async generator
66
+ const source = fromQueryCursor("stream", async function* () {
67
+ // your custom cursor/streaming logic
68
+ });
69
+ ```
70
+
71
+ Generic extractors for testing and non-DB sources:
72
+
73
+ ```typescript
74
+ import { fromArray, createExtractor } from "khotan-data";
75
+
76
+ const testSource = fromArray("mock", [{ id: 1 }, { id: 2 }]);
77
+ ```
78
+
79
+ ## Transforms
80
+
81
+ Composable, type-safe record transformations:
82
+
83
+ ```typescript
84
+ import { map, filter, pick, omit, rename, flatMap, compose } from "khotan-data/transform";
85
+
86
+ // Map fields
87
+ .transform(map("normalize", (r) => ({ ...r, email: r.email.toLowerCase() })))
88
+
89
+ // Filter records (non-matching records are dropped)
90
+ .transform(filter("active-only", (r) => r.active))
91
+
92
+ // Pick/omit fields
93
+ .transform(pick("slim", ["id", "name", "email"]))
94
+ .transform(omit("strip-pii", ["ssn", "dob"]))
95
+
96
+ // Rename fields
97
+ .transform(rename("api-names", { firstName: "first_name" }))
98
+
99
+ // One-to-many expansion
100
+ .transform(flatMap("explode-tags", (r) =>
101
+ r.tags.map((tag) => ({ ...r, tag }))
102
+ ))
103
+
104
+ // Compose multiple transforms into one step
105
+ .transform(compose("pipeline", [filterStep, mapStep, renameStep]))
106
+ ```
107
+
108
+ ## Loaders
109
+
110
+ Write data into Drizzle tables:
111
+
112
+ ```typescript
113
+ import { toDrizzle, toDrizzleTx } from "khotan-data/drizzle";
114
+
115
+ // Simple insert (auto-batches to stay under Postgres parameter limits)
116
+ const loader = toDrizzle("insert", (rows) =>
117
+ db.insert(analytics).values(rows)
118
+ );
119
+
120
+ // Upsert
121
+ const loader = toDrizzle("upsert", (rows) =>
122
+ db
123
+ .insert(analytics)
124
+ .values(rows)
125
+ .onConflictDoUpdate({
126
+ target: analytics.userId,
127
+ set: { segment: sql`excluded.segment`, updatedAt: new Date() },
128
+ })
129
+ );
130
+
131
+ // Transactional — all-or-nothing per batch
132
+ const loader = toDrizzleTx("tx-insert", db, (tx, rows) =>
133
+ tx.insert(analytics).values(rows)
134
+ );
135
+
136
+ // Control batching for wide tables
137
+ const loader = toDrizzle("wide-table", writeFn, {
138
+ columnsPerRow: 25, // auto-calculates safe batch size
139
+ });
140
+ ```
141
+
142
+ ## Pipeline
143
+
144
+ The `Pipeline` builder is immutable — each method returns a new instance:
145
+
146
+ ```typescript
147
+ const base = Pipeline.create("etl")
148
+ .extract(source)
149
+ .transform(filterStep);
150
+
151
+ // Branch into different outputs
152
+ const toDb = base.load(toDrizzle("db", writeFn)).run();
153
+ const toFile = base.load(toFileSink).run();
154
+ ```
155
+
156
+ ### Options
157
+
158
+ ```typescript
159
+ await pipeline.run({
160
+ batchSize: 500, // records per load batch (default: 1000)
161
+ continueOnError: true, // don't throw on errors, collect them
162
+ signal: controller.signal, // AbortSignal for cancellation
163
+ });
164
+ ```
165
+
166
+ ### Events
167
+
168
+ ```typescript
169
+ pipeline.on((event) => {
170
+ if (event.type === "error") console.error(event.stepName, event.data);
171
+ if (event.type === "pipeline:end") console.log("Done:", event.data);
172
+ });
173
+ ```
174
+
175
+ ## Subpath Imports
176
+
177
+ ```typescript
178
+ import { Pipeline } from "khotan-data/pipeline";
179
+ import { map, filter } from "khotan-data/transform";
180
+ import { fromQuery, toDrizzle } from "khotan-data/drizzle";
181
+ ```
182
+
183
+ ## Development
184
+
185
+ ```bash
186
+ npm install
187
+ npm run dev # watch mode build
188
+ npm run test # run tests
189
+ npm run test:watch # watch mode tests
190
+ npm run check # typecheck + lint + format + test
191
+ npm run build # production build
192
+ ```
193
+
194
+ ## License
195
+
196
+ MIT
@@ -0,0 +1,99 @@
1
+ // src/drizzle-extract.ts
2
+ function fromQuery(name, queryFn) {
3
+ return {
4
+ name,
5
+ async *extract() {
6
+ const rows = await queryFn();
7
+ for (const row of rows) {
8
+ yield row;
9
+ }
10
+ }
11
+ };
12
+ }
13
+ function fromQueryCursor(name, generatorFn) {
14
+ return {
15
+ name,
16
+ extract: generatorFn
17
+ };
18
+ }
19
+ function fromQueryPaginated(name, opts) {
20
+ const pageSize = opts.pageSize ?? 1e3;
21
+ return {
22
+ name,
23
+ async *extract() {
24
+ let offset = 0;
25
+ for (; ; ) {
26
+ const rows = await opts.query(pageSize, offset);
27
+ if (rows.length === 0) break;
28
+ for (const row of rows) {
29
+ yield row;
30
+ }
31
+ if (rows.length < pageSize) break;
32
+ offset += pageSize;
33
+ }
34
+ }
35
+ };
36
+ }
37
+
38
+ // src/drizzle-load.ts
39
+ var PG_MAX_PARAMETERS = 65535;
40
+ function resolveMaxRows(options) {
41
+ if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;
42
+ if (options?.columnsPerRow) {
43
+ return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);
44
+ }
45
+ return 1e3;
46
+ }
47
+ function toDrizzle(name, writeFn, options) {
48
+ const maxRows = resolveMaxRows(options);
49
+ return {
50
+ name,
51
+ async load(records) {
52
+ const errors = [];
53
+ let loaded = 0;
54
+ for (let i = 0; i < records.length; i += maxRows) {
55
+ const chunk = records.slice(i, i + maxRows);
56
+ try {
57
+ await writeFn(chunk);
58
+ loaded += chunk.length;
59
+ } catch (err) {
60
+ const error = err instanceof Error ? err : new Error(String(err));
61
+ for (const record of chunk) {
62
+ errors.push({ record, error });
63
+ }
64
+ }
65
+ }
66
+ return { recordsLoaded: loaded, errors };
67
+ }
68
+ };
69
+ }
70
+ function toDrizzleTx(name, db, writeFn, options) {
71
+ const maxRows = resolveMaxRows(options);
72
+ return {
73
+ name,
74
+ async load(records) {
75
+ const errors = [];
76
+ let loaded = 0;
77
+ try {
78
+ await db.transaction(async (tx) => {
79
+ for (let i = 0; i < records.length; i += maxRows) {
80
+ const chunk = records.slice(i, i + maxRows);
81
+ await writeFn(tx, chunk);
82
+ loaded += chunk.length;
83
+ }
84
+ });
85
+ } catch (err) {
86
+ const error = err instanceof Error ? err : new Error(String(err));
87
+ for (const record of records) {
88
+ errors.push({ record, error });
89
+ }
90
+ loaded = 0;
91
+ }
92
+ return { recordsLoaded: loaded, errors };
93
+ }
94
+ };
95
+ }
96
+
97
+ export { fromQuery, fromQueryCursor, fromQueryPaginated, toDrizzle, toDrizzleTx };
98
+ //# sourceMappingURL=chunk-42CNRMAQ.js.map
99
+ //# sourceMappingURL=chunk-42CNRMAQ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/drizzle-extract.ts","../src/drizzle-load.ts"],"names":[],"mappings":";AAoBO,SAAS,SAAA,CACd,MACA,OAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,MAAM,IAAA,GAAO,MAAM,OAAA,EAAQ;AAC3B,MAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,QAAA,MAAM,GAAA;AAAA,MACR;AAAA,IACF;AAAA,GACF;AACF;AAqBO,SAAS,eAAA,CACd,MACA,WAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,GACX;AACF;AAcO,SAAS,kBAAA,CACd,MACA,IAAA,EAIc;AACd,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,GAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,WAAS;AACP,QAAA,MAAM,IAAA,GAAO,MAAM,IAAA,CAAK,KAAA,CAAM,UAAU,MAAM,CAAA;AAC9C,QAAA,IAAI,IAAA,CAAK,WAAW,CAAA,EAAG;AACvB,QAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,UAAA,MAAM,GAAA;AAAA,QACR;AACA,QAAA,IAAI,IAAA,CAAK,SAAS,QAAA,EAAU;AAC5B,QAAA,MAAA,IAAU,QAAA;AAAA,MACZ;AAAA,IACF;AAAA,GACF;AACF;;;ACjGA,IAAM,iBAAA,GAAoB,KAAA;AAiB1B,SAAS,eAAe,OAAA,EAAoC;AAC1D,EAAA,IAAI,OAAA,EAAS,mBAAA,EAAqB,OAAO,OAAA,CAAQ,mBAAA;AACjD,EAAA,IAAI,SAAS,aAAA,EAAe;AAC1B,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,iBAAA,GAAoB,OAAA,CAAQ,aAAa,CAAA;AAAA,EAC7D;AACA,EAAA,OAAO,GAAA;AACT;AA+BO,SAAS,SAAA,CACd,IAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,QAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,QAAA,IAAI;AACF,UAAA,MAAM,QAAQ,KAAK,CAAA;AACnB,UAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,QAClB,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,KAAA,MAAW,UAAU,KAAA,EAAO;AAC1B,YAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,UAC/B;AAAA,QACF;AAAA,MACF;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF;AAeO,SAAS,WAAA,CACd,IAAA,EACA,EAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,IAAI;AACF,QAAA,MAAM,EAAA,CAAG,WAAA,CAAY,OAAO,EAAA,KAAO;AACjC,UAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,YAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,YAAA,MAAM,OAAA,CAAQ,IAAI,KAAK,CAAA;AACvB,YAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,UAClB;AAAA,QACF,CAAC,CAAA;AAAA,MACH,SAAS,GAAA,EAAK;AACZ,QAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,QAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,UAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,QAC/B;AACA,QAAA,MAAA,GAAS,CAAA;AAAA,MACX;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF","file":"chunk-42CNRMAQ.js","sourcesContent":["import type { DataRecord, Extractor } from \"./types.js\";\n\n/**\n * Create an extractor from any Drizzle select query.\n *\n * Pass a function that returns the query — this makes the extractor\n * re-runnable and avoids consuming a one-shot promise.\n *\n * @example\n * ```ts\n * import { fromQuery } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { users } from \"@/db/schema\";\n * import { eq } from \"drizzle-orm\";\n *\n * const extractor = fromQuery(\"active-users\", () =>\n * db.select().from(users).where(eq(users.active, true))\n * );\n * ```\n */\nexport function fromQuery<T extends DataRecord>(\n name: string,\n queryFn: () => PromiseLike<T[]>,\n): Extractor<T> {\n return {\n name,\n async *extract() {\n const rows = await queryFn();\n for (const row of rows) {\n yield row;\n }\n },\n };\n}\n\n/**\n * Create an extractor from a Drizzle query that streams results in\n * chunks. Use this for large tables where materializing all rows\n * at once is too expensive.\n *\n * @example\n * ```ts\n * const extractor = fromQueryCursor(\"all-events\", async function* () {\n * let offset = 0;\n * const limit = 5000;\n * while (true) {\n * const batch = await db.select().from(events).limit(limit).offset(offset);\n * if (batch.length === 0) break;\n * yield* batch;\n * offset += limit;\n * }\n * });\n * ```\n */\nexport function fromQueryCursor<T extends DataRecord>(\n name: string,\n generatorFn: () => AsyncIterable<T>,\n): Extractor<T> {\n return {\n name,\n extract: generatorFn,\n };\n}\n\n/**\n * Create an extractor from a paginated Drizzle query. Automatically\n * handles offset-based pagination so you don't have to write the loop.\n *\n * @example\n * ```ts\n * const extractor = fromQueryPaginated(\"all-users\", {\n * pageSize: 2000,\n * query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),\n * });\n * ```\n */\nexport function fromQueryPaginated<T extends DataRecord>(\n name: string,\n opts: {\n query: (limit: number, offset: number) => PromiseLike<T[]>;\n pageSize?: number;\n },\n): Extractor<T> {\n const pageSize = opts.pageSize ?? 1000;\n return {\n name,\n async *extract() {\n let offset = 0;\n for (;;) {\n const rows = await opts.query(pageSize, offset);\n if (rows.length === 0) break;\n for (const row of rows) {\n yield row;\n }\n if (rows.length < pageSize) break;\n offset += pageSize;\n }\n },\n };\n}\n","import type { DataRecord, Loader, LoadResult } from \"./types.js\";\n\nconst PG_MAX_PARAMETERS = 65535;\n\ninterface ToDrizzleOptions {\n /**\n * Max records per INSERT statement. When a batch exceeds this,\n * it's automatically split into sub-batches to stay within Postgres\n * parameter limits. Defaults to auto-calculated from columnsPerRow.\n */\n maxRowsPerStatement?: number;\n /**\n * Number of columns per row. Used to auto-calculate maxRowsPerStatement\n * to stay under Postgres' 65535 parameter limit.\n * If not provided, falls back to maxRowsPerStatement or 1000.\n */\n columnsPerRow?: number;\n}\n\nfunction resolveMaxRows(options?: ToDrizzleOptions): number {\n if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;\n if (options?.columnsPerRow) {\n return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);\n }\n return 1000;\n}\n\n/**\n * Create a loader that writes records using a Drizzle insert/upsert.\n *\n * You provide the write function — this keeps the loader decoupled from\n * specific Drizzle driver types while giving you full control over\n * insert/upsert/conflict behavior.\n *\n * Automatically sub-batches to stay within Postgres' 65535 parameter limit.\n *\n * @example\n * ```ts\n * import { toDrizzle } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { processedUsers } from \"@/db/schema\";\n *\n * // Simple insert\n * const loader = toDrizzle(\"insert-users\", (rows) =>\n * db.insert(processedUsers).values(rows)\n * );\n *\n * // Upsert\n * const loader = toDrizzle(\"upsert-users\", (rows) =>\n * db.insert(processedUsers).values(rows).onConflictDoUpdate({\n * target: processedUsers.id,\n * set: { name: sql`excluded.name`, updatedAt: new Date() },\n * })\n * );\n * ```\n */\nexport function toDrizzle<T extends DataRecord>(\n name: string,\n writeFn: (records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n try {\n await writeFn(chunk);\n loaded += chunk.length;\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of chunk) {\n errors.push({ record, error });\n }\n }\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n\n/**\n * Create a loader that writes records inside a Drizzle transaction.\n *\n * All sub-batches for a single load call are wrapped in a single\n * transaction — if any batch fails, the entire load is rolled back.\n *\n * @example\n * ```ts\n * const loader = toDrizzleTx(\"tx-insert\", db, (tx, rows) =>\n * tx.insert(processedUsers).values(rows)\n * );\n * ```\n */\nexport function toDrizzleTx<T extends DataRecord>(\n name: string,\n db: { transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R> },\n writeFn: (tx: never, records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n try {\n await db.transaction(async (tx) => {\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n await writeFn(tx, chunk);\n loaded += chunk.length;\n }\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of records) {\n errors.push({ record, error });\n }\n loaded = 0;\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n"]}
@@ -0,0 +1,215 @@
1
+ 'use strict';
2
+
3
+ // src/pipeline-builder.ts
4
+ var Pipeline = class _Pipeline {
5
+ #name;
6
+ #extractor;
7
+ #transformers;
8
+ #loaders;
9
+ #listeners;
10
+ constructor(name, extractor, transformers, loaders, listeners) {
11
+ this.#name = name;
12
+ this.#extractor = extractor;
13
+ this.#transformers = transformers;
14
+ this.#loaders = loaders;
15
+ this.#listeners = listeners;
16
+ }
17
+ static create(name) {
18
+ return new _Pipeline(name, null, [], [], []);
19
+ }
20
+ get name() {
21
+ return this.#name;
22
+ }
23
+ /**
24
+ * Set the data source for this pipeline.
25
+ */
26
+ extract(extractor) {
27
+ return new _Pipeline(
28
+ this.#name,
29
+ extractor,
30
+ this.#transformers,
31
+ this.#loaders,
32
+ this.#listeners
33
+ );
34
+ }
35
+ /**
36
+ * Add a transformation step.
37
+ */
38
+ transform(transformer) {
39
+ return new _Pipeline(
40
+ this.#name,
41
+ this.#extractor,
42
+ [...this.#transformers, transformer],
43
+ this.#loaders,
44
+ this.#listeners
45
+ );
46
+ }
47
+ /**
48
+ * Add a load destination.
49
+ */
50
+ load(loader) {
51
+ return new _Pipeline(
52
+ this.#name,
53
+ this.#extractor,
54
+ this.#transformers,
55
+ [...this.#loaders, loader],
56
+ this.#listeners
57
+ );
58
+ }
59
+ /**
60
+ * Subscribe to pipeline events.
61
+ */
62
+ on(listener) {
63
+ return new _Pipeline(
64
+ this.#name,
65
+ this.#extractor,
66
+ this.#transformers,
67
+ this.#loaders,
68
+ [...this.#listeners, listener]
69
+ );
70
+ }
71
+ /**
72
+ * Execute the pipeline.
73
+ */
74
+ async run(options = {}) {
75
+ if (!this.#extractor) {
76
+ throw new Error(
77
+ `Pipeline "${this.#name}" has no extractor. Call .extract() before .run().`
78
+ );
79
+ }
80
+ if (this.#loaders.length === 0) {
81
+ throw new Error(
82
+ `Pipeline "${this.#name}" has no loaders. Call .load() before .run().`
83
+ );
84
+ }
85
+ const { batchSize = 1e3, continueOnError = false, signal } = options;
86
+ const startTime = performance.now();
87
+ const errors = [];
88
+ let recordsProcessed = 0;
89
+ let recordsLoaded = 0;
90
+ this.#emit({
91
+ type: "pipeline:start",
92
+ timestamp: /* @__PURE__ */ new Date(),
93
+ data: { name: this.#name }
94
+ });
95
+ let batch = [];
96
+ const flushBatch = async () => {
97
+ if (batch.length === 0) return;
98
+ for (const loader of this.#loaders) {
99
+ try {
100
+ this.#emit({
101
+ type: "step:start",
102
+ timestamp: /* @__PURE__ */ new Date(),
103
+ stepName: loader.name
104
+ });
105
+ const result = await loader.load(batch);
106
+ recordsLoaded += result.recordsLoaded;
107
+ for (const err of result.errors) {
108
+ errors.push({
109
+ stepName: loader.name,
110
+ error: err.error,
111
+ record: err.record
112
+ });
113
+ }
114
+ this.#emit({
115
+ type: "step:end",
116
+ timestamp: /* @__PURE__ */ new Date(),
117
+ stepName: loader.name,
118
+ data: result
119
+ });
120
+ } catch (err) {
121
+ const error = err instanceof Error ? err : new Error(String(err));
122
+ errors.push({ stepName: loader.name, error });
123
+ if (!continueOnError) {
124
+ throw error;
125
+ }
126
+ }
127
+ }
128
+ batch = [];
129
+ };
130
+ try {
131
+ for await (const raw of this.#extractor.extract()) {
132
+ if (signal?.aborted) {
133
+ break;
134
+ }
135
+ this.#emit({
136
+ type: "record:extracted",
137
+ timestamp: /* @__PURE__ */ new Date(),
138
+ stepName: this.#extractor.name,
139
+ data: raw
140
+ });
141
+ let records = [raw];
142
+ for (const transformer of this.#transformers) {
143
+ const nextRecords = [];
144
+ for (const record of records) {
145
+ try {
146
+ this.#emit({
147
+ type: "step:start",
148
+ timestamp: /* @__PURE__ */ new Date(),
149
+ stepName: transformer.name
150
+ });
151
+ const result = await transformer.transform(record);
152
+ const transformed = Array.isArray(result) ? result : [result];
153
+ nextRecords.push(...transformed);
154
+ this.#emit({
155
+ type: "record:transformed",
156
+ timestamp: /* @__PURE__ */ new Date(),
157
+ stepName: transformer.name,
158
+ data: transformed
159
+ });
160
+ } catch (err) {
161
+ const error = err instanceof Error ? err : new Error(String(err));
162
+ errors.push({
163
+ stepName: transformer.name,
164
+ error,
165
+ record
166
+ });
167
+ this.#emit({
168
+ type: "error",
169
+ timestamp: /* @__PURE__ */ new Date(),
170
+ stepName: transformer.name,
171
+ data: error
172
+ });
173
+ if (!continueOnError) {
174
+ throw error;
175
+ }
176
+ }
177
+ }
178
+ records = nextRecords;
179
+ }
180
+ batch.push(...records);
181
+ recordsProcessed += records.length;
182
+ if (batch.length >= batchSize) {
183
+ await flushBatch();
184
+ }
185
+ }
186
+ await flushBatch();
187
+ } catch {
188
+ if (!continueOnError) {
189
+ const duration2 = performance.now() - startTime;
190
+ this.#emit({
191
+ type: "pipeline:end",
192
+ timestamp: /* @__PURE__ */ new Date(),
193
+ data: { recordsProcessed, recordsLoaded, errors, duration: duration2 }
194
+ });
195
+ return { recordsProcessed, recordsLoaded, errors, duration: duration2 };
196
+ }
197
+ }
198
+ const duration = performance.now() - startTime;
199
+ this.#emit({
200
+ type: "pipeline:end",
201
+ timestamp: /* @__PURE__ */ new Date(),
202
+ data: { recordsProcessed, recordsLoaded, errors, duration }
203
+ });
204
+ return { recordsProcessed, recordsLoaded, errors, duration };
205
+ }
206
+ #emit(event) {
207
+ for (const listener of this.#listeners) {
208
+ listener(event);
209
+ }
210
+ }
211
+ };
212
+
213
+ exports.Pipeline = Pipeline;
214
+ //# sourceMappingURL=chunk-6PDC7DFX.cjs.map
215
+ //# sourceMappingURL=chunk-6PDC7DFX.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/pipeline-builder.ts"],"names":["duration"],"mappings":";;;AAuBO,IAAM,QAAA,GAAN,MAAM,SAAA,CAAmD;AAAA,EACrD,KAAA;AAAA,EACA,UAAA;AAAA,EACA,aAAA;AAAA,EACA,QAAA;AAAA,EACA,UAAA;AAAA,EAED,WAAA,CACN,IAAA,EACA,SAAA,EACA,YAAA,EACA,SACA,SAAA,EACA;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAA;AACb,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,aAAA,GAAgB,YAAA;AACrB,IAAA,IAAA,CAAK,QAAA,GAAW,OAAA;AAChB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAAA,EACpB;AAAA,EAEA,OAAO,OAAO,IAAA,EAAwB;AACpC,IAAA,OAAO,IAAI,UAAS,IAAA,EAAM,IAAA,EAAM,EAAC,EAAG,EAAC,EAAG,EAAE,CAAA;AAAA,EAC5C;AAAA,EAEA,IAAI,IAAA,GAAe;AACjB,IAAA,OAAO,IAAA,CAAK,KAAA;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,QAA8B,SAAA,EAAsC;AAClE,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,SAAA;AAAA,MACA,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,UACE,WAAA,EACmB;AACnB,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,aAAA,EAAe,WAAW,CAAA;AAAA,MACnC,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,KAAK,MAAA,EAA8C;AACjD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,QAAA,EAAU,MAAM,CAAA;AAAA,MACzB,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,GAAG,QAAA,EAAqD;AACtD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,UAAA,EAAY,QAAQ;AAAA,KAC/B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,GAAA,CAAI,OAAA,GAA2B,EAAC,EAA4B;AAChE,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,kDAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,6CAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,SAAA,GAAY,GAAA,EAAM,eAAA,GAAkB,KAAA,EAAO,QAAO,GAAI,OAAA;AAE9D,IAAA,MAAM,SAAA,GAAY,YAAY,GAAA,EAAI;AAClC,IAAA,MAAM,SAA8B,EAAC;AACrC,IAAA,IAAI,gBAAA,GAAmB,CAAA;AACvB,IAAA,IAAI,aAAA,GAAgB,CAAA;AAEpB,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,gBAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,IAAA,EAAM,IAAA,CAAK,KAAA;AAAM,KAC1B,CAAA;AAED,IAAA,IAAI,QAAsB,EAAC;AAE3B,IAAA,MAAM,aAAa,YAA2B;AAC5C,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AAExB,MAAA,KAAA,MAAW,MAAA,IAAU,KAAK,QAAA,EAAU;AAClC,QAAA,IAAI;AACF,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,YAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO;AAAA,WAClB,CAAA;AAED,UAAA,MAAM,MAAA,GAAS,MAAM,MAAA,CAAO,IAAA,CAAK,KAAK,CAAA;AACtC,UAAA,aAAA,IAAiB,MAAA,CAAO,aAAA;AAExB,UAAA,KAAA,MAAW,GAAA,IAAO,OAAO,MAAA,EAAQ;AAC/B,YAAA,MAAA,CAAO,IAAA,CAAK;AAAA,cACV,UAAU,MAAA,CAAO,IAAA;AAAA,cACjB,OAAO,GAAA,CAAI,KAAA;AAAA,cACX,QAAQ,GAAA,CAAI;AAAA,aACb,CAAA;AAAA,UACH;AAEA,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,UAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO,IAAA;AAAA,YACjB,IAAA,EAAM;AAAA,WACP,CAAA;AAAA,QACH,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,MAAA,CAAO,KAAK,EAAE,QAAA,EAAU,MAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AAE5C,UAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,YAAA,MAAM,KAAA;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,EAAC;AAAA,IACX,CAAA;AAEA,IAAA,IAAI;AACF,MAAA,WAAA,MAAiB,GAAA,IAAO,IAAA,CAAK,UAAA,CAAW,OAAA,EAAQ,EAAG;AACjD,QAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,UAAA;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,kBAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,QAAA,EAAU,KAAK,UAAA,CAAW,IAAA;AAAA,UAC1B,IAAA,EAAM;AAAA,SACP,CAAA;AAED,QAAA,IAAI,OAAA,GAAwB,CAAC,GAAG,CAAA;AAEhC,QAAA,KAAA,MAAW,WAAA,IAAe,KAAK,aAAA,EAAe;AAC5C,UAAA,MAAM,cAA4B,EAAC;AAEnC,UAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,YAAA,IAAI;AACF,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,YAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY;AAAA,eACvB,CAAA;AAED,cAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,MAAM,CAAA;AACjD,cAAA,MAAM,cAAc,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,GAAI,MAAA,GAAS,CAAC,MAAM,CAAA;AAC5D,cAAA,WAAA,CAAY,IAAA,CAAK,GAAG,WAAW,CAAA;AAE/B,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,oBAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAAA,YACH,SAAS,GAAA,EAAK;AACZ,cAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,cAAA,MAAA,CAAO,IAAA,CAAK;AAAA,gBACV,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,KAAA;AAAA,gBACA;AAAA,eACD,CAAA;AAED,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,OAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAED,cAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,gBAAA,MAAM,KAAA;AAAA,cACR;AAAA,YACF;AAAA,UACF;AAEA,UAAA,OAAA,GAAU,WAAA;AAAA,QACZ;AAEA,QAAA,KAAA,CAAM,IAAA,CAAK,GAAG,OAAO,CAAA;AACrB,QAAA,gBAAA,IAAoB,OAAA,CAAQ,MAAA;AAE5B,QAAA,IAAI,KAAA,CAAM,UAAU,SAAA,EAAW;AAC7B,UAAA,MAAM,UAAA,EAAW;AAAA,QACnB;AAAA,MACF;AAEA,MAAA,MAAM,UAAA,EAAW;AAAA,IACnB,CAAA,CAAA,MAAQ;AACN,MAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,QAAA,MAAMA,SAAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AACrC,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,cAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,MAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA;AAAS,SAC3D,CAAA;AACD,QAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA,EAAS;AAAA,MAC7D;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AAErC,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,cAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,QAAQ,QAAA;AAAS,KAC3D,CAAA;AAED,IAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,QAAA,EAAS;AAAA,EAC7D;AAAA,EAEA,MAAM,KAAA,EAKG;AACP,IAAA,KAAA,MAAW,QAAA,IAAY,KAAK,UAAA,EAAY;AACtC,MAAA,QAAA,CAAS,KAA6C,CAAA;AAAA,IACxD;AAAA,EACF;AACF","file":"chunk-6PDC7DFX.cjs","sourcesContent":["import type {\n DataRecord,\n Extractor,\n Loader,\n PipelineEventListener,\n PipelineOptions,\n PipelineResult,\n PipelineStepError,\n Transformer,\n} from \"./types.js\";\n\n/**\n * A composable, type-safe ETL pipeline builder.\n *\n * @example\n * ```ts\n * const result = await Pipeline.create(\"my-pipeline\")\n * .extract(myExtractor)\n * .transform(myTransformer)\n * .load(myLoader)\n * .run();\n * ```\n */\nexport class Pipeline<TCurrent extends DataRecord = DataRecord> {\n readonly #name: string;\n readonly #extractor: Extractor | null;\n readonly #transformers: Transformer[];\n readonly #loaders: Loader[];\n readonly #listeners: PipelineEventListener[];\n\n private constructor(\n name: string,\n extractor: Extractor | null,\n transformers: Transformer[],\n loaders: Loader[],\n listeners: PipelineEventListener[],\n ) {\n this.#name = name;\n this.#extractor = extractor;\n this.#transformers = transformers;\n this.#loaders = loaders;\n this.#listeners = listeners;\n }\n\n static create(name: string): Pipeline {\n return new Pipeline(name, null, [], [], []);\n }\n\n get name(): string {\n return this.#name;\n }\n\n /**\n * Set the data source for this pipeline.\n */\n extract<T extends DataRecord>(extractor: Extractor<T>): Pipeline<T> {\n return new Pipeline(\n this.#name,\n extractor,\n this.#transformers,\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a transformation step.\n */\n transform<TOutput extends DataRecord>(\n transformer: Transformer<TCurrent, TOutput>,\n ): Pipeline<TOutput> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n [...this.#transformers, transformer],\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a load destination.\n */\n load(loader: Loader<TCurrent>): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n [...this.#loaders, loader],\n this.#listeners,\n );\n }\n\n /**\n * Subscribe to pipeline events.\n */\n on(listener: PipelineEventListener): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n this.#loaders,\n [...this.#listeners, listener],\n );\n }\n\n /**\n * Execute the pipeline.\n */\n async run(options: PipelineOptions = {}): Promise<PipelineResult> {\n if (!this.#extractor) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no extractor. Call .extract() before .run().`,\n );\n }\n\n if (this.#loaders.length === 0) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no loaders. Call .load() before .run().`,\n );\n }\n\n const { batchSize = 1000, continueOnError = false, signal } = options;\n\n const startTime = performance.now();\n const errors: PipelineStepError[] = [];\n let recordsProcessed = 0;\n let recordsLoaded = 0;\n\n this.#emit({\n type: \"pipeline:start\",\n timestamp: new Date(),\n data: { name: this.#name },\n });\n\n let batch: DataRecord[] = [];\n\n const flushBatch = async (): Promise<void> => {\n if (batch.length === 0) return;\n\n for (const loader of this.#loaders) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: loader.name,\n });\n\n const result = await loader.load(batch);\n recordsLoaded += result.recordsLoaded;\n\n for (const err of result.errors) {\n errors.push({\n stepName: loader.name,\n error: err.error,\n record: err.record,\n });\n }\n\n this.#emit({\n type: \"step:end\",\n timestamp: new Date(),\n stepName: loader.name,\n data: result,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({ stepName: loader.name, error });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n batch = [];\n };\n\n try {\n for await (const raw of this.#extractor.extract()) {\n if (signal?.aborted) {\n break;\n }\n\n this.#emit({\n type: \"record:extracted\",\n timestamp: new Date(),\n stepName: this.#extractor.name,\n data: raw,\n });\n\n let records: DataRecord[] = [raw];\n\n for (const transformer of this.#transformers) {\n const nextRecords: DataRecord[] = [];\n\n for (const record of records) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: transformer.name,\n });\n\n const result = await transformer.transform(record);\n const transformed = Array.isArray(result) ? result : [result];\n nextRecords.push(...transformed);\n\n this.#emit({\n type: \"record:transformed\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: transformed,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({\n stepName: transformer.name,\n error,\n record,\n });\n\n this.#emit({\n type: \"error\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: error,\n });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n records = nextRecords;\n }\n\n batch.push(...records);\n recordsProcessed += records.length;\n\n if (batch.length >= batchSize) {\n await flushBatch();\n }\n }\n\n await flushBatch();\n } catch {\n if (!continueOnError) {\n const duration = performance.now() - startTime;\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n }\n\n const duration = performance.now() - startTime;\n\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n\n #emit(event: {\n type: string;\n timestamp: Date;\n stepName?: string;\n data?: unknown;\n }): void {\n for (const listener of this.#listeners) {\n listener(event as Parameters<PipelineEventListener>[0]);\n }\n }\n}\n"]}
@@ -0,0 +1,80 @@
1
+ 'use strict';
2
+
3
+ // src/transformers.ts
4
+ function createTransformer(name, fn) {
5
+ return { name, transform: fn };
6
+ }
7
+ function map(name, fn) {
8
+ return createTransformer(name, fn);
9
+ }
10
+ function filter(name, predicate) {
11
+ return createTransformer(name, async (record) => {
12
+ const keep = await predicate(record);
13
+ return keep ? record : [];
14
+ });
15
+ }
16
+ function flatMap(name, fn) {
17
+ return createTransformer(name, fn);
18
+ }
19
+ function pick(name, keys) {
20
+ return createTransformer(name, (record) => {
21
+ const result = {};
22
+ for (const key of keys) {
23
+ if (key in record) {
24
+ result[key] = record[key];
25
+ }
26
+ }
27
+ return result;
28
+ });
29
+ }
30
+ function omit(name, keys) {
31
+ const keySet = new Set(keys);
32
+ return createTransformer(name, (record) => {
33
+ const result = {};
34
+ for (const [key, value] of Object.entries(record)) {
35
+ if (!keySet.has(key)) {
36
+ result[key] = value;
37
+ }
38
+ }
39
+ return result;
40
+ });
41
+ }
42
+ function rename(name, mapping) {
43
+ return createTransformer(name, (record) => {
44
+ const result = {};
45
+ for (const [key, value] of Object.entries(record)) {
46
+ const newKey = mapping[key] ?? key;
47
+ result[newKey] = value;
48
+ }
49
+ return result;
50
+ });
51
+ }
52
+ function compose(name, transformers) {
53
+ return createTransformer(name, async (record) => {
54
+ let records = [record];
55
+ for (const transformer of transformers) {
56
+ const nextRecords = [];
57
+ for (const r of records) {
58
+ const result = await transformer.transform(r);
59
+ if (Array.isArray(result)) {
60
+ nextRecords.push(...result);
61
+ } else {
62
+ nextRecords.push(result);
63
+ }
64
+ }
65
+ records = nextRecords;
66
+ }
67
+ return records;
68
+ });
69
+ }
70
+
71
+ exports.compose = compose;
72
+ exports.createTransformer = createTransformer;
73
+ exports.filter = filter;
74
+ exports.flatMap = flatMap;
75
+ exports.map = map;
76
+ exports.omit = omit;
77
+ exports.pick = pick;
78
+ exports.rename = rename;
79
+ //# sourceMappingURL=chunk-6R4QVX2Q.cjs.map
80
+ //# sourceMappingURL=chunk-6R4QVX2Q.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/transformers.ts"],"names":[],"mappings":";;;AAKO,SAAS,iBAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,EAAA,EAAG;AAC/B;AAKO,SAAS,GAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAMO,SAAS,MAAA,CACd,MACA,SAAA,EACmB;AACnB,EAAA,OAAO,iBAAA,CAAkB,IAAA,EAAM,OAAO,MAAA,KAAc;AAClD,IAAA,MAAM,IAAA,GAAO,MAAM,SAAA,CAAU,MAAM,CAAA;AACnC,IAAA,OAAO,IAAA,GAAO,SAAS,EAAC;AAAA,EAC1B,CAAC,CAAA;AACH;AAKO,SAAS,OAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,MAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,MAAA,CAAO,GAAG,CAAA;AAAA,MACvD;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAY,IAAI,CAAA;AACnC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,IAAI,CAAC,MAAA,CAAO,GAAA,CAAI,GAAG,CAAA,EAAG;AACpB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,KAAA;AAAA,MAC7C;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,MAAA,CACd,MACA,OAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,CAAC,MAAA,KAAc;AAC/C,IAAA,MAAM,SAAqB,EAAC;AAC5B,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,MAAM,MAAA,GAAS,OAAA,CAAQ,GAAG,CAAA,IAAK,GAAA;AAC/B,MAAA,MAAA,CAAO,MAAM,CAAA,GAAI,KAAA;AAAA,IACnB;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAMO,SAAS,OAAA,CACd,MACA,YAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,OAAO,MAAA,KAAc;AACrD,IAAA,IAAI,OAAA,GAAwB,CAAC,MAAM,CAAA;AAEnC,IAAA,KAAA,MAAW,eAAe,YAAA,EAAc;AACtC,MAAA,MAAM,cAA4B,EAAC;AACnC,MAAA,KAAA,MAAW,KAAK,OAAA,EAAS;AACvB,QAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,CAAC,CAAA;AAC5C,QAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,EAAG;AACzB,UAAA,WAAA,CAAY,IAAA,CAAK,GAAG,MAAM,CAAA;AAAA,QAC5B,CAAA,MAAO;AACL,UAAA,WAAA,CAAY,KAAK,MAAM,CAAA;AAAA,QACzB;AAAA,MACF;AACA,MAAA,OAAA,GAAU,WAAA;AAAA,IACZ;AAEA,IAAA,OAAO,OAAA;AAAA,EACT,CAAC,CAAA;AACH","file":"chunk-6R4QVX2Q.cjs","sourcesContent":["import type { DataRecord, Transformer } from \"./types.js\";\n\n/**\n * Create a custom transformer from a function.\n */\nexport function createTransformer<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | TOutput[] | Promise<TOutput | TOutput[]>,\n): Transformer<TInput, TOutput> {\n return { name, transform: fn };\n}\n\n/**\n * Transform each record using a mapping function.\n */\nexport function map<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | Promise<TOutput>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Filter records based on a predicate. Records that don't match\n * are dropped (returned as empty array).\n */\nexport function filter<T extends DataRecord = DataRecord>(\n name: string,\n predicate: (record: T) => boolean | Promise<boolean>,\n): Transformer<T, T> {\n return createTransformer(name, async (record: T) => {\n const keep = await predicate(record);\n return keep ? record : [];\n });\n}\n\n/**\n * Transform each record into zero or more records.\n */\nexport function flatMap<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput[] | Promise<TOutput[]>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Pick specific keys from each record.\n */\nexport function pick<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Pick<T, K> & DataRecord> {\n return createTransformer<T, Pick<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Pick<T, K> & DataRecord;\n for (const key of keys) {\n if (key in record) {\n (result as Record<string, unknown>)[key] = record[key];\n }\n }\n return result;\n });\n}\n\n/**\n * Omit specific keys from each record.\n */\nexport function omit<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Omit<T, K> & DataRecord> {\n const keySet = new Set<string>(keys);\n return createTransformer<T, Omit<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Omit<T, K> & DataRecord;\n for (const [key, value] of Object.entries(record)) {\n if (!keySet.has(key)) {\n (result as Record<string, unknown>)[key] = value;\n }\n }\n return result;\n });\n}\n\n/**\n * Rename keys in each record.\n */\nexport function rename<T extends DataRecord>(\n name: string,\n mapping: Record<string, string>,\n): Transformer<T> {\n return createTransformer<T>(name, (record: T) => {\n const result: DataRecord = {};\n for (const [key, value] of Object.entries(record)) {\n const newKey = mapping[key] ?? key;\n result[newKey] = value;\n }\n return result;\n });\n}\n\n/**\n * Compose multiple transformers into a single transformer that\n * applies them in sequence.\n */\nexport function compose<T extends DataRecord>(\n name: string,\n transformers: Transformer[],\n): Transformer<T> {\n return createTransformer<T>(name, async (record: T) => {\n let records: DataRecord[] = [record];\n\n for (const transformer of transformers) {\n const nextRecords: DataRecord[] = [];\n for (const r of records) {\n const result = await transformer.transform(r);\n if (Array.isArray(result)) {\n nextRecords.push(...result);\n } else {\n nextRecords.push(result);\n }\n }\n records = nextRecords;\n }\n\n return records;\n });\n}\n"]}