khotan-data 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +196 -0
- package/dist/chunk-42CNRMAQ.js +99 -0
- package/dist/chunk-42CNRMAQ.js.map +1 -0
- package/dist/chunk-6PDC7DFX.cjs +215 -0
- package/dist/chunk-6PDC7DFX.cjs.map +1 -0
- package/dist/chunk-6R4QVX2Q.cjs +80 -0
- package/dist/chunk-6R4QVX2Q.cjs.map +1 -0
- package/dist/chunk-FRRSW3TN.cjs +105 -0
- package/dist/chunk-FRRSW3TN.cjs.map +1 -0
- package/dist/chunk-NVPI7OV3.js +71 -0
- package/dist/chunk-NVPI7OV3.js.map +1 -0
- package/dist/chunk-TK4HD4XA.js +213 -0
- package/dist/chunk-TK4HD4XA.js.map +1 -0
- package/dist/drizzle.cjs +28 -0
- package/dist/drizzle.cjs.map +1 -0
- package/dist/drizzle.d.cts +120 -0
- package/dist/drizzle.d.ts +120 -0
- package/dist/drizzle.js +3 -0
- package/dist/drizzle.js.map +1 -0
- package/dist/index.cjs +107 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +35 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/pipeline.cjs +12 -0
- package/dist/pipeline.cjs.map +1 -0
- package/dist/pipeline.d.cts +43 -0
- package/dist/pipeline.d.ts +43 -0
- package/dist/pipeline.js +3 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/transform.cjs +40 -0
- package/dist/transform.cjs.map +1 -0
- package/dist/transform.d.cts +38 -0
- package/dist/transform.d.ts +38 -0
- package/dist/transform.js +3 -0
- package/dist/transform.js.map +1 -0
- package/dist/types-EpLTQcN2.d.cts +54 -0
- package/dist/types-EpLTQcN2.d.ts +54 -0
- package/package.json +110 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 khotan-data contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# khotan-data
|
|
2
|
+
|
|
3
|
+
Data primitives for TypeScript — ETL pipelines, transforms, and Drizzle Postgres integration.
|
|
4
|
+
|
|
5
|
+
Built for **Next.js + Drizzle + Postgres** projects. Think better-auth for data management.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install khotan-data
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires `drizzle-orm` as a peer dependency (you almost certainly already have it).
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```typescript
|
|
18
|
+
import { Pipeline, fromQuery, map, filter, toDrizzle } from "khotan-data";
|
|
19
|
+
import { db } from "@/db";
|
|
20
|
+
import { users, analytics } from "@/db/schema";
|
|
21
|
+
import { eq } from "drizzle-orm";
|
|
22
|
+
|
|
23
|
+
const result = await Pipeline.create("user-analytics")
|
|
24
|
+
.extract(
|
|
25
|
+
fromQuery("active-users", () =>
|
|
26
|
+
db.select().from(users).where(eq(users.active, true))
|
|
27
|
+
),
|
|
28
|
+
)
|
|
29
|
+
.transform(filter("adults", (r) => r.age >= 18))
|
|
30
|
+
.transform(
|
|
31
|
+
map("enrich", (r) => ({
|
|
32
|
+
userId: r.id,
|
|
33
|
+
email: r.email.toLowerCase(),
|
|
34
|
+
segment: r.age >= 65 ? "senior" : "standard",
|
|
35
|
+
processedAt: new Date(),
|
|
36
|
+
})),
|
|
37
|
+
)
|
|
38
|
+
.load(
|
|
39
|
+
toDrizzle("write-analytics", (rows) =>
|
|
40
|
+
db.insert(analytics).values(rows)
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
.run();
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Extractors
|
|
47
|
+
|
|
48
|
+
Pull data from Drizzle queries:
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
import { fromQuery, fromQueryPaginated, fromQueryCursor } from "khotan-data/drizzle";
|
|
52
|
+
|
|
53
|
+
// One-shot query
|
|
54
|
+
const source = fromQuery("users", () =>
|
|
55
|
+
db.select().from(users).where(eq(users.active, true))
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
// Auto-paginated for large tables
|
|
59
|
+
const source = fromQueryPaginated("all-orders", {
|
|
60
|
+
pageSize: 5000,
|
|
61
|
+
query: (limit, offset) =>
|
|
62
|
+
db.select().from(orders).limit(limit).offset(offset),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Full control with async generator
|
|
66
|
+
const source = fromQueryCursor("stream", async function* () {
|
|
67
|
+
// your custom cursor/streaming logic
|
|
68
|
+
});
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Generic extractors for testing and non-DB sources:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import { fromArray, createExtractor } from "khotan-data";
|
|
75
|
+
|
|
76
|
+
const testSource = fromArray("mock", [{ id: 1 }, { id: 2 }]);
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Transforms
|
|
80
|
+
|
|
81
|
+
Composable, type-safe record transformations:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import { map, filter, pick, omit, rename, flatMap, compose } from "khotan-data/transform";
|
|
85
|
+
|
|
86
|
+
// Map fields
|
|
87
|
+
.transform(map("normalize", (r) => ({ ...r, email: r.email.toLowerCase() })))
|
|
88
|
+
|
|
89
|
+
// Filter records (non-matching records are dropped)
|
|
90
|
+
.transform(filter("active-only", (r) => r.active))
|
|
91
|
+
|
|
92
|
+
// Pick/omit fields
|
|
93
|
+
.transform(pick("slim", ["id", "name", "email"]))
|
|
94
|
+
.transform(omit("strip-pii", ["ssn", "dob"]))
|
|
95
|
+
|
|
96
|
+
// Rename fields
|
|
97
|
+
.transform(rename("api-names", { firstName: "first_name" }))
|
|
98
|
+
|
|
99
|
+
// One-to-many expansion
|
|
100
|
+
.transform(flatMap("explode-tags", (r) =>
|
|
101
|
+
r.tags.map((tag) => ({ ...r, tag }))
|
|
102
|
+
))
|
|
103
|
+
|
|
104
|
+
// Compose multiple transforms into one step
|
|
105
|
+
.transform(compose("pipeline", [filterStep, mapStep, renameStep]))
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Loaders
|
|
109
|
+
|
|
110
|
+
Write data into Drizzle tables:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
import { toDrizzle, toDrizzleTx } from "khotan-data/drizzle";
|
|
114
|
+
|
|
115
|
+
// Simple insert (auto-batches to stay under Postgres parameter limits)
|
|
116
|
+
const loader = toDrizzle("insert", (rows) =>
|
|
117
|
+
db.insert(analytics).values(rows)
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
// Upsert
|
|
121
|
+
const loader = toDrizzle("upsert", (rows) =>
|
|
122
|
+
db
|
|
123
|
+
.insert(analytics)
|
|
124
|
+
.values(rows)
|
|
125
|
+
.onConflictDoUpdate({
|
|
126
|
+
target: analytics.userId,
|
|
127
|
+
set: { segment: sql`excluded.segment`, updatedAt: new Date() },
|
|
128
|
+
})
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
// Transactional — all-or-nothing per batch
|
|
132
|
+
const loader = toDrizzleTx("tx-insert", db, (tx, rows) =>
|
|
133
|
+
tx.insert(analytics).values(rows)
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
// Control batching for wide tables
|
|
137
|
+
const loader = toDrizzle("wide-table", writeFn, {
|
|
138
|
+
columnsPerRow: 25, // auto-calculates safe batch size
|
|
139
|
+
});
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Pipeline
|
|
143
|
+
|
|
144
|
+
The `Pipeline` builder is immutable — each method returns a new instance:
|
|
145
|
+
|
|
146
|
+
```typescript
|
|
147
|
+
const base = Pipeline.create("etl")
|
|
148
|
+
.extract(source)
|
|
149
|
+
.transform(filterStep);
|
|
150
|
+
|
|
151
|
+
// Branch into different outputs
|
|
152
|
+
const toDb = base.load(toDrizzle("db", writeFn)).run();
|
|
153
|
+
const toFile = base.load(toFileSink).run();
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Options
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
await pipeline.run({
|
|
160
|
+
batchSize: 500, // records per load batch (default: 1000)
|
|
161
|
+
continueOnError: true, // don't throw on errors, collect them
|
|
162
|
+
signal: controller.signal, // AbortSignal for cancellation
|
|
163
|
+
});
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Events
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
pipeline.on((event) => {
|
|
170
|
+
if (event.type === "error") console.error(event.stepName, event.data);
|
|
171
|
+
if (event.type === "pipeline:end") console.log("Done:", event.data);
|
|
172
|
+
});
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Subpath Imports
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
import { Pipeline } from "khotan-data/pipeline";
|
|
179
|
+
import { map, filter } from "khotan-data/transform";
|
|
180
|
+
import { fromQuery, toDrizzle } from "khotan-data/drizzle";
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Development
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
npm install
|
|
187
|
+
npm run dev # watch mode build
|
|
188
|
+
npm run test # run tests
|
|
189
|
+
npm run test:watch # watch mode tests
|
|
190
|
+
npm run check # typecheck + lint + format + test
|
|
191
|
+
npm run build # production build
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## License
|
|
195
|
+
|
|
196
|
+
MIT
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// src/drizzle-extract.ts
|
|
2
|
+
function fromQuery(name, queryFn) {
|
|
3
|
+
return {
|
|
4
|
+
name,
|
|
5
|
+
async *extract() {
|
|
6
|
+
const rows = await queryFn();
|
|
7
|
+
for (const row of rows) {
|
|
8
|
+
yield row;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
function fromQueryCursor(name, generatorFn) {
|
|
14
|
+
return {
|
|
15
|
+
name,
|
|
16
|
+
extract: generatorFn
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
function fromQueryPaginated(name, opts) {
|
|
20
|
+
const pageSize = opts.pageSize ?? 1e3;
|
|
21
|
+
return {
|
|
22
|
+
name,
|
|
23
|
+
async *extract() {
|
|
24
|
+
let offset = 0;
|
|
25
|
+
for (; ; ) {
|
|
26
|
+
const rows = await opts.query(pageSize, offset);
|
|
27
|
+
if (rows.length === 0) break;
|
|
28
|
+
for (const row of rows) {
|
|
29
|
+
yield row;
|
|
30
|
+
}
|
|
31
|
+
if (rows.length < pageSize) break;
|
|
32
|
+
offset += pageSize;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// src/drizzle-load.ts
|
|
39
|
+
var PG_MAX_PARAMETERS = 65535;
|
|
40
|
+
function resolveMaxRows(options) {
|
|
41
|
+
if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;
|
|
42
|
+
if (options?.columnsPerRow) {
|
|
43
|
+
return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);
|
|
44
|
+
}
|
|
45
|
+
return 1e3;
|
|
46
|
+
}
|
|
47
|
+
function toDrizzle(name, writeFn, options) {
|
|
48
|
+
const maxRows = resolveMaxRows(options);
|
|
49
|
+
return {
|
|
50
|
+
name,
|
|
51
|
+
async load(records) {
|
|
52
|
+
const errors = [];
|
|
53
|
+
let loaded = 0;
|
|
54
|
+
for (let i = 0; i < records.length; i += maxRows) {
|
|
55
|
+
const chunk = records.slice(i, i + maxRows);
|
|
56
|
+
try {
|
|
57
|
+
await writeFn(chunk);
|
|
58
|
+
loaded += chunk.length;
|
|
59
|
+
} catch (err) {
|
|
60
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
61
|
+
for (const record of chunk) {
|
|
62
|
+
errors.push({ record, error });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return { recordsLoaded: loaded, errors };
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function toDrizzleTx(name, db, writeFn, options) {
|
|
71
|
+
const maxRows = resolveMaxRows(options);
|
|
72
|
+
return {
|
|
73
|
+
name,
|
|
74
|
+
async load(records) {
|
|
75
|
+
const errors = [];
|
|
76
|
+
let loaded = 0;
|
|
77
|
+
try {
|
|
78
|
+
await db.transaction(async (tx) => {
|
|
79
|
+
for (let i = 0; i < records.length; i += maxRows) {
|
|
80
|
+
const chunk = records.slice(i, i + maxRows);
|
|
81
|
+
await writeFn(tx, chunk);
|
|
82
|
+
loaded += chunk.length;
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
} catch (err) {
|
|
86
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
87
|
+
for (const record of records) {
|
|
88
|
+
errors.push({ record, error });
|
|
89
|
+
}
|
|
90
|
+
loaded = 0;
|
|
91
|
+
}
|
|
92
|
+
return { recordsLoaded: loaded, errors };
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export { fromQuery, fromQueryCursor, fromQueryPaginated, toDrizzle, toDrizzleTx };
|
|
98
|
+
//# sourceMappingURL=chunk-42CNRMAQ.js.map
|
|
99
|
+
//# sourceMappingURL=chunk-42CNRMAQ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/drizzle-extract.ts","../src/drizzle-load.ts"],"names":[],"mappings":";AAoBO,SAAS,SAAA,CACd,MACA,OAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,MAAM,IAAA,GAAO,MAAM,OAAA,EAAQ;AAC3B,MAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,QAAA,MAAM,GAAA;AAAA,MACR;AAAA,IACF;AAAA,GACF;AACF;AAqBO,SAAS,eAAA,CACd,MACA,WAAA,EACc;AACd,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAA,EAAS;AAAA,GACX;AACF;AAcO,SAAS,kBAAA,CACd,MACA,IAAA,EAIc;AACd,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,GAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,OAAO,OAAA,GAAU;AACf,MAAA,IAAI,MAAA,GAAS,CAAA;AACb,MAAA,WAAS;AACP,QAAA,MAAM,IAAA,GAAO,MAAM,IAAA,CAAK,KAAA,CAAM,UAAU,MAAM,CAAA;AAC9C,QAAA,IAAI,IAAA,CAAK,WAAW,CAAA,EAAG;AACvB,QAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,UAAA,MAAM,GAAA;AAAA,QACR;AACA,QAAA,IAAI,IAAA,CAAK,SAAS,QAAA,EAAU;AAC5B,QAAA,MAAA,IAAU,QAAA;AAAA,MACZ;AAAA,IACF;AAAA,GACF;AACF;;;ACjGA,IAAM,iBAAA,GAAoB,KAAA;AAiB1B,SAAS,eAAe,OAAA,EAAoC;AAC1D,EAAA,IAAI,OAAA,EAAS,mBAAA,EAAqB,OAAO,OAAA,CAAQ,mBAAA;AACjD,EAAA,IAAI,SAAS,aAAA,EAAe;AAC1B,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,iBAAA,GAAoB,OAAA,CAAQ,aAAa,CAAA;AAAA,EAC7D;AACA,EAAA,OAAO,GAAA;AACT;AA+BO,SAAS,SAAA,CACd,IAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,QAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,QAAA,IAAI;AACF,UAAA,MAAM,QAAQ,KAAK,CAAA;AACnB,UAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,QAClB,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,KAAA,MAAW,UAAU,KAAA,EAAO;AAC1B,YAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,UAC/B;AAAA,QACF;AAAA,MACF;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF;AAeO,SAAS,WAAA,CACd,IAAA,EACA,EAAA,EACA,OAAA,EACA,OAAA,EACW;AACX,EAAA,MAAM,OAAA,GAAU,eAAe,OAAO,CAAA;AAEtC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,MAAM,KAAK,OAAA,EAAmC;AAC5C,MAAA,MAAM,SAA+B,EAAC;AACtC,MAAA,IAAI,MAAA,GAAS,CAAA;AAEb,MAAA,IAAI;AACF,QAAA,MAAM,EAAA,CAAG,WAAA,CAAY,OAAO,EAAA,KAAO;AACjC,UAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,OAAA,EAAS;AAChD,YAAA,MAAM,KAAA,GAAQ,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,IAAI,OAAO,CAAA;AAC1C,YAAA,MAAM,OAAA,CAAQ,IAAI,KAAK,CAAA;AACvB,YAAA,MAAA,IAAU,KAAA,CAAM,MAAA;AAAA,UAClB;AAAA,QACF,CAAC,CAAA;AAAA,MACH,SAAS,GAAA,EAAK;AACZ,QAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,QAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,UAAA,MAAA,CAAO,IAAA,CAAK,EAAE,MAAA,EAAQ,KAAA,EAAO,CAAA;AAAA,QAC/B;AACA,QAAA,MAAA,GAAS,CAAA;AAAA,MACX;AAEA,MAAA,OAAO,EAAE,aAAA,EAAe,MAAA,EAAQ,MAAA,EAAO;AAAA,IACzC;AAAA,GACF;AACF","file":"chunk-42CNRMAQ.js","sourcesContent":["import type { DataRecord, Extractor } from \"./types.js\";\n\n/**\n * Create an extractor from any Drizzle select query.\n *\n * Pass a function that returns the query — this makes the extractor\n * re-runnable and avoids consuming a one-shot promise.\n *\n * @example\n * ```ts\n * import { fromQuery } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { users } from \"@/db/schema\";\n * import { eq } from \"drizzle-orm\";\n *\n * const extractor = fromQuery(\"active-users\", () =>\n * db.select().from(users).where(eq(users.active, true))\n * );\n * ```\n */\nexport function fromQuery<T extends DataRecord>(\n name: string,\n queryFn: () => PromiseLike<T[]>,\n): Extractor<T> {\n return {\n name,\n async *extract() {\n const rows = await queryFn();\n for (const row of rows) {\n yield row;\n }\n },\n };\n}\n\n/**\n * Create an extractor from a Drizzle query that streams results in\n * chunks. Use this for large tables where materializing all rows\n * at once is too expensive.\n *\n * @example\n * ```ts\n * const extractor = fromQueryCursor(\"all-events\", async function* () {\n * let offset = 0;\n * const limit = 5000;\n * while (true) {\n * const batch = await db.select().from(events).limit(limit).offset(offset);\n * if (batch.length === 0) break;\n * yield* batch;\n * offset += limit;\n * }\n * });\n * ```\n */\nexport function fromQueryCursor<T extends DataRecord>(\n name: string,\n generatorFn: () => AsyncIterable<T>,\n): Extractor<T> {\n return {\n name,\n extract: generatorFn,\n };\n}\n\n/**\n * Create an extractor from a paginated Drizzle query. Automatically\n * handles offset-based pagination so you don't have to write the loop.\n *\n * @example\n * ```ts\n * const extractor = fromQueryPaginated(\"all-users\", {\n * pageSize: 2000,\n * query: (limit, offset) => db.select().from(users).limit(limit).offset(offset),\n * });\n * ```\n */\nexport function fromQueryPaginated<T extends DataRecord>(\n name: string,\n opts: {\n query: (limit: number, offset: number) => PromiseLike<T[]>;\n pageSize?: number;\n },\n): Extractor<T> {\n const pageSize = opts.pageSize ?? 1000;\n return {\n name,\n async *extract() {\n let offset = 0;\n for (;;) {\n const rows = await opts.query(pageSize, offset);\n if (rows.length === 0) break;\n for (const row of rows) {\n yield row;\n }\n if (rows.length < pageSize) break;\n offset += pageSize;\n }\n },\n };\n}\n","import type { DataRecord, Loader, LoadResult } from \"./types.js\";\n\nconst PG_MAX_PARAMETERS = 65535;\n\ninterface ToDrizzleOptions {\n /**\n * Max records per INSERT statement. When a batch exceeds this,\n * it's automatically split into sub-batches to stay within Postgres\n * parameter limits. Defaults to auto-calculated from columnsPerRow.\n */\n maxRowsPerStatement?: number;\n /**\n * Number of columns per row. Used to auto-calculate maxRowsPerStatement\n * to stay under Postgres' 65535 parameter limit.\n * If not provided, falls back to maxRowsPerStatement or 1000.\n */\n columnsPerRow?: number;\n}\n\nfunction resolveMaxRows(options?: ToDrizzleOptions): number {\n if (options?.maxRowsPerStatement) return options.maxRowsPerStatement;\n if (options?.columnsPerRow) {\n return Math.floor(PG_MAX_PARAMETERS / options.columnsPerRow);\n }\n return 1000;\n}\n\n/**\n * Create a loader that writes records using a Drizzle insert/upsert.\n *\n * You provide the write function — this keeps the loader decoupled from\n * specific Drizzle driver types while giving you full control over\n * insert/upsert/conflict behavior.\n *\n * Automatically sub-batches to stay within Postgres' 65535 parameter limit.\n *\n * @example\n * ```ts\n * import { toDrizzle } from \"khotan-data/drizzle\";\n * import { db } from \"@/db\";\n * import { processedUsers } from \"@/db/schema\";\n *\n * // Simple insert\n * const loader = toDrizzle(\"insert-users\", (rows) =>\n * db.insert(processedUsers).values(rows)\n * );\n *\n * // Upsert\n * const loader = toDrizzle(\"upsert-users\", (rows) =>\n * db.insert(processedUsers).values(rows).onConflictDoUpdate({\n * target: processedUsers.id,\n * set: { name: sql`excluded.name`, updatedAt: new Date() },\n * })\n * );\n * ```\n */\nexport function toDrizzle<T extends DataRecord>(\n name: string,\n writeFn: (records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n try {\n await writeFn(chunk);\n loaded += chunk.length;\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of chunk) {\n errors.push({ record, error });\n }\n }\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n\n/**\n * Create a loader that writes records inside a Drizzle transaction.\n *\n * All sub-batches for a single load call are wrapped in a single\n * transaction — if any batch fails, the entire load is rolled back.\n *\n * @example\n * ```ts\n * const loader = toDrizzleTx(\"tx-insert\", db, (tx, rows) =>\n * tx.insert(processedUsers).values(rows)\n * );\n * ```\n */\nexport function toDrizzleTx<T extends DataRecord>(\n name: string,\n db: { transaction: <R>(fn: (tx: never) => Promise<R>) => Promise<R> },\n writeFn: (tx: never, records: T[]) => PromiseLike<unknown>,\n options?: ToDrizzleOptions,\n): Loader<T> {\n const maxRows = resolveMaxRows(options);\n\n return {\n name,\n async load(records: T[]): Promise<LoadResult> {\n const errors: LoadResult[\"errors\"] = [];\n let loaded = 0;\n\n try {\n await db.transaction(async (tx) => {\n for (let i = 0; i < records.length; i += maxRows) {\n const chunk = records.slice(i, i + maxRows);\n await writeFn(tx, chunk);\n loaded += chunk.length;\n }\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n for (const record of records) {\n errors.push({ record, error });\n }\n loaded = 0;\n }\n\n return { recordsLoaded: loaded, errors };\n },\n };\n}\n"]}
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// src/pipeline-builder.ts
|
|
4
|
+
var Pipeline = class _Pipeline {
|
|
5
|
+
#name;
|
|
6
|
+
#extractor;
|
|
7
|
+
#transformers;
|
|
8
|
+
#loaders;
|
|
9
|
+
#listeners;
|
|
10
|
+
constructor(name, extractor, transformers, loaders, listeners) {
|
|
11
|
+
this.#name = name;
|
|
12
|
+
this.#extractor = extractor;
|
|
13
|
+
this.#transformers = transformers;
|
|
14
|
+
this.#loaders = loaders;
|
|
15
|
+
this.#listeners = listeners;
|
|
16
|
+
}
|
|
17
|
+
static create(name) {
|
|
18
|
+
return new _Pipeline(name, null, [], [], []);
|
|
19
|
+
}
|
|
20
|
+
get name() {
|
|
21
|
+
return this.#name;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Set the data source for this pipeline.
|
|
25
|
+
*/
|
|
26
|
+
extract(extractor) {
|
|
27
|
+
return new _Pipeline(
|
|
28
|
+
this.#name,
|
|
29
|
+
extractor,
|
|
30
|
+
this.#transformers,
|
|
31
|
+
this.#loaders,
|
|
32
|
+
this.#listeners
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Add a transformation step.
|
|
37
|
+
*/
|
|
38
|
+
transform(transformer) {
|
|
39
|
+
return new _Pipeline(
|
|
40
|
+
this.#name,
|
|
41
|
+
this.#extractor,
|
|
42
|
+
[...this.#transformers, transformer],
|
|
43
|
+
this.#loaders,
|
|
44
|
+
this.#listeners
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Add a load destination.
|
|
49
|
+
*/
|
|
50
|
+
load(loader) {
|
|
51
|
+
return new _Pipeline(
|
|
52
|
+
this.#name,
|
|
53
|
+
this.#extractor,
|
|
54
|
+
this.#transformers,
|
|
55
|
+
[...this.#loaders, loader],
|
|
56
|
+
this.#listeners
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Subscribe to pipeline events.
|
|
61
|
+
*/
|
|
62
|
+
on(listener) {
|
|
63
|
+
return new _Pipeline(
|
|
64
|
+
this.#name,
|
|
65
|
+
this.#extractor,
|
|
66
|
+
this.#transformers,
|
|
67
|
+
this.#loaders,
|
|
68
|
+
[...this.#listeners, listener]
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Execute the pipeline.
|
|
73
|
+
*/
|
|
74
|
+
async run(options = {}) {
|
|
75
|
+
if (!this.#extractor) {
|
|
76
|
+
throw new Error(
|
|
77
|
+
`Pipeline "${this.#name}" has no extractor. Call .extract() before .run().`
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
if (this.#loaders.length === 0) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`Pipeline "${this.#name}" has no loaders. Call .load() before .run().`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
const { batchSize = 1e3, continueOnError = false, signal } = options;
|
|
86
|
+
const startTime = performance.now();
|
|
87
|
+
const errors = [];
|
|
88
|
+
let recordsProcessed = 0;
|
|
89
|
+
let recordsLoaded = 0;
|
|
90
|
+
this.#emit({
|
|
91
|
+
type: "pipeline:start",
|
|
92
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
93
|
+
data: { name: this.#name }
|
|
94
|
+
});
|
|
95
|
+
let batch = [];
|
|
96
|
+
const flushBatch = async () => {
|
|
97
|
+
if (batch.length === 0) return;
|
|
98
|
+
for (const loader of this.#loaders) {
|
|
99
|
+
try {
|
|
100
|
+
this.#emit({
|
|
101
|
+
type: "step:start",
|
|
102
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
103
|
+
stepName: loader.name
|
|
104
|
+
});
|
|
105
|
+
const result = await loader.load(batch);
|
|
106
|
+
recordsLoaded += result.recordsLoaded;
|
|
107
|
+
for (const err of result.errors) {
|
|
108
|
+
errors.push({
|
|
109
|
+
stepName: loader.name,
|
|
110
|
+
error: err.error,
|
|
111
|
+
record: err.record
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
this.#emit({
|
|
115
|
+
type: "step:end",
|
|
116
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
117
|
+
stepName: loader.name,
|
|
118
|
+
data: result
|
|
119
|
+
});
|
|
120
|
+
} catch (err) {
|
|
121
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
122
|
+
errors.push({ stepName: loader.name, error });
|
|
123
|
+
if (!continueOnError) {
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
batch = [];
|
|
129
|
+
};
|
|
130
|
+
try {
|
|
131
|
+
for await (const raw of this.#extractor.extract()) {
|
|
132
|
+
if (signal?.aborted) {
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
this.#emit({
|
|
136
|
+
type: "record:extracted",
|
|
137
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
138
|
+
stepName: this.#extractor.name,
|
|
139
|
+
data: raw
|
|
140
|
+
});
|
|
141
|
+
let records = [raw];
|
|
142
|
+
for (const transformer of this.#transformers) {
|
|
143
|
+
const nextRecords = [];
|
|
144
|
+
for (const record of records) {
|
|
145
|
+
try {
|
|
146
|
+
this.#emit({
|
|
147
|
+
type: "step:start",
|
|
148
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
149
|
+
stepName: transformer.name
|
|
150
|
+
});
|
|
151
|
+
const result = await transformer.transform(record);
|
|
152
|
+
const transformed = Array.isArray(result) ? result : [result];
|
|
153
|
+
nextRecords.push(...transformed);
|
|
154
|
+
this.#emit({
|
|
155
|
+
type: "record:transformed",
|
|
156
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
157
|
+
stepName: transformer.name,
|
|
158
|
+
data: transformed
|
|
159
|
+
});
|
|
160
|
+
} catch (err) {
|
|
161
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
162
|
+
errors.push({
|
|
163
|
+
stepName: transformer.name,
|
|
164
|
+
error,
|
|
165
|
+
record
|
|
166
|
+
});
|
|
167
|
+
this.#emit({
|
|
168
|
+
type: "error",
|
|
169
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
170
|
+
stepName: transformer.name,
|
|
171
|
+
data: error
|
|
172
|
+
});
|
|
173
|
+
if (!continueOnError) {
|
|
174
|
+
throw error;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
records = nextRecords;
|
|
179
|
+
}
|
|
180
|
+
batch.push(...records);
|
|
181
|
+
recordsProcessed += records.length;
|
|
182
|
+
if (batch.length >= batchSize) {
|
|
183
|
+
await flushBatch();
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
await flushBatch();
|
|
187
|
+
} catch {
|
|
188
|
+
if (!continueOnError) {
|
|
189
|
+
const duration2 = performance.now() - startTime;
|
|
190
|
+
this.#emit({
|
|
191
|
+
type: "pipeline:end",
|
|
192
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
193
|
+
data: { recordsProcessed, recordsLoaded, errors, duration: duration2 }
|
|
194
|
+
});
|
|
195
|
+
return { recordsProcessed, recordsLoaded, errors, duration: duration2 };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
const duration = performance.now() - startTime;
|
|
199
|
+
this.#emit({
|
|
200
|
+
type: "pipeline:end",
|
|
201
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
202
|
+
data: { recordsProcessed, recordsLoaded, errors, duration }
|
|
203
|
+
});
|
|
204
|
+
return { recordsProcessed, recordsLoaded, errors, duration };
|
|
205
|
+
}
|
|
206
|
+
#emit(event) {
|
|
207
|
+
for (const listener of this.#listeners) {
|
|
208
|
+
listener(event);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
exports.Pipeline = Pipeline;
|
|
214
|
+
//# sourceMappingURL=chunk-6PDC7DFX.cjs.map
|
|
215
|
+
//# sourceMappingURL=chunk-6PDC7DFX.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/pipeline-builder.ts"],"names":["duration"],"mappings":";;;AAuBO,IAAM,QAAA,GAAN,MAAM,SAAA,CAAmD;AAAA,EACrD,KAAA;AAAA,EACA,UAAA;AAAA,EACA,aAAA;AAAA,EACA,QAAA;AAAA,EACA,UAAA;AAAA,EAED,WAAA,CACN,IAAA,EACA,SAAA,EACA,YAAA,EACA,SACA,SAAA,EACA;AACA,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAA;AACb,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAClB,IAAA,IAAA,CAAK,aAAA,GAAgB,YAAA;AACrB,IAAA,IAAA,CAAK,QAAA,GAAW,OAAA;AAChB,IAAA,IAAA,CAAK,UAAA,GAAa,SAAA;AAAA,EACpB;AAAA,EAEA,OAAO,OAAO,IAAA,EAAwB;AACpC,IAAA,OAAO,IAAI,UAAS,IAAA,EAAM,IAAA,EAAM,EAAC,EAAG,EAAC,EAAG,EAAE,CAAA;AAAA,EAC5C;AAAA,EAEA,IAAI,IAAA,GAAe;AACjB,IAAA,OAAO,IAAA,CAAK,KAAA;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,QAA8B,SAAA,EAAsC;AAClE,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,SAAA;AAAA,MACA,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,UACE,WAAA,EACmB;AACnB,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,aAAA,EAAe,WAAW,CAAA;AAAA,MACnC,IAAA,CAAK,QAAA;AAAA,MACL,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,KAAK,MAAA,EAA8C;AACjD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,QAAA,EAAU,MAAM,CAAA;AAAA,MACzB,IAAA,CAAK;AAAA,KACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,GAAG,QAAA,EAAqD;AACtD,IAAA,OAAO,IAAI,SAAA;AAAA,MACT,IAAA,CAAK,KAAA;AAAA,MACL,IAAA,CAAK,UAAA;AAAA,MACL,IAAA,CAAK,aAAA;AAAA,MACL,IAAA,CAAK,QAAA;AAAA,MACL,CAAC,GAAG,IAAA,CAAK,UAAA,EAAY,QAAQ;AAAA,KAC/B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,GAAA,CAAI,OAAA,GAA2B,EAAC,EAA4B;AAChE,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,kDAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAA,KAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,UAAA,EAAa,KAAK,KAAK,CAAA,6CAAA;AAAA,OACzB;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,SAAA,GAAY,GAAA,EAAM,eAAA,GAAkB,KAAA,EAAO,QAAO,GAAI,OAAA;AAE9D,IAAA,MAAM,SAAA,GAAY,YAAY,GAAA,EAAI;AAClC,IAAA,MAAM,SAA8B,EAAC;AACrC,IAAA,IAAI,gBAAA,GAAmB,CAAA;AACvB,IAAA,IAAI,aAAA,GAAgB,CAAA;AAEpB,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,gBAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,IAAA,EAAM,IAAA,CAAK,KAAA;AAAM,KAC1B,CAAA;AAED,IAAA,IAAI,QAAsB,EAAC;AAE3B,IAAA,MAAM,aAAa,YAA2B;AAC5C,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AAExB,MAAA,KAAA,MAAW,MAAA,IAAU,KAAK,QAAA,EAAU;AAClC,QAAA,IAAI;AACF,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,YAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO;AAAA,WAClB,CAAA;AAED,UAAA,MAAM,MAAA,GAAS,MAAM,MAAA,CAAO,IAAA,CAAK,KAAK,CAAA;AACtC,UAAA,aAAA,IAAiB,MAAA,CAAO,aAAA;AAExB,UAAA,KAAA,MAAW,GAAA,IAAO,OAAO,MAAA,EAAQ;AAC/B,YAAA,MAAA,CAAO,IAAA,CAAK;AAAA,cACV,UAAU,MAAA,CAAO,IAAA;AAAA,cACjB,OAAO,GAAA,CAAI,KAAA;AAAA,cACX,QAAQ,GAAA,CAAI;AAAA,aACb,CAAA;AAAA,UACH;AAEA,UAAA,IAAA,CAAK,KAAA,CAAM;AAAA,YACT,IAAA,EAAM,UAAA;AAAA,YACN,SAAA,sBAAe,IAAA,EAAK;AAAA,YACpB,UAAU,MAAA,CAAO,IAAA;AAAA,YACjB,IAAA,EAAM;AAAA,WACP,CAAA;AAAA,QACH,SAAS,GAAA,EAAK;AACZ,UAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,UAAA,MAAA,CAAO,KAAK,EAAE,QAAA,EAAU,MAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AAE5C,UAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,YAAA,MAAM,KAAA;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,EAAC;AAAA,IACX,CAAA;AAEA,IAAA,IAAI;AACF,MAAA,WAAA,MAAiB,GAAA,IAAO,IAAA,CAAK,UAAA,CAAW,OAAA,EAAQ,EAAG;AACjD,QAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,UAAA;AAAA,QACF;AAEA,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,kBAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,QAAA,EAAU,KAAK,UAAA,CAAW,IAAA;AAAA,UAC1B,IAAA,EAAM;AAAA,SACP,CAAA;AAED,QAAA,IAAI,OAAA,GAAwB,CAAC,GAAG,CAAA;AAEhC,QAAA,KAAA,MAAW,WAAA,IAAe,KAAK,aAAA,EAAe;AAC5C,UAAA,MAAM,cAA4B,EAAC;AAEnC,UAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,YAAA,IAAI;AACF,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,YAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY;AAAA,eACvB,CAAA;AAED,cAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,MAAM,CAAA;AACjD,cAAA,MAAM,cAAc,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,GAAI,MAAA,GAAS,CAAC,MAAM,CAAA;AAC5D,cAAA,WAAA,CAAY,IAAA,CAAK,GAAG,WAAW,CAAA;AAE/B,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,oBAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAAA,YACH,SAAS,GAAA,EAAK;AACZ,cAAA,MAAM,KAAA,GAAQ,eAAe,KAAA,GAAQ,GAAA,GAAM,IAAI,KAAA,CAAM,MAAA,CAAO,GAAG,CAAC,CAAA;AAChE,cAAA,MAAA,CAAO,IAAA,CAAK;AAAA,gBACV,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,KAAA;AAAA,gBACA;AAAA,eACD,CAAA;AAED,cAAA,IAAA,CAAK,KAAA,CAAM;AAAA,gBACT,IAAA,EAAM,OAAA;AAAA,gBACN,SAAA,sBAAe,IAAA,EAAK;AAAA,gBACpB,UAAU,WAAA,CAAY,IAAA;AAAA,gBACtB,IAAA,EAAM;AAAA,eACP,CAAA;AAED,cAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,gBAAA,MAAM,KAAA;AAAA,cACR;AAAA,YACF;AAAA,UACF;AAEA,UAAA,OAAA,GAAU,WAAA;AAAA,QACZ;AAEA,QAAA,KAAA,CAAM,IAAA,CAAK,GAAG,OAAO,CAAA;AACrB,QAAA,gBAAA,IAAoB,OAAA,CAAQ,MAAA;AAE5B,QAAA,IAAI,KAAA,CAAM,UAAU,SAAA,EAAW;AAC7B,UAAA,MAAM,UAAA,EAAW;AAAA,QACnB;AAAA,MACF;AAEA,MAAA,MAAM,UAAA,EAAW;AAAA,IACnB,CAAA,CAAA,MAAQ;AACN,MAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,QAAA,MAAMA,SAAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AACrC,QAAA,IAAA,CAAK,KAAA,CAAM;AAAA,UACT,IAAA,EAAM,cAAA;AAAA,UACN,SAAA,sBAAe,IAAA,EAAK;AAAA,UACpB,MAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA;AAAS,SAC3D,CAAA;AACD,QAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,UAAAA,SAAAA,EAAS;AAAA,MAC7D;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,WAAA,CAAY,GAAA,EAAI,GAAI,SAAA;AAErC,IAAA,IAAA,CAAK,KAAA,CAAM;AAAA,MACT,IAAA,EAAM,cAAA;AAAA,MACN,SAAA,sBAAe,IAAA,EAAK;AAAA,MACpB,IAAA,EAAM,EAAE,gBAAA,EAAkB,aAAA,EAAe,QAAQ,QAAA;AAAS,KAC3D,CAAA;AAED,IAAA,OAAO,EAAE,gBAAA,EAAkB,aAAA,EAAe,MAAA,EAAQ,QAAA,EAAS;AAAA,EAC7D;AAAA,EAEA,MAAM,KAAA,EAKG;AACP,IAAA,KAAA,MAAW,QAAA,IAAY,KAAK,UAAA,EAAY;AACtC,MAAA,QAAA,CAAS,KAA6C,CAAA;AAAA,IACxD;AAAA,EACF;AACF","file":"chunk-6PDC7DFX.cjs","sourcesContent":["import type {\n DataRecord,\n Extractor,\n Loader,\n PipelineEventListener,\n PipelineOptions,\n PipelineResult,\n PipelineStepError,\n Transformer,\n} from \"./types.js\";\n\n/**\n * A composable, type-safe ETL pipeline builder.\n *\n * @example\n * ```ts\n * const result = await Pipeline.create(\"my-pipeline\")\n * .extract(myExtractor)\n * .transform(myTransformer)\n * .load(myLoader)\n * .run();\n * ```\n */\nexport class Pipeline<TCurrent extends DataRecord = DataRecord> {\n readonly #name: string;\n readonly #extractor: Extractor | null;\n readonly #transformers: Transformer[];\n readonly #loaders: Loader[];\n readonly #listeners: PipelineEventListener[];\n\n private constructor(\n name: string,\n extractor: Extractor | null,\n transformers: Transformer[],\n loaders: Loader[],\n listeners: PipelineEventListener[],\n ) {\n this.#name = name;\n this.#extractor = extractor;\n this.#transformers = transformers;\n this.#loaders = loaders;\n this.#listeners = listeners;\n }\n\n static create(name: string): Pipeline {\n return new Pipeline(name, null, [], [], []);\n }\n\n get name(): string {\n return this.#name;\n }\n\n /**\n * Set the data source for this pipeline.\n */\n extract<T extends DataRecord>(extractor: Extractor<T>): Pipeline<T> {\n return new Pipeline(\n this.#name,\n extractor,\n this.#transformers,\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a transformation step.\n */\n transform<TOutput extends DataRecord>(\n transformer: Transformer<TCurrent, TOutput>,\n ): Pipeline<TOutput> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n [...this.#transformers, transformer],\n this.#loaders,\n this.#listeners,\n );\n }\n\n /**\n * Add a load destination.\n */\n load(loader: Loader<TCurrent>): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n [...this.#loaders, loader],\n this.#listeners,\n );\n }\n\n /**\n * Subscribe to pipeline events.\n */\n on(listener: PipelineEventListener): Pipeline<TCurrent> {\n return new Pipeline(\n this.#name,\n this.#extractor,\n this.#transformers,\n this.#loaders,\n [...this.#listeners, listener],\n );\n }\n\n /**\n * Execute the pipeline.\n */\n async run(options: PipelineOptions = {}): Promise<PipelineResult> {\n if (!this.#extractor) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no extractor. Call .extract() before .run().`,\n );\n }\n\n if (this.#loaders.length === 0) {\n throw new Error(\n `Pipeline \"${this.#name}\" has no loaders. Call .load() before .run().`,\n );\n }\n\n const { batchSize = 1000, continueOnError = false, signal } = options;\n\n const startTime = performance.now();\n const errors: PipelineStepError[] = [];\n let recordsProcessed = 0;\n let recordsLoaded = 0;\n\n this.#emit({\n type: \"pipeline:start\",\n timestamp: new Date(),\n data: { name: this.#name },\n });\n\n let batch: DataRecord[] = [];\n\n const flushBatch = async (): Promise<void> => {\n if (batch.length === 0) return;\n\n for (const loader of this.#loaders) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: loader.name,\n });\n\n const result = await loader.load(batch);\n recordsLoaded += result.recordsLoaded;\n\n for (const err of result.errors) {\n errors.push({\n stepName: loader.name,\n error: err.error,\n record: err.record,\n });\n }\n\n this.#emit({\n type: \"step:end\",\n timestamp: new Date(),\n stepName: loader.name,\n data: result,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({ stepName: loader.name, error });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n batch = [];\n };\n\n try {\n for await (const raw of this.#extractor.extract()) {\n if (signal?.aborted) {\n break;\n }\n\n this.#emit({\n type: \"record:extracted\",\n timestamp: new Date(),\n stepName: this.#extractor.name,\n data: raw,\n });\n\n let records: DataRecord[] = [raw];\n\n for (const transformer of this.#transformers) {\n const nextRecords: DataRecord[] = [];\n\n for (const record of records) {\n try {\n this.#emit({\n type: \"step:start\",\n timestamp: new Date(),\n stepName: transformer.name,\n });\n\n const result = await transformer.transform(record);\n const transformed = Array.isArray(result) ? result : [result];\n nextRecords.push(...transformed);\n\n this.#emit({\n type: \"record:transformed\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: transformed,\n });\n } catch (err) {\n const error = err instanceof Error ? err : new Error(String(err));\n errors.push({\n stepName: transformer.name,\n error,\n record,\n });\n\n this.#emit({\n type: \"error\",\n timestamp: new Date(),\n stepName: transformer.name,\n data: error,\n });\n\n if (!continueOnError) {\n throw error;\n }\n }\n }\n\n records = nextRecords;\n }\n\n batch.push(...records);\n recordsProcessed += records.length;\n\n if (batch.length >= batchSize) {\n await flushBatch();\n }\n }\n\n await flushBatch();\n } catch {\n if (!continueOnError) {\n const duration = performance.now() - startTime;\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n }\n\n const duration = performance.now() - startTime;\n\n this.#emit({\n type: \"pipeline:end\",\n timestamp: new Date(),\n data: { recordsProcessed, recordsLoaded, errors, duration },\n });\n\n return { recordsProcessed, recordsLoaded, errors, duration };\n }\n\n #emit(event: {\n type: string;\n timestamp: Date;\n stepName?: string;\n data?: unknown;\n }): void {\n for (const listener of this.#listeners) {\n listener(event as Parameters<PipelineEventListener>[0]);\n }\n }\n}\n"]}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// src/transformers.ts
|
|
4
|
+
function createTransformer(name, fn) {
|
|
5
|
+
return { name, transform: fn };
|
|
6
|
+
}
|
|
7
|
+
function map(name, fn) {
|
|
8
|
+
return createTransformer(name, fn);
|
|
9
|
+
}
|
|
10
|
+
function filter(name, predicate) {
|
|
11
|
+
return createTransformer(name, async (record) => {
|
|
12
|
+
const keep = await predicate(record);
|
|
13
|
+
return keep ? record : [];
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
function flatMap(name, fn) {
|
|
17
|
+
return createTransformer(name, fn);
|
|
18
|
+
}
|
|
19
|
+
function pick(name, keys) {
|
|
20
|
+
return createTransformer(name, (record) => {
|
|
21
|
+
const result = {};
|
|
22
|
+
for (const key of keys) {
|
|
23
|
+
if (key in record) {
|
|
24
|
+
result[key] = record[key];
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return result;
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
function omit(name, keys) {
|
|
31
|
+
const keySet = new Set(keys);
|
|
32
|
+
return createTransformer(name, (record) => {
|
|
33
|
+
const result = {};
|
|
34
|
+
for (const [key, value] of Object.entries(record)) {
|
|
35
|
+
if (!keySet.has(key)) {
|
|
36
|
+
result[key] = value;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return result;
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
function rename(name, mapping) {
|
|
43
|
+
return createTransformer(name, (record) => {
|
|
44
|
+
const result = {};
|
|
45
|
+
for (const [key, value] of Object.entries(record)) {
|
|
46
|
+
const newKey = mapping[key] ?? key;
|
|
47
|
+
result[newKey] = value;
|
|
48
|
+
}
|
|
49
|
+
return result;
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
function compose(name, transformers) {
|
|
53
|
+
return createTransformer(name, async (record) => {
|
|
54
|
+
let records = [record];
|
|
55
|
+
for (const transformer of transformers) {
|
|
56
|
+
const nextRecords = [];
|
|
57
|
+
for (const r of records) {
|
|
58
|
+
const result = await transformer.transform(r);
|
|
59
|
+
if (Array.isArray(result)) {
|
|
60
|
+
nextRecords.push(...result);
|
|
61
|
+
} else {
|
|
62
|
+
nextRecords.push(result);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
records = nextRecords;
|
|
66
|
+
}
|
|
67
|
+
return records;
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
exports.compose = compose;
|
|
72
|
+
exports.createTransformer = createTransformer;
|
|
73
|
+
exports.filter = filter;
|
|
74
|
+
exports.flatMap = flatMap;
|
|
75
|
+
exports.map = map;
|
|
76
|
+
exports.omit = omit;
|
|
77
|
+
exports.pick = pick;
|
|
78
|
+
exports.rename = rename;
|
|
79
|
+
//# sourceMappingURL=chunk-6R4QVX2Q.cjs.map
|
|
80
|
+
//# sourceMappingURL=chunk-6R4QVX2Q.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/transformers.ts"],"names":[],"mappings":";;;AAKO,SAAS,iBAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,EAAE,IAAA,EAAM,SAAA,EAAW,EAAA,EAAG;AAC/B;AAKO,SAAS,GAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAMO,SAAS,MAAA,CACd,MACA,SAAA,EACmB;AACnB,EAAA,OAAO,iBAAA,CAAkB,IAAA,EAAM,OAAO,MAAA,KAAc;AAClD,IAAA,MAAM,IAAA,GAAO,MAAM,SAAA,CAAU,MAAM,CAAA;AACnC,IAAA,OAAO,IAAA,GAAO,SAAS,EAAC;AAAA,EAC1B,CAAC,CAAA;AACH;AAKO,SAAS,OAAA,CAId,MACA,EAAA,EAC8B;AAC9B,EAAA,OAAO,iBAAA,CAAmC,MAAM,EAAE,CAAA;AACpD;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,OAAO,IAAA,EAAM;AACtB,MAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,MAAA,CAAO,GAAG,CAAA;AAAA,MACvD;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,IAAA,CACd,MACA,IAAA,EACyC;AACzC,EAAA,MAAM,MAAA,GAAS,IAAI,GAAA,CAAY,IAAI,CAAA;AACnC,EAAA,OAAO,iBAAA,CAA8C,IAAA,EAAM,CAAC,MAAA,KAAc;AACxE,IAAA,MAAM,SAAS,EAAC;AAChB,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,IAAI,CAAC,MAAA,CAAO,GAAA,CAAI,GAAG,CAAA,EAAG;AACpB,QAAC,MAAA,CAAmC,GAAG,CAAA,GAAI,KAAA;AAAA,MAC7C;AAAA,IACF;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAKO,SAAS,MAAA,CACd,MACA,OAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,CAAC,MAAA,KAAc;AAC/C,IAAA,MAAM,SAAqB,EAAC;AAC5B,IAAA,KAAA,MAAW,CAAC,GAAA,EAAK,KAAK,KAAK,MAAA,CAAO,OAAA,CAAQ,MAAM,CAAA,EAAG;AACjD,MAAA,MAAM,MAAA,GAAS,OAAA,CAAQ,GAAG,CAAA,IAAK,GAAA;AAC/B,MAAA,MAAA,CAAO,MAAM,CAAA,GAAI,KAAA;AAAA,IACnB;AACA,IAAA,OAAO,MAAA;AAAA,EACT,CAAC,CAAA;AACH;AAMO,SAAS,OAAA,CACd,MACA,YAAA,EACgB;AAChB,EAAA,OAAO,iBAAA,CAAqB,IAAA,EAAM,OAAO,MAAA,KAAc;AACrD,IAAA,IAAI,OAAA,GAAwB,CAAC,MAAM,CAAA;AAEnC,IAAA,KAAA,MAAW,eAAe,YAAA,EAAc;AACtC,MAAA,MAAM,cAA4B,EAAC;AACnC,MAAA,KAAA,MAAW,KAAK,OAAA,EAAS;AACvB,QAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,SAAA,CAAU,CAAC,CAAA;AAC5C,QAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,MAAM,CAAA,EAAG;AACzB,UAAA,WAAA,CAAY,IAAA,CAAK,GAAG,MAAM,CAAA;AAAA,QAC5B,CAAA,MAAO;AACL,UAAA,WAAA,CAAY,KAAK,MAAM,CAAA;AAAA,QACzB;AAAA,MACF;AACA,MAAA,OAAA,GAAU,WAAA;AAAA,IACZ;AAEA,IAAA,OAAO,OAAA;AAAA,EACT,CAAC,CAAA;AACH","file":"chunk-6R4QVX2Q.cjs","sourcesContent":["import type { DataRecord, Transformer } from \"./types.js\";\n\n/**\n * Create a custom transformer from a function.\n */\nexport function createTransformer<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | TOutput[] | Promise<TOutput | TOutput[]>,\n): Transformer<TInput, TOutput> {\n return { name, transform: fn };\n}\n\n/**\n * Transform each record using a mapping function.\n */\nexport function map<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput | Promise<TOutput>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Filter records based on a predicate. Records that don't match\n * are dropped (returned as empty array).\n */\nexport function filter<T extends DataRecord = DataRecord>(\n name: string,\n predicate: (record: T) => boolean | Promise<boolean>,\n): Transformer<T, T> {\n return createTransformer(name, async (record: T) => {\n const keep = await predicate(record);\n return keep ? record : [];\n });\n}\n\n/**\n * Transform each record into zero or more records.\n */\nexport function flatMap<\n TInput extends DataRecord = DataRecord,\n TOutput extends DataRecord = DataRecord,\n>(\n name: string,\n fn: (record: TInput) => TOutput[] | Promise<TOutput[]>,\n): Transformer<TInput, TOutput> {\n return createTransformer<TInput, TOutput>(name, fn);\n}\n\n/**\n * Pick specific keys from each record.\n */\nexport function pick<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Pick<T, K> & DataRecord> {\n return createTransformer<T, Pick<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Pick<T, K> & DataRecord;\n for (const key of keys) {\n if (key in record) {\n (result as Record<string, unknown>)[key] = record[key];\n }\n }\n return result;\n });\n}\n\n/**\n * Omit specific keys from each record.\n */\nexport function omit<T extends DataRecord, K extends keyof T & string>(\n name: string,\n keys: K[],\n): Transformer<T, Omit<T, K> & DataRecord> {\n const keySet = new Set<string>(keys);\n return createTransformer<T, Omit<T, K> & DataRecord>(name, (record: T) => {\n const result = {} as Omit<T, K> & DataRecord;\n for (const [key, value] of Object.entries(record)) {\n if (!keySet.has(key)) {\n (result as Record<string, unknown>)[key] = value;\n }\n }\n return result;\n });\n}\n\n/**\n * Rename keys in each record.\n */\nexport function rename<T extends DataRecord>(\n name: string,\n mapping: Record<string, string>,\n): Transformer<T> {\n return createTransformer<T>(name, (record: T) => {\n const result: DataRecord = {};\n for (const [key, value] of Object.entries(record)) {\n const newKey = mapping[key] ?? key;\n result[newKey] = value;\n }\n return result;\n });\n}\n\n/**\n * Compose multiple transformers into a single transformer that\n * applies them in sequence.\n */\nexport function compose<T extends DataRecord>(\n name: string,\n transformers: Transformer[],\n): Transformer<T> {\n return createTransformer<T>(name, async (record: T) => {\n let records: DataRecord[] = [record];\n\n for (const transformer of transformers) {\n const nextRecords: DataRecord[] = [];\n for (const r of records) {\n const result = await transformer.transform(r);\n if (Array.isArray(result)) {\n nextRecords.push(...result);\n } else {\n nextRecords.push(result);\n }\n }\n records = nextRecords;\n }\n\n return records;\n });\n}\n"]}
|