@bulkimport/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +373 -0
- package/dist/index.cjs +819 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +335 -0
- package/dist/index.d.ts +335 -0
- package/dist/index.js +777 -0
- package/dist/index.js.map +1 -0
- package/package.json +72 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 @bulkimport/core contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# @bulkimport/core
|
|
2
|
+
|
|
3
|
+
Backend-agnostic bulk data import library for TypeScript/JavaScript. Schema validation, batch processing, pause/resume, and full event lifecycle — without coupling to any framework or database.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Schema validation** — Define field types, required fields, patterns, custom validators, and transforms
|
|
8
|
+
- **Batch processing** — Split large datasets into configurable batches
|
|
9
|
+
- **Pause / Resume / Abort** — Full control over long-running imports
|
|
10
|
+
- **Event-driven** — Subscribe to granular lifecycle events (import, batch, record level)
|
|
11
|
+
- **Preview mode** — Sample and validate records before committing to a full import
|
|
12
|
+
- **Pluggable architecture** — Bring your own parser, data source, or state store
|
|
13
|
+
- **Zero framework coupling** — Works with Express, Fastify, Hono, serverless, or standalone
|
|
14
|
+
- **Dual format** — Ships ESM and CJS with full TypeScript declarations
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @bulkimport/core
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { BulkImport, CsvParser, BufferSource } from '@bulkimport/core';
|
|
26
|
+
|
|
27
|
+
const importer = new BulkImport({
|
|
28
|
+
schema: {
|
|
29
|
+
fields: [
|
|
30
|
+
{ name: 'email', type: 'email', required: true },
|
|
31
|
+
{ name: 'name', type: 'string', required: true },
|
|
32
|
+
{ name: 'age', type: 'number', required: false },
|
|
33
|
+
],
|
|
34
|
+
},
|
|
35
|
+
batchSize: 500,
|
|
36
|
+
continueOnError: true,
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
importer.from(new BufferSource(csvString), new CsvParser());
|
|
40
|
+
|
|
41
|
+
// Preview first
|
|
42
|
+
const preview = await importer.preview(10);
|
|
43
|
+
console.log(preview.validRecords);
|
|
44
|
+
console.log(preview.invalidRecords);
|
|
45
|
+
|
|
46
|
+
// Process
|
|
47
|
+
await importer.start(async (record, context) => {
|
|
48
|
+
await db.users.insert(record);
|
|
49
|
+
});
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Schema Definition
|
|
53
|
+
|
|
54
|
+
Each field supports:
|
|
55
|
+
|
|
56
|
+
| Property | Type | Description |
|
|
57
|
+
|---|---|---|
|
|
58
|
+
| `name` | `string` | Column name to match |
|
|
59
|
+
| `type` | `'string' \| 'number' \| 'boolean' \| 'date' \| 'email' \| 'custom'` | Built-in type validation |
|
|
60
|
+
| `required` | `boolean` | Fail if missing or empty |
|
|
61
|
+
| `pattern` | `RegExp` | Regex validation |
|
|
62
|
+
| `customValidator` | `(value: unknown) => { valid: boolean; message?: string }` | Custom validation logic |
|
|
63
|
+
| `transform` | `(value: unknown) => unknown` | Transform value before validation |
|
|
64
|
+
| `defaultValue` | `unknown` | Applied when the field is undefined |
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
const schema = {
|
|
68
|
+
fields: [
|
|
69
|
+
{ name: 'email', type: 'email', required: true },
|
|
70
|
+
{
|
|
71
|
+
name: 'role',
|
|
72
|
+
type: 'string',
|
|
73
|
+
required: true,
|
|
74
|
+
pattern: /^(admin|user|editor)$/,
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: 'name',
|
|
78
|
+
type: 'string',
|
|
79
|
+
required: true,
|
|
80
|
+
transform: (v) => String(v).trim().toUpperCase(),
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
name: 'nif',
|
|
84
|
+
type: 'custom',
|
|
85
|
+
required: true,
|
|
86
|
+
customValidator: (value) => ({
|
|
87
|
+
valid: /^\d{8}[A-Z]$/.test(String(value)),
|
|
88
|
+
message: 'Invalid NIF format',
|
|
89
|
+
}),
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
strict: true, // Reject unknown fields
|
|
93
|
+
};
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Events
|
|
97
|
+
|
|
98
|
+
Subscribe to lifecycle events for progress tracking, logging, or UI updates:
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
importer.on('import:started', (e) => {
|
|
102
|
+
console.log(`Starting: ${e.totalRecords} records in ${e.totalBatches} batches`);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
importer.on('batch:completed', (e) => {
|
|
106
|
+
console.log(`Batch ${e.batchIndex}: ${e.processedCount}/${e.totalCount}`);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
importer.on('record:failed', (e) => {
|
|
110
|
+
console.log(`Record ${e.recordIndex} failed: ${e.error}`);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
importer.on('import:progress', (e) => {
|
|
114
|
+
console.log(`${e.progress.percentage}% complete`);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
importer.on('import:completed', (e) => {
|
|
118
|
+
console.log(`Done: ${e.summary.processed} processed, ${e.summary.failed} failed`);
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Available events:** `import:started`, `import:completed`, `import:paused`, `import:aborted`, `import:failed`, `import:progress`, `batch:started`, `batch:completed`, `batch:failed`, `record:processed`, `record:failed`
|
|
123
|
+
|
|
124
|
+
## Pause / Resume / Abort
|
|
125
|
+
|
|
126
|
+
```typescript
|
|
127
|
+
// Pause after a specific batch via events
|
|
128
|
+
importer.on('batch:completed', (e) => {
|
|
129
|
+
if (e.batchIndex === 2) importer.pause();
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
await importer.start(processor);
|
|
133
|
+
|
|
134
|
+
// Later...
|
|
135
|
+
await importer.resume();
|
|
136
|
+
|
|
137
|
+
// Or cancel entirely
|
|
138
|
+
await importer.abort();
|
|
139
|
+
|
|
140
|
+
// Check status at any time
|
|
141
|
+
const { state, progress, batches } = await importer.getStatus();
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Real-World Example: Express + PostgreSQL
|
|
145
|
+
|
|
146
|
+
A complete example showing how to use `@bulkimport/core` in a REST API with a database:
|
|
147
|
+
|
|
148
|
+
```typescript
|
|
149
|
+
import express from 'express';
|
|
150
|
+
import { Pool } from 'pg';
|
|
151
|
+
import { BulkImport, CsvParser, BufferSource } from '@bulkimport/core';
|
|
152
|
+
|
|
153
|
+
const app = express();
|
|
154
|
+
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
155
|
+
|
|
156
|
+
app.post('/api/import/users', async (req, res) => {
|
|
157
|
+
const csvBuffer = req.body; // Raw CSV body
|
|
158
|
+
|
|
159
|
+
const importer = new BulkImport({
|
|
160
|
+
schema: {
|
|
161
|
+
fields: [
|
|
162
|
+
{ name: 'email', type: 'email', required: true },
|
|
163
|
+
{ name: 'name', type: 'string', required: true },
|
|
164
|
+
{
|
|
165
|
+
name: 'role',
|
|
166
|
+
type: 'string',
|
|
167
|
+
required: false,
|
|
168
|
+
pattern: /^(admin|user|editor)$/,
|
|
169
|
+
defaultValue: 'user',
|
|
170
|
+
},
|
|
171
|
+
],
|
|
172
|
+
strict: true,
|
|
173
|
+
},
|
|
174
|
+
batchSize: 500,
|
|
175
|
+
continueOnError: true,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
importer.from(new BufferSource(csvBuffer), new CsvParser());
|
|
179
|
+
|
|
180
|
+
// Preview before committing
|
|
181
|
+
const preview = await importer.preview(5);
|
|
182
|
+
if (preview.invalidRecords.length > 0) {
|
|
183
|
+
return res.status(422).json({
|
|
184
|
+
message: 'Validation errors found in sample',
|
|
185
|
+
errors: preview.invalidRecords.map((r) => ({
|
|
186
|
+
row: r.index,
|
|
187
|
+
fields: r.errors.map((e) => ({ field: e.field, message: e.message })),
|
|
188
|
+
})),
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Track progress via SSE, WebSocket, or just log
|
|
193
|
+
importer.on('import:progress', (e) => {
|
|
194
|
+
console.log(`Import ${e.jobId}: ${e.progress.percentage}%`);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// Process each valid record
|
|
198
|
+
await importer.start(async (record, context) => {
|
|
199
|
+
await pool.query(
|
|
200
|
+
'INSERT INTO users (email, name, role) VALUES ($1, $2, $3) ON CONFLICT (email) DO NOTHING',
|
|
201
|
+
[record.email, record.name, record.role],
|
|
202
|
+
);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const { progress } = await importer.getStatus();
|
|
206
|
+
const failed = await importer.getFailedRecords();
|
|
207
|
+
|
|
208
|
+
res.json({
|
|
209
|
+
total: progress.totalRecords,
|
|
210
|
+
processed: progress.processedRecords,
|
|
211
|
+
failed: progress.failedRecords,
|
|
212
|
+
errors: failed.map((r) => ({
|
|
213
|
+
row: r.index,
|
|
214
|
+
data: r.raw,
|
|
215
|
+
reason: r.errors.map((e) => e.message).join('; '),
|
|
216
|
+
})),
|
|
217
|
+
});
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Works With
|
|
222
|
+
|
|
223
|
+
`@bulkimport/core` is framework and database agnostic. It works with anything that runs JavaScript:
|
|
224
|
+
|
|
225
|
+
### Frameworks
|
|
226
|
+
|
|
227
|
+
| Framework | Compatible | Notes |
|
|
228
|
+
|---|---|---|
|
|
229
|
+
| Express | Yes | Use with `multer` or raw body parser for file uploads |
|
|
230
|
+
| Fastify | Yes | Use `@fastify/multipart` for file handling |
|
|
231
|
+
| Hono | Yes | Works in Node, Deno, Bun, and Cloudflare Workers |
|
|
232
|
+
| NestJS | Yes | Wrap in a service, inject via DI |
|
|
233
|
+
| Next.js API Routes | Yes | Server-side only (API routes / Server Actions) |
|
|
234
|
+
| Nuxt Server Routes | Yes | `server/api/` handlers |
|
|
235
|
+
| tRPC | Yes | Call from procedures |
|
|
236
|
+
| AWS Lambda | Yes | Pair with S3 events or API Gateway |
|
|
237
|
+
| Serverless (Vercel/Netlify) | Yes | Mind the function timeout for large files |
|
|
238
|
+
|
|
239
|
+
### Databases / ORMs
|
|
240
|
+
|
|
241
|
+
| Database / ORM | Compatible | Notes |
|
|
242
|
+
|---|---|---|
|
|
243
|
+
| PostgreSQL (`pg`) | Yes | Use in the processor callback |
|
|
244
|
+
| MySQL (`mysql2`) | Yes | Use in the processor callback |
|
|
245
|
+
| MongoDB (`mongoose`) | Yes | `Model.insertMany` or per-record inserts |
|
|
246
|
+
| Prisma | Yes | `prisma.model.create()` in the processor |
|
|
247
|
+
| Drizzle ORM | Yes | Use `.insert()` in the processor |
|
|
248
|
+
| TypeORM | Yes | Use repository methods in the processor |
|
|
249
|
+
| SQLite (`better-sqlite3`) | Yes | Sync driver works inside async callback |
|
|
250
|
+
| Redis | Yes | Use for caching or as a queue |
|
|
251
|
+
| DynamoDB | Yes | `PutItem` per record or `BatchWriteItem` |
|
|
252
|
+
| Supabase | Yes | Use the JS client in the processor |
|
|
253
|
+
|
|
254
|
+
### Runtimes
|
|
255
|
+
|
|
256
|
+
| Runtime | Compatible | Notes |
|
|
257
|
+
|---|---|---|
|
|
258
|
+
| Node.js >= 16.7 | Yes | Full support |
|
|
259
|
+
| Bun | Yes | Full support |
|
|
260
|
+
| Deno | Yes | Via npm specifiers |
|
|
261
|
+
| Browsers | Partial | Parsing and validation work; file system sources do not |
|
|
262
|
+
| Cloudflare Workers | Partial | No `Buffer`, use string sources |
|
|
263
|
+
|
|
264
|
+
## Custom Adapters
|
|
265
|
+
|
|
266
|
+
The library is designed to be extended. Implement the port interfaces to plug in your own sources, parsers, or state stores.
|
|
267
|
+
|
|
268
|
+
### Data Source
|
|
269
|
+
|
|
270
|
+
Implement `DataSource` to read from any origin:
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import type { DataSource, SourceMetadata } from '@bulkimport/core';
|
|
274
|
+
|
|
275
|
+
class S3Source implements DataSource {
|
|
276
|
+
async *read(): AsyncIterable<string | Buffer> {
|
|
277
|
+
const stream = await s3.getObject({ Bucket: '...', Key: '...' });
|
|
278
|
+
yield await stream.Body.transformToString();
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
async sample(maxBytes?: number): Promise<string | Buffer> {
|
|
282
|
+
// Return a small chunk for preview
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
metadata(): SourceMetadata {
|
|
286
|
+
return { fileName: 'data.csv', mimeType: 'text/csv' };
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Parser
|
|
292
|
+
|
|
293
|
+
Implement `SourceParser` for any format:
|
|
294
|
+
|
|
295
|
+
```typescript
|
|
296
|
+
import type { SourceParser, RawRecord } from '@bulkimport/core';
|
|
297
|
+
|
|
298
|
+
class JsonParser implements SourceParser {
|
|
299
|
+
async *parse(data: string | Buffer): AsyncIterable<RawRecord> {
|
|
300
|
+
const items = JSON.parse(String(data));
|
|
301
|
+
for (const item of items) {
|
|
302
|
+
yield item;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### State Store
|
|
309
|
+
|
|
310
|
+
Implement `StateStore` to persist state to your database:
|
|
311
|
+
|
|
312
|
+
```typescript
|
|
313
|
+
import type { StateStore } from '@bulkimport/core';
|
|
314
|
+
|
|
315
|
+
class PostgresStateStore implements StateStore {
|
|
316
|
+
async saveJobState(job) { /* INSERT/UPDATE */ }
|
|
317
|
+
async getJobState(jobId) { /* SELECT */ }
|
|
318
|
+
// ... implement all methods
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const importer = new BulkImport({
|
|
322
|
+
schema: { /* ... */ },
|
|
323
|
+
stateStore: new PostgresStateStore(pool),
|
|
324
|
+
});
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
## API Reference
|
|
328
|
+
|
|
329
|
+
### `BulkImport`
|
|
330
|
+
|
|
331
|
+
| Method | Description |
|
|
332
|
+
|---|---|
|
|
333
|
+
| `from(source, parser)` | Set the data source and parser. Returns `this` for chaining. |
|
|
334
|
+
| `on(event, handler)` | Subscribe to a lifecycle event. Returns `this`. |
|
|
335
|
+
| `preview(maxRecords?)` | Validate a sample of records without processing. |
|
|
336
|
+
| `start(processor)` | Begin processing all records through the provided callback. |
|
|
337
|
+
| `pause()` | Pause processing after the current record. |
|
|
338
|
+
| `resume()` | Resume a paused import. |
|
|
339
|
+
| `abort()` | Cancel the import permanently. |
|
|
340
|
+
| `getStatus()` | Get current state, progress, and batch details. |
|
|
341
|
+
| `getFailedRecords()` | Get all records that failed validation or processing. |
|
|
342
|
+
| `getPendingRecords()` | Get records not yet processed. |
|
|
343
|
+
| `getJobId()` | Get the unique job identifier. |
|
|
344
|
+
|
|
345
|
+
### `BulkImportConfig`
|
|
346
|
+
|
|
347
|
+
| Property | Type | Default | Description |
|
|
348
|
+
|---|---|---|---|
|
|
349
|
+
| `schema` | `SchemaDefinition` | required | Field definitions and validation rules |
|
|
350
|
+
| `batchSize` | `number` | `100` | Records per batch |
|
|
351
|
+
| `continueOnError` | `boolean` | `false` | Keep processing when a record fails |
|
|
352
|
+
| `stateStore` | `StateStore` | `InMemoryStateStore` | Where to persist job state |
|
|
353
|
+
|
|
354
|
+
## Built-in Adapters
|
|
355
|
+
|
|
356
|
+
| Adapter | Description |
|
|
357
|
+
|---|---|
|
|
358
|
+
| `CsvParser` | CSV parsing with auto-delimiter detection (uses PapaParse) |
|
|
359
|
+
| `BufferSource` | Read from a string or Buffer in memory |
|
|
360
|
+
| `InMemoryStateStore` | Non-persistent state store (default) |
|
|
361
|
+
|
|
362
|
+
## Requirements
|
|
363
|
+
|
|
364
|
+
- Node.js >= 16.7.0 (uses `crypto.randomUUID` — stable since 16.7)
|
|
365
|
+
- TypeScript >= 5.0 (for consumers using TypeScript)
|
|
366
|
+
|
|
367
|
+
## Contributing
|
|
368
|
+
|
|
369
|
+
See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup, coding standards, and how to submit changes.
|
|
370
|
+
|
|
371
|
+
## License
|
|
372
|
+
|
|
373
|
+
[MIT](./LICENSE)
|