@yutaura/csv-batch-reader 0.0.0-1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -1
- package/dist/csv-batch-read.d.ts +2 -0
- package/dist/csv-batch-read.d.ts.map +1 -0
- package/dist/csv-batch-read.test.d.ts +2 -0
- package/dist/csv-batch-read.test.d.ts.map +1 -0
- package/dist/index.d.ts +1 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +86 -8
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +74 -7
- package/dist/index.mjs.map +1 -1
- package/dist/promise-with-resolvers.d.ts +6 -0
- package/dist/promise-with-resolvers.d.ts.map +1 -0
- package/dist/promise-with-resolvers.test.d.ts +2 -0
- package/dist/promise-with-resolvers.test.d.ts.map +1 -0
- package/package.json +15 -9
- package/src/csv-batch-read.test.ts +207 -0
- package/src/csv-batch-read.ts +73 -0
- package/src/index.ts +1 -7
- package/src/promise-with-resolvers.test.ts +45 -0
- package/src/promise-with-resolvers.ts +15 -0
package/README.md
CHANGED
|
@@ -1 +1,39 @@
|
|
|
1
|
-
# csv-batch-reader
|
|
1
|
+
# @yutaura/csv-batch-reader
|
|
2
|
+
|
|
3
|
+
## Description
|
|
4
|
+
|
|
5
|
+
This is a library for reading CSV files in batch.
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { csvBatchRead } from '@yutaura/csv-batch-reader';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
await csvBatchRead(
|
|
12
|
+
'path/to/csv-file.csv',
|
|
13
|
+
100, // batch size
|
|
14
|
+
async (headers) => {
|
|
15
|
+
// do something with headers
|
|
16
|
+
},
|
|
17
|
+
async (batch) => {
|
|
18
|
+
// do something with batch
|
|
19
|
+
for (const row of batch) {
|
|
20
|
+
// do something with row
|
|
21
|
+
console.log(row);
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
);
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pnpm install @yutaura/csv-batch-reader
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## License
|
|
34
|
+
|
|
35
|
+
MIT
|
|
36
|
+
|
|
37
|
+
## Funding
|
|
38
|
+
|
|
39
|
+
If you like this library, please consider supporting me on [GitHub Sponsors](https://github.com/sponsors/YutaUra)
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export declare const csvBatchRead: <T extends Record<string, string> = Record<string, string>>(filePath: string, batchSize: number, handleHeader: (header: (keyof T)[]) => unknown, handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown) => Promise<void>;
|
|
2
|
+
//# sourceMappingURL=csv-batch-read.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,YAAY,GACvB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,qCAEtB,MAAM,aACL,MAAM,gBACH,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,OAAO,eACjC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,KAAK,OAAO,kBA8D9E,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv-batch-read.test.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.test.ts"],"names":[],"mappings":""}
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
2
3
|
var __defProp = Object.defineProperty;
|
|
3
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
5
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
8
|
var __export = (target, all) => {
|
|
7
9
|
for (var name in all)
|
|
@@ -15,23 +17,99 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
15
17
|
}
|
|
16
18
|
return to;
|
|
17
19
|
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
18
28
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
29
|
|
|
20
30
|
// src/index.ts
|
|
21
31
|
var src_exports = {};
|
|
22
32
|
__export(src_exports, {
|
|
23
|
-
|
|
24
|
-
x: () => x
|
|
33
|
+
csvBatchRead: () => csvBatchRead
|
|
25
34
|
});
|
|
26
35
|
module.exports = __toCommonJS(src_exports);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
var
|
|
30
|
-
|
|
36
|
+
|
|
37
|
+
// src/csv-batch-read.ts
|
|
38
|
+
var import_node_fs = require("fs");
|
|
39
|
+
var import_papaparse = __toESM(require("papaparse"));
|
|
40
|
+
|
|
41
|
+
// src/promise-with-resolvers.ts
|
|
42
|
+
var promiseWithResolvers = () => {
|
|
43
|
+
let resolve = () => void 0;
|
|
44
|
+
let reject = () => void 0;
|
|
45
|
+
const promise = new Promise((res, rej) => {
|
|
46
|
+
resolve = res;
|
|
47
|
+
reject = rej;
|
|
48
|
+
});
|
|
49
|
+
return {
|
|
50
|
+
promise,
|
|
51
|
+
resolve,
|
|
52
|
+
reject
|
|
53
|
+
};
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
// src/csv-batch-read.ts
|
|
57
|
+
var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
58
|
+
return await new Promise((resolve, reject) => {
|
|
59
|
+
const stream = (0, import_node_fs.createReadStream)(filePath).pipe(
|
|
60
|
+
import_papaparse.default.parse(import_papaparse.default.NODE_STREAM_INPUT, { header: true })
|
|
61
|
+
);
|
|
62
|
+
let buf = [];
|
|
63
|
+
let batchCount = 0;
|
|
64
|
+
let isFirstChunk = true;
|
|
65
|
+
const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
|
|
66
|
+
const headerResolved = headerPromise.then(
|
|
67
|
+
(headers) => handleHeader(headers)
|
|
68
|
+
);
|
|
69
|
+
const shouldResolve = [headerResolved];
|
|
70
|
+
stream.on("data", (chunk) => {
|
|
71
|
+
if (isFirstChunk) {
|
|
72
|
+
resolveHeaders(Object.keys(chunk));
|
|
73
|
+
isFirstChunk = false;
|
|
74
|
+
}
|
|
75
|
+
if (buf.length === batchSize) {
|
|
76
|
+
const currentBatchCount = batchCount;
|
|
77
|
+
const currentBuf = buf.slice();
|
|
78
|
+
shouldResolve.push(
|
|
79
|
+
headerResolved.then(
|
|
80
|
+
() => handleBatch(currentBuf, currentBatchCount, false)
|
|
81
|
+
)
|
|
82
|
+
);
|
|
83
|
+
buf = [];
|
|
84
|
+
batchCount++;
|
|
85
|
+
}
|
|
86
|
+
buf.push(chunk);
|
|
87
|
+
});
|
|
88
|
+
stream.on("end", () => {
|
|
89
|
+
const currentBatchCount = batchCount;
|
|
90
|
+
const currentBuf = buf.slice();
|
|
91
|
+
shouldResolve.push(
|
|
92
|
+
headerResolved.then(
|
|
93
|
+
() => handleBatch(currentBuf, currentBatchCount, true)
|
|
94
|
+
)
|
|
95
|
+
);
|
|
96
|
+
Promise.all(shouldResolve).then(() => {
|
|
97
|
+
stream.destroy();
|
|
98
|
+
resolve();
|
|
99
|
+
}).catch((error) => {
|
|
100
|
+
stream.destroy();
|
|
101
|
+
reject(error);
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
stream.on("error", (error) => {
|
|
105
|
+
console.error("error", error);
|
|
106
|
+
stream.destroy();
|
|
107
|
+
reject(error);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
31
110
|
};
|
|
32
111
|
// Annotate the CommonJS export names for ESM import in node:
|
|
33
112
|
0 && (module.exports = {
|
|
34
|
-
|
|
35
|
-
x
|
|
113
|
+
csvBatchRead
|
|
36
114
|
});
|
|
37
115
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then((headers) =>\n handleHeader(headers),\n );\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, false),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, true),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,qBAAiC;AACjC,uBAAiB;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBACG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,aAAS,iCAAiB,QAAQ,EAAE;AAAA,MACxC,iBAAAA,QAAK,MAAM,iBAAAA,QAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc;AAAA,MAAK,CAAC,YACzC,aAAa,OAAO;AAAA,IACtB;AACA,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,MAClB,YAAY,YAAY,mBAAmB,KAAK;AAAA,UAClD;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,MAClB,YAAY,YAAY,mBAAmB,IAAI;AAAA,QACjD;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["Papa"]}
|
package/dist/index.mjs
CHANGED
|
@@ -1,11 +1,78 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
// src/csv-batch-read.ts
|
|
2
|
+
import { createReadStream } from "node:fs";
|
|
3
|
+
import Papa from "papaparse";
|
|
4
|
+
|
|
5
|
+
// src/promise-with-resolvers.ts
|
|
6
|
+
var promiseWithResolvers = () => {
|
|
7
|
+
let resolve = () => void 0;
|
|
8
|
+
let reject = () => void 0;
|
|
9
|
+
const promise = new Promise((res, rej) => {
|
|
10
|
+
resolve = res;
|
|
11
|
+
reject = rej;
|
|
12
|
+
});
|
|
13
|
+
return {
|
|
14
|
+
promise,
|
|
15
|
+
resolve,
|
|
16
|
+
reject
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
// src/csv-batch-read.ts
|
|
21
|
+
var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
22
|
+
return await new Promise((resolve, reject) => {
|
|
23
|
+
const stream = createReadStream(filePath).pipe(
|
|
24
|
+
Papa.parse(Papa.NODE_STREAM_INPUT, { header: true })
|
|
25
|
+
);
|
|
26
|
+
let buf = [];
|
|
27
|
+
let batchCount = 0;
|
|
28
|
+
let isFirstChunk = true;
|
|
29
|
+
const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
|
|
30
|
+
const headerResolved = headerPromise.then(
|
|
31
|
+
(headers) => handleHeader(headers)
|
|
32
|
+
);
|
|
33
|
+
const shouldResolve = [headerResolved];
|
|
34
|
+
stream.on("data", (chunk) => {
|
|
35
|
+
if (isFirstChunk) {
|
|
36
|
+
resolveHeaders(Object.keys(chunk));
|
|
37
|
+
isFirstChunk = false;
|
|
38
|
+
}
|
|
39
|
+
if (buf.length === batchSize) {
|
|
40
|
+
const currentBatchCount = batchCount;
|
|
41
|
+
const currentBuf = buf.slice();
|
|
42
|
+
shouldResolve.push(
|
|
43
|
+
headerResolved.then(
|
|
44
|
+
() => handleBatch(currentBuf, currentBatchCount, false)
|
|
45
|
+
)
|
|
46
|
+
);
|
|
47
|
+
buf = [];
|
|
48
|
+
batchCount++;
|
|
49
|
+
}
|
|
50
|
+
buf.push(chunk);
|
|
51
|
+
});
|
|
52
|
+
stream.on("end", () => {
|
|
53
|
+
const currentBatchCount = batchCount;
|
|
54
|
+
const currentBuf = buf.slice();
|
|
55
|
+
shouldResolve.push(
|
|
56
|
+
headerResolved.then(
|
|
57
|
+
() => handleBatch(currentBuf, currentBatchCount, true)
|
|
58
|
+
)
|
|
59
|
+
);
|
|
60
|
+
Promise.all(shouldResolve).then(() => {
|
|
61
|
+
stream.destroy();
|
|
62
|
+
resolve();
|
|
63
|
+
}).catch((error) => {
|
|
64
|
+
stream.destroy();
|
|
65
|
+
reject(error);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
stream.on("error", (error) => {
|
|
69
|
+
console.error("error", error);
|
|
70
|
+
stream.destroy();
|
|
71
|
+
reject(error);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
6
74
|
};
|
|
7
75
|
export {
|
|
8
|
-
|
|
9
|
-
x
|
|
76
|
+
csvBatchRead
|
|
10
77
|
};
|
|
11
78
|
//# sourceMappingURL=index.mjs.map
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/
|
|
1
|
+
{"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then((headers) =>\n handleHeader(headers),\n );\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, false),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, true),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";AAAA,SAAS,wBAAwB;AACjC,OAAO,UAAU;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBACG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,SAAS,iBAAiB,QAAQ,EAAE;AAAA,MACxC,KAAK,MAAM,KAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc;AAAA,MAAK,CAAC,YACzC,aAAa,OAAO;AAAA,IACtB;AACA,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,MAClB,YAAY,YAAY,mBAAmB,KAAK;AAAA,UAClD;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,MAClB,YAAY,YAAY,mBAAmB,IAAI;AAAA,QACjD;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promise-with-resolvers.d.ts","sourceRoot":"","sources":["../src/promise-with-resolvers.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,oBAAoB,GAAI,CAAC;;qBAGf,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,IAAI;sBAC1B,OAAO,KAAK,IAAI;CAUvC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"promise-with-resolvers.test.d.ts","sourceRoot":"","sources":["../src/promise-with-resolvers.test.ts"],"names":[],"mappings":""}
|
package/package.json
CHANGED
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yutaura/csv-batch-reader",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
7
|
-
"scripts": {
|
|
8
|
-
"build": "tsup",
|
|
9
|
-
"prepublish": "pnpm run build"
|
|
10
|
-
},
|
|
11
7
|
"types": "dist/index.d.ts",
|
|
12
8
|
"main": "dist/index.js",
|
|
13
9
|
"module": "dist/index.mjs",
|
|
@@ -21,13 +17,23 @@
|
|
|
21
17
|
],
|
|
22
18
|
"author": "YutaUra",
|
|
23
19
|
"license": "MIT",
|
|
24
|
-
"
|
|
20
|
+
"dependencies": {
|
|
21
|
+
"papaparse": "5.4.1"
|
|
22
|
+
},
|
|
25
23
|
"devDependencies": {
|
|
24
|
+
"@biomejs/biome": "1.9.2",
|
|
26
25
|
"@changesets/cli": "2.27.8",
|
|
27
|
-
"@microsoft/api-extractor": "7.47.9",
|
|
28
26
|
"@tsconfig/strictest": "2.0.5",
|
|
29
27
|
"@types/node": "22.6.1",
|
|
28
|
+
"@types/papaparse": "5.3.14",
|
|
30
29
|
"tsup": "8.3.0",
|
|
31
|
-
"typescript": "5.6.2"
|
|
30
|
+
"typescript": "5.6.2",
|
|
31
|
+
"vitest": "2.1.1"
|
|
32
|
+
},
|
|
33
|
+
"scripts": {
|
|
34
|
+
"build": "tsup",
|
|
35
|
+
"check": "tsc --noEmit && biome check --fix",
|
|
36
|
+
"test": "vitest run",
|
|
37
|
+
"publish-packages": "pnpm run build && changeset version && changeset publish"
|
|
32
38
|
}
|
|
33
|
-
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { setTimeout } from "node:timers/promises";
|
|
4
|
+
import { csvBatchRead } from "./csv-batch-read.js";
|
|
5
|
+
|
|
6
|
+
const TEST_CSV_FILE = join(await mkdtemp("test-csv-file"), "test.csv");
|
|
7
|
+
|
|
8
|
+
beforeAll(async () => {
|
|
9
|
+
await mkdir(dirname(TEST_CSV_FILE), { recursive: true });
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
afterAll(async () => {
|
|
13
|
+
await rm(dirname(TEST_CSV_FILE), { recursive: true });
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
beforeEach(async () => {
|
|
17
|
+
await writeFile(
|
|
18
|
+
TEST_CSV_FILE,
|
|
19
|
+
`\
|
|
20
|
+
a,b,c
|
|
21
|
+
100,101,102
|
|
22
|
+
200,201,202
|
|
23
|
+
300,301,302
|
|
24
|
+
400,401,402
|
|
25
|
+
500,501,502
|
|
26
|
+
600,601,602
|
|
27
|
+
700,701,702
|
|
28
|
+
800,801,802
|
|
29
|
+
900,901,902
|
|
30
|
+
`,
|
|
31
|
+
{ encoding: "utf-8", flag: "w" },
|
|
32
|
+
);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
describe("csvBatchRead", () => {
|
|
36
|
+
it("should read headers", async () => {
|
|
37
|
+
// given
|
|
38
|
+
let headers: string[] = [];
|
|
39
|
+
// when
|
|
40
|
+
await csvBatchRead(
|
|
41
|
+
TEST_CSV_FILE,
|
|
42
|
+
100,
|
|
43
|
+
(h) => {
|
|
44
|
+
headers = h;
|
|
45
|
+
},
|
|
46
|
+
() => {},
|
|
47
|
+
);
|
|
48
|
+
// then
|
|
49
|
+
expect(headers).toStrictEqual(["a", "b", "c"]);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("should read rows", async () => {
|
|
53
|
+
// given
|
|
54
|
+
const rows: Record<string, string>[][] = [];
|
|
55
|
+
// when
|
|
56
|
+
await csvBatchRead(
|
|
57
|
+
TEST_CSV_FILE,
|
|
58
|
+
3,
|
|
59
|
+
() => {},
|
|
60
|
+
(r) => {
|
|
61
|
+
rows.push(r);
|
|
62
|
+
},
|
|
63
|
+
);
|
|
64
|
+
// then
|
|
65
|
+
expect(rows).toStrictEqual([
|
|
66
|
+
[
|
|
67
|
+
{ a: "100", b: "101", c: "102" },
|
|
68
|
+
{ a: "200", b: "201", c: "202" },
|
|
69
|
+
{ a: "300", b: "301", c: "302" },
|
|
70
|
+
],
|
|
71
|
+
[
|
|
72
|
+
{ a: "400", b: "401", c: "402" },
|
|
73
|
+
{ a: "500", b: "501", c: "502" },
|
|
74
|
+
{ a: "600", b: "601", c: "602" },
|
|
75
|
+
],
|
|
76
|
+
[
|
|
77
|
+
{ a: "700", b: "701", c: "702" },
|
|
78
|
+
{ a: "800", b: "801", c: "802" },
|
|
79
|
+
{ a: "900", b: "901", c: "902" },
|
|
80
|
+
],
|
|
81
|
+
]);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("when batchSize is 1, should read rows one by one", async () => {
|
|
85
|
+
// given
|
|
86
|
+
const rows: Record<string, string>[][] = [];
|
|
87
|
+
// when
|
|
88
|
+
await csvBatchRead(
|
|
89
|
+
TEST_CSV_FILE,
|
|
90
|
+
1,
|
|
91
|
+
() => {},
|
|
92
|
+
(r) => {
|
|
93
|
+
rows.push(r);
|
|
94
|
+
},
|
|
95
|
+
);
|
|
96
|
+
// then
|
|
97
|
+
expect(rows).toStrictEqual([
|
|
98
|
+
[{ a: "100", b: "101", c: "102" }],
|
|
99
|
+
[{ a: "200", b: "201", c: "202" }],
|
|
100
|
+
[{ a: "300", b: "301", c: "302" }],
|
|
101
|
+
[{ a: "400", b: "401", c: "402" }],
|
|
102
|
+
[{ a: "500", b: "501", c: "502" }],
|
|
103
|
+
[{ a: "600", b: "601", c: "602" }],
|
|
104
|
+
[{ a: "700", b: "701", c: "702" }],
|
|
105
|
+
[{ a: "800", b: "801", c: "802" }],
|
|
106
|
+
[{ a: "900", b: "901", c: "902" }],
|
|
107
|
+
]);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("when batchSize is csv row count, should read rows in one batch", async () => {
|
|
111
|
+
// given
|
|
112
|
+
const rows: Record<string, string>[][] = [];
|
|
113
|
+
// when
|
|
114
|
+
await csvBatchRead(
|
|
115
|
+
TEST_CSV_FILE,
|
|
116
|
+
9,
|
|
117
|
+
() => {},
|
|
118
|
+
(r) => {
|
|
119
|
+
rows.push(r);
|
|
120
|
+
},
|
|
121
|
+
);
|
|
122
|
+
// then
|
|
123
|
+
expect(rows).toStrictEqual([
|
|
124
|
+
[
|
|
125
|
+
{ a: "100", b: "101", c: "102" },
|
|
126
|
+
{ a: "200", b: "201", c: "202" },
|
|
127
|
+
{ a: "300", b: "301", c: "302" },
|
|
128
|
+
{ a: "400", b: "401", c: "402" },
|
|
129
|
+
{ a: "500", b: "501", c: "502" },
|
|
130
|
+
{ a: "600", b: "601", c: "602" },
|
|
131
|
+
{ a: "700", b: "701", c: "702" },
|
|
132
|
+
{ a: "800", b: "801", c: "802" },
|
|
133
|
+
{ a: "900", b: "901", c: "902" },
|
|
134
|
+
],
|
|
135
|
+
]);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it("when batchSize is larger than csv row count, should read rows in one batch", async () => {
|
|
139
|
+
// given
|
|
140
|
+
const rows: Record<string, string>[][] = [];
|
|
141
|
+
// when
|
|
142
|
+
await csvBatchRead(
|
|
143
|
+
TEST_CSV_FILE,
|
|
144
|
+
1000,
|
|
145
|
+
() => {},
|
|
146
|
+
(r) => {
|
|
147
|
+
rows.push(r);
|
|
148
|
+
},
|
|
149
|
+
);
|
|
150
|
+
// then
|
|
151
|
+
expect(rows).toStrictEqual([
|
|
152
|
+
[
|
|
153
|
+
{ a: "100", b: "101", c: "102" },
|
|
154
|
+
{ a: "200", b: "201", c: "202" },
|
|
155
|
+
{ a: "300", b: "301", c: "302" },
|
|
156
|
+
{ a: "400", b: "401", c: "402" },
|
|
157
|
+
{ a: "500", b: "501", c: "502" },
|
|
158
|
+
{ a: "600", b: "601", c: "602" },
|
|
159
|
+
{ a: "700", b: "701", c: "702" },
|
|
160
|
+
{ a: "800", b: "801", c: "802" },
|
|
161
|
+
{ a: "900", b: "901", c: "902" },
|
|
162
|
+
],
|
|
163
|
+
]);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("rows handler should be called after header handler", async () => {
|
|
167
|
+
// given
|
|
168
|
+
let headerResolvedAt = 0;
|
|
169
|
+
const rowsResolvedAt: number[] = [];
|
|
170
|
+
// when
|
|
171
|
+
await csvBatchRead(
|
|
172
|
+
TEST_CSV_FILE,
|
|
173
|
+
3,
|
|
174
|
+
async () => {
|
|
175
|
+
await setTimeout(500);
|
|
176
|
+
headerResolvedAt = performance.now();
|
|
177
|
+
},
|
|
178
|
+
() => {
|
|
179
|
+
rowsResolvedAt.push(performance.now());
|
|
180
|
+
},
|
|
181
|
+
);
|
|
182
|
+
// then
|
|
183
|
+
for (const rowResolvedAt of rowsResolvedAt) {
|
|
184
|
+
expect(rowResolvedAt).toBeGreaterThan(headerResolvedAt);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it("csvBatchRead should done after all rows handler resolved", async () => {
|
|
189
|
+
// given
|
|
190
|
+
const rowsResolvedAt: number[] = [];
|
|
191
|
+
// when
|
|
192
|
+
await csvBatchRead(
|
|
193
|
+
TEST_CSV_FILE,
|
|
194
|
+
3,
|
|
195
|
+
() => {},
|
|
196
|
+
async () => {
|
|
197
|
+
await setTimeout(500);
|
|
198
|
+
rowsResolvedAt.push(performance.now());
|
|
199
|
+
},
|
|
200
|
+
);
|
|
201
|
+
const csvBatchReadResolvedAt = performance.now();
|
|
202
|
+
// then
|
|
203
|
+
for (const rowResolvedAt of rowsResolvedAt) {
|
|
204
|
+
expect(rowResolvedAt).toBeLessThan(csvBatchReadResolvedAt);
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
});
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { createReadStream } from "node:fs";
|
|
2
|
+
import Papa from "papaparse";
|
|
3
|
+
import { promiseWithResolvers } from "./promise-with-resolvers.js";
|
|
4
|
+
|
|
5
|
+
export const csvBatchRead = async <
|
|
6
|
+
T extends Record<string, string> = Record<string, string>,
|
|
7
|
+
>(
|
|
8
|
+
filePath: string,
|
|
9
|
+
batchSize: number,
|
|
10
|
+
handleHeader: (header: (keyof T)[]) => unknown,
|
|
11
|
+
handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,
|
|
12
|
+
) => {
|
|
13
|
+
return await new Promise<void>((resolve, reject) => {
|
|
14
|
+
const stream = createReadStream(filePath).pipe(
|
|
15
|
+
Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
let buf: T[] = [];
|
|
19
|
+
let batchCount = 0;
|
|
20
|
+
let isFirstChunk = true;
|
|
21
|
+
const { promise: headerPromise, resolve: resolveHeaders } =
|
|
22
|
+
promiseWithResolvers<(keyof T)[]>();
|
|
23
|
+
const headerResolved = headerPromise.then((headers) =>
|
|
24
|
+
handleHeader(headers),
|
|
25
|
+
);
|
|
26
|
+
const shouldResolve: unknown[] = [headerResolved];
|
|
27
|
+
|
|
28
|
+
stream.on("data", (chunk: T) => {
|
|
29
|
+
if (isFirstChunk) {
|
|
30
|
+
resolveHeaders(Object.keys(chunk));
|
|
31
|
+
isFirstChunk = false;
|
|
32
|
+
}
|
|
33
|
+
if (buf.length === batchSize) {
|
|
34
|
+
const currentBatchCount = batchCount;
|
|
35
|
+
const currentBuf = buf.slice();
|
|
36
|
+
shouldResolve.push(
|
|
37
|
+
headerResolved.then(() =>
|
|
38
|
+
handleBatch(currentBuf, currentBatchCount, false),
|
|
39
|
+
),
|
|
40
|
+
);
|
|
41
|
+
buf = [];
|
|
42
|
+
batchCount++;
|
|
43
|
+
}
|
|
44
|
+
buf.push(chunk);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
stream.on("end", () => {
|
|
48
|
+
const currentBatchCount = batchCount;
|
|
49
|
+
const currentBuf = buf.slice();
|
|
50
|
+
shouldResolve.push(
|
|
51
|
+
headerResolved.then(() =>
|
|
52
|
+
handleBatch(currentBuf, currentBatchCount, true),
|
|
53
|
+
),
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
Promise.all(shouldResolve)
|
|
57
|
+
.then(() => {
|
|
58
|
+
stream.destroy();
|
|
59
|
+
resolve();
|
|
60
|
+
})
|
|
61
|
+
.catch((error) => {
|
|
62
|
+
stream.destroy();
|
|
63
|
+
reject(error);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
stream.on("error", (error) => {
|
|
68
|
+
console.error("error", error);
|
|
69
|
+
stream.destroy();
|
|
70
|
+
reject(error);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
};
|
package/src/index.ts
CHANGED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { setTimeout } from "node:timers/promises";
|
|
2
|
+
import { promiseWithResolvers } from "./promise-with-resolvers.js";
|
|
3
|
+
|
|
4
|
+
describe("promiseWithResolvers", () => {
|
|
5
|
+
it("promiseWithResolvers returns promise, resolve, reject", () => {
|
|
6
|
+
// given
|
|
7
|
+
// when
|
|
8
|
+
const returns = promiseWithResolvers();
|
|
9
|
+
// then
|
|
10
|
+
expect(returns).toHaveProperty("promise");
|
|
11
|
+
expect(returns).toHaveProperty("resolve");
|
|
12
|
+
expect(returns).toHaveProperty("reject");
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("resolve should resolve promise", async () => {
|
|
16
|
+
// given
|
|
17
|
+
const { promise, resolve } = promiseWithResolvers<string>();
|
|
18
|
+
// when
|
|
19
|
+
resolve("resolved");
|
|
20
|
+
// then
|
|
21
|
+
await expect(promise).resolves.toBe("resolved");
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("when not resolved, promise should be pending", async () => {
|
|
25
|
+
// given
|
|
26
|
+
const { promise } = promiseWithResolvers();
|
|
27
|
+
// when
|
|
28
|
+
const res = await Promise.race([
|
|
29
|
+
promise.then(() => "resolved"),
|
|
30
|
+
setTimeout(3000).then(() => "pending"),
|
|
31
|
+
]);
|
|
32
|
+
// then
|
|
33
|
+
// 3秒までしか待っていないけど、ずっと待ち続けることはできないので妥協
|
|
34
|
+
expect(res).toBe("pending");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("reject should reject promise", async () => {
|
|
38
|
+
// given
|
|
39
|
+
const { promise, reject } = promiseWithResolvers();
|
|
40
|
+
// when
|
|
41
|
+
reject("rejected");
|
|
42
|
+
// then
|
|
43
|
+
await expect(promise).rejects.toBe("rejected");
|
|
44
|
+
});
|
|
45
|
+
});
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export const promiseWithResolvers = <T = void>() => {
|
|
2
|
+
// new Promise のコンストラクタ自体は非同期処理を行わないため、
|
|
3
|
+
// resolve, reject への代入は promise の生成が終わったタイミングで完了している
|
|
4
|
+
let resolve: (value: T | PromiseLike<T>) => void = () => undefined;
|
|
5
|
+
let reject: (reason?: unknown) => void = () => undefined;
|
|
6
|
+
const promise = new Promise<T>((res, rej) => {
|
|
7
|
+
resolve = res;
|
|
8
|
+
reject = rej;
|
|
9
|
+
});
|
|
10
|
+
return {
|
|
11
|
+
promise,
|
|
12
|
+
resolve,
|
|
13
|
+
reject,
|
|
14
|
+
};
|
|
15
|
+
};
|