@yutaura/csv-batch-reader 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv-batch-read.d.ts +2 -1
- package/dist/csv-batch-read.d.ts.map +1 -1
- package/dist/index.js +17 -14
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +17 -14
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
- package/src/csv-batch-read.test.ts +12 -10
- package/src/csv-batch-read.ts +18 -15
package/dist/csv-batch-read.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
|
|
1
|
+
import { PassThrough } from "node:stream";
|
|
2
|
+
export declare const csvBatchRead: <T extends Record<string, string> = Record<string, string>>(filePath: string, batchSize: number, handleHeader: (header: (keyof T)[]) => unknown, handleBatch: (rows: PassThrough, batchCount: number, isLastChunk: boolean, header: (keyof T)[]) => unknown) => Promise<void>;
|
|
2
3
|
//# sourceMappingURL=csv-batch-read.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAK1C,eAAO,MAAM,YAAY,GACvB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,qCAEtB,MAAM,aACL,MAAM,gBACH,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,OAAO,eACjC,CACX,IAAI,EAAE,WAAW,EACjB,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,OAAO,EACpB,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAChB,OAAO,kBAgEb,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -36,6 +36,8 @@ module.exports = __toCommonJS(src_exports);
|
|
|
36
36
|
|
|
37
37
|
// src/csv-batch-read.ts
|
|
38
38
|
var import_node_fs = require("fs");
|
|
39
|
+
var import_node_stream = require("stream");
|
|
40
|
+
var import_promises = require("stream/promises");
|
|
39
41
|
var import_papaparse = __toESM(require("papaparse"));
|
|
40
42
|
|
|
41
43
|
// src/promise-with-resolvers.ts
|
|
@@ -59,7 +61,8 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
|
59
61
|
const stream = (0, import_node_fs.createReadStream)(filePath).pipe(
|
|
60
62
|
import_papaparse.default.parse(import_papaparse.default.NODE_STREAM_INPUT, { header: true })
|
|
61
63
|
);
|
|
62
|
-
let
|
|
64
|
+
let currentStream = new import_node_stream.PassThrough({ objectMode: true });
|
|
65
|
+
let currentStreamCount = 0;
|
|
63
66
|
let batchCount = 0;
|
|
64
67
|
let isFirstChunk = true;
|
|
65
68
|
const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
|
|
@@ -73,27 +76,27 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
|
73
76
|
resolveHeaders(Object.keys(chunk));
|
|
74
77
|
isFirstChunk = false;
|
|
75
78
|
}
|
|
76
|
-
if (
|
|
79
|
+
if (currentStreamCount === batchSize) {
|
|
80
|
+
shouldResolve.push((0, import_promises.finished)(currentStream.end()));
|
|
81
|
+
currentStream = new import_node_stream.PassThrough({ objectMode: true });
|
|
82
|
+
currentStreamCount = 0;
|
|
83
|
+
batchCount++;
|
|
84
|
+
}
|
|
85
|
+
if (currentStreamCount === 0) {
|
|
86
|
+
const stream2 = currentStream;
|
|
77
87
|
const currentBatchCount = batchCount;
|
|
78
|
-
const currentBuf = buf.slice();
|
|
79
88
|
shouldResolve.push(
|
|
80
89
|
headerResolved.then(
|
|
81
|
-
(headers) => handleBatch(
|
|
90
|
+
(headers) => handleBatch(stream2, currentBatchCount, false, headers)
|
|
82
91
|
)
|
|
83
92
|
);
|
|
84
|
-
buf = [];
|
|
85
|
-
batchCount++;
|
|
86
93
|
}
|
|
87
|
-
|
|
94
|
+
currentStream.push(chunk);
|
|
95
|
+
currentStreamCount++;
|
|
88
96
|
});
|
|
89
97
|
stream.on("end", () => {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
shouldResolve.push(
|
|
93
|
-
headerResolved.then(
|
|
94
|
-
(headers) => handleBatch(currentBuf, currentBatchCount, true, headers)
|
|
95
|
-
)
|
|
96
|
-
);
|
|
98
|
+
console.log("stream end");
|
|
99
|
+
currentStream.end();
|
|
97
100
|
Promise.all(shouldResolve).then(() => {
|
|
98
101
|
stream.destroy();
|
|
99
102
|
resolve();
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows:
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport { PassThrough } from \"node:stream\";\nimport { finished } from \"node:stream/promises\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: PassThrough,\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let currentStream = new PassThrough({ objectMode: true });\n let currentStreamCount = 0;\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (currentStreamCount === batchSize) {\n shouldResolve.push(finished(currentStream.end()));\n currentStream = new PassThrough({ objectMode: true });\n currentStreamCount = 0;\n batchCount++;\n }\n if (currentStreamCount === 0) {\n const stream = currentStream;\n const currentBatchCount = batchCount;\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(stream, currentBatchCount, false, headers),\n ),\n );\n }\n currentStream.push(chunk);\n currentStreamCount++;\n });\n\n stream.on(\"end\", () => {\n console.log(\"stream end\");\n currentStream.end();\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,qBAAiC;AACjC,yBAA4B;AAC5B,sBAAyB;AACzB,uBAAiB;;;ACHV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADRO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,aAAS,iCAAiB,QAAQ,EAAE;AAAA,MACxC,iBAAAA,QAAK,MAAM,iBAAAA,QAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,gBAAgB,IAAI,+BAAY,EAAE,YAAY,KAAK,CAAC;AACxD,QAAI,qBAAqB;AACzB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,uBAAuB,WAAW;AACpC,sBAAc,SAAK,0BAAS,cAAc,IAAI,CAAC,CAAC;AAChD,wBAAgB,IAAI,+BAAY,EAAE,YAAY,KAAK,CAAC;AACpD,6BAAqB;AACrB;AAAA,MACF;AACA,UAAI,uBAAuB,GAAG;AAC5B,cAAMC,UAAS;AACf,cAAM,oBAAoB;AAC1B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAYA,SAAQ,mBAAmB,OAAO,OAAO;AAAA,UACvD;AAAA,QACF;AAAA,MACF;AACA,oBAAc,KAAK,KAAK;AACxB;AAAA,IACF,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,cAAQ,IAAI,YAAY;AACxB,oBAAc,IAAI;AAElB,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["Papa","stream"]}
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
// src/csv-batch-read.ts
|
|
2
2
|
import { createReadStream } from "node:fs";
|
|
3
|
+
import { PassThrough } from "node:stream";
|
|
4
|
+
import { finished } from "node:stream/promises";
|
|
3
5
|
import Papa from "papaparse";
|
|
4
6
|
|
|
5
7
|
// src/promise-with-resolvers.ts
|
|
@@ -23,7 +25,8 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
|
23
25
|
const stream = createReadStream(filePath).pipe(
|
|
24
26
|
Papa.parse(Papa.NODE_STREAM_INPUT, { header: true })
|
|
25
27
|
);
|
|
26
|
-
let
|
|
28
|
+
let currentStream = new PassThrough({ objectMode: true });
|
|
29
|
+
let currentStreamCount = 0;
|
|
27
30
|
let batchCount = 0;
|
|
28
31
|
let isFirstChunk = true;
|
|
29
32
|
const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
|
|
@@ -37,27 +40,27 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
|
|
|
37
40
|
resolveHeaders(Object.keys(chunk));
|
|
38
41
|
isFirstChunk = false;
|
|
39
42
|
}
|
|
40
|
-
if (
|
|
43
|
+
if (currentStreamCount === batchSize) {
|
|
44
|
+
shouldResolve.push(finished(currentStream.end()));
|
|
45
|
+
currentStream = new PassThrough({ objectMode: true });
|
|
46
|
+
currentStreamCount = 0;
|
|
47
|
+
batchCount++;
|
|
48
|
+
}
|
|
49
|
+
if (currentStreamCount === 0) {
|
|
50
|
+
const stream2 = currentStream;
|
|
41
51
|
const currentBatchCount = batchCount;
|
|
42
|
-
const currentBuf = buf.slice();
|
|
43
52
|
shouldResolve.push(
|
|
44
53
|
headerResolved.then(
|
|
45
|
-
(headers) => handleBatch(
|
|
54
|
+
(headers) => handleBatch(stream2, currentBatchCount, false, headers)
|
|
46
55
|
)
|
|
47
56
|
);
|
|
48
|
-
buf = [];
|
|
49
|
-
batchCount++;
|
|
50
57
|
}
|
|
51
|
-
|
|
58
|
+
currentStream.push(chunk);
|
|
59
|
+
currentStreamCount++;
|
|
52
60
|
});
|
|
53
61
|
stream.on("end", () => {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
shouldResolve.push(
|
|
57
|
-
headerResolved.then(
|
|
58
|
-
(headers) => handleBatch(currentBuf, currentBatchCount, true, headers)
|
|
59
|
-
)
|
|
60
|
-
);
|
|
62
|
+
console.log("stream end");
|
|
63
|
+
currentStream.end();
|
|
61
64
|
Promise.all(shouldResolve).then(() => {
|
|
62
65
|
stream.destroy();
|
|
63
66
|
resolve();
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows:
|
|
1
|
+
{"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport { PassThrough } from \"node:stream\";\nimport { finished } from \"node:stream/promises\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: PassThrough,\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let currentStream = new PassThrough({ objectMode: true });\n let currentStreamCount = 0;\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (currentStreamCount === batchSize) {\n shouldResolve.push(finished(currentStream.end()));\n currentStream = new PassThrough({ objectMode: true });\n currentStreamCount = 0;\n batchCount++;\n }\n if (currentStreamCount === 0) {\n const stream = currentStream;\n const currentBatchCount = batchCount;\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(stream, currentBatchCount, false, headers),\n ),\n );\n }\n currentStream.push(chunk);\n currentStreamCount++;\n });\n\n stream.on(\"end\", () => {\n console.log(\"stream end\");\n currentStream.end();\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";AAAA,SAAS,wBAAwB;AACjC,SAAS,mBAAmB;AAC5B,SAAS,gBAAgB;AACzB,OAAO,UAAU;;;ACHV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADRO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,SAAS,iBAAiB,QAAQ,EAAE;AAAA,MACxC,KAAK,MAAM,KAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,gBAAgB,IAAI,YAAY,EAAE,YAAY,KAAK,CAAC;AACxD,QAAI,qBAAqB;AACzB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,uBAAuB,WAAW;AACpC,sBAAc,KAAK,SAAS,cAAc,IAAI,CAAC,CAAC;AAChD,wBAAgB,IAAI,YAAY,EAAE,YAAY,KAAK,CAAC;AACpD,6BAAqB;AACrB;AAAA,MACF;AACA,UAAI,uBAAuB,GAAG;AAC5B,cAAMA,UAAS;AACf,cAAM,oBAAoB;AAC1B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAYA,SAAQ,mBAAmB,OAAO,OAAO;AAAA,UACvD;AAAA,QACF;AAAA,MACF;AACA,oBAAc,KAAK,KAAK;AACxB;AAAA,IACF,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,cAAQ,IAAI,YAAY;AACxB,oBAAc,IAAI;AAElB,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["stream"]}
|
package/package.json
CHANGED
|
@@ -57,8 +57,8 @@ describe("csvBatchRead", () => {
|
|
|
57
57
|
TEST_CSV_FILE,
|
|
58
58
|
3,
|
|
59
59
|
() => {},
|
|
60
|
-
(
|
|
61
|
-
rows.push(
|
|
60
|
+
async (stream) => {
|
|
61
|
+
rows.push(await stream.toArray());
|
|
62
62
|
},
|
|
63
63
|
);
|
|
64
64
|
// then
|
|
@@ -89,8 +89,8 @@ describe("csvBatchRead", () => {
|
|
|
89
89
|
TEST_CSV_FILE,
|
|
90
90
|
1,
|
|
91
91
|
() => {},
|
|
92
|
-
(r) => {
|
|
93
|
-
rows.push(r);
|
|
92
|
+
async (r) => {
|
|
93
|
+
rows.push(await r.toArray());
|
|
94
94
|
},
|
|
95
95
|
);
|
|
96
96
|
// then
|
|
@@ -115,8 +115,8 @@ describe("csvBatchRead", () => {
|
|
|
115
115
|
TEST_CSV_FILE,
|
|
116
116
|
9,
|
|
117
117
|
() => {},
|
|
118
|
-
(r) => {
|
|
119
|
-
rows.push(r);
|
|
118
|
+
async (r) => {
|
|
119
|
+
rows.push(await r.toArray());
|
|
120
120
|
},
|
|
121
121
|
);
|
|
122
122
|
// then
|
|
@@ -143,8 +143,8 @@ describe("csvBatchRead", () => {
|
|
|
143
143
|
TEST_CSV_FILE,
|
|
144
144
|
1000,
|
|
145
145
|
() => {},
|
|
146
|
-
(r) => {
|
|
147
|
-
rows.push(r);
|
|
146
|
+
async (r) => {
|
|
147
|
+
rows.push(await r.toArray());
|
|
148
148
|
},
|
|
149
149
|
);
|
|
150
150
|
// then
|
|
@@ -175,7 +175,8 @@ describe("csvBatchRead", () => {
|
|
|
175
175
|
await setTimeout(500);
|
|
176
176
|
headerResolvedAt = performance.now();
|
|
177
177
|
},
|
|
178
|
-
() => {
|
|
178
|
+
async (stream) => {
|
|
179
|
+
await stream.toArray();
|
|
179
180
|
rowsResolvedAt.push(performance.now());
|
|
180
181
|
},
|
|
181
182
|
);
|
|
@@ -193,7 +194,8 @@ describe("csvBatchRead", () => {
|
|
|
193
194
|
TEST_CSV_FILE,
|
|
194
195
|
3,
|
|
195
196
|
() => {},
|
|
196
|
-
async () => {
|
|
197
|
+
async (stream) => {
|
|
198
|
+
await stream.toArray();
|
|
197
199
|
await setTimeout(500);
|
|
198
200
|
rowsResolvedAt.push(performance.now());
|
|
199
201
|
},
|
package/src/csv-batch-read.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { createReadStream } from "node:fs";
|
|
2
|
+
import { PassThrough } from "node:stream";
|
|
3
|
+
import { finished } from "node:stream/promises";
|
|
2
4
|
import Papa from "papaparse";
|
|
3
5
|
import { promiseWithResolvers } from "./promise-with-resolvers.js";
|
|
4
6
|
|
|
@@ -9,7 +11,7 @@ export const csvBatchRead = async <
|
|
|
9
11
|
batchSize: number,
|
|
10
12
|
handleHeader: (header: (keyof T)[]) => unknown,
|
|
11
13
|
handleBatch: (
|
|
12
|
-
rows:
|
|
14
|
+
rows: PassThrough,
|
|
13
15
|
batchCount: number,
|
|
14
16
|
isLastChunk: boolean,
|
|
15
17
|
header: (keyof T)[],
|
|
@@ -20,7 +22,8 @@ export const csvBatchRead = async <
|
|
|
20
22
|
Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),
|
|
21
23
|
);
|
|
22
24
|
|
|
23
|
-
let
|
|
25
|
+
let currentStream = new PassThrough({ objectMode: true });
|
|
26
|
+
let currentStreamCount = 0;
|
|
24
27
|
let batchCount = 0;
|
|
25
28
|
let isFirstChunk = true;
|
|
26
29
|
const { promise: headerPromise, resolve: resolveHeaders } =
|
|
@@ -36,28 +39,28 @@ export const csvBatchRead = async <
|
|
|
36
39
|
resolveHeaders(Object.keys(chunk));
|
|
37
40
|
isFirstChunk = false;
|
|
38
41
|
}
|
|
39
|
-
if (
|
|
42
|
+
if (currentStreamCount === batchSize) {
|
|
43
|
+
shouldResolve.push(finished(currentStream.end()));
|
|
44
|
+
currentStream = new PassThrough({ objectMode: true });
|
|
45
|
+
currentStreamCount = 0;
|
|
46
|
+
batchCount++;
|
|
47
|
+
}
|
|
48
|
+
if (currentStreamCount === 0) {
|
|
49
|
+
const stream = currentStream;
|
|
40
50
|
const currentBatchCount = batchCount;
|
|
41
|
-
const currentBuf = buf.slice();
|
|
42
51
|
shouldResolve.push(
|
|
43
52
|
headerResolved.then((headers) =>
|
|
44
|
-
handleBatch(
|
|
53
|
+
handleBatch(stream, currentBatchCount, false, headers),
|
|
45
54
|
),
|
|
46
55
|
);
|
|
47
|
-
buf = [];
|
|
48
|
-
batchCount++;
|
|
49
56
|
}
|
|
50
|
-
|
|
57
|
+
currentStream.push(chunk);
|
|
58
|
+
currentStreamCount++;
|
|
51
59
|
});
|
|
52
60
|
|
|
53
61
|
stream.on("end", () => {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
shouldResolve.push(
|
|
57
|
-
headerResolved.then((headers) =>
|
|
58
|
-
handleBatch(currentBuf, currentBatchCount, true, headers),
|
|
59
|
-
),
|
|
60
|
-
);
|
|
62
|
+
console.log("stream end");
|
|
63
|
+
currentStream.end();
|
|
61
64
|
|
|
62
65
|
Promise.all(shouldResolve)
|
|
63
66
|
.then(() => {
|