@yutaura/csv-batch-reader 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,3 @@
1
- export declare const csvBatchRead: <T extends Record<string, string> = Record<string, string>>(filePath: string, batchSize: number, handleHeader: (header: (keyof T)[]) => unknown, handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean, header: (keyof T)[]) => unknown) => Promise<void>;
1
+ import { PassThrough } from "node:stream";
2
+ export declare const csvBatchRead: <T extends Record<string, string> = Record<string, string>>(filePath: string, batchSize: number, handleHeader: (header: (keyof T)[]) => unknown, handleBatch: (rows: PassThrough, batchCount: number, isLastChunk: boolean, header: (keyof T)[]) => unknown) => Promise<void>;
2
3
  //# sourceMappingURL=csv-batch-read.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,YAAY,GACvB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,qCAEtB,MAAM,aACL,MAAM,gBACH,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,OAAO,eACjC,CACX,IAAI,EAAE,CAAC,EAAE,EACT,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,OAAO,EACpB,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAChB,OAAO,kBA+Db,CAAC"}
1
+ {"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAK1C,eAAO,MAAM,YAAY,GACvB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,qCAEtB,MAAM,aACL,MAAM,gBACH,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,OAAO,eACjC,CACX,IAAI,EAAE,WAAW,EACjB,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,OAAO,EACpB,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAChB,OAAO,kBAgEb,CAAC"}
package/dist/index.js CHANGED
@@ -36,6 +36,8 @@ module.exports = __toCommonJS(src_exports);
36
36
 
37
37
  // src/csv-batch-read.ts
38
38
  var import_node_fs = require("fs");
39
+ var import_node_stream = require("stream");
40
+ var import_promises = require("stream/promises");
39
41
  var import_papaparse = __toESM(require("papaparse"));
40
42
 
41
43
  // src/promise-with-resolvers.ts
@@ -59,7 +61,8 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
59
61
  const stream = (0, import_node_fs.createReadStream)(filePath).pipe(
60
62
  import_papaparse.default.parse(import_papaparse.default.NODE_STREAM_INPUT, { header: true })
61
63
  );
62
- let buf = [];
64
+ let currentStream = new import_node_stream.PassThrough({ objectMode: true });
65
+ let currentStreamCount = 0;
63
66
  let batchCount = 0;
64
67
  let isFirstChunk = true;
65
68
  const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
@@ -73,27 +76,27 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
73
76
  resolveHeaders(Object.keys(chunk));
74
77
  isFirstChunk = false;
75
78
  }
76
- if (buf.length === batchSize) {
79
+ if (currentStreamCount === batchSize) {
80
+ shouldResolve.push((0, import_promises.finished)(currentStream.end()));
81
+ currentStream = new import_node_stream.PassThrough({ objectMode: true });
82
+ currentStreamCount = 0;
83
+ batchCount++;
84
+ }
85
+ if (currentStreamCount === 0) {
86
+ const stream2 = currentStream;
77
87
  const currentBatchCount = batchCount;
78
- const currentBuf = buf.slice();
79
88
  shouldResolve.push(
80
89
  headerResolved.then(
81
- (headers) => handleBatch(currentBuf, currentBatchCount, false, headers)
90
+ (headers) => handleBatch(stream2, currentBatchCount, false, headers)
82
91
  )
83
92
  );
84
- buf = [];
85
- batchCount++;
86
93
  }
87
- buf.push(chunk);
94
+ currentStream.push(chunk);
95
+ currentStreamCount++;
88
96
  });
89
97
  stream.on("end", () => {
90
- const currentBatchCount = batchCount;
91
- const currentBuf = buf.slice();
92
- shouldResolve.push(
93
- headerResolved.then(
94
- (headers) => handleBatch(currentBuf, currentBatchCount, true, headers)
95
- )
96
- );
98
+ console.log("stream end");
99
+ currentStream.end();
97
100
  Promise.all(shouldResolve).then(() => {
98
101
  stream.destroy();
99
102
  resolve();
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: T[],\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(currentBuf, currentBatchCount, false, headers),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(currentBuf, currentBatchCount, true, headers),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,qBAAiC;AACjC,uBAAiB;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,aAAS,iCAAiB,QAAQ,EAAE;AAAA,MACxC,iBAAAA,QAAK,MAAM,iBAAAA,QAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAY,YAAY,mBAAmB,OAAO,OAAO;AAAA,UAC3D;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,CAAC,YACnB,YAAY,YAAY,mBAAmB,MAAM,OAAO;AAAA,QAC1D;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["Papa"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport { PassThrough } from \"node:stream\";\nimport { finished } from \"node:stream/promises\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: PassThrough,\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let currentStream = new PassThrough({ objectMode: true });\n let currentStreamCount = 0;\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (currentStreamCount === batchSize) {\n shouldResolve.push(finished(currentStream.end()));\n currentStream = new PassThrough({ objectMode: true });\n currentStreamCount = 0;\n batchCount++;\n }\n if (currentStreamCount === 0) {\n const stream = currentStream;\n const currentBatchCount = batchCount;\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(stream, currentBatchCount, false, headers),\n ),\n );\n }\n currentStream.push(chunk);\n currentStreamCount++;\n });\n\n stream.on(\"end\", () => {\n console.log(\"stream end\");\n currentStream.end();\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,qBAAiC;AACjC,yBAA4B;AAC5B,sBAAyB;AACzB,uBAAiB;;;ACHV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADRO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,aAAS,iCAAiB,QAAQ,EAAE;AAAA,MACxC,iBAAAA,QAAK,MAAM,iBAAAA,QAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,gBAAgB,IAAI,+BAAY,EAAE,YAAY,KAAK,CAAC;AACxD,QAAI,qBAAqB;AACzB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,uBAAuB,WAAW;AACpC,sBAAc,SAAK,0BAAS,cAAc,IAAI,CAAC,CAAC;AAChD,wBAAgB,IAAI,+BAAY,EAAE,YAAY,KAAK,CAAC;AACpD,6BAAqB;AACrB;AAAA,MACF;AACA,UAAI,uBAAuB,GAAG;AAC5B,cAAMC,UAAS;AACf,cAAM,oBAAoB;AAC1B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAYA,SAAQ,mBAAmB,OAAO,OAAO;AAAA,UACvD;AAAA,QACF;AAAA,MACF;AACA,oBAAc,KAAK,KAAK;AACxB;AAAA,IACF,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,cAAQ,IAAI,YAAY;AACxB,oBAAc,IAAI;AAElB,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["Papa","stream"]}
package/dist/index.mjs CHANGED
@@ -1,5 +1,7 @@
1
1
  // src/csv-batch-read.ts
2
2
  import { createReadStream } from "node:fs";
3
+ import { PassThrough } from "node:stream";
4
+ import { finished } from "node:stream/promises";
3
5
  import Papa from "papaparse";
4
6
 
5
7
  // src/promise-with-resolvers.ts
@@ -23,7 +25,8 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
23
25
  const stream = createReadStream(filePath).pipe(
24
26
  Papa.parse(Papa.NODE_STREAM_INPUT, { header: true })
25
27
  );
26
- let buf = [];
28
+ let currentStream = new PassThrough({ objectMode: true });
29
+ let currentStreamCount = 0;
27
30
  let batchCount = 0;
28
31
  let isFirstChunk = true;
29
32
  const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
@@ -37,27 +40,27 @@ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
37
40
  resolveHeaders(Object.keys(chunk));
38
41
  isFirstChunk = false;
39
42
  }
40
- if (buf.length === batchSize) {
43
+ if (currentStreamCount === batchSize) {
44
+ shouldResolve.push(finished(currentStream.end()));
45
+ currentStream = new PassThrough({ objectMode: true });
46
+ currentStreamCount = 0;
47
+ batchCount++;
48
+ }
49
+ if (currentStreamCount === 0) {
50
+ const stream2 = currentStream;
41
51
  const currentBatchCount = batchCount;
42
- const currentBuf = buf.slice();
43
52
  shouldResolve.push(
44
53
  headerResolved.then(
45
- (headers) => handleBatch(currentBuf, currentBatchCount, false, headers)
54
+ (headers) => handleBatch(stream2, currentBatchCount, false, headers)
46
55
  )
47
56
  );
48
- buf = [];
49
- batchCount++;
50
57
  }
51
- buf.push(chunk);
58
+ currentStream.push(chunk);
59
+ currentStreamCount++;
52
60
  });
53
61
  stream.on("end", () => {
54
- const currentBatchCount = batchCount;
55
- const currentBuf = buf.slice();
56
- shouldResolve.push(
57
- headerResolved.then(
58
- (headers) => handleBatch(currentBuf, currentBatchCount, true, headers)
59
- )
60
- );
62
+ console.log("stream end");
63
+ currentStream.end();
61
64
  Promise.all(shouldResolve).then(() => {
62
65
  stream.destroy();
63
66
  resolve();
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: T[],\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(currentBuf, currentBatchCount, false, headers),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(currentBuf, currentBatchCount, true, headers),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";AAAA,SAAS,wBAAwB;AACjC,OAAO,UAAU;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,SAAS,iBAAiB,QAAQ,EAAE;AAAA,MACxC,KAAK,MAAM,KAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAY,YAAY,mBAAmB,OAAO,OAAO;AAAA,UAC3D;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,CAAC,YACnB,YAAY,YAAY,mBAAmB,MAAM,OAAO;AAAA,QAC1D;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport { PassThrough } from \"node:stream\";\nimport { finished } from \"node:stream/promises\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (\n rows: PassThrough,\n batchCount: number,\n isLastChunk: boolean,\n header: (keyof T)[],\n ) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let currentStream = new PassThrough({ objectMode: true });\n let currentStreamCount = 0;\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then(async (headers) => {\n await handleHeader(headers);\n return headers;\n });\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (currentStreamCount === batchSize) {\n shouldResolve.push(finished(currentStream.end()));\n currentStream = new PassThrough({ objectMode: true });\n currentStreamCount = 0;\n batchCount++;\n }\n if (currentStreamCount === 0) {\n const stream = currentStream;\n const currentBatchCount = batchCount;\n shouldResolve.push(\n headerResolved.then((headers) =>\n handleBatch(stream, currentBatchCount, false, headers),\n ),\n );\n }\n currentStream.push(chunk);\n currentStreamCount++;\n });\n\n stream.on(\"end\", () => {\n console.log(\"stream end\");\n currentStream.end();\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";AAAA,SAAS,wBAAwB;AACjC,SAAS,mBAAmB;AAC5B,SAAS,gBAAgB;AACzB,OAAO,UAAU;;;ACHV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADRO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBAMG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,SAAS,iBAAiB,QAAQ,EAAE;AAAA,MACxC,KAAK,MAAM,KAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,gBAAgB,IAAI,YAAY,EAAE,YAAY,KAAK,CAAC;AACxD,QAAI,qBAAqB;AACzB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc,KAAK,OAAO,YAAY;AAC3D,YAAM,aAAa,OAAO;AAC1B,aAAO;AAAA,IACT,CAAC;AACD,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,uBAAuB,WAAW;AACpC,sBAAc,KAAK,SAAS,cAAc,IAAI,CAAC,CAAC;AAChD,wBAAgB,IAAI,YAAY,EAAE,YAAY,KAAK,CAAC;AACpD,6BAAqB;AACrB;AAAA,MACF;AACA,UAAI,uBAAuB,GAAG;AAC5B,cAAMA,UAAS;AACf,cAAM,oBAAoB;AAC1B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,CAAC,YACnB,YAAYA,SAAQ,mBAAmB,OAAO,OAAO;AAAA,UACvD;AAAA,QACF;AAAA,MACF;AACA,oBAAc,KAAK,KAAK;AACxB;AAAA,IACF,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,cAAQ,IAAI,YAAY;AACxB,oBAAc,IAAI;AAElB,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["stream"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yutaura/csv-batch-reader",
3
- "version": "1.1.0",
3
+ "version": "1.2.1",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -57,8 +57,8 @@ describe("csvBatchRead", () => {
57
57
  TEST_CSV_FILE,
58
58
  3,
59
59
  () => {},
60
- (r) => {
61
- rows.push(r);
60
+ async (stream) => {
61
+ rows.push(await stream.toArray());
62
62
  },
63
63
  );
64
64
  // then
@@ -89,8 +89,8 @@ describe("csvBatchRead", () => {
89
89
  TEST_CSV_FILE,
90
90
  1,
91
91
  () => {},
92
- (r) => {
93
- rows.push(r);
92
+ async (r) => {
93
+ rows.push(await r.toArray());
94
94
  },
95
95
  );
96
96
  // then
@@ -115,8 +115,8 @@ describe("csvBatchRead", () => {
115
115
  TEST_CSV_FILE,
116
116
  9,
117
117
  () => {},
118
- (r) => {
119
- rows.push(r);
118
+ async (r) => {
119
+ rows.push(await r.toArray());
120
120
  },
121
121
  );
122
122
  // then
@@ -143,8 +143,8 @@ describe("csvBatchRead", () => {
143
143
  TEST_CSV_FILE,
144
144
  1000,
145
145
  () => {},
146
- (r) => {
147
- rows.push(r);
146
+ async (r) => {
147
+ rows.push(await r.toArray());
148
148
  },
149
149
  );
150
150
  // then
@@ -175,7 +175,8 @@ describe("csvBatchRead", () => {
175
175
  await setTimeout(500);
176
176
  headerResolvedAt = performance.now();
177
177
  },
178
- () => {
178
+ async (stream) => {
179
+ await stream.toArray();
179
180
  rowsResolvedAt.push(performance.now());
180
181
  },
181
182
  );
@@ -193,7 +194,8 @@ describe("csvBatchRead", () => {
193
194
  TEST_CSV_FILE,
194
195
  3,
195
196
  () => {},
196
- async () => {
197
+ async (stream) => {
198
+ await stream.toArray();
197
199
  await setTimeout(500);
198
200
  rowsResolvedAt.push(performance.now());
199
201
  },
@@ -1,4 +1,6 @@
1
1
  import { createReadStream } from "node:fs";
2
+ import { PassThrough } from "node:stream";
3
+ import { finished } from "node:stream/promises";
2
4
  import Papa from "papaparse";
3
5
  import { promiseWithResolvers } from "./promise-with-resolvers.js";
4
6
 
@@ -9,7 +11,7 @@ export const csvBatchRead = async <
9
11
  batchSize: number,
10
12
  handleHeader: (header: (keyof T)[]) => unknown,
11
13
  handleBatch: (
12
- rows: T[],
14
+ rows: PassThrough,
13
15
  batchCount: number,
14
16
  isLastChunk: boolean,
15
17
  header: (keyof T)[],
@@ -20,7 +22,8 @@ export const csvBatchRead = async <
20
22
  Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),
21
23
  );
22
24
 
23
- let buf: T[] = [];
25
+ let currentStream = new PassThrough({ objectMode: true });
26
+ let currentStreamCount = 0;
24
27
  let batchCount = 0;
25
28
  let isFirstChunk = true;
26
29
  const { promise: headerPromise, resolve: resolveHeaders } =
@@ -36,28 +39,28 @@ export const csvBatchRead = async <
36
39
  resolveHeaders(Object.keys(chunk));
37
40
  isFirstChunk = false;
38
41
  }
39
- if (buf.length === batchSize) {
42
+ if (currentStreamCount === batchSize) {
43
+ shouldResolve.push(finished(currentStream.end()));
44
+ currentStream = new PassThrough({ objectMode: true });
45
+ currentStreamCount = 0;
46
+ batchCount++;
47
+ }
48
+ if (currentStreamCount === 0) {
49
+ const stream = currentStream;
40
50
  const currentBatchCount = batchCount;
41
- const currentBuf = buf.slice();
42
51
  shouldResolve.push(
43
52
  headerResolved.then((headers) =>
44
- handleBatch(currentBuf, currentBatchCount, false, headers),
53
+ handleBatch(stream, currentBatchCount, false, headers),
45
54
  ),
46
55
  );
47
- buf = [];
48
- batchCount++;
49
56
  }
50
- buf.push(chunk);
57
+ currentStream.push(chunk);
58
+ currentStreamCount++;
51
59
  });
52
60
 
53
61
  stream.on("end", () => {
54
- const currentBatchCount = batchCount;
55
- const currentBuf = buf.slice();
56
- shouldResolve.push(
57
- headerResolved.then((headers) =>
58
- handleBatch(currentBuf, currentBatchCount, true, headers),
59
- ),
60
- );
62
+ console.log("stream end");
63
+ currentStream.end();
61
64
 
62
65
  Promise.all(shouldResolve)
63
66
  .then(() => {