@yutaura/csv-batch-reader 0.0.0-0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1 +1,39 @@
1
- # csv-batch-reader
1
+ # @yutaura/csv-batch-reader
2
+
3
+ ## Description
4
+
5
+ This is a library for reading CSV files in batch.
6
+
7
+ ```typescript
8
+ import { csvBatchRead } from '@yutaura/csv-batch-reader';
9
+
10
+
11
+ await csvBatchRead(
12
+ 'path/to/csv-file.csv',
13
+ 100, // batch size
14
+ async (headers) => {
15
+ // do something with headers
16
+ },
17
+ async (batch) => {
18
+ // do something with batch
19
+ for (const row of batch) {
20
+ // do something with row
21
+ console.log(row);
22
+ }
23
+ },
24
+ );
25
+ ```
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ pnpm install @yutaura/csv-batch-reader
31
+ ```
32
+
33
+ ## License
34
+
35
+ MIT
36
+
37
+ ## Funding
38
+
39
+ If you like this library, please consider supporting me on [GitHub Sponsors](https://github.com/sponsors/YutaUra)
@@ -0,0 +1,2 @@
1
+ export declare const csvBatchRead: <T extends Record<string, string> = Record<string, string>>(filePath: string, batchSize: number, handleHeader: (header: (keyof T)[]) => unknown, handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown) => Promise<void>;
2
+ //# sourceMappingURL=csv-batch-read.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv-batch-read.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,YAAY,GACvB,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,qCAEtB,MAAM,aACL,MAAM,gBACH,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,OAAO,eACjC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,KAAK,OAAO,kBA8D9E,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=csv-batch-read.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv-batch-read.test.d.ts","sourceRoot":"","sources":["../src/csv-batch-read.test.ts"],"names":[],"mappings":""}
package/dist/index.d.ts CHANGED
@@ -1,3 +1,2 @@
1
- export declare const x = 1;
2
- export declare const main: () => never;
1
+ export { csvBatchRead } from "./csv-batch-read.js";
3
2
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,CAAC,IAAI,CAAC;AAEnB,eAAO,MAAM,IAAI,aAEhB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
package/dist/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
6
8
  var __export = (target, all) => {
7
9
  for (var name in all)
@@ -15,23 +17,99 @@ var __copyProps = (to, from, except, desc) => {
15
17
  }
16
18
  return to;
17
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
18
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
29
 
20
30
  // src/index.ts
21
31
  var src_exports = {};
22
32
  __export(src_exports, {
23
- main: () => main,
24
- x: () => x
33
+ csvBatchRead: () => csvBatchRead
25
34
  });
26
35
  module.exports = __toCommonJS(src_exports);
27
- console.log("Hello World!");
28
- var x = 1;
29
- var main = () => {
30
- throw new Error("This is an error");
36
+
37
+ // src/csv-batch-read.ts
38
+ var import_node_fs = require("fs");
39
+ var import_papaparse = __toESM(require("papaparse"));
40
+
41
+ // src/promise-with-resolvers.ts
42
+ var promiseWithResolvers = () => {
43
+ let resolve = () => void 0;
44
+ let reject = () => void 0;
45
+ const promise = new Promise((res, rej) => {
46
+ resolve = res;
47
+ reject = rej;
48
+ });
49
+ return {
50
+ promise,
51
+ resolve,
52
+ reject
53
+ };
54
+ };
55
+
56
+ // src/csv-batch-read.ts
57
+ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
58
+ return await new Promise((resolve, reject) => {
59
+ const stream = (0, import_node_fs.createReadStream)(filePath).pipe(
60
+ import_papaparse.default.parse(import_papaparse.default.NODE_STREAM_INPUT, { header: true })
61
+ );
62
+ let buf = [];
63
+ let batchCount = 0;
64
+ let isFirstChunk = true;
65
+ const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
66
+ const headerResolved = headerPromise.then(
67
+ (headers) => handleHeader(headers)
68
+ );
69
+ const shouldResolve = [headerResolved];
70
+ stream.on("data", (chunk) => {
71
+ if (isFirstChunk) {
72
+ resolveHeaders(Object.keys(chunk));
73
+ isFirstChunk = false;
74
+ }
75
+ if (buf.length === batchSize) {
76
+ const currentBatchCount = batchCount;
77
+ const currentBuf = buf.slice();
78
+ shouldResolve.push(
79
+ headerResolved.then(
80
+ () => handleBatch(currentBuf, currentBatchCount, false)
81
+ )
82
+ );
83
+ buf = [];
84
+ batchCount++;
85
+ }
86
+ buf.push(chunk);
87
+ });
88
+ stream.on("end", () => {
89
+ const currentBatchCount = batchCount;
90
+ const currentBuf = buf.slice();
91
+ shouldResolve.push(
92
+ headerResolved.then(
93
+ () => handleBatch(currentBuf, currentBatchCount, true)
94
+ )
95
+ );
96
+ Promise.all(shouldResolve).then(() => {
97
+ stream.destroy();
98
+ resolve();
99
+ }).catch((error) => {
100
+ stream.destroy();
101
+ reject(error);
102
+ });
103
+ });
104
+ stream.on("error", (error) => {
105
+ console.error("error", error);
106
+ stream.destroy();
107
+ reject(error);
108
+ });
109
+ });
31
110
  };
32
111
  // Annotate the CommonJS export names for ESM import in node:
33
112
  0 && (module.exports = {
34
- main,
35
- x
113
+ csvBatchRead
36
114
  });
37
115
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["console.log(\"Hello World!\");\n\nexport const x = 1;\n\nexport const main = () => {\n throw new Error(\"This is an error\");\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAAQ,IAAI,cAAc;AAEnB,IAAM,IAAI;AAEV,IAAM,OAAO,MAAM;AACxB,QAAM,IAAI,MAAM,kBAAkB;AACpC;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["export { csvBatchRead } from \"./csv-batch-read.js\";\n","import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then((headers) =>\n handleHeader(headers),\n );\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, false),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, true),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,qBAAiC;AACjC,uBAAiB;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBACG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,aAAS,iCAAiB,QAAQ,EAAE;AAAA,MACxC,iBAAAA,QAAK,MAAM,iBAAAA,QAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc;AAAA,MAAK,CAAC,YACzC,aAAa,OAAO;AAAA,IACtB;AACA,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,MAClB,YAAY,YAAY,mBAAmB,KAAK;AAAA,UAClD;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,MAClB,YAAY,YAAY,mBAAmB,IAAI;AAAA,QACjD;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":["Papa"]}
package/dist/index.mjs CHANGED
@@ -1,11 +1,78 @@
1
- // src/index.ts
2
- console.log("Hello World!");
3
- var x = 1;
4
- var main = () => {
5
- throw new Error("This is an error");
1
+ // src/csv-batch-read.ts
2
+ import { createReadStream } from "node:fs";
3
+ import Papa from "papaparse";
4
+
5
+ // src/promise-with-resolvers.ts
6
+ var promiseWithResolvers = () => {
7
+ let resolve = () => void 0;
8
+ let reject = () => void 0;
9
+ const promise = new Promise((res, rej) => {
10
+ resolve = res;
11
+ reject = rej;
12
+ });
13
+ return {
14
+ promise,
15
+ resolve,
16
+ reject
17
+ };
18
+ };
19
+
20
+ // src/csv-batch-read.ts
21
+ var csvBatchRead = async (filePath, batchSize, handleHeader, handleBatch) => {
22
+ return await new Promise((resolve, reject) => {
23
+ const stream = createReadStream(filePath).pipe(
24
+ Papa.parse(Papa.NODE_STREAM_INPUT, { header: true })
25
+ );
26
+ let buf = [];
27
+ let batchCount = 0;
28
+ let isFirstChunk = true;
29
+ const { promise: headerPromise, resolve: resolveHeaders } = promiseWithResolvers();
30
+ const headerResolved = headerPromise.then(
31
+ (headers) => handleHeader(headers)
32
+ );
33
+ const shouldResolve = [headerResolved];
34
+ stream.on("data", (chunk) => {
35
+ if (isFirstChunk) {
36
+ resolveHeaders(Object.keys(chunk));
37
+ isFirstChunk = false;
38
+ }
39
+ if (buf.length === batchSize) {
40
+ const currentBatchCount = batchCount;
41
+ const currentBuf = buf.slice();
42
+ shouldResolve.push(
43
+ headerResolved.then(
44
+ () => handleBatch(currentBuf, currentBatchCount, false)
45
+ )
46
+ );
47
+ buf = [];
48
+ batchCount++;
49
+ }
50
+ buf.push(chunk);
51
+ });
52
+ stream.on("end", () => {
53
+ const currentBatchCount = batchCount;
54
+ const currentBuf = buf.slice();
55
+ shouldResolve.push(
56
+ headerResolved.then(
57
+ () => handleBatch(currentBuf, currentBatchCount, true)
58
+ )
59
+ );
60
+ Promise.all(shouldResolve).then(() => {
61
+ stream.destroy();
62
+ resolve();
63
+ }).catch((error) => {
64
+ stream.destroy();
65
+ reject(error);
66
+ });
67
+ });
68
+ stream.on("error", (error) => {
69
+ console.error("error", error);
70
+ stream.destroy();
71
+ reject(error);
72
+ });
73
+ });
6
74
  };
7
75
  export {
8
- main,
9
- x
76
+ csvBatchRead
10
77
  };
11
78
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["console.log(\"Hello World!\");\n\nexport const x = 1;\n\nexport const main = () => {\n throw new Error(\"This is an error\");\n};\n"],"mappings":";AAAA,QAAQ,IAAI,cAAc;AAEnB,IAAM,IAAI;AAEV,IAAM,OAAO,MAAM;AACxB,QAAM,IAAI,MAAM,kBAAkB;AACpC;","names":[]}
1
+ {"version":3,"sources":["../src/csv-batch-read.ts","../src/promise-with-resolvers.ts"],"sourcesContent":["import { createReadStream } from \"node:fs\";\nimport Papa from \"papaparse\";\nimport { promiseWithResolvers } from \"./promise-with-resolvers.js\";\n\nexport const csvBatchRead = async <\n T extends Record<string, string> = Record<string, string>,\n>(\n filePath: string,\n batchSize: number,\n handleHeader: (header: (keyof T)[]) => unknown,\n handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,\n) => {\n return await new Promise<void>((resolve, reject) => {\n const stream = createReadStream(filePath).pipe(\n Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),\n );\n\n let buf: T[] = [];\n let batchCount = 0;\n let isFirstChunk = true;\n const { promise: headerPromise, resolve: resolveHeaders } =\n promiseWithResolvers<(keyof T)[]>();\n const headerResolved = headerPromise.then((headers) =>\n handleHeader(headers),\n );\n const shouldResolve: unknown[] = [headerResolved];\n\n stream.on(\"data\", (chunk: T) => {\n if (isFirstChunk) {\n resolveHeaders(Object.keys(chunk));\n isFirstChunk = false;\n }\n if (buf.length === batchSize) {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, false),\n ),\n );\n buf = [];\n batchCount++;\n }\n buf.push(chunk);\n });\n\n stream.on(\"end\", () => {\n const currentBatchCount = batchCount;\n const currentBuf = buf.slice();\n shouldResolve.push(\n headerResolved.then(() =>\n handleBatch(currentBuf, currentBatchCount, true),\n ),\n );\n\n Promise.all(shouldResolve)\n .then(() => {\n stream.destroy();\n resolve();\n })\n .catch((error) => {\n stream.destroy();\n reject(error);\n });\n });\n\n stream.on(\"error\", (error) => {\n console.error(\"error\", error);\n stream.destroy();\n reject(error);\n });\n });\n};\n","export const promiseWithResolvers = <T = void>() => {\n // new Promise のコンストラクタ自体は非同期処理を行わないため、\n // resolve, reject への代入は promise の生成が終わったタイミングで完了している\n let resolve: (value: T | PromiseLike<T>) => void = () => undefined;\n let reject: (reason?: unknown) => void = () => undefined;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return {\n promise,\n resolve,\n reject,\n };\n};\n"],"mappings":";AAAA,SAAS,wBAAwB;AACjC,OAAO,UAAU;;;ACDV,IAAM,uBAAuB,MAAgB;AAGlD,MAAI,UAA+C,MAAM;AACzD,MAAI,SAAqC,MAAM;AAC/C,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ADVO,IAAM,eAAe,OAG1B,UACA,WACA,cACA,gBACG;AACH,SAAO,MAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAClD,UAAM,SAAS,iBAAiB,QAAQ,EAAE;AAAA,MACxC,KAAK,MAAM,KAAK,mBAAmB,EAAE,QAAQ,KAAK,CAAC;AAAA,IACrD;AAEA,QAAI,MAAW,CAAC;AAChB,QAAI,aAAa;AACjB,QAAI,eAAe;AACnB,UAAM,EAAE,SAAS,eAAe,SAAS,eAAe,IACtD,qBAAkC;AACpC,UAAM,iBAAiB,cAAc;AAAA,MAAK,CAAC,YACzC,aAAa,OAAO;AAAA,IACtB;AACA,UAAM,gBAA2B,CAAC,cAAc;AAEhD,WAAO,GAAG,QAAQ,CAAC,UAAa;AAC9B,UAAI,cAAc;AAChB,uBAAe,OAAO,KAAK,KAAK,CAAC;AACjC,uBAAe;AAAA,MACjB;AACA,UAAI,IAAI,WAAW,WAAW;AAC5B,cAAM,oBAAoB;AAC1B,cAAM,aAAa,IAAI,MAAM;AAC7B,sBAAc;AAAA,UACZ,eAAe;AAAA,YAAK,MAClB,YAAY,YAAY,mBAAmB,KAAK;AAAA,UAClD;AAAA,QACF;AACA,cAAM,CAAC;AACP;AAAA,MACF;AACA,UAAI,KAAK,KAAK;AAAA,IAChB,CAAC;AAED,WAAO,GAAG,OAAO,MAAM;AACrB,YAAM,oBAAoB;AAC1B,YAAM,aAAa,IAAI,MAAM;AAC7B,oBAAc;AAAA,QACZ,eAAe;AAAA,UAAK,MAClB,YAAY,YAAY,mBAAmB,IAAI;AAAA,QACjD;AAAA,MACF;AAEA,cAAQ,IAAI,aAAa,EACtB,KAAK,MAAM;AACV,eAAO,QAAQ;AACf,gBAAQ;AAAA,MACV,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,eAAO,QAAQ;AACf,eAAO,KAAK;AAAA,MACd,CAAC;AAAA,IACL,CAAC;AAED,WAAO,GAAG,SAAS,CAAC,UAAU;AAC5B,cAAQ,MAAM,SAAS,KAAK;AAC5B,aAAO,QAAQ;AACf,aAAO,KAAK;AAAA,IACd,CAAC;AAAA,EACH,CAAC;AACH;","names":[]}
@@ -0,0 +1,6 @@
1
+ export declare const promiseWithResolvers: <T = void>() => {
2
+ promise: Promise<T>;
3
+ resolve: (value: T | PromiseLike<T>) => void;
4
+ reject: (reason?: unknown) => void;
5
+ };
6
+ //# sourceMappingURL=promise-with-resolvers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"promise-with-resolvers.d.ts","sourceRoot":"","sources":["../src/promise-with-resolvers.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,oBAAoB,GAAI,CAAC;;qBAGf,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,IAAI;sBAC1B,OAAO,KAAK,IAAI;CAUvC,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=promise-with-resolvers.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"promise-with-resolvers.test.d.ts","sourceRoot":"","sources":["../src/promise-with-resolvers.test.ts"],"names":[],"mappings":""}
package/package.json CHANGED
@@ -1,17 +1,12 @@
1
1
  {
2
2
  "name": "@yutaura/csv-batch-reader",
3
- "version": "0.0.0-0",
3
+ "version": "1.0.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
7
- "scripts": {
8
- "build": "tsup"
9
- },
10
- "exports": {
11
- "types": "./dist/index.d.ts",
12
- "import": "./dist/index.mjs",
13
- "require": "./dist/index.js"
14
- },
7
+ "types": "dist/index.d.ts",
8
+ "main": "dist/index.js",
9
+ "module": "dist/index.mjs",
15
10
  "files": [
16
11
  "dist",
17
12
  "src",
@@ -22,13 +17,23 @@
22
17
  ],
23
18
  "author": "YutaUra",
24
19
  "license": "MIT",
25
- "packageManager": "pnpm@9.10.0",
20
+ "dependencies": {
21
+ "papaparse": "5.4.1"
22
+ },
26
23
  "devDependencies": {
24
+ "@biomejs/biome": "1.9.2",
27
25
  "@changesets/cli": "2.27.8",
28
- "@microsoft/api-extractor": "7.47.9",
29
26
  "@tsconfig/strictest": "2.0.5",
30
27
  "@types/node": "22.6.1",
28
+ "@types/papaparse": "5.3.14",
31
29
  "tsup": "8.3.0",
32
- "typescript": "5.6.2"
30
+ "typescript": "5.6.2",
31
+ "vitest": "2.1.1"
32
+ },
33
+ "scripts": {
34
+ "build": "tsup",
35
+ "check": "tsc --noEmit && biome check --fix",
36
+ "test": "vitest run",
37
+ "publish-packages": "pnpm run build && changeset version && changeset publish"
33
38
  }
34
- }
39
+ }
@@ -0,0 +1,207 @@
1
+ import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
2
+ import { dirname, join } from "node:path";
3
+ import { setTimeout } from "node:timers/promises";
4
+ import { csvBatchRead } from "./csv-batch-read.js";
5
+
6
+ const TEST_CSV_FILE = join(await mkdtemp("test-csv-file"), "test.csv");
7
+
8
+ beforeAll(async () => {
9
+ await mkdir(dirname(TEST_CSV_FILE), { recursive: true });
10
+ });
11
+
12
+ afterAll(async () => {
13
+ await rm(dirname(TEST_CSV_FILE), { recursive: true });
14
+ });
15
+
16
+ beforeEach(async () => {
17
+ await writeFile(
18
+ TEST_CSV_FILE,
19
+ `\
20
+ a,b,c
21
+ 100,101,102
22
+ 200,201,202
23
+ 300,301,302
24
+ 400,401,402
25
+ 500,501,502
26
+ 600,601,602
27
+ 700,701,702
28
+ 800,801,802
29
+ 900,901,902
30
+ `,
31
+ { encoding: "utf-8", flag: "w" },
32
+ );
33
+ });
34
+
35
+ describe("csvBatchRead", () => {
36
+ it("should read headers", async () => {
37
+ // given
38
+ let headers: string[] = [];
39
+ // when
40
+ await csvBatchRead(
41
+ TEST_CSV_FILE,
42
+ 100,
43
+ (h) => {
44
+ headers = h;
45
+ },
46
+ () => {},
47
+ );
48
+ // then
49
+ expect(headers).toStrictEqual(["a", "b", "c"]);
50
+ });
51
+
52
+ it("should read rows", async () => {
53
+ // given
54
+ const rows: Record<string, string>[][] = [];
55
+ // when
56
+ await csvBatchRead(
57
+ TEST_CSV_FILE,
58
+ 3,
59
+ () => {},
60
+ (r) => {
61
+ rows.push(r);
62
+ },
63
+ );
64
+ // then
65
+ expect(rows).toStrictEqual([
66
+ [
67
+ { a: "100", b: "101", c: "102" },
68
+ { a: "200", b: "201", c: "202" },
69
+ { a: "300", b: "301", c: "302" },
70
+ ],
71
+ [
72
+ { a: "400", b: "401", c: "402" },
73
+ { a: "500", b: "501", c: "502" },
74
+ { a: "600", b: "601", c: "602" },
75
+ ],
76
+ [
77
+ { a: "700", b: "701", c: "702" },
78
+ { a: "800", b: "801", c: "802" },
79
+ { a: "900", b: "901", c: "902" },
80
+ ],
81
+ ]);
82
+ });
83
+
84
+ it("when batchSize is 1, should read rows one by one", async () => {
85
+ // given
86
+ const rows: Record<string, string>[][] = [];
87
+ // when
88
+ await csvBatchRead(
89
+ TEST_CSV_FILE,
90
+ 1,
91
+ () => {},
92
+ (r) => {
93
+ rows.push(r);
94
+ },
95
+ );
96
+ // then
97
+ expect(rows).toStrictEqual([
98
+ [{ a: "100", b: "101", c: "102" }],
99
+ [{ a: "200", b: "201", c: "202" }],
100
+ [{ a: "300", b: "301", c: "302" }],
101
+ [{ a: "400", b: "401", c: "402" }],
102
+ [{ a: "500", b: "501", c: "502" }],
103
+ [{ a: "600", b: "601", c: "602" }],
104
+ [{ a: "700", b: "701", c: "702" }],
105
+ [{ a: "800", b: "801", c: "802" }],
106
+ [{ a: "900", b: "901", c: "902" }],
107
+ ]);
108
+ });
109
+
110
+ it("when batchSize is csv row count, should read rows in one batch", async () => {
111
+ // given
112
+ const rows: Record<string, string>[][] = [];
113
+ // when
114
+ await csvBatchRead(
115
+ TEST_CSV_FILE,
116
+ 9,
117
+ () => {},
118
+ (r) => {
119
+ rows.push(r);
120
+ },
121
+ );
122
+ // then
123
+ expect(rows).toStrictEqual([
124
+ [
125
+ { a: "100", b: "101", c: "102" },
126
+ { a: "200", b: "201", c: "202" },
127
+ { a: "300", b: "301", c: "302" },
128
+ { a: "400", b: "401", c: "402" },
129
+ { a: "500", b: "501", c: "502" },
130
+ { a: "600", b: "601", c: "602" },
131
+ { a: "700", b: "701", c: "702" },
132
+ { a: "800", b: "801", c: "802" },
133
+ { a: "900", b: "901", c: "902" },
134
+ ],
135
+ ]);
136
+ });
137
+
138
+ it("when batchSize is larger than csv row count, should read rows in one batch", async () => {
139
+ // given
140
+ const rows: Record<string, string>[][] = [];
141
+ // when
142
+ await csvBatchRead(
143
+ TEST_CSV_FILE,
144
+ 1000,
145
+ () => {},
146
+ (r) => {
147
+ rows.push(r);
148
+ },
149
+ );
150
+ // then
151
+ expect(rows).toStrictEqual([
152
+ [
153
+ { a: "100", b: "101", c: "102" },
154
+ { a: "200", b: "201", c: "202" },
155
+ { a: "300", b: "301", c: "302" },
156
+ { a: "400", b: "401", c: "402" },
157
+ { a: "500", b: "501", c: "502" },
158
+ { a: "600", b: "601", c: "602" },
159
+ { a: "700", b: "701", c: "702" },
160
+ { a: "800", b: "801", c: "802" },
161
+ { a: "900", b: "901", c: "902" },
162
+ ],
163
+ ]);
164
+ });
165
+
166
+ it("rows handler should be called after header handler", async () => {
167
+ // given
168
+ let headerResolvedAt = 0;
169
+ const rowsResolvedAt: number[] = [];
170
+ // when
171
+ await csvBatchRead(
172
+ TEST_CSV_FILE,
173
+ 3,
174
+ async () => {
175
+ await setTimeout(500);
176
+ headerResolvedAt = performance.now();
177
+ },
178
+ () => {
179
+ rowsResolvedAt.push(performance.now());
180
+ },
181
+ );
182
+ // then
183
+ for (const rowResolvedAt of rowsResolvedAt) {
184
+ expect(rowResolvedAt).toBeGreaterThan(headerResolvedAt);
185
+ }
186
+ });
187
+
188
+ it("csvBatchRead should done after all rows handler resolved", async () => {
189
+ // given
190
+ const rowsResolvedAt: number[] = [];
191
+ // when
192
+ await csvBatchRead(
193
+ TEST_CSV_FILE,
194
+ 3,
195
+ () => {},
196
+ async () => {
197
+ await setTimeout(500);
198
+ rowsResolvedAt.push(performance.now());
199
+ },
200
+ );
201
+ const csvBatchReadResolvedAt = performance.now();
202
+ // then
203
+ for (const rowResolvedAt of rowsResolvedAt) {
204
+ expect(rowResolvedAt).toBeLessThan(csvBatchReadResolvedAt);
205
+ }
206
+ });
207
+ });
@@ -0,0 +1,73 @@
1
+ import { createReadStream } from "node:fs";
2
+ import Papa from "papaparse";
3
+ import { promiseWithResolvers } from "./promise-with-resolvers.js";
4
+
5
+ export const csvBatchRead = async <
6
+ T extends Record<string, string> = Record<string, string>,
7
+ >(
8
+ filePath: string,
9
+ batchSize: number,
10
+ handleHeader: (header: (keyof T)[]) => unknown,
11
+ handleBatch: (rows: T[], batchCount: number, isLastChunk: boolean) => unknown,
12
+ ) => {
13
+ return await new Promise<void>((resolve, reject) => {
14
+ const stream = createReadStream(filePath).pipe(
15
+ Papa.parse(Papa.NODE_STREAM_INPUT, { header: true }),
16
+ );
17
+
18
+ let buf: T[] = [];
19
+ let batchCount = 0;
20
+ let isFirstChunk = true;
21
+ const { promise: headerPromise, resolve: resolveHeaders } =
22
+ promiseWithResolvers<(keyof T)[]>();
23
+ const headerResolved = headerPromise.then((headers) =>
24
+ handleHeader(headers),
25
+ );
26
+ const shouldResolve: unknown[] = [headerResolved];
27
+
28
+ stream.on("data", (chunk: T) => {
29
+ if (isFirstChunk) {
30
+ resolveHeaders(Object.keys(chunk));
31
+ isFirstChunk = false;
32
+ }
33
+ if (buf.length === batchSize) {
34
+ const currentBatchCount = batchCount;
35
+ const currentBuf = buf.slice();
36
+ shouldResolve.push(
37
+ headerResolved.then(() =>
38
+ handleBatch(currentBuf, currentBatchCount, false),
39
+ ),
40
+ );
41
+ buf = [];
42
+ batchCount++;
43
+ }
44
+ buf.push(chunk);
45
+ });
46
+
47
+ stream.on("end", () => {
48
+ const currentBatchCount = batchCount;
49
+ const currentBuf = buf.slice();
50
+ shouldResolve.push(
51
+ headerResolved.then(() =>
52
+ handleBatch(currentBuf, currentBatchCount, true),
53
+ ),
54
+ );
55
+
56
+ Promise.all(shouldResolve)
57
+ .then(() => {
58
+ stream.destroy();
59
+ resolve();
60
+ })
61
+ .catch((error) => {
62
+ stream.destroy();
63
+ reject(error);
64
+ });
65
+ });
66
+
67
+ stream.on("error", (error) => {
68
+ console.error("error", error);
69
+ stream.destroy();
70
+ reject(error);
71
+ });
72
+ });
73
+ };
package/src/index.ts CHANGED
@@ -1,7 +1 @@
1
- console.log("Hello World!");
2
-
3
- export const x = 1;
4
-
5
- export const main = () => {
6
- throw new Error("This is an error");
7
- };
1
+ export { csvBatchRead } from "./csv-batch-read.js";
@@ -0,0 +1,45 @@
1
+ import { setTimeout } from "node:timers/promises";
2
+ import { promiseWithResolvers } from "./promise-with-resolvers.js";
3
+
4
+ describe("promiseWithResolvers", () => {
5
+ it("promiseWithResolvers returns promise, resolve, reject", () => {
6
+ // given
7
+ // when
8
+ const returns = promiseWithResolvers();
9
+ // then
10
+ expect(returns).toHaveProperty("promise");
11
+ expect(returns).toHaveProperty("resolve");
12
+ expect(returns).toHaveProperty("reject");
13
+ });
14
+
15
+ it("resolve should resolve promise", async () => {
16
+ // given
17
+ const { promise, resolve } = promiseWithResolvers<string>();
18
+ // when
19
+ resolve("resolved");
20
+ // then
21
+ await expect(promise).resolves.toBe("resolved");
22
+ });
23
+
24
+ it("when not resolved, promise should be pending", async () => {
25
+ // given
26
+ const { promise } = promiseWithResolvers();
27
+ // when
28
+ const res = await Promise.race([
29
+ promise.then(() => "resolved"),
30
+ setTimeout(3000).then(() => "pending"),
31
+ ]);
32
+ // then
33
+ // 3秒までしか待っていないけど、ずっと待ち続けることはできないので妥協
34
+ expect(res).toBe("pending");
35
+ });
36
+
37
+ it("reject should reject promise", async () => {
38
+ // given
39
+ const { promise, reject } = promiseWithResolvers();
40
+ // when
41
+ reject("rejected");
42
+ // then
43
+ await expect(promise).rejects.toBe("rejected");
44
+ });
45
+ });
@@ -0,0 +1,15 @@
1
+ export const promiseWithResolvers = <T = void>() => {
2
+ // new Promise のコンストラクタ自体は非同期処理を行わないため、
3
+ // resolve, reject への代入は promise の生成が終わったタイミングで完了している
4
+ let resolve: (value: T | PromiseLike<T>) => void = () => undefined;
5
+ let reject: (reason?: unknown) => void = () => undefined;
6
+ const promise = new Promise<T>((res, rej) => {
7
+ resolve = res;
8
+ reject = rej;
9
+ });
10
+ return {
11
+ promise,
12
+ resolve,
13
+ reject,
14
+ };
15
+ };