select-csv 1.1.21 โ†’ 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +363 -127
  2. package/package.json +1 -1
  3. package/test.js +28 -0
package/README.md CHANGED
@@ -4,7 +4,7 @@ The fastest, simplest, and most powerful CSV parser for Node.js. Optimized for h
4
4
 
5
5
  `select-csv` converts `.csv` files into arrays, JSON objects, or raw lines. It provides two main functions, `parseCsv` (for local files) and `parseText` (for raw strings), both sharing the same methods and features.
6
6
 
7
- ## โœจ Key Features
7
+ ## Key Features
8
8
  - **Ultra-Lightweight:** Package size is less than 30 KB.
9
9
  - **Fast Mode:** Synchronous execution for maximum speed and zero overhead.
10
10
  - **Memory Efficient:** Streams large files using chunks and row offsets instead of loading the entire file into RAM.
@@ -14,21 +14,32 @@ The fastest, simplest, and most powerful CSV parser for Node.js. Optimized for h
14
14
 
15
15
  ---
16
16
 
17
- ## ๐Ÿš€ Installation
18
17
 
19
- ```bash
20
- npm install select-csv
21
- ๐Ÿ›  Usage Examples
22
- Initialization
23
- JavaScript
18
+ Install:
19
+ -------
24
20
 
25
- const { parseCsv, parseText } = require("select-csv");
26
- let parse;
21
+ select-csv is available on [npm](https://www.npmjs.com/package/select-csv). It
22
+ can be installed with the following command:
27
23
 
28
- // 1. Create object from a local .csv file
24
+ npm install select-csv
25
+
26
+
27
+
28
+ Usage:
29
+ -------
30
+
31
+ Here there are clearly different examples
32
+
33
+
34
+ ```js
35
+ const {parseCsv,parseText} = require("select-csv");
36
+
37
+ var parse;
38
+
39
+ // First create object from .csv file
29
40
  parse = parseCsv('file_path.csv');
30
41
 
31
- // 2. Create object from raw text string
42
+ // Or if you just want create object from text
32
43
  parse = parseText(
33
44
  `Index,User Id,First Name,Last Name,Sex
34
45
  1,5f10e9D33fC5f2b,Sara,Mcguire,Female
@@ -40,179 +51,404 @@ parse = parseText(
40
51
  7,FCdfFf08196f633,Collin,Allison,Male
41
52
  8,356279dAa0F7CbD,Nicholas,Branch,Male
42
53
  9,F563CcbFBfEcf5a,Emma,Robinson,Female
43
- 10,f2dceFc00F62542,Pedro,Cordova,Male`
54
+ 10,f2dceFc00F62542,Pedro,Cordova,Male`,
55
+ {
56
+ linebreak: '\n',
57
+ header: true
58
+ }
44
59
  );
45
- Get All Rows
46
- JavaScript
47
60
 
48
- const result = parse.get();
61
+
62
+ ```
63
+
64
+ * If you want to get all rows :
65
+ ```js
66
+ const result = parse.get(); //Return all rows
49
67
  /*
50
- Returns:
51
- {
52
- time: '1 ms',
53
- header: ["Index","User Id","First Name","Last Name","Sex"],
54
- rows: [
55
- ["1","5f10e9D33fC5f2b","Sara","Mcguire","Female"],
56
- ["2","751cD1cbF77e005","Alisha","Hebert","Male"],
57
- ...
58
- ],
59
- row_count: 10
60
- }
68
+ {
69
+ time:1,
70
+ header:["Index","User Id","First Name","Last Name","Sex"],
71
+ rows:[
72
+ ["1","5f10e9D33fC5f2b","Sara","Mcguire","Female"],
73
+ ["2","751cD1cbF77e005","Alisha","Hebert","Male"],
74
+ ["3","DcEFDB2D2e62bF9","Gwendolyn","Sheppard","Male"],
75
+ ["4","C88661E02EEDA9e","Kristine","Mccann","Female"],
76
+ ["5","fafF1aBDebaB2a6","Bobby","Pittman","Female"],
77
+ ["6","BdDb6C8Af309202","Calvin","Ramsey","Female"],
78
+ ["7","FCdfFf08196f633","Collin","Allison","Male"],
79
+ ["8","356279dAa0F7CbD","Nicholas","Branch","Male"],
80
+ ["9","F563CcbFBfEcf5a","Emma","Robinson","Female"],
81
+ ["10","f2dceFc00F62542","Pedro","Cordova","Male"]
82
+ ],
83
+ row_count:10
84
+ }
61
85
  */
62
- Parsing in Chunks
63
- The chunk(c) method allows you to fetch a specific number of rows. The parser saves the current offset automatically.
64
86
 
65
- JavaScript
87
+ ```
66
88
 
67
- let result;
89
+ * If you want to get a chunks of rows :
68
90
 
69
- // Get rows 0 and 1
70
- result = parse.chunk(2);
91
+ ```js
92
+ var result;
93
+ result = parse.chunk(c)
94
+ //The 'c' parameter must be an integer and greater than or equal to 1
95
+
96
+ //Examples:
97
+ result = parse.chunk(2) //Return row 0 and 1
71
98
  /*
72
99
  {
73
- "time": "0 ms",
74
- "header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
75
- "rows": [
100
+ "Time": 0,
101
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
102
+ "Rows": [
76
103
  [ "1", "5f10e9D33fC5f2b", "Sara", "Mcguire", "Female" ],
77
104
  [ "2", "751cD1cbF77e005", "Alisha", "Hebert", "Male" ]
78
105
  ],
79
- "row_count": 2
106
+ "row_count:": 2
107
+ }
108
+ */
109
+
110
+ result = parse.chunk(3) //Return row 2,3 and 4 (Get rows from last offset saved)
111
+ /*
112
+ {
113
+ "Time": 0,
114
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
115
+ "Rows": [
116
+ [ "3", "DcEFDB2D2e62bF9", "Gwendolyn", "Sheppard", "Male" ],
117
+ [ "4", "C88661E02EEDA9e", "Kristine", "Mccann", "Female" ],
118
+ [ "5", "fafF1aBDebaB2a6", "Bobby", "Pittman", "Female" ]
119
+ ],
120
+ "row_count:": 3
80
121
  }
81
122
  */
82
123
 
83
- // Get rows 2, 3, and 4 (continues from last offset)
84
- result = parse.chunk(3);
124
+ result = parse.chunk(1) //Return row 5 (Get rows from last offset saved)
125
+ /*
126
+ {
127
+ "Time": 0,
128
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
129
+ "Rows": [
130
+ [ "6", "BdDb6C8Af309202", "Calvin", "Ramsey", "Female" ]
131
+ ],
132
+ "row_count:": 1
133
+ }
134
+ */
135
+
136
+
137
+ ```
85
138
 
86
- // Get row 5
87
- result = parse.chunk(1);
88
- Row Offsets (Specific Range)
89
- Use rowOffset(from, to) to fetch a specific range of rows.
139
+ * If you want to get specific rows :
90
140
 
91
- JavaScript
141
+ ```js
142
+ var result
143
+ result = parse.rowOffset(from)
144
+ // The 'from' parameter must be an integer and greater than or equal to 0
92
145
 
93
- // Get all rows from the 6th row to the last row
94
- let result = parse.rowOffset(6);
146
+ // Or
147
+ result = parse.rowOffset(from,to)
148
+ // The 'to' parameter must be an integer and greater than or equal to 1
95
149
 
96
- // Get rows from 5th to 8th row
97
- result = parse.rowOffset(5, 8);
150
+ //Examples:
151
+ result = parse.rowOffset(6) //Returns all rows from the sixth row to the last row
98
152
  /*
99
153
  {
100
- "time": "1 ms",
101
- "header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
102
- "rows": [
154
+ "Time": 0,
155
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
156
+ "Rows": [
157
+ [ "7", "FCdfFf08196f633", "Collin", "Allison", "Male" ],
158
+ [ "8", "356279dAa0F7CbD", "Nicholas", "Branch", "Male" ],
159
+ [ "9", "F563CcbFBfEcf5a", "Emma", "Robinson", "Female" ],
160
+ [ "10", "f2dceFc00F62542", "Pedro", "Cordova", "Male" ]
161
+ ],
162
+ "row_count:": 4
163
+ }
164
+ */
165
+
166
+ result = parse.rowOffset(5,8) //Returns all rows from 5th to 8th row
167
+ /*
168
+ {
169
+ "Time": 1,
170
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
171
+ "Rows": [
103
172
  [ "6", "BdDb6C8Af309202", "Calvin", "Ramsey", "Female" ],
104
173
  [ "7", "FCdfFf08196f633", "Collin", "Allison", "Male" ],
105
174
  [ "8", "356279dAa0F7CbD", "Nicholas", "Branch", "Male" ]
106
175
  ],
107
- "row_count": 3
176
+ "row_count:": 3
108
177
  }
109
178
  */
110
- Manual Offset Control
111
- You can manually set the row pointer using setRowOffset(offs).
112
179
 
113
- JavaScript
114
180
 
115
- // If offset exists, returns [byte_offset, row_number]
116
- let status = parse.setRowOffset(5); // [236, 5]
117
181
 
118
- // Get row 6 after setting offset
119
- let nextRow = parse.chunk(1);
182
+ ```
183
+
184
+ * If you want to change the row offset :
120
185
 
121
- // Returns false if row number does not exist
122
- let fail = parse.setRowOffset(20); // false
123
- โš™๏ธ Configuration Options
124
- The default configuration is:
186
+ ```js
187
+ parse.setRowOffset(offs)
125
188
 
126
- JavaScript
189
+ // The 'offs' parameter must be an integer and greater than or equal to 0.
127
190
 
191
+ // If the offset exists, return [offset,row_number].
192
+ result = parse.setRowOffset(5)
193
+ /*
194
+ [236,5]
195
+ */
196
+ result = parse.chunk(1) // Get rows from last offset saved
197
+ /*
128
198
  {
129
- 'header': true, // Treat first row as header
130
- 'quote': false, // Handle quoted values
131
- 'linebreak': '\r\n', // Custom line break
132
- 'delimiter': ",", // Column separator (set to false for raw lines)
133
- 'json': false, // Return rows as JSON objects (requires header: true)
134
- 'bufferSize': 1048576 // 1MB buffer (for parseCsv only)
199
+ "Time": 0,
200
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
201
+ "Rows": [
202
+ [ "6", "BdDb6C8Af309202", "Calvin", "Ramsey", "Female" ]
203
+ ],
204
+ "row_count:": 1
135
205
  }
136
- Custom Options Example:
137
- JavaScript
206
+ */
207
+
208
+ // If not , returns false and the offset not changed.
209
+ result = parse.setRowOffset(20)
210
+ /*
211
+ false
212
+ */
213
+ result = parse.chunk(1) // Get rows from last offset saved
214
+ /*
215
+ {
216
+ "Time": 0,
217
+ "Header": [ "Index", "User Id", "First Name", "Last Name", "Sex" ],
218
+ "Rows": [
219
+ [ "7", "FCdfFf08196f633", "Collin", "Allison", "Male" ]
220
+ ],
221
+ "row_count:": 1
222
+ }
223
+ */
224
+
225
+
226
+ ```
227
+
228
+ * The default object option :
229
+
230
+ ```js
231
+ {
232
+ 'header': true,
233
+ 'quote': false,
234
+ 'linebreak': '\r\n',
235
+ 'delimiter': ",",
236
+ 'bufferSize':1024*1024
237
+ }
238
+ // delimiter: (String: get rows containing columns, false: get lines without columns)
239
+ //bufferSize: It only works with a CSV file, which is the maximum number of characters that can be read at a time, the minimum value is 1024
240
+ ```
241
+
242
+ * If you want to use specific option :
243
+ ```js
244
+ var option = {
245
+ 'header': false, /* or true */
246
+ 'quote': true, /* or false */
247
+ 'linebreak': '\n', /* '\n' or '\r' or any other string */
248
+ 'delimiter': "," /* ';' or any other string or false */
249
+ 'bufferSize':2000 /* It only works with a CSV file */
250
+ }
251
+
252
+ var parse;
253
+ // Create object from .csv file
254
+ parse = parseCsv('file_path.csv',option);
138
255
 
139
- let option = {
140
- 'header': false,
141
- 'quote': true,
142
- 'linebreak': '\n',
143
- 'delimiter': ",",
144
- 'json': false,
145
- 'bufferSize': 2000
146
- };
256
+ // Or if you just want create object from text
257
+ parse = parseText(
258
+ `Index,User Id,First Name,Last Name,Sex
259
+ 1,5f10e9D33fC5f2b,Sara,Mcguire,Female
260
+ 2,751cD1cbF77e005,Alisha,Hebert,Male
261
+ 3,DcEFDB2D2e62bF9,Gwendolyn,Sheppard,Male`
262
+ , option);
147
263
 
148
- parse = parseCsv('data.csv', option);
149
- Working with JSON Output:
150
- If you want the result as an array of objects based on the header:
264
+ option = {
265
+ 'header': false,
266
+ }
151
267
 
152
- JavaScript
268
+ result = parse.rowOffset(2)
269
+ /*
270
+ {
271
+ "Time": 0,
272
+ "Header": false,
273
+ "Rows": [
274
+ [ "2", "751cD1cbF77e005", "Alisha", "Hebert", "Male" ]
275
+ ],
276
+ "row_count:": 1
277
+ }
278
+ */
153
279
 
154
- const parse = parseCsv('data.csv', { 'header': true, 'json': true });
155
- const result = parse.get();
280
+ option = {
281
+ 'header': true,
282
+ 'delimiter': false
283
+ }
284
+ // delimiter: (String: get rows containing columns, false: get lines without columns)
156
285
  /*
157
- rows: [
158
- { "Index": "1", "User Id": "5f10e9D33fC5f2b", "First Name": "Sara", ... },
159
- ...
160
- ]
286
+ {
287
+ "Time": 0,
288
+ "Header": false,
289
+ "Rows": [
290
+ [ "2,751cD1cbF77e005,Alisha,Hebert,Male" ] // No columns, just string (all line)
291
+ ],
292
+ "row_count:": 1
293
+ }
161
294
  */
162
- Raw Lines (No Delimiter):
163
- If delimiter is set to false, the parser returns full lines as strings.
164
295
 
165
- JavaScript
296
+ ```
166
297
 
167
- parse.resetOption({ 'header': true, 'delimiter': false });
168
- let result = parse.rowOffset(2);
169
- // rows: [ ["2,751cD1cbF77e005,Alisha,Hebert,Male"] ]
170
- ๐Ÿ“Š Benchmarking Large Files
171
- Tested with a 100 Million Row Dataset:
298
+ * If you want to reset option after multiple uses of your code :
299
+ ```js
300
+ const option = { // Just an exapmle
301
+ 'header': false,
302
+ 'quote': true,
303
+ 'linebreak': '\n'
304
+ }
305
+
306
+ parse.resetOption(option); // All saved values are erased and the object is restared again
307
+
308
+
309
+ ```
172
310
 
173
- JavaScript
311
+ * If you want to get information of your object :
312
+ ```js
313
+
314
+ const result = parse.getInfo();
315
+ /*
316
+ {
317
+ "offset": 275,
318
+ "rowOffset": 7,
319
+ "option": {
320
+ "header": false,
321
+ "quote": false,
322
+ "linebreak": "\n",
323
+ "delimiter": false
324
+ }
325
+ }
326
+ */
174
327
 
175
- const parse = parseCsv('huge-data.csv', {"header": false});
328
+ ```
176
329
 
177
- // Fetch 100,000 rows
178
- let result = parse.chunk(100000); // ~222ms
330
+ * Examples of parsing a large CSV file:
331
+ (https://www.kaggle.com/datasets/zanjibar/100-million-data-csv)
179
332
 
180
- // Jump to row 30,000,000
181
- result = parse.rowOffset(30000000, 30000005); // ~3743ms
182
333
 
183
- // Jump to row 90,000,000
184
- result = parse.rowOffset(90000000, 90000004); // ~44126ms
334
+ ```js
185
335
 
186
- // Current metadata
187
- console.log(parse.getInfo());
336
+ const parse = parseCsv('100-million-data.csv',{"header": false});
337
+ var result;
338
+ result = parse.chunk(100000)
188
339
  /*
189
340
  {
190
- offset: 3599945660,
191
- rowOffset: 90000008,
192
- option: { ... }
341
+ time: 222,
342
+ header: false,
343
+ rows: [
344
+ [ '198801', '1', '103', '100', '000000190', '0', '35843', '34353' ],
345
+ [ '198801', '1', '103', '100', '120991000', '0', '1590', '4154' ],
346
+ [ '198801', '1', '103', '100', '210390900', '0', '4500', '2565' ],
347
+ .
348
+ .
349
+ .
350
+ [ '198801', '1', '103', '100', '391590000', '0', '95000', '7850' ],
351
+ [ '198801', '1', '103', '100', '391620000', '0', '1000', '404' ],
352
+ [ '198801', '1', '103', '100', '391723000', '0', '545', '479' ],
353
+ [ '198801', '1', '103', '100', '391732100', '0', '24', '393' ],
354
+ [ '198801', '1', '103', '100', '391732900', '0', '60', '758' ],
355
+ [ '198801', '1', '103', '100', '391810100', '0', '1935', '1042' ],
356
+ [ '198801', '1', '103', '100', '391910200', '0', '510', '1303' ],
357
+ [ '198801', '1', '103', '100', '391910300', '0', '133', '379' ],
358
+ [ '198801', '1', '103', '100', '391990300', '0', '450', '1668' ],
359
+ [ '198801', '1', '103', '100', '391990500', '0', '942', '1721' ],
360
+ [ '198801', '1', '103', '100', '391990900', '0', '40', '235' ],
361
+ [ '198801', '1', '103', '100', '392030000', '0', '406', '652' ],
362
+ ... 99900 more items
363
+ ],
364
+ row_count: 100000
193
365
  }
194
366
  */
195
- ๐Ÿงช Advanced Methods
196
- getInfo()
197
- Returns current byte offset, row offset, and active options.
198
367
 
199
- JavaScript
368
+ result = parse.chunk(3) // Return row 100001,100002 and 100003 (Get rows from last offset saved)
369
+ /*
370
+ {
371
+ time: 1,
372
+ header: false,
373
+ rows: [
374
+ [ '198801', '1', '326', '500', '841330000', '90', '81', '246' ],
375
+ [ '198801', '1', '326', '500', '841510000', '0', '35', '1366' ],
376
+ [ '198801', '1', '326', '500', '841582100', '0', '6', '334' ]
377
+ ],
378
+ row_count: 3
379
+ }
380
+ */
200
381
 
201
- const info = parse.getInfo();
202
- resetOption(newOptions)
203
- Clears internal state and re-initializes the parser with new settings.
382
+ const from = 1000*1000*30;
383
+ const to = from + 5;
384
+ result = parse.rowOffset(from,to)
385
+ /*
386
+ {
387
+ time: 3743,
388
+ header: false,
389
+ rows: [
390
+ [
391
+ '199804', '2',
392
+ '213', '502',
393
+ '848130000', '16035',
394
+ '746', '8380'
395
+ ],
396
+ [ '199804', '2', '213', '502', '848140000', '168', '152', '1891' ],
397
+ [ '199804', '2', '213', '502', '848180010', '77', '404', '1366' ],
398
+ [ '199804', '2', '213', '502', '848190000', '0', '131', '570' ],
399
+ [ '199804', '2', '213', '502', '848230000', '300', '4', '882' ]
400
+ ],
401
+ row_count: 5
402
+ }
403
+ */
204
404
 
205
- JavaScript
405
+ const from = 1000*1000*90;
406
+ const to = from + 4;
407
+ result = parse.rowOffset(from,to)
408
+ /*
409
+ {
410
+ time: 44126,
411
+ header: false,
412
+ rows: [
413
+ [ '201412', '1', '125', '400', '283525000', '0', '160000', '6492' ],
414
+ [ '201412', '1', '125', '400', '390740100', '0', '17500', '5579' ],
415
+ [ '201412', '1', '125', '400', '390950000', '0', '36000', '21423' ],
416
+ [ '201412', '1', '125', '400', '392329000', '0', '520', '1413' ]
417
+ ],
418
+ row_count: 4
419
+ }
420
+ */
206
421
 
207
- parse.resetOption({ 'header': false, 'quote': true });
208
- header()
209
- Returns the detected CSV header array.
422
+ result = parse.chunk(3) // Get rows from last offset saved ( row to,to+1 and to+2 )
423
+ /*
424
+ {
425
+ time: 29,
426
+ header: false,
427
+ rows: [
428
+ [ '201412', '1', '125', '400', '400932000', '0', '18', '526' ],
429
+ [ '201412', '1', '125', '400', '401110000', '173', '1735', '1197' ],
430
+ [ '201412', '1', '125', '400', '401120000', '133', '1707', '1099' ]
431
+ ],
432
+ row_count: 3
433
+ }
434
+ */
210
435
 
211
- JavaScript
436
+ result = parse.getInfo() // Get all the information
437
+ /*
438
+ {
439
+ offset: 3599945660,
440
+ rowOffset: 90000008,
441
+ option: {
442
+ header: false,
443
+ quote: false,
444
+ linebreak: '\r\n',
445
+ delimiter: ',',
446
+ bufferSize: 1048576
447
+ }
448
+ }
449
+ */
450
+ ```
212
451
 
213
- const headers = parse.header();
214
- ๐Ÿค Contributing
215
- Found a bug or have a suggestion? Open an issue at GitHub Issues.
452
+ ## ๐Ÿ“„ License
216
453
 
217
- ๐Ÿ“„ License
218
- This project is licensed under the MIT License.
454
+ MIT License
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "select-csv",
3
- "version": "1.1.21",
3
+ "version": "1.1.23",
4
4
  "description": "A high-performance, memory-efficient CSV parser for Node.js. Supports streaming large files via chunks and row offsets with zero dependencies.",
5
5
  "keywords": [
6
6
  "csv",
package/test.js ADDED
@@ -0,0 +1,28 @@
1
+ const {parseCsv,parseText} = require("select-csv");
2
+
3
+ var parse;
4
+
5
+ // First create object from .csv file
6
+ //parse = parseCsv('file_path.csv');
7
+
8
+ // Or if you just want create object from text
9
+ parse = parseText(
10
+ `Index,User Id,First Name,Last Name,Sex
11
+ 1,5f10e9D33fC5f2b,Sara,Mcguire,Female
12
+ 2,751cD1cbF77e005,Alisha,Hebert,Male
13
+ 3,DcEFDB2D2e62bF9,Gwendolyn,Sheppard,Male
14
+ 4,C88661E02EEDA9e,Kristine,Mccann,Female
15
+ 5,fafF1aBDebaB2a6,Bobby,Pittman,Female
16
+ 6,BdDb6C8Af309202,Calvin,Ramsey,Female
17
+ 7,FCdfFf08196f633,Collin,Allison,Male
18
+ 8,356279dAa0F7CbD,Nicholas,Branch,Male
19
+ 9,F563CcbFBfEcf5a,Emma,Robinson,Female
20
+ 10,f2dceFc00F62542,Pedro,Cordova,Male`,
21
+ {
22
+ linebreak: '\n',
23
+ header: true
24
+ }
25
+ );
26
+
27
+ const result = parse.get();
28
+ console.log(result)