file-to-json-converter 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -0
- package/package.json +26 -0
- package/src/index.js +74 -0
- package/src/parsers/csv.js +228 -0
- package/src/parsers/excel.js +157 -0
- package/src/transformers/batch.js +0 -0
- package/src/transformers/stream.js +0 -0
- package/src/utils/detector.js +95 -0
- package/src/utils/memory.js +110 -0
- package/src/writers/json.js +0 -0
- package/src/writers/ndjson.js +0 -0
- package/test/test.js +156 -0
- package/types/index.d.ts +156 -0
- package/workers/processor.worker.js +0 -0
package/README.md
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# File to JSON Converter
|
|
2
|
+
|
|
3
|
+
High-performance CSV and Excel to JSON converter with streaming support for Node.js.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- ✅ **CSV & Excel Support** - Parse both CSV and XLSX/XLS files
|
|
8
|
+
- 🚀 **Streaming Mode** - Process large files without loading into memory
|
|
9
|
+
- 📦 **Batch Mode** - Fast processing for smaller files
|
|
10
|
+
- 🎯 **Smart Detection** - Automatic file type detection
|
|
11
|
+
- 🔧 **Transform & Filter** - Apply custom transformations and filters
|
|
12
|
+
- 📊 **Progress Tracking** - Real-time progress callbacks
|
|
13
|
+
- 💾 **Memory Efficient** - Optimized for large datasets (20K+ rows)
|
|
14
|
+
- 🎨 **Multiple Formats** - Output as JSON or NDJSON (newline-delimited JSON)
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install file-to-json-converter
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```javascript
|
|
25
|
+
const { convertToJSON } = require('file-to-json-converter');
|
|
26
|
+
|
|
27
|
+
// Simple conversion
|
|
28
|
+
const data = await convertToJSON('data.csv');
|
|
29
|
+
console.log(data);
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Usage Examples
|
|
33
|
+
|
|
34
|
+
### Basic CSV to JSON
|
|
35
|
+
|
|
36
|
+
```javascript
|
|
37
|
+
const { convertBatch } = require('file-to-json-converter');
|
|
38
|
+
|
|
39
|
+
const result = await convertBatch('users.csv');
|
|
40
|
+
// Returns: [{ Name: 'John', Age: '30', ... }, ...]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Streaming Large Files
|
|
44
|
+
|
|
45
|
+
```javascript
|
|
46
|
+
const { convertStream } = require('file-to-json-converter');
|
|
47
|
+
|
|
48
|
+
await convertStream('large-data.csv', 'output.json', {
|
|
49
|
+
onProgress: (progress) => {
|
|
50
|
+
console.log(`Processed: ${progress.rowsProcessed} rows`);
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Excel File Conversion
|
|
56
|
+
|
|
57
|
+
```javascript
|
|
58
|
+
const { convertToJSON } = require('file-to-json-converter');
|
|
59
|
+
|
|
60
|
+
// Convert specific sheet
|
|
61
|
+
const data = await convertToJSON('report.xlsx', {
|
|
62
|
+
sheetName: 'Sales Data'
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Get available sheets
|
|
66
|
+
const { getSheetNames } = require('file-to-json-converter/src/parsers/excel');
|
|
67
|
+
const sheets = await getSheetNames('report.xlsx');
|
|
68
|
+
console.log('Available sheets:', sheets);
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Transform & Filter Data
|
|
72
|
+
|
|
73
|
+
```javascript
|
|
74
|
+
await convertStream('data.csv', 'output.json', {
|
|
75
|
+
// Filter rows
|
|
76
|
+
filter: (row) => parseInt(row.Age) > 18,
|
|
77
|
+
|
|
78
|
+
// Transform data
|
|
79
|
+
transform: (row) => ({
|
|
80
|
+
fullName: row.Name.toUpperCase(),
|
|
81
|
+
age: parseInt(row.Age),
|
|
82
|
+
email: row.Email.toLowerCase()
|
|
83
|
+
})
|
|
84
|
+
});
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### NDJSON Format
|
|
88
|
+
|
|
89
|
+
```javascript
|
|
90
|
+
await convertStream('data.csv', 'output.ndjson', {
|
|
91
|
+
outputFormat: 'ndjson' // One JSON object per line
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Custom Options
|
|
96
|
+
|
|
97
|
+
```javascript
|
|
98
|
+
const { convertToJSON } = require('file-to-json-converter');
|
|
99
|
+
|
|
100
|
+
const data = await convertToJSON('data.csv', {
|
|
101
|
+
batchSize: 5000, // Custom batch size (or 'auto')
|
|
102
|
+
delimiter: ';', // Custom delimiter for CSV
|
|
103
|
+
encoding: 'utf16le', // File encoding
|
|
104
|
+
skipErrors: true, // Skip malformed rows
|
|
105
|
+
headers: true, // First row is headers
|
|
106
|
+
onProgress: (progress) => {
|
|
107
|
+
console.log(`Progress: ${progress.rowsProcessed}`);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## API Reference
|
|
113
|
+
|
|
114
|
+
### `convertToJSON(input, options)`
|
|
115
|
+
|
|
116
|
+
Main conversion function that auto-detects file type and processing mode.
|
|
117
|
+
|
|
118
|
+
**Parameters:**
|
|
119
|
+
- `input` (string | Buffer) - File path or buffer
|
|
120
|
+
- `options` (object) - Configuration options
|
|
121
|
+
|
|
122
|
+
**Returns:** `Promise<Array | ConversionResult>`
|
|
123
|
+
|
|
124
|
+
### `convertStream(inputPath, outputPath, options)`
|
|
125
|
+
|
|
126
|
+
Stream-based conversion for large files.
|
|
127
|
+
|
|
128
|
+
**Parameters:**
|
|
129
|
+
- `inputPath` (string) - Input file path
|
|
130
|
+
- `outputPath` (string) - Output file path
|
|
131
|
+
- `options` (object) - Configuration options
|
|
132
|
+
|
|
133
|
+
**Returns:** `Promise<ConversionResult>`
|
|
134
|
+
|
|
135
|
+
### `convertBatch(inputPath, options)`
|
|
136
|
+
|
|
137
|
+
Batch conversion (loads entire file into memory).
|
|
138
|
+
|
|
139
|
+
**Parameters:**
|
|
140
|
+
- `inputPath` (string) - Input file path
|
|
141
|
+
- `options` (object) - Configuration options
|
|
142
|
+
|
|
143
|
+
**Returns:** `Promise<Array>`
|
|
144
|
+
|
|
145
|
+
### Options
|
|
146
|
+
|
|
147
|
+
| Option | Type | Default | Description |
|
|
148
|
+
|--------|------|---------|-------------|
|
|
149
|
+
| `batchSize` | number \| 'auto' | 'auto' | Rows per batch |
|
|
150
|
+
| `streaming` | boolean | true | Enable streaming mode |
|
|
151
|
+
| `outputFormat` | 'json' \| 'ndjson' | 'json' | Output format |
|
|
152
|
+
| `outputFile` | string | null | Output file path |
|
|
153
|
+
| `encoding` | string | 'utf8' | File encoding |
|
|
154
|
+
| `skipErrors` | boolean | false | Skip malformed rows |
|
|
155
|
+
| `transform` | function | null | Transform each row |
|
|
156
|
+
| `filter` | function | null | Filter rows |
|
|
157
|
+
| `onProgress` | function | null | Progress callback |
|
|
158
|
+
| `sheetName` | string | null | Excel sheet name |
|
|
159
|
+
| `headers` | boolean | true | First row is headers |
|
|
160
|
+
| `delimiter` | string | ',' | CSV delimiter |
|
|
161
|
+
|
|
162
|
+
## Performance
|
|
163
|
+
|
|
164
|
+
- **Small files (<10K rows)**: Batch mode recommended
|
|
165
|
+
- **Large files (10K-100K rows)**: Streaming mode with automatic batching
|
|
166
|
+
- **Very large files (>100K rows)**: Streaming mode with NDJSON format
|
|
167
|
+
|
|
168
|
+
### Benchmarks
|
|
169
|
+
|
|
170
|
+
- 25K rows: ~2-3 seconds
|
|
171
|
+
- 100K rows: ~8-12 seconds
|
|
172
|
+
- Memory usage: ~50-100MB for streaming mode
|
|
173
|
+
|
|
174
|
+
## TypeScript Support
|
|
175
|
+
|
|
176
|
+
Full TypeScript definitions included:
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
import { convertToJSON, ConversionOptions } from 'file-to-json-converter';
|
|
180
|
+
|
|
181
|
+
const options: ConversionOptions = {
|
|
182
|
+
filter: (row: any) => row.Age > 18,
|
|
183
|
+
transform: (row: any) => ({ ...row })
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
const data = await convertToJSON('data.csv', options);
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Error Handling
|
|
190
|
+
|
|
191
|
+
```javascript
|
|
192
|
+
try {
|
|
193
|
+
const data = await convertToJSON('data.csv', {
|
|
194
|
+
skipErrors: true // Continue on malformed rows
|
|
195
|
+
});
|
|
196
|
+
} catch (err) {
|
|
197
|
+
console.error('Conversion failed:', err.message);
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Testing
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
npm test
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
MIT
|
|
210
|
+
|
|
211
|
+
## Contributing
|
|
212
|
+
|
|
213
|
+
Contributions welcome! Please open an issue or PR.
|
|
214
|
+
|
|
215
|
+
## Support
|
|
216
|
+
|
|
217
|
+
For issues or questions, please open an issue on GitHub.
|
package/package.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "file-to-json-converter",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "High-performance CSV/Excel to JSON converter with streaming support",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"types": "types/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test": "node test/test.js"
|
|
9
|
+
},
|
|
10
|
+
"keywords": [
|
|
11
|
+
"csv",
|
|
12
|
+
"excel",
|
|
13
|
+
"json",
|
|
14
|
+
"converter",
|
|
15
|
+
"streaming",
|
|
16
|
+
"parser"
|
|
17
|
+
],
|
|
18
|
+
"author": "",
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"dependencies": {
|
|
21
|
+
"exceljs": "^4.4.0"
|
|
22
|
+
},
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=14.0.0"
|
|
25
|
+
}
|
|
26
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const { parseCSV } = require('./parsers/csv');
|
|
4
|
+
const { parseExcel } = require('./parsers/excel');
|
|
5
|
+
const { detectFileType } = require('./utils/detector');
|
|
6
|
+
const { estimateRowCount } = require('./utils/memory');
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Main converter function
|
|
10
|
+
* @param {string|Buffer} input - File path or Buffer
|
|
11
|
+
* @param {Object} options - Configuration options
|
|
12
|
+
* @returns {Promise<Array|void>} JSON data or void for streaming
|
|
13
|
+
*/
|
|
14
|
+
async function convertToJSON(input, options = {}) {
|
|
15
|
+
const config = {
|
|
16
|
+
batchSize: options.batchSize || 'auto',
|
|
17
|
+
streaming: options.streaming !== false,
|
|
18
|
+
outputFormat: options.outputFormat || 'json',
|
|
19
|
+
outputFile: options.outputFile || null,
|
|
20
|
+
encoding: options.encoding || 'utf8',
|
|
21
|
+
skipErrors: options.skipErrors || false,
|
|
22
|
+
transform: options.transform || null,
|
|
23
|
+
filter: options.filter || null,
|
|
24
|
+
onProgress: options.onProgress || null,
|
|
25
|
+
sheetName: options.sheetName || null, // For Excel
|
|
26
|
+
headers: options.headers !== false, // Auto-detect headers
|
|
27
|
+
...options
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// Detect file type
|
|
31
|
+
const fileType = await detectFileType(input);
|
|
32
|
+
|
|
33
|
+
// Route to appropriate parser
|
|
34
|
+
if (fileType === 'csv') {
|
|
35
|
+
return await parseCSV(input, config);
|
|
36
|
+
} else if (fileType === 'excel') {
|
|
37
|
+
return await parseExcel(input, config);
|
|
38
|
+
} else {
|
|
39
|
+
throw new Error(`Unsupported file type: ${fileType}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Stream converter - processes data in chunks
|
|
45
|
+
* @param {string} inputPath - Input file path
|
|
46
|
+
* @param {string} outputPath - Output file path
|
|
47
|
+
* @param {Object} options - Configuration options
|
|
48
|
+
*/
|
|
49
|
+
async function convertStream(inputPath, outputPath, options = {}) {
|
|
50
|
+
return convertToJSON(inputPath, {
|
|
51
|
+
...options,
|
|
52
|
+
streaming: true,
|
|
53
|
+
outputFile: outputPath
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Batch converter - loads entire file (for small files)
|
|
59
|
+
* @param {string} inputPath - Input file path
|
|
60
|
+
* @param {Object} options - Configuration options
|
|
61
|
+
* @returns {Promise<Array>} JSON data
|
|
62
|
+
*/
|
|
63
|
+
async function convertBatch(inputPath, options = {}) {
|
|
64
|
+
return convertToJSON(inputPath, {
|
|
65
|
+
...options,
|
|
66
|
+
streaming: false
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
module.exports = {
|
|
71
|
+
convertToJSON,
|
|
72
|
+
convertStream,
|
|
73
|
+
convertBatch
|
|
74
|
+
};
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const { pipeline, Transform } = require('stream');
|
|
3
|
+
const { promisify } = require('util');
|
|
4
|
+
const { calculateBatchSize } = require('../utils/memory');
|
|
5
|
+
|
|
6
|
+
const pipelineAsync = promisify(pipeline);
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Parse CSV line into array
|
|
10
|
+
*/
|
|
11
|
+
function parseCSVLine(line, delimiter = ',') {
|
|
12
|
+
const result = [];
|
|
13
|
+
let current = '';
|
|
14
|
+
let inQuotes = false;
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < line.length; i++) {
|
|
17
|
+
const char = line[i];
|
|
18
|
+
const nextChar = line[i + 1];
|
|
19
|
+
|
|
20
|
+
if (char === '"') {
|
|
21
|
+
if (inQuotes && nextChar === '"') {
|
|
22
|
+
current += '"';
|
|
23
|
+
i++;
|
|
24
|
+
} else {
|
|
25
|
+
inQuotes = !inQuotes;
|
|
26
|
+
}
|
|
27
|
+
} else if (char === delimiter && !inQuotes) {
|
|
28
|
+
result.push(current.trim());
|
|
29
|
+
current = '';
|
|
30
|
+
} else {
|
|
31
|
+
current += char;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
result.push(current.trim());
|
|
35
|
+
return result;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Transform stream for CSV parsing
|
|
40
|
+
*/
|
|
41
|
+
class CSVTransform extends Transform {
|
|
42
|
+
constructor(options = {}) {
|
|
43
|
+
super({ objectMode: true });
|
|
44
|
+
this.headers = null;
|
|
45
|
+
this.buffer = '';
|
|
46
|
+
this.rowCount = 0;
|
|
47
|
+
this.delimiter = options.delimiter || ',';
|
|
48
|
+
this.skipErrors = options.skipErrors || false;
|
|
49
|
+
this.transform = options.transform;
|
|
50
|
+
this.filter = options.filter;
|
|
51
|
+
this.onProgress = options.onProgress;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
_transform(chunk, encoding, callback) {
|
|
55
|
+
this.buffer += chunk.toString();
|
|
56
|
+
const lines = this.buffer.split('\n');
|
|
57
|
+
this.buffer = lines.pop(); // Keep incomplete line
|
|
58
|
+
|
|
59
|
+
for (const line of lines) {
|
|
60
|
+
if (!line.trim()) continue;
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const values = parseCSVLine(line, this.delimiter);
|
|
64
|
+
|
|
65
|
+
if (!this.headers) {
|
|
66
|
+
this.headers = values;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const obj = {};
|
|
71
|
+
this.headers.forEach((header, i) => {
|
|
72
|
+
obj[header] = values[i] || '';
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Apply filter
|
|
76
|
+
if (this.filter && !this.filter(obj)) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Apply transform
|
|
81
|
+
const transformed = this.transform ? this.transform(obj) : obj;
|
|
82
|
+
|
|
83
|
+
this.rowCount++;
|
|
84
|
+
this.push(transformed);
|
|
85
|
+
|
|
86
|
+
// Progress callback
|
|
87
|
+
if (this.onProgress && this.rowCount % 1000 === 0) {
|
|
88
|
+
this.onProgress({ rowsProcessed: this.rowCount });
|
|
89
|
+
}
|
|
90
|
+
} catch (err) {
|
|
91
|
+
if (!this.skipErrors) {
|
|
92
|
+
return callback(err);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
callback();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
_flush(callback) {
|
|
101
|
+
if (this.buffer.trim()) {
|
|
102
|
+
try {
|
|
103
|
+
const values = parseCSVLine(this.buffer, this.delimiter);
|
|
104
|
+
const obj = {};
|
|
105
|
+
this.headers.forEach((header, i) => {
|
|
106
|
+
obj[header] = values[i] || '';
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (!this.filter || this.filter(obj)) {
|
|
110
|
+
const transformed = this.transform ? this.transform(obj) : obj;
|
|
111
|
+
this.push(transformed);
|
|
112
|
+
this.rowCount++;
|
|
113
|
+
}
|
|
114
|
+
} catch (err) {
|
|
115
|
+
if (!this.skipErrors) {
|
|
116
|
+
return callback(err);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (this.onProgress) {
|
|
122
|
+
this.onProgress({ rowsProcessed: this.rowCount, completed: true });
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
callback();
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* JSON writer stream
|
|
131
|
+
*/
|
|
132
|
+
class JSONWriter extends Transform {
|
|
133
|
+
constructor(outputFormat = 'json') {
|
|
134
|
+
super({ objectMode: true });
|
|
135
|
+
this.outputFormat = outputFormat;
|
|
136
|
+
this.isFirst = true;
|
|
137
|
+
|
|
138
|
+
if (outputFormat === 'json') {
|
|
139
|
+
this.push('[');
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
_transform(chunk, encoding, callback) {
|
|
144
|
+
if (this.outputFormat === 'json') {
|
|
145
|
+
if (!this.isFirst) {
|
|
146
|
+
this.push(',');
|
|
147
|
+
}
|
|
148
|
+
this.push(JSON.stringify(chunk));
|
|
149
|
+
this.isFirst = false;
|
|
150
|
+
} else if (this.outputFormat === 'ndjson') {
|
|
151
|
+
this.push(JSON.stringify(chunk) + '\n');
|
|
152
|
+
}
|
|
153
|
+
callback();
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
_flush(callback) {
|
|
157
|
+
if (this.outputFormat === 'json') {
|
|
158
|
+
this.push(']');
|
|
159
|
+
}
|
|
160
|
+
callback();
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Parse CSV file
|
|
166
|
+
*/
|
|
167
|
+
async function parseCSV(input, options = {}) {
|
|
168
|
+
const inputPath = typeof input === 'string' ? input : null;
|
|
169
|
+
|
|
170
|
+
// For streaming mode
|
|
171
|
+
if (options.streaming && options.outputFile) {
|
|
172
|
+
const readStream = inputPath
|
|
173
|
+
? fs.createReadStream(inputPath, { encoding: options.encoding })
|
|
174
|
+
: null;
|
|
175
|
+
|
|
176
|
+
if (!readStream) {
|
|
177
|
+
throw new Error('Streaming mode requires file path');
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const csvTransform = new CSVTransform({
|
|
181
|
+
delimiter: options.delimiter,
|
|
182
|
+
skipErrors: options.skipErrors,
|
|
183
|
+
transform: options.transform,
|
|
184
|
+
filter: options.filter,
|
|
185
|
+
onProgress: options.onProgress
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
const jsonWriter = new JSONWriter(options.outputFormat);
|
|
189
|
+
const writeStream = fs.createWriteStream(options.outputFile);
|
|
190
|
+
|
|
191
|
+
await pipelineAsync(
|
|
192
|
+
readStream,
|
|
193
|
+
csvTransform,
|
|
194
|
+
jsonWriter,
|
|
195
|
+
writeStream
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
return { success: true, file: options.outputFile };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// For batch mode (load all into memory)
|
|
202
|
+
return new Promise((resolve, reject) => {
|
|
203
|
+
const results = [];
|
|
204
|
+
const readStream = inputPath
|
|
205
|
+
? fs.createReadStream(inputPath, { encoding: options.encoding })
|
|
206
|
+
: null;
|
|
207
|
+
|
|
208
|
+
if (!readStream) {
|
|
209
|
+
throw new Error('Batch mode requires file path');
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const csvTransform = new CSVTransform({
|
|
213
|
+
delimiter: options.delimiter,
|
|
214
|
+
skipErrors: options.skipErrors,
|
|
215
|
+
transform: options.transform,
|
|
216
|
+
filter: options.filter,
|
|
217
|
+
onProgress: options.onProgress
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
csvTransform.on('data', (row) => results.push(row));
|
|
221
|
+
csvTransform.on('end', () => resolve(results));
|
|
222
|
+
csvTransform.on('error', reject);
|
|
223
|
+
|
|
224
|
+
readStream.pipe(csvTransform);
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
module.exports = { parseCSV, CSVTransform };
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const ExcelJS = require('exceljs');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Parse Excel file with streaming
|
|
6
|
+
*/
|
|
7
|
+
async function parseExcel(input, options = {}) {
|
|
8
|
+
const inputPath = typeof input === 'string' ? input : null;
|
|
9
|
+
|
|
10
|
+
if (!inputPath) {
|
|
11
|
+
throw new Error('Excel parsing requires file path');
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const workbook = new ExcelJS.Workbook();
|
|
15
|
+
const results = [];
|
|
16
|
+
let headers = null;
|
|
17
|
+
let rowCount = 0;
|
|
18
|
+
|
|
19
|
+
// For streaming mode with output file
|
|
20
|
+
if (options.streaming && options.outputFile) {
|
|
21
|
+
const writeStream = fs.createWriteStream(options.outputFile);
|
|
22
|
+
|
|
23
|
+
if (options.outputFormat === 'json') {
|
|
24
|
+
writeStream.write('[');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
let isFirst = true;
|
|
28
|
+
|
|
29
|
+
return new Promise(async (resolve, reject) => {
|
|
30
|
+
try {
|
|
31
|
+
const worksheetReader = workbook.xlsx.createInputStream(fs.createReadStream(inputPath));
|
|
32
|
+
|
|
33
|
+
worksheetReader.on('worksheet', (worksheet) => {
|
|
34
|
+
// Skip if specific sheet requested and this isn't it
|
|
35
|
+
if (options.sheetName && worksheet.name !== options.sheetName) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
worksheet.on('row', (row) => {
|
|
40
|
+
if (!headers) {
|
|
41
|
+
headers = row.values.slice(1); // Remove first empty element
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const obj = {};
|
|
46
|
+
headers.forEach((header, i) => {
|
|
47
|
+
obj[header] = row.values[i + 1] || '';
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Apply filter
|
|
51
|
+
if (options.filter && !options.filter(obj)) {
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Apply transform
|
|
56
|
+
const transformed = options.transform ? options.transform(obj) : obj;
|
|
57
|
+
|
|
58
|
+
// Write to output
|
|
59
|
+
if (options.outputFormat === 'json') {
|
|
60
|
+
if (!isFirst) writeStream.write(',');
|
|
61
|
+
writeStream.write(JSON.stringify(transformed));
|
|
62
|
+
isFirst = false;
|
|
63
|
+
} else if (options.outputFormat === 'ndjson') {
|
|
64
|
+
writeStream.write(JSON.stringify(transformed) + '\n');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
rowCount++;
|
|
68
|
+
|
|
69
|
+
// Progress callback
|
|
70
|
+
if (options.onProgress && rowCount % 1000 === 0) {
|
|
71
|
+
options.onProgress({ rowsProcessed: rowCount });
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
worksheetReader.on('end', () => {
|
|
77
|
+
if (options.outputFormat === 'json') {
|
|
78
|
+
writeStream.write(']');
|
|
79
|
+
}
|
|
80
|
+
writeStream.end();
|
|
81
|
+
|
|
82
|
+
if (options.onProgress) {
|
|
83
|
+
options.onProgress({ rowsProcessed: rowCount, completed: true });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
resolve({ success: true, file: options.outputFile });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
worksheetReader.on('error', reject);
|
|
90
|
+
} catch (err) {
|
|
91
|
+
reject(err);
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// For batch mode (load all into memory)
|
|
97
|
+
try {
|
|
98
|
+
await workbook.xlsx.readFile(inputPath);
|
|
99
|
+
|
|
100
|
+
let worksheet;
|
|
101
|
+
if (options.sheetName) {
|
|
102
|
+
worksheet = workbook.getWorksheet(options.sheetName);
|
|
103
|
+
if (!worksheet) {
|
|
104
|
+
throw new Error(`Sheet "${options.sheetName}" not found`);
|
|
105
|
+
}
|
|
106
|
+
} else {
|
|
107
|
+
worksheet = workbook.worksheets[0]; // Get first sheet
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
worksheet.eachRow((row, rowNumber) => {
|
|
111
|
+
if (rowNumber === 1 && options.headers !== false) {
|
|
112
|
+
headers = row.values.slice(1); // Remove first empty element
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const obj = {};
|
|
117
|
+
headers.forEach((header, i) => {
|
|
118
|
+
obj[header] = row.values[i + 1] || '';
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Apply filter
|
|
122
|
+
if (options.filter && !options.filter(obj)) {
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Apply transform
|
|
127
|
+
const transformed = options.transform ? options.transform(obj) : obj;
|
|
128
|
+
results.push(transformed);
|
|
129
|
+
|
|
130
|
+
rowCount++;
|
|
131
|
+
|
|
132
|
+
// Progress callback
|
|
133
|
+
if (options.onProgress && rowCount % 1000 === 0) {
|
|
134
|
+
options.onProgress({ rowsProcessed: rowCount });
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
if (options.onProgress) {
|
|
139
|
+
options.onProgress({ rowsProcessed: rowCount, completed: true });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return results;
|
|
143
|
+
} catch (err) {
|
|
144
|
+
throw new Error(`Failed to parse Excel file: ${err.message}`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Get sheet names from Excel file
|
|
150
|
+
*/
|
|
151
|
+
async function getSheetNames(filePath) {
|
|
152
|
+
const workbook = new ExcelJS.Workbook();
|
|
153
|
+
await workbook.xlsx.readFile(filePath);
|
|
154
|
+
return workbook.worksheets.map(ws => ws.name);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
module.exports = { parseExcel, getSheetNames };
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const { promisify } = require('util');
|
|
4
|
+
|
|
5
|
+
const readFileAsync = promisify(fs.readFile);
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Detect file type based on extension and magic numbers
|
|
9
|
+
*/
|
|
10
|
+
async function detectFileType(input) {
|
|
11
|
+
if (typeof input === 'string') {
|
|
12
|
+
// File path
|
|
13
|
+
const ext = path.extname(input).toLowerCase();
|
|
14
|
+
|
|
15
|
+
// Check by extension first
|
|
16
|
+
if (ext === '.csv') {
|
|
17
|
+
return 'csv';
|
|
18
|
+
} else if (ext === '.xlsx' || ext === '.xls') {
|
|
19
|
+
return 'excel';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Fallback to magic number detection
|
|
23
|
+
return await detectByMagicNumber(input);
|
|
24
|
+
} else if (Buffer.isBuffer(input)) {
|
|
25
|
+
// Buffer input
|
|
26
|
+
return detectBufferType(input);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
throw new Error('Input must be a file path or Buffer');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Detect file type by reading magic numbers (file signature)
|
|
34
|
+
*/
|
|
35
|
+
async function detectByMagicNumber(filePath) {
|
|
36
|
+
try {
|
|
37
|
+
const buffer = await readFileAsync(filePath);
|
|
38
|
+
return detectBufferType(buffer);
|
|
39
|
+
} catch (err) {
|
|
40
|
+
throw new Error(`Failed to read file: ${err.message}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Detect buffer type by magic numbers
|
|
46
|
+
*/
|
|
47
|
+
function detectBufferType(buffer) {
|
|
48
|
+
// Check for ZIP signature (Excel .xlsx files are ZIP archives)
|
|
49
|
+
if (buffer[0] === 0x50 && buffer[1] === 0x4B &&
|
|
50
|
+
buffer[2] === 0x03 && buffer[3] === 0x04) {
|
|
51
|
+
return 'excel';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Check for old Excel .xls signature
|
|
55
|
+
if (buffer[0] === 0xD0 && buffer[1] === 0xCF &&
|
|
56
|
+
buffer[2] === 0x11 && buffer[3] === 0xE0) {
|
|
57
|
+
return 'excel';
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Default to CSV for text-based files
|
|
61
|
+
const text = buffer.slice(0, 1024).toString('utf8');
|
|
62
|
+
if (text.includes(',') || text.includes('\t')) {
|
|
63
|
+
return 'csv';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
throw new Error('Unable to detect file type');
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Validate file exists and is readable
|
|
71
|
+
*/
|
|
72
|
+
function validateFile(filePath) {
|
|
73
|
+
if (!fs.existsSync(filePath)) {
|
|
74
|
+
throw new Error(`File not found: ${filePath}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
fs.accessSync(filePath, fs.constants.R_OK);
|
|
79
|
+
} catch (err) {
|
|
80
|
+
throw new Error(`File is not readable: ${filePath}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const stats = fs.statSync(filePath);
|
|
84
|
+
return {
|
|
85
|
+
size: stats.size,
|
|
86
|
+
sizeInMB: (stats.size / (1024 * 1024)).toFixed(2)
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
module.exports = {
|
|
91
|
+
detectFileType,
|
|
92
|
+
detectByMagicNumber,
|
|
93
|
+
detectBufferType,
|
|
94
|
+
validateFile
|
|
95
|
+
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
const os = require('os');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Calculate optimal batch size based on available memory
|
|
6
|
+
*/
|
|
7
|
+
function calculateBatchSize(fileSize, options = {}) {
|
|
8
|
+
const freeMemory = os.freemem();
|
|
9
|
+
const totalMemory = os.totalmem();
|
|
10
|
+
const memoryUsagePercent = 1 - (freeMemory / totalMemory);
|
|
11
|
+
|
|
12
|
+
// If memory usage is already high, use smaller batches
|
|
13
|
+
if (memoryUsagePercent > 0.7) {
|
|
14
|
+
return 5000;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Estimate row size (average 200 bytes per row)
|
|
18
|
+
const estimatedRowSize = options.estimatedRowSize || 200;
|
|
19
|
+
const availableForProcessing = freeMemory * 0.3; // Use 30% of free memory
|
|
20
|
+
|
|
21
|
+
const calculatedBatch = Math.floor(availableForProcessing / estimatedRowSize);
|
|
22
|
+
|
|
23
|
+
// Clamp between 1000 and 50000
|
|
24
|
+
return Math.max(1000, Math.min(50000, calculatedBatch));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Estimate number of rows in a file
|
|
29
|
+
*/
|
|
30
|
+
function estimateRowCount(filePath) {
|
|
31
|
+
const stats = fs.statSync(filePath);
|
|
32
|
+
const fileSize = stats.size;
|
|
33
|
+
|
|
34
|
+
// Read first 100KB to estimate average row size
|
|
35
|
+
const fd = fs.openSync(filePath, 'r');
|
|
36
|
+
const sampleSize = Math.min(100 * 1024, fileSize);
|
|
37
|
+
const buffer = Buffer.alloc(sampleSize);
|
|
38
|
+
fs.readSync(fd, buffer, 0, sampleSize, 0);
|
|
39
|
+
fs.closeSync(fd);
|
|
40
|
+
|
|
41
|
+
const text = buffer.toString('utf8');
|
|
42
|
+
const lines = text.split('\n').length;
|
|
43
|
+
const avgRowSize = sampleSize / lines;
|
|
44
|
+
|
|
45
|
+
return Math.floor(fileSize / avgRowSize);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Check if file should use batch processing
|
|
50
|
+
*/
|
|
51
|
+
function shouldUseBatchProcessing(filePath, threshold = 10000) {
|
|
52
|
+
try {
|
|
53
|
+
const estimatedRows = estimateRowCount(filePath);
|
|
54
|
+
return estimatedRows > threshold;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
// Default to batch processing if estimation fails
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get memory stats
|
|
63
|
+
*/
|
|
64
|
+
function getMemoryStats() {
|
|
65
|
+
const used = process.memoryUsage();
|
|
66
|
+
const free = os.freemem();
|
|
67
|
+
const total = os.totalmem();
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
process: {
|
|
71
|
+
heapUsed: (used.heapUsed / 1024 / 1024).toFixed(2) + ' MB',
|
|
72
|
+
heapTotal: (used.heapTotal / 1024 / 1024).toFixed(2) + ' MB',
|
|
73
|
+
rss: (used.rss / 1024 / 1024).toFixed(2) + ' MB',
|
|
74
|
+
external: (used.external / 1024 / 1024).toFixed(2) + ' MB'
|
|
75
|
+
},
|
|
76
|
+
system: {
|
|
77
|
+
free: (free / 1024 / 1024).toFixed(2) + ' MB',
|
|
78
|
+
total: (total / 1024 / 1024).toFixed(2) + ' MB',
|
|
79
|
+
used: ((total - free) / 1024 / 1024).toFixed(2) + ' MB',
|
|
80
|
+
percentUsed: (((total - free) / total) * 100).toFixed(2) + '%'
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Monitor memory usage with callback
|
|
87
|
+
*/
|
|
88
|
+
function monitorMemory(callback, interval = 5000) {
|
|
89
|
+
const intervalId = setInterval(() => {
|
|
90
|
+
const stats = getMemoryStats();
|
|
91
|
+
callback(stats);
|
|
92
|
+
|
|
93
|
+
// Auto-stop if memory usage is critical
|
|
94
|
+
const used = process.memoryUsage().heapUsed;
|
|
95
|
+
const total = process.memoryUsage().heapTotal;
|
|
96
|
+
if (used / total > 0.9) {
|
|
97
|
+
console.warn('Warning: High memory usage detected');
|
|
98
|
+
}
|
|
99
|
+
}, interval);
|
|
100
|
+
|
|
101
|
+
return () => clearInterval(intervalId);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
module.exports = {
|
|
105
|
+
calculateBatchSize,
|
|
106
|
+
estimateRowCount,
|
|
107
|
+
shouldUseBatchProcessing,
|
|
108
|
+
getMemoryStats,
|
|
109
|
+
monitorMemory
|
|
110
|
+
};
|
|
File without changes
|
|
File without changes
|
package/test/test.js
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
const { convertToJSON, convertStream, convertBatch } = require('../src/index');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
// Test data directory
|
|
6
|
+
const testDir = path.join(__dirname, 'data');
|
|
7
|
+
if (!fs.existsSync(testDir)) {
|
|
8
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// Create sample CSV file for testing
|
|
12
|
+
function createSampleCSV() {
|
|
13
|
+
const csvPath = path.join(testDir, 'sample.csv');
|
|
14
|
+
const rows = ['Name,Age,City,Email'];
|
|
15
|
+
|
|
16
|
+
for (let i = 1; i <= 100; i++) {
|
|
17
|
+
rows.push(`User${i},${20 + i},City${i % 10},user${i}@example.com`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
fs.writeFileSync(csvPath, rows.join('\n'));
|
|
21
|
+
console.log(`✓ Created sample CSV: ${csvPath}`);
|
|
22
|
+
return csvPath;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Test 1: Basic CSV to JSON (Batch mode)
|
|
26
|
+
async function test1() {
|
|
27
|
+
console.log('\n--- Test 1: Basic CSV to JSON (Batch) ---');
|
|
28
|
+
const csvPath = createSampleCSV();
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const result = await convertBatch(csvPath);
|
|
32
|
+
console.log(`✓ Converted ${result.length} rows`);
|
|
33
|
+
console.log('First row:', result[0]);
|
|
34
|
+
console.log('Last row:', result[result.length - 1]);
|
|
35
|
+
} catch (err) {
|
|
36
|
+
console.error('✗ Error:', err.message);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Test 2: Streaming mode with output file
|
|
41
|
+
async function test2() {
|
|
42
|
+
console.log('\n--- Test 2: Streaming Mode ---');
|
|
43
|
+
const csvPath = createSampleCSV();
|
|
44
|
+
const outputPath = path.join(testDir, 'output.json');
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
const result = await convertStream(csvPath, outputPath, {
|
|
48
|
+
outputFormat: 'json',
|
|
49
|
+
onProgress: (progress) => {
|
|
50
|
+
if (progress.completed) {
|
|
51
|
+
console.log(`✓ Completed: ${progress.rowsProcessed} rows`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
console.log(`✓ Output saved to: ${result.file}`);
|
|
57
|
+
const outputSize = fs.statSync(outputPath).size;
|
|
58
|
+
console.log(`File size: ${(outputSize / 1024).toFixed(2)} KB`);
|
|
59
|
+
} catch (err) {
|
|
60
|
+
console.error('✗ Error:', err.message);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Test 3: With transform and filter
|
|
65
|
+
async function test3() {
|
|
66
|
+
console.log('\n--- Test 3: Transform & Filter ---');
|
|
67
|
+
const csvPath = createSampleCSV();
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
const result = await convertBatch(csvPath, {
|
|
71
|
+
filter: (row) => parseInt(row.Age) > 50,
|
|
72
|
+
transform: (row) => ({
|
|
73
|
+
name: row.Name.toUpperCase(),
|
|
74
|
+
age: parseInt(row.Age),
|
|
75
|
+
location: row.City,
|
|
76
|
+
contact: row.Email
|
|
77
|
+
})
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
console.log(`✓ Filtered to ${result.length} rows (Age > 50)`);
|
|
81
|
+
console.log('Sample transformed row:', result[0]);
|
|
82
|
+
} catch (err) {
|
|
83
|
+
console.error('✗ Error:', err.message);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Test 4: NDJSON output format
|
|
88
|
+
async function test4() {
|
|
89
|
+
console.log('\n--- Test 4: NDJSON Format ---');
|
|
90
|
+
const csvPath = createSampleCSV();
|
|
91
|
+
const outputPath = path.join(testDir, 'output.ndjson');
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
await convertStream(csvPath, outputPath, {
|
|
95
|
+
outputFormat: 'ndjson'
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
console.log(`✓ NDJSON output saved to: ${outputPath}`);
|
|
99
|
+
const lines = fs.readFileSync(outputPath, 'utf8').split('\n').filter(Boolean);
|
|
100
|
+
console.log(`Lines written: ${lines.length}`);
|
|
101
|
+
console.log('First line:', JSON.parse(lines[0]));
|
|
102
|
+
} catch (err) {
|
|
103
|
+
console.error('✗ Error:', err.message);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Test 5: Large file simulation
|
|
108
|
+
async function test5() {
|
|
109
|
+
console.log('\n--- Test 5: Large File (25K rows) ---');
|
|
110
|
+
const largeCsvPath = path.join(testDir, 'large.csv');
|
|
111
|
+
const rows = ['ID,Name,Value,Status,Timestamp'];
|
|
112
|
+
|
|
113
|
+
for (let i = 1; i <= 25000; i++) {
|
|
114
|
+
rows.push(`${i},Item${i},${Math.random() * 1000},active,2024-01-01`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
fs.writeFileSync(largeCsvPath, rows.join('\n'));
|
|
118
|
+
console.log(`✓ Created large CSV with 25,000 rows`);
|
|
119
|
+
|
|
120
|
+
const startTime = Date.now();
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const outputPath = path.join(testDir, 'large_output.json');
|
|
124
|
+
await convertStream(largeCsvPath, outputPath, {
|
|
125
|
+
onProgress: (progress) => {
|
|
126
|
+
if (progress.rowsProcessed % 5000 === 0 || progress.completed) {
|
|
127
|
+
console.log(`Progress: ${progress.rowsProcessed} rows processed`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
133
|
+
console.log(`✓ Completed in ${duration}s`);
|
|
134
|
+
|
|
135
|
+
const outputSize = fs.statSync(outputPath).size;
|
|
136
|
+
console.log(`Output file size: ${(outputSize / 1024 / 1024).toFixed(2)} MB`);
|
|
137
|
+
} catch (err) {
|
|
138
|
+
console.error('✗ Error:', err.message);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Run all tests
|
|
143
|
+
async function runAllTests() {
|
|
144
|
+
console.log('=== File to JSON Converter Tests ===');
|
|
145
|
+
|
|
146
|
+
await test1();
|
|
147
|
+
await test2();
|
|
148
|
+
await test3();
|
|
149
|
+
await test4();
|
|
150
|
+
await test5();
|
|
151
|
+
|
|
152
|
+
console.log('\n=== All Tests Completed ===');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Execute tests
|
|
156
|
+
runAllTests().catch(console.error);
|
package/types/index.d.ts
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
const { convertToJSON, convertStream, convertBatch } = require('../src/index');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
// Test data directory
|
|
6
|
+
const testDir = path.join(__dirname, 'data');
|
|
7
|
+
if (!fs.existsSync(testDir)) {
|
|
8
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// Create sample CSV file for testing
|
|
12
|
+
function createSampleCSV() {
|
|
13
|
+
const csvPath = path.join(testDir, 'sample.csv');
|
|
14
|
+
const rows = ['Name,Age,City,Email'];
|
|
15
|
+
|
|
16
|
+
for (let i = 1; i <= 100; i++) {
|
|
17
|
+
rows.push(`User${i},${20 + i},City${i % 10},user${i}@example.com`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
fs.writeFileSync(csvPath, rows.join('\n'));
|
|
21
|
+
console.log(`✓ Created sample CSV: ${csvPath}`);
|
|
22
|
+
return csvPath;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Test 1: Basic CSV to JSON (Batch mode)
|
|
26
|
+
async function test1() {
|
|
27
|
+
console.log('\n--- Test 1: Basic CSV to JSON (Batch) ---');
|
|
28
|
+
const csvPath = createSampleCSV();
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const result = await convertBatch(csvPath);
|
|
32
|
+
console.log(`✓ Converted ${result.length} rows`);
|
|
33
|
+
console.log('First row:', result[0]);
|
|
34
|
+
console.log('Last row:', result[result.length - 1]);
|
|
35
|
+
} catch (err) {
|
|
36
|
+
console.error('✗ Error:', err.message);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Test 2: Streaming mode with output file
|
|
41
|
+
async function test2() {
|
|
42
|
+
console.log('\n--- Test 2: Streaming Mode ---');
|
|
43
|
+
const csvPath = createSampleCSV();
|
|
44
|
+
const outputPath = path.join(testDir, 'output.json');
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
const result = await convertStream(csvPath, outputPath, {
|
|
48
|
+
outputFormat: 'json',
|
|
49
|
+
onProgress: (progress) => {
|
|
50
|
+
if (progress.completed) {
|
|
51
|
+
console.log(`✓ Completed: ${progress.rowsProcessed} rows`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
console.log(`✓ Output saved to: ${result.file}`);
|
|
57
|
+
const outputSize = fs.statSync(outputPath).size;
|
|
58
|
+
console.log(`File size: ${(outputSize / 1024).toFixed(2)} KB`);
|
|
59
|
+
} catch (err) {
|
|
60
|
+
console.error('✗ Error:', err.message);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Test 3: With transform and filter
|
|
65
|
+
async function test3() {
|
|
66
|
+
console.log('\n--- Test 3: Transform & Filter ---');
|
|
67
|
+
const csvPath = createSampleCSV();
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
const result = await convertBatch(csvPath, {
|
|
71
|
+
filter: (row) => parseInt(row.Age) > 50,
|
|
72
|
+
transform: (row) => ({
|
|
73
|
+
name: row.Name.toUpperCase(),
|
|
74
|
+
age: parseInt(row.Age),
|
|
75
|
+
location: row.City,
|
|
76
|
+
contact: row.Email
|
|
77
|
+
})
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
console.log(`✓ Filtered to ${result.length} rows (Age > 50)`);
|
|
81
|
+
console.log('Sample transformed row:', result[0]);
|
|
82
|
+
} catch (err) {
|
|
83
|
+
console.error('✗ Error:', err.message);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Test 4: NDJSON output format
|
|
88
|
+
async function test4() {
|
|
89
|
+
console.log('\n--- Test 4: NDJSON Format ---');
|
|
90
|
+
const csvPath = createSampleCSV();
|
|
91
|
+
const outputPath = path.join(testDir, 'output.ndjson');
|
|
92
|
+
|
|
93
|
+
try {
|
|
94
|
+
await convertStream(csvPath, outputPath, {
|
|
95
|
+
outputFormat: 'ndjson'
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
console.log(`✓ NDJSON output saved to: ${outputPath}`);
|
|
99
|
+
const lines = fs.readFileSync(outputPath, 'utf8').split('\n').filter(Boolean);
|
|
100
|
+
console.log(`Lines written: ${lines.length}`);
|
|
101
|
+
console.log('First line:', JSON.parse(lines[0]));
|
|
102
|
+
} catch (err) {
|
|
103
|
+
console.error('✗ Error:', err.message);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Test 5: Large file simulation
|
|
108
|
+
async function test5() {
|
|
109
|
+
console.log('\n--- Test 5: Large File (25K rows) ---');
|
|
110
|
+
const largeCsvPath = path.join(testDir, 'large.csv');
|
|
111
|
+
const rows = ['ID,Name,Value,Status,Timestamp'];
|
|
112
|
+
|
|
113
|
+
for (let i = 1; i <= 25000; i++) {
|
|
114
|
+
rows.push(`${i},Item${i},${Math.random() * 1000},active,2024-01-01`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
fs.writeFileSync(largeCsvPath, rows.join('\n'));
|
|
118
|
+
console.log(`✓ Created large CSV with 25,000 rows`);
|
|
119
|
+
|
|
120
|
+
const startTime = Date.now();
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const outputPath = path.join(testDir, 'large_output.json');
|
|
124
|
+
await convertStream(largeCsvPath, outputPath, {
|
|
125
|
+
onProgress: (progress) => {
|
|
126
|
+
if (progress.rowsProcessed % 5000 === 0 || progress.completed) {
|
|
127
|
+
console.log(`Progress: ${progress.rowsProcessed} rows processed`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
133
|
+
console.log(`✓ Completed in ${duration}s`);
|
|
134
|
+
|
|
135
|
+
const outputSize = fs.statSync(outputPath).size;
|
|
136
|
+
console.log(`Output file size: ${(outputSize / 1024 / 1024).toFixed(2)} MB`);
|
|
137
|
+
} catch (err) {
|
|
138
|
+
console.error('✗ Error:', err.message);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Run all tests
|
|
143
|
+
async function runAllTests() {
|
|
144
|
+
console.log('=== File to JSON Converter Tests ===');
|
|
145
|
+
|
|
146
|
+
await test1();
|
|
147
|
+
await test2();
|
|
148
|
+
await test3();
|
|
149
|
+
await test4();
|
|
150
|
+
await test5();
|
|
151
|
+
|
|
152
|
+
console.log('\n=== All Tests Completed ===');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Execute tests
|
|
156
|
+
runAllTests().catch(console.error);
|
|
File without changes
|