cisv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +158 -0
- package/.github/workflows/release.yml +167 -0
- package/Dockerfile +63 -0
- package/LICENSE +7 -0
- package/Makefile +160 -0
- package/README.md +249 -0
- package/SIMD_benchmarks.md +658 -0
- package/benchmark/benchmark.js +287 -0
- package/benchmark_cli_reader.sh +236 -0
- package/benchmark_cli_writer.sh +280 -0
- package/binding.gyp +57 -0
- package/debug-addon.js +64 -0
- package/examples/basic-parse.js +65 -0
- package/examples/large-file.js +35 -0
- package/examples/transform.js +152 -0
- package/examples/typescript.ts +38 -0
- package/index.d.ts +336 -0
- package/install_benchmark_deps.sh +156 -0
- package/package.json +47 -0
- package/run_benchmarks.sh +53 -0
- package/src/cisv_addon.cc +614 -0
- package/src/cisv_parser.c +988 -0
- package/src/cisv_parser.h +55 -0
- package/src/cisv_simd.h +53 -0
- package/src/cisv_transformer.c +537 -0
- package/src/cisv_transformer.h +145 -0
- package/src/cisv_writer.c +535 -0
- package/src/cisv_writer.h +60 -0
- package/src/index.ts +2 -0
- package/src/test/typescript.test.ts +43 -0
- package/src/win_getopt.h +100 -0
- package/src/win_sys_time.h +50 -0
- package/test/basic.test.js +104 -0
- package/test_select.sh +92 -0
- package/test_transform.sh +167 -0
- package/test_transform_leak_test.js +94 -0
- package/tsconfig.json +17 -0
- package/types/cisv.d.ts +8 -0
- package/valgrind-node.supp +69 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# CISV Writer Benchmark Script
|
|
4
|
+
# Compares CSV writing performance
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
# Colors for output
|
|
9
|
+
RED='\033[0;31m'
|
|
10
|
+
GREEN='\033[0;32m'
|
|
11
|
+
YELLOW='\033[1;33m'
|
|
12
|
+
BLUE='\033[0;34m'
|
|
13
|
+
NC='\033[0m' # No Color
|
|
14
|
+
|
|
15
|
+
echo -e "${BLUE}=== CSV Writer Benchmark ===${NC}\n"
|
|
16
|
+
|
|
17
|
+
# Check if cisv is built
|
|
18
|
+
if [ ! -f "./cisv" ]; then
|
|
19
|
+
echo -e "${YELLOW}Building cisv CLI...${NC}"
|
|
20
|
+
make clean
|
|
21
|
+
make cli
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
# Test file sizes
|
|
25
|
+
declare -A test_sizes=(
|
|
26
|
+
["small"]=1000
|
|
27
|
+
["medium"]=100000
|
|
28
|
+
["large"]=1000000
|
|
29
|
+
["xlarge"]=10000000
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Function to benchmark a write command
|
|
33
|
+
benchmark_write() {
|
|
34
|
+
local name=$1
|
|
35
|
+
local cmd=$2
|
|
36
|
+
local output=$3
|
|
37
|
+
local rows=$4
|
|
38
|
+
|
|
39
|
+
echo -e "${BLUE}--- $name ---${NC}"
|
|
40
|
+
|
|
41
|
+
# Get time with nanosecond precision
|
|
42
|
+
start=$(date +%s.%N)
|
|
43
|
+
eval "$cmd" > /dev/null 2>&1
|
|
44
|
+
end=$(date +%s.%N)
|
|
45
|
+
|
|
46
|
+
# Calculate elapsed time
|
|
47
|
+
elapsed=$(echo "$end - $start" | bc)
|
|
48
|
+
|
|
49
|
+
# Get file size
|
|
50
|
+
if [ -f "$output" ]; then
|
|
51
|
+
size=$(stat -c%s "$output" 2>/dev/null || stat -f%z "$output" 2>/dev/null)
|
|
52
|
+
size_mb=$(echo "scale=2; $size / 1048576" | bc)
|
|
53
|
+
throughput=$(echo "scale=2; $size_mb / $elapsed" | bc)
|
|
54
|
+
rows_per_sec=$(echo "scale=0; $rows / $elapsed" | bc)
|
|
55
|
+
|
|
56
|
+
echo "Time: ${elapsed} seconds"
|
|
57
|
+
echo "Size: ${size_mb} MB"
|
|
58
|
+
echo "Throughput: ${throughput} MB/s"
|
|
59
|
+
echo "Rows/sec: ${rows_per_sec}"
|
|
60
|
+
|
|
61
|
+
# Cleanup
|
|
62
|
+
rm -f "$output"
|
|
63
|
+
else
|
|
64
|
+
echo "Error: Output file not created"
|
|
65
|
+
fi
|
|
66
|
+
echo ""
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Install dependencies if needed
|
|
70
|
+
install_deps() {
|
|
71
|
+
echo -e "${YELLOW}Checking/installing dependencies...${NC}"
|
|
72
|
+
|
|
73
|
+
# Check for Python pandas
|
|
74
|
+
if ! python3 -c "import pandas" 2>/dev/null; then
|
|
75
|
+
echo "Installing pandas..."
|
|
76
|
+
pip3 install pandas
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
# Check for Ruby
|
|
80
|
+
if ! command -v ruby > /dev/null 2>&1; then
|
|
81
|
+
echo "Ruby not found, skipping Ruby benchmarks"
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
# Build generate_csv tool if needed
|
|
85
|
+
if [ ! -f "./benchmark/generate_csv" ]; then
|
|
86
|
+
echo "Building C comparison tool..."
|
|
87
|
+
mkdir -p benchmark
|
|
88
|
+
cat > benchmark/generate_csv.c << 'EOF'
|
|
89
|
+
#include <stdio.h>
|
|
90
|
+
#include <stdlib.h>
|
|
91
|
+
#include <time.h>
|
|
92
|
+
|
|
93
|
+
int main(int argc, char *argv[]) {
|
|
94
|
+
if (argc != 3) {
|
|
95
|
+
fprintf(stderr, "Usage: %s <rows> <output>\n", argv[0]);
|
|
96
|
+
return 1;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
size_t rows = strtoull(argv[1], NULL, 10);
|
|
100
|
+
FILE *out = fopen(argv[2], "w");
|
|
101
|
+
if (!out) {
|
|
102
|
+
perror("fopen");
|
|
103
|
+
return 1;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Write header
|
|
107
|
+
fprintf(out, "id,name,email,value,timestamp\n");
|
|
108
|
+
|
|
109
|
+
// Write rows
|
|
110
|
+
for (size_t i = 0; i < rows; i++) {
|
|
111
|
+
fprintf(out, "%zu,User_%zu,user%zu@example.com,%.2f,2024-01-01 00:00:00\n",
|
|
112
|
+
i + 1, i, i, i * 1.23);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
fclose(out);
|
|
116
|
+
return 0;
|
|
117
|
+
}
|
|
118
|
+
EOF
|
|
119
|
+
gcc -O3 -o benchmark/generate_csv benchmark/generate_csv.c
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
echo ""
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Python CSV writer
|
|
126
|
+
create_python_writer() {
|
|
127
|
+
cat > benchmark/write_csv.py << 'EOF'
|
|
128
|
+
import sys
|
|
129
|
+
import csv
|
|
130
|
+
from datetime import datetime
|
|
131
|
+
|
|
132
|
+
def generate_csv(rows, output):
|
|
133
|
+
with open(output, 'w', newline='') as f:
|
|
134
|
+
writer = csv.writer(f)
|
|
135
|
+
writer.writerow(['id', 'name', 'email', 'value', 'timestamp'])
|
|
136
|
+
|
|
137
|
+
for i in range(rows):
|
|
138
|
+
writer.writerow([
|
|
139
|
+
i + 1,
|
|
140
|
+
f'User_{i}',
|
|
141
|
+
f'user{i}@example.com',
|
|
142
|
+
round(i * 1.23, 2),
|
|
143
|
+
'2024-01-01 00:00:00'
|
|
144
|
+
])
|
|
145
|
+
|
|
146
|
+
if __name__ == '__main__':
|
|
147
|
+
if len(sys.argv) != 3:
|
|
148
|
+
print(f"Usage: {sys.argv[0]} <rows> <output>")
|
|
149
|
+
sys.exit(1)
|
|
150
|
+
|
|
151
|
+
generate_csv(int(sys.argv[1]), sys.argv[2])
|
|
152
|
+
EOF
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# Ruby CSV writer
|
|
156
|
+
create_ruby_writer() {
|
|
157
|
+
cat > benchmark/write_csv.rb << 'EOF'
|
|
158
|
+
require 'csv'
|
|
159
|
+
|
|
160
|
+
if ARGV.length != 2
|
|
161
|
+
puts "Usage: #{$0} <rows> <output>"
|
|
162
|
+
exit 1
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
rows = ARGV[0].to_i
|
|
166
|
+
output = ARGV[1]
|
|
167
|
+
|
|
168
|
+
CSV.open(output, 'w') do |csv|
|
|
169
|
+
csv << ['id', 'name', 'email', 'value', 'timestamp']
|
|
170
|
+
|
|
171
|
+
rows.times do |i|
|
|
172
|
+
csv << [
|
|
173
|
+
i + 1,
|
|
174
|
+
"User_#{i}",
|
|
175
|
+
"user#{i}@example.com",
|
|
176
|
+
(i * 1.23).round(2),
|
|
177
|
+
'2024-01-01 00:00:00'
|
|
178
|
+
]
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
EOF
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# Node.js CSV writer using fast-csv
|
|
185
|
+
create_node_writer() {
|
|
186
|
+
cat > benchmark/write_csv.js << 'EOF'
|
|
187
|
+
const fs = require('fs');
|
|
188
|
+
const fastcsv = require('fast-csv');
|
|
189
|
+
|
|
190
|
+
if (process.argv.length !== 4) {
|
|
191
|
+
console.error(`Usage: ${process.argv[1]} <rows> <output>`);
|
|
192
|
+
process.exit(1);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const rows = parseInt(process.argv[2]);
|
|
196
|
+
const output = process.argv[3];
|
|
197
|
+
|
|
198
|
+
const ws = fs.createWriteStream(output);
|
|
199
|
+
const csvStream = fastcsv.format({ headers: true });
|
|
200
|
+
|
|
201
|
+
csvStream.pipe(ws);
|
|
202
|
+
|
|
203
|
+
for (let i = 0; i < rows; i++) {
|
|
204
|
+
csvStream.write({
|
|
205
|
+
id: i + 1,
|
|
206
|
+
name: `User_${i}`,
|
|
207
|
+
email: `user${i}@example.com`,
|
|
208
|
+
value: (i * 1.23).toFixed(2),
|
|
209
|
+
timestamp: '2024-01-01 00:00:00'
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
csvStream.end();
|
|
214
|
+
EOF
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Setup
|
|
218
|
+
install_deps
|
|
219
|
+
create_python_writer
|
|
220
|
+
create_ruby_writer
|
|
221
|
+
create_node_writer
|
|
222
|
+
|
|
223
|
+
# Run benchmarks for each size
|
|
224
|
+
for size_name in small medium large xlarge; do
|
|
225
|
+
rows=${test_sizes[$size_name]}
|
|
226
|
+
echo -e "${GREEN}=== Testing $size_name ($rows rows) ===${NC}\n"
|
|
227
|
+
|
|
228
|
+
# CISV writer
|
|
229
|
+
benchmark_write "cisv write" \
|
|
230
|
+
"./cisv write -g $rows -o bench_cisv.csv -b" \
|
|
231
|
+
"bench_cisv.csv" \
|
|
232
|
+
"$rows"
|
|
233
|
+
|
|
234
|
+
# C fprintf baseline
|
|
235
|
+
benchmark_write "C fprintf" \
|
|
236
|
+
"./benchmark/generate_csv $rows bench_c.csv" \
|
|
237
|
+
"bench_c.csv" \
|
|
238
|
+
"$rows"
|
|
239
|
+
|
|
240
|
+
# Python csv module
|
|
241
|
+
benchmark_write "Python csv" \
|
|
242
|
+
"python3 benchmark/write_csv.py $rows bench_python.csv" \
|
|
243
|
+
"bench_python.csv" \
|
|
244
|
+
"$rows"
|
|
245
|
+
|
|
246
|
+
# Ruby CSV
|
|
247
|
+
if command -v ruby > /dev/null 2>&1; then
|
|
248
|
+
benchmark_write "Ruby CSV" \
|
|
249
|
+
"ruby benchmark/write_csv.rb $rows bench_ruby.csv" \
|
|
250
|
+
"bench_ruby.csv" \
|
|
251
|
+
"$rows"
|
|
252
|
+
fi
|
|
253
|
+
|
|
254
|
+
# Node.js fast-csv (if available)
|
|
255
|
+
if command -v node > /dev/null 2>&1 && [ -f "node_modules/fast-csv/index.js" ]; then
|
|
256
|
+
benchmark_write "Node.js fast-csv" \
|
|
257
|
+
"node benchmark/write_csv.js $rows bench_node.csv" \
|
|
258
|
+
"bench_node.csv" \
|
|
259
|
+
"$rows"
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
# awk baseline
|
|
263
|
+
benchmark_write "awk" \
|
|
264
|
+
"awk 'BEGIN{print \"id,name,email,value,timestamp\"; for(i=1;i<=$rows;i++) printf \"%d,User_%d,user%d@example.com,%.2f,2024-01-01 00:00:00\\n\",i,i-1,i-1,(i-1)*1.23}' > bench_awk.csv" \
|
|
265
|
+
"bench_awk.csv" \
|
|
266
|
+
"$rows"
|
|
267
|
+
|
|
268
|
+
echo -e "${BLUE}$(printf '=%.0s' {1..60})${NC}\n"
|
|
269
|
+
|
|
270
|
+
# Only run small and medium for quick tests
|
|
271
|
+
if [[ "$1" == "--quick" && "$size_name" == "medium" ]]; then
|
|
272
|
+
break
|
|
273
|
+
fi
|
|
274
|
+
done
|
|
275
|
+
|
|
276
|
+
# Cleanup
|
|
277
|
+
rm -f benchmark/write_csv.py benchmark/write_csv.rb benchmark/write_csv.js
|
|
278
|
+
rm -f benchmark/generate_csv.c benchmark/generate_csv
|
|
279
|
+
|
|
280
|
+
echo -e "${GREEN}Benchmark complete!${NC}"
|
package/binding.gyp
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"targets": [
|
|
3
|
+
{
|
|
4
|
+
"target_name": "cisv",
|
|
5
|
+
"sources": [
|
|
6
|
+
"src/cisv_addon.cc",
|
|
7
|
+
"src/cisv_parser.c",
|
|
8
|
+
"src/cisv_transformer.c"
|
|
9
|
+
],
|
|
10
|
+
"include_dirs": [
|
|
11
|
+
"<!@(node -p \"require('node-addon-api').include\")",
|
|
12
|
+
"src/"
|
|
13
|
+
],
|
|
14
|
+
"dependencies": [
|
|
15
|
+
"<!(node -p \"require('node-addon-api').gyp\")"
|
|
16
|
+
],
|
|
17
|
+
"cflags!": [ "-fno-exceptions" ],
|
|
18
|
+
"cflags": ["-O3", "-mavx2"],
|
|
19
|
+
"cflags_cc!": [ "-fno-exceptions" ],
|
|
20
|
+
"defines": [
|
|
21
|
+
"NAPI_DISABLE_CPP_EXCEPTIONS",
|
|
22
|
+
"NAPI_VERSION=6"
|
|
23
|
+
],
|
|
24
|
+
"conditions": [
|
|
25
|
+
["OS=='linux'", {
|
|
26
|
+
"cflags": [
|
|
27
|
+
"-O3",
|
|
28
|
+
"-march=native",
|
|
29
|
+
"-mtune=native",
|
|
30
|
+
"-ffast-math"
|
|
31
|
+
],
|
|
32
|
+
"cflags_cc": [
|
|
33
|
+
"-O3",
|
|
34
|
+
"-march=native",
|
|
35
|
+
"-mtune=native",
|
|
36
|
+
"-ffast-math"
|
|
37
|
+
]
|
|
38
|
+
}],
|
|
39
|
+
["OS=='mac'", {
|
|
40
|
+
"xcode_settings": {
|
|
41
|
+
"GCC_OPTIMIZATION_LEVEL": "3",
|
|
42
|
+
"OTHER_CFLAGS": [
|
|
43
|
+
"-march=native",
|
|
44
|
+
"-mtune=native",
|
|
45
|
+
"-ffast-math"
|
|
46
|
+
],
|
|
47
|
+
"OTHER_CPLUSPLUSFLAGS": [
|
|
48
|
+
"-march=native",
|
|
49
|
+
"-mtune=native",
|
|
50
|
+
"-ffast-math"
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
}]
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
]
|
|
57
|
+
}
|
package/debug-addon.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// debug-addon.js
|
|
2
|
+
console.log('=== Debugging Node.js Addon ===\n');
|
|
3
|
+
|
|
4
|
+
try {
|
|
5
|
+
console.log('1. Attempting to load the addon...');
|
|
6
|
+
const addon = require('./build/Release/'); // or your main entry point
|
|
7
|
+
console.log('✓ Addon loaded successfully');
|
|
8
|
+
console.log(' Addon exports:', Object.keys(addon));
|
|
9
|
+
|
|
10
|
+
if (!addon.cisvParser) {
|
|
11
|
+
console.error('✗ cisvParser class not found in exports');
|
|
12
|
+
console.log(' Available exports:', addon);
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
console.log('\n2. Creating parser instance...');
|
|
17
|
+
const parser = new addon.cisvParser();
|
|
18
|
+
console.log('✓ Parser instance created');
|
|
19
|
+
|
|
20
|
+
console.log('\n3. Checking available methods...');
|
|
21
|
+
const prototype = Object.getPrototypeOf(parser);
|
|
22
|
+
const methods = Object.getOwnPropertyNames(prototype);
|
|
23
|
+
console.log(' Available methods:', methods);
|
|
24
|
+
|
|
25
|
+
console.log('\n4. Checking transform method specifically...');
|
|
26
|
+
console.log(' transform exists:', 'transform' in parser);
|
|
27
|
+
console.log(' transform type:', typeof parser.transform);
|
|
28
|
+
|
|
29
|
+
if (typeof parser.transform === 'function') {
|
|
30
|
+
console.log('✓ transform method is available and is a function');
|
|
31
|
+
|
|
32
|
+
console.log('\n5. Testing basic transform call...');
|
|
33
|
+
try {
|
|
34
|
+
// Test with minimal arguments
|
|
35
|
+
const result = parser.transform(0, 'uppercase');
|
|
36
|
+
console.log('✓ transform method call succeeded');
|
|
37
|
+
console.log(' Result:', result);
|
|
38
|
+
} catch (err) {
|
|
39
|
+
console.error('✗ transform method call failed:', err.message);
|
|
40
|
+
console.error(' Stack:', err.stack);
|
|
41
|
+
}
|
|
42
|
+
} else {
|
|
43
|
+
console.error('✗ transform method is not a function');
|
|
44
|
+
console.log(' Actual type:', typeof parser.transform);
|
|
45
|
+
console.log(' Value:', parser.transform);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
console.log('\n6. Checking other methods...');
|
|
49
|
+
['parseSync', 'write', 'end', 'getRows', 'transformField', 'transformRow', 'transformSchema'].forEach(method => {
|
|
50
|
+
console.log(` ${method}: ${typeof parser[method]}`);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
} catch (error) {
|
|
54
|
+
console.error('✗ Failed to load or test addon:', error.message);
|
|
55
|
+
console.error('Full error:', error);
|
|
56
|
+
|
|
57
|
+
console.log('\nTroubleshooting steps:');
|
|
58
|
+
console.log('1. Check if addon compiled: ls -la build/Release/');
|
|
59
|
+
console.log('2. Try rebuilding: npm rebuild or node-gyp rebuild');
|
|
60
|
+
console.log('3. Check your index.js exports the right module');
|
|
61
|
+
console.log('4. Verify binding.gyp configuration');
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
console.log('\n=== Debug Complete ===');
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
const { cisvParser } = require('../build/Release/cisv');
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const dataFilePath = path.join(__dirname, '../fixtures/data.csv');
|
|
7
|
+
|
|
8
|
+
// Ensure the data.csv file exists before running
|
|
9
|
+
if (!fs.existsSync(dataFilePath)) {
|
|
10
|
+
console.log('Creating a sample data.csv file...');
|
|
11
|
+
const sampleData = `id,name,email,city
|
|
12
|
+
1,John Doe,john.doe@email.com,New York
|
|
13
|
+
2,Jane Smith,"jane.smith@email.com",Los Angeles
|
|
14
|
+
3,Peter Jones,peter.jones@email.com,"San Francisco"
|
|
15
|
+
4,Mary Williams,"mary.w@email.com",Chicago`;
|
|
16
|
+
fs.writeFileSync(dataFilePath, sampleData);
|
|
17
|
+
console.log('Sample data.csv created.\n');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Sync Parsing Example (for comparison)
|
|
21
|
+
try {
|
|
22
|
+
const syncParser = new cisvParser();
|
|
23
|
+
// const dataFilePath = path.join(__dirname, '../data.csv');
|
|
24
|
+
const rows = syncParser.parseSync(dataFilePath);
|
|
25
|
+
console.log(`Sync parsing successful. Total rows found: ${rows.length}`);
|
|
26
|
+
} catch (e) {
|
|
27
|
+
console.error('Sync parsing failed:', e);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
console.log('\n' + '-'.repeat(40) + '\n');
|
|
31
|
+
|
|
32
|
+
// Stream Parsing Example ---
|
|
33
|
+
console.log('Starting stream parsing...');
|
|
34
|
+
const streamParser = new cisvParser();
|
|
35
|
+
|
|
36
|
+
fs.createReadStream(dataFilePath)
|
|
37
|
+
.on('data', chunk => {
|
|
38
|
+
try {
|
|
39
|
+
streamParser.write(chunk);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error('Error during stream write:', e);
|
|
42
|
+
}
|
|
43
|
+
})
|
|
44
|
+
.on('end', () => {
|
|
45
|
+
console.log('Stream finished.');
|
|
46
|
+
|
|
47
|
+
// Finalize the parsing process. This processes any remaining data.
|
|
48
|
+
streamParser.end();
|
|
49
|
+
|
|
50
|
+
// ✨ NOW, USE getRows() TO RETRIEVE THE RESULTS ✨
|
|
51
|
+
const allRows = streamParser.getRows();
|
|
52
|
+
|
|
53
|
+
console.log(`Total rows from stream: ${allRows.length}`);
|
|
54
|
+
|
|
55
|
+
if (allRows.length > 2) {
|
|
56
|
+
// As requested: get a specific line. Let's get the 3rd line (index 2).
|
|
57
|
+
const specificLine = allRows[2];
|
|
58
|
+
console.log('Getting a specific line (line 3):', specificLine);
|
|
59
|
+
} else {
|
|
60
|
+
console.log('Not enough rows to get the 3rd line.');
|
|
61
|
+
}
|
|
62
|
+
})
|
|
63
|
+
.on('error', (err) => {
|
|
64
|
+
console.error('Stream error:', err);
|
|
65
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
const { cisvParser } = require('../build/Release/cisv');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
// Process large CSV files with progress reporting
|
|
6
|
+
function processLargeFile(filePath) {
|
|
7
|
+
const parser = new cisvParser();
|
|
8
|
+
const stats = fs.statSync(filePath);
|
|
9
|
+
const totalSize = stats.size;
|
|
10
|
+
let processed = 0;
|
|
11
|
+
|
|
12
|
+
const stream = fs.createReadStream(filePath);
|
|
13
|
+
const startTime = process.hrtime();
|
|
14
|
+
|
|
15
|
+
stream.on('data', (chunk) => {
|
|
16
|
+
processed += chunk.length;
|
|
17
|
+
parser.write(chunk);
|
|
18
|
+
|
|
19
|
+
// Report progress every MB
|
|
20
|
+
if (processed % (1024 * 1024) === 0) {
|
|
21
|
+
const percent = (processed / totalSize * 100).toFixed(1);
|
|
22
|
+
const [seconds] = process.hrtime(startTime);
|
|
23
|
+
console.log(`Processed: ${percent}% (${seconds}s)`);
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
stream.on('end', () => {
|
|
28
|
+
parser.end();
|
|
29
|
+
const rows = parser.getRows();
|
|
30
|
+
const [seconds] = process.hrtime(startTime);
|
|
31
|
+
console.log(`Completed! Processed ${rows.length} rows in ${seconds}s`);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
processLargeFile(process.argv[2] || path.join(__dirname, '../fixtures/large.csv'));
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
const { cisvParser, TransformType } = require('../build/Release/cisv');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
|
|
5
|
+
// Create test CSV if it doesn't exist
|
|
6
|
+
const testFile = path.join(__dirname, 'transform_test.csv');
|
|
7
|
+
if (!fs.existsSync(testFile)) {
|
|
8
|
+
const data = `id,name,email,amount,date
|
|
9
|
+
1,john doe,JOHN@EXAMPLE.COM,1234.56,2024-01-01
|
|
10
|
+
2, jane smith ,jane@EXAMPLE.com,2345.67,2024-01-02
|
|
11
|
+
3,bob johnson,BOB@EXAMPLE.COM,3456.78,2024-01-03`;
|
|
12
|
+
fs.writeFileSync(testFile, data);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
console.log('=== Native C Transform Examples ===\n');
|
|
16
|
+
console.log('Available transform types:', TransformType);
|
|
17
|
+
console.log('');
|
|
18
|
+
|
|
19
|
+
// Example 1: Basic transforms using native C code
|
|
20
|
+
console.log('1. Native C transforms:');
|
|
21
|
+
const parser1 = new cisvParser();
|
|
22
|
+
parser1.transform(1, 'uppercase') // Name to uppercase (C implementation)
|
|
23
|
+
.transform(2, 'lowercase') // Email to lowercase (C implementation)
|
|
24
|
+
.transform(3, 'to_float'); // Amount to float (C implementation)
|
|
25
|
+
|
|
26
|
+
const rows1 = parser1.parseSync(testFile);
|
|
27
|
+
console.log('Transformed with C code:', rows1[1]);
|
|
28
|
+
|
|
29
|
+
// Example 2: Trim transform (C implementation)
|
|
30
|
+
console.log('\n2. Native trim:');
|
|
31
|
+
const parser2 = new cisvParser();
|
|
32
|
+
parser2.transform(1, 'trim');
|
|
33
|
+
|
|
34
|
+
const rows2 = parser2.parseSync(testFile);
|
|
35
|
+
console.log('Original: " jane smith "');
|
|
36
|
+
console.log('Trimmed with C:', `"${rows2[2][1]}"`);
|
|
37
|
+
|
|
38
|
+
// Example 3: SHA256 hashing (C implementation)
|
|
39
|
+
console.log('\n3. Native SHA256:');
|
|
40
|
+
const parser3 = new cisvParser();
|
|
41
|
+
parser3.transform(2, 'hash_sha256');
|
|
42
|
+
|
|
43
|
+
const rows3 = parser3.parseSync(testFile);
|
|
44
|
+
console.log('Hashed email (C implementation):', rows3[1][2]);
|
|
45
|
+
|
|
46
|
+
// Example 4: Base64 encoding (C implementation)
|
|
47
|
+
console.log('\n4. Native Base64:');
|
|
48
|
+
const parser4 = new cisvParser();
|
|
49
|
+
parser4.transform(1, 'base64_encode');
|
|
50
|
+
|
|
51
|
+
const rows4 = parser4.parseSync(testFile);
|
|
52
|
+
console.log('Base64 encoded names (C):', rows4.slice(1).map(row => row[1]));
|
|
53
|
+
|
|
54
|
+
// Example 5: Multiple transforms on different fields
|
|
55
|
+
console.log('\n5. Multiple native transforms:');
|
|
56
|
+
const parser5 = new cisvParser();
|
|
57
|
+
parser5.transform(0, 'to_int') // ID to int
|
|
58
|
+
.transform(1, 'trim') // Trim name
|
|
59
|
+
.transform(1, 'uppercase') // Then uppercase
|
|
60
|
+
.transform(2, 'lowercase') // Email lowercase
|
|
61
|
+
.transform(3, 'to_float'); // Amount to float
|
|
62
|
+
|
|
63
|
+
const rows5 = parser5.parseSync(testFile);
|
|
64
|
+
console.log('Multiple transforms:', rows5[1]);
|
|
65
|
+
|
|
66
|
+
// Example 6: Performance test with native transforms
|
|
67
|
+
console.log('\n6. Performance test (native C transforms):');
|
|
68
|
+
|
|
69
|
+
// Generate larger test file
|
|
70
|
+
const largeCsv = 'large_native_test.csv';
|
|
71
|
+
const rowCount = 100000;
|
|
72
|
+
console.log(`Generating ${rowCount} rows...`);
|
|
73
|
+
|
|
74
|
+
let csvContent = 'id,name,email,amount\n';
|
|
75
|
+
for (let i = 0; i < rowCount; i++) {
|
|
76
|
+
csvContent += `${i}, user ${i} ,USER${i}@EXAMPLE.COM,${i * 1.23}\n`;
|
|
77
|
+
}
|
|
78
|
+
fs.writeFileSync(largeCsv, csvContent);
|
|
79
|
+
|
|
80
|
+
// Test without transforms
|
|
81
|
+
console.time('No transforms');
|
|
82
|
+
const parserNoTransform = new cisvParser();
|
|
83
|
+
parserNoTransform.parseSync(largeCsv);
|
|
84
|
+
console.timeEnd('No transforms');
|
|
85
|
+
|
|
86
|
+
// Test with C transforms
|
|
87
|
+
console.time('With C transforms');
|
|
88
|
+
const parserWithTransform = new cisvParser();
|
|
89
|
+
parserWithTransform.transform(1, 'trim')
|
|
90
|
+
.transform(1, 'uppercase')
|
|
91
|
+
.transform(2, 'lowercase')
|
|
92
|
+
.transform(3, 'to_float');
|
|
93
|
+
parserWithTransform.parseSync(largeCsv);
|
|
94
|
+
console.timeEnd('With C transforms');
|
|
95
|
+
|
|
96
|
+
// Get stats
|
|
97
|
+
const stats = parserWithTransform.getStats();
|
|
98
|
+
console.log('Parse stats:', stats);
|
|
99
|
+
|
|
100
|
+
// Example 7: Parse string content
|
|
101
|
+
console.log('\n7. Parse string with transforms:');
|
|
102
|
+
const csvString = `name,value
|
|
103
|
+
test one,123
|
|
104
|
+
TEST TWO,456`;
|
|
105
|
+
|
|
106
|
+
const parser7 = new cisvParser();
|
|
107
|
+
parser7.transform(0, 'uppercase')
|
|
108
|
+
.transform(1, 'to_int');
|
|
109
|
+
|
|
110
|
+
const rows7 = parser7.parseString(csvString);
|
|
111
|
+
console.log('Parsed string:', rows7);
|
|
112
|
+
|
|
113
|
+
// Example 8: Streaming with transforms
|
|
114
|
+
console.log('\n8. Streaming with transforms:');
|
|
115
|
+
const parser8 = new cisvParser();
|
|
116
|
+
parser8.transform(0, 'uppercase');
|
|
117
|
+
|
|
118
|
+
parser8.write('name,value\n');
|
|
119
|
+
parser8.write('test,123\n');
|
|
120
|
+
parser8.write('another,456\n');
|
|
121
|
+
parser8.end();
|
|
122
|
+
|
|
123
|
+
const rows8 = parser8.getRows();
|
|
124
|
+
console.log('Streamed rows:', rows8);
|
|
125
|
+
|
|
126
|
+
// Example 9: Count rows (static method)
|
|
127
|
+
console.log('\n9. Count rows:');
|
|
128
|
+
const rowCount2 = cisvParser.countRows(testFile);
|
|
129
|
+
console.log(`File has ${rowCount2} rows`);
|
|
130
|
+
|
|
131
|
+
// Example 10: Clear and reuse parser
|
|
132
|
+
console.log('\n10. Reuse parser:');
|
|
133
|
+
const parser10 = new cisvParser();
|
|
134
|
+
parser10.transform(0, 'uppercase');
|
|
135
|
+
|
|
136
|
+
// First parse
|
|
137
|
+
const firstParse = parser10.parseSync(testFile);
|
|
138
|
+
console.log('First parse rows:', firstParse.length);
|
|
139
|
+
|
|
140
|
+
// Clear and parse again with different transforms
|
|
141
|
+
parser10.clear();
|
|
142
|
+
parser10.clearTransforms();
|
|
143
|
+
parser10.transform(1, 'lowercase');
|
|
144
|
+
|
|
145
|
+
const secondParse = parser10.parseSync(testFile);
|
|
146
|
+
console.log('Second parse rows:', secondParse.length);
|
|
147
|
+
|
|
148
|
+
// Cleanup
|
|
149
|
+
fs.unlinkSync(largeCsv);
|
|
150
|
+
|
|
151
|
+
console.log('\n=== Native Transform Examples Complete ===');
|
|
152
|
+
console.log('All transforms executed using optimized C code!');
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { join } from 'path';
|
|
2
|
+
import { createReadStream } from 'fs';
|
|
3
|
+
|
|
4
|
+
declare module '../build/Release/cisv' {
|
|
5
|
+
export class cisvParser {
|
|
6
|
+
parseSync(path: string): string[][];
|
|
7
|
+
write(chunk: Buffer): void;
|
|
8
|
+
end(): void;
|
|
9
|
+
getRows(): string[][];
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const { cisvParser } = require('../build/Release/cisv') as {
|
|
14
|
+
cisvParser: typeof cisvParser
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
async function processCSV(filePath: string) {
|
|
18
|
+
const parser = new cisvParser();
|
|
19
|
+
const stream = createReadStream(filePath);
|
|
20
|
+
|
|
21
|
+
return new Promise<string[][]>((resolve, reject) => {
|
|
22
|
+
stream.on('data', chunk => parser.write(chunk));
|
|
23
|
+
stream.on('end', () => {
|
|
24
|
+
parser.end();
|
|
25
|
+
resolve(parser.getRows());
|
|
26
|
+
});
|
|
27
|
+
stream.on('error', reject);
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
(async () => {
|
|
32
|
+
try {
|
|
33
|
+
const rows = await processCSV(join(__dirname, '../fixtures/data.csv'));
|
|
34
|
+
console.log(`Processed ${rows.length} rows`);
|
|
35
|
+
} catch (err) {
|
|
36
|
+
console.error('Error:', err);
|
|
37
|
+
}
|
|
38
|
+
})();
|