cisv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +158 -0
- package/.github/workflows/release.yml +167 -0
- package/Dockerfile +63 -0
- package/LICENSE +7 -0
- package/Makefile +160 -0
- package/README.md +249 -0
- package/SIMD_benchmarks.md +658 -0
- package/benchmark/benchmark.js +287 -0
- package/benchmark_cli_reader.sh +236 -0
- package/benchmark_cli_writer.sh +280 -0
- package/binding.gyp +57 -0
- package/debug-addon.js +64 -0
- package/examples/basic-parse.js +65 -0
- package/examples/large-file.js +35 -0
- package/examples/transform.js +152 -0
- package/examples/typescript.ts +38 -0
- package/index.d.ts +336 -0
- package/install_benchmark_deps.sh +156 -0
- package/package.json +47 -0
- package/run_benchmarks.sh +53 -0
- package/src/cisv_addon.cc +614 -0
- package/src/cisv_parser.c +988 -0
- package/src/cisv_parser.h +55 -0
- package/src/cisv_simd.h +53 -0
- package/src/cisv_transformer.c +537 -0
- package/src/cisv_transformer.h +145 -0
- package/src/cisv_writer.c +535 -0
- package/src/cisv_writer.h +60 -0
- package/src/index.ts +2 -0
- package/src/test/typescript.test.ts +43 -0
- package/src/win_getopt.h +100 -0
- package/src/win_sys_time.h +50 -0
- package/test/basic.test.js +104 -0
- package/test_select.sh +92 -0
- package/test_transform.sh +167 -0
- package/test_transform_leak_test.js +94 -0
- package/tsconfig.json +17 -0
- package/types/cisv.d.ts +8 -0
- package/valgrind-node.supp +69 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ${{ matrix.os }}
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
os: [ubuntu-latest] # macos-latest, windows-latest # skipped for now
|
|
11
|
+
node-version: [23.x, 24.x]
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v3
|
|
14
|
+
- name: Use Node.js ${{ matrix.node-version }}
|
|
15
|
+
uses: actions/setup-node@v3
|
|
16
|
+
with:
|
|
17
|
+
node-version: ${{ matrix.node-version }}
|
|
18
|
+
- run: npm install
|
|
19
|
+
- run: npm test
|
|
20
|
+
- name: Build
|
|
21
|
+
run: |
|
|
22
|
+
npm run build
|
|
23
|
+
npm run test:build
|
|
24
|
+
|
|
25
|
+
cli-build:
|
|
26
|
+
runs-on: ${{ matrix.os }}
|
|
27
|
+
strategy:
|
|
28
|
+
matrix:
|
|
29
|
+
os: [ubuntu-latest] # macos-latest ignored for now
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v3
|
|
32
|
+
- name: Build CLI
|
|
33
|
+
run: |
|
|
34
|
+
npm install -g node-gyp
|
|
35
|
+
make clean
|
|
36
|
+
make build
|
|
37
|
+
make cli
|
|
38
|
+
- name: Test CLI basic functionality
|
|
39
|
+
run: |
|
|
40
|
+
./cisv --version
|
|
41
|
+
./cisv --help
|
|
42
|
+
echo "id,name,value" > test.csv
|
|
43
|
+
echo "1,test,100" >> test.csv
|
|
44
|
+
echo "2,demo,200" >> test.csv
|
|
45
|
+
./cisv -c test.csv | grep -q "3" # including headers
|
|
46
|
+
./cisv test.csv
|
|
47
|
+
./cisv -s 0,2 test.csv
|
|
48
|
+
./cisv --head 1 test.csv
|
|
49
|
+
- name: Upload CLI binary
|
|
50
|
+
uses: actions/upload-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: cisv-${{ matrix.os }}
|
|
53
|
+
path: cisv
|
|
54
|
+
|
|
55
|
+
memory-check:
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
needs: cli-build
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/checkout@v3
|
|
60
|
+
- name: Install memory checking tools
|
|
61
|
+
run: |
|
|
62
|
+
sudo apt-get update
|
|
63
|
+
sudo apt-get install -y valgrind cppcheck clang-tools
|
|
64
|
+
- name: Build CLI with debug symbols
|
|
65
|
+
run: |
|
|
66
|
+
npm install -g node-gyp
|
|
67
|
+
make clean
|
|
68
|
+
make build
|
|
69
|
+
CFLAGS="-g -O0" make cli
|
|
70
|
+
|
|
71
|
+
- name: Run memory leak tests with Valgrind
|
|
72
|
+
run: |
|
|
73
|
+
# Run with valgrind leak check
|
|
74
|
+
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose ./cisv --version 2>&1 | tee valgrind-version.log
|
|
75
|
+
# Test basic CSV parsing
|
|
76
|
+
echo "id,name,value" > memtest.csv
|
|
77
|
+
echo "1,test,100" >> memtest.csv
|
|
78
|
+
echo "2,demo,200" >> memtest.csv
|
|
79
|
+
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv memtest.csv 2>&1 | tee valgrind-basic.log
|
|
80
|
+
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv -c memtest.csv 2>&1 | tee valgrind-count.log
|
|
81
|
+
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv -s 0,2 memtest.csv 2>&1 | tee valgrind-select.log
|
|
82
|
+
# Check for memory leaks
|
|
83
|
+
! grep -q "definitely lost" valgrind-*.log
|
|
84
|
+
! grep -q "indirectly lost" valgrind-*.log
|
|
85
|
+
|
|
86
|
+
- name: Run memory deep leak tests on reader and writer
|
|
87
|
+
run: |
|
|
88
|
+
chmod +x test_select.sh
|
|
89
|
+
bash ./test_select.sh
|
|
90
|
+
|
|
91
|
+
- name: Run memory deep leak tests on transform
|
|
92
|
+
run: |
|
|
93
|
+
chmod +x test_transform.sh
|
|
94
|
+
bash ./test_transform.sh
|
|
95
|
+
|
|
96
|
+
- name: Run AddressSanitizer tests
|
|
97
|
+
run: |
|
|
98
|
+
make clean
|
|
99
|
+
CC=clang CFLAGS="-g -O0 -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-fsanitize=address" make cli
|
|
100
|
+
./cisv --version
|
|
101
|
+
echo "id,name,value,x" > test_x.csv
|
|
102
|
+
echo "1,test,100,a" >> test_x.csv
|
|
103
|
+
echo "2,demo,200,b" >> test_x.csv
|
|
104
|
+
./cisv -c test_x.csv
|
|
105
|
+
./cisv -s 0,2,3 test_x.csv
|
|
106
|
+
- name: Upload memory check results
|
|
107
|
+
if: failure()
|
|
108
|
+
uses: actions/upload-artifact@v4
|
|
109
|
+
with:
|
|
110
|
+
name: memory-check-logs
|
|
111
|
+
path: valgrind-*.log
|
|
112
|
+
|
|
113
|
+
#benchmark:
|
|
114
|
+
# runs-on: ubuntu-latest
|
|
115
|
+
# needs: [build, cli-build, memory-check]
|
|
116
|
+
# steps:
|
|
117
|
+
# - uses: actions/checkout@v3
|
|
118
|
+
# - uses: actions/setup-node@v3
|
|
119
|
+
# with:
|
|
120
|
+
# node-version: 23.x
|
|
121
|
+
# - name: Install benchmark dependencies
|
|
122
|
+
# run: |
|
|
123
|
+
# sudo apt-get update
|
|
124
|
+
# sudo apt-get install -y miller bc time
|
|
125
|
+
# pip3 install --upgrade pip
|
|
126
|
+
# pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0'
|
|
127
|
+
# - name: Install Rust tools
|
|
128
|
+
# uses: actions-rs/toolchain@v1
|
|
129
|
+
# with:
|
|
130
|
+
# toolchain: stable
|
|
131
|
+
# override: true
|
|
132
|
+
# - name: Install xsv and qsv
|
|
133
|
+
# run: |
|
|
134
|
+
# cargo install xsv
|
|
135
|
+
# cargo install qsv
|
|
136
|
+
# - name: Build cisv CLI
|
|
137
|
+
# run: |
|
|
138
|
+
# make clean
|
|
139
|
+
# make cli
|
|
140
|
+
# - name: Run Node.js benchmarks
|
|
141
|
+
# run: |
|
|
142
|
+
# npm install
|
|
143
|
+
# npm run benchmark
|
|
144
|
+
# - name: Run CLI benchmarks
|
|
145
|
+
# run: |
|
|
146
|
+
# make benchmark-cli
|
|
147
|
+
# - name: Generate benchmark report
|
|
148
|
+
# run: |
|
|
149
|
+
# echo "## Benchmark Results" > benchmark-report.md
|
|
150
|
+
# echo "Date: $(date)" >> benchmark-report.md
|
|
151
|
+
# echo "Commit: ${{ github.sha }}" >> benchmark-report.md
|
|
152
|
+
# echo "" >> benchmark-report.md
|
|
153
|
+
# make benchmark-cli >> benchmark-report.md 2>&1
|
|
154
|
+
# - name: Upload benchmark results
|
|
155
|
+
# uses: actions/upload-artifact@v3
|
|
156
|
+
# with:
|
|
157
|
+
# name: benchmark-results
|
|
158
|
+
# path: benchmark-report.md
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
packages: read
|
|
11
|
+
env:
|
|
12
|
+
VERSION: ${{ github.ref_name }}
|
|
13
|
+
jobs:
|
|
14
|
+
build:
|
|
15
|
+
runs-on: ${{ matrix.os }}
|
|
16
|
+
strategy:
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest] # , macos-latest
|
|
19
|
+
arch: [amd64, arm64]
|
|
20
|
+
include:
|
|
21
|
+
- os: ubuntu-latest
|
|
22
|
+
pkg_os: ubuntu
|
|
23
|
+
pkg_type: deb
|
|
24
|
+
# - os: macos-latest
|
|
25
|
+
# pkg_os: macos
|
|
26
|
+
# pkg_type: tar.gz
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
with:
|
|
32
|
+
fetch-depth: 0
|
|
33
|
+
|
|
34
|
+
- name: Setup Node.js
|
|
35
|
+
uses: actions/setup-node@v3
|
|
36
|
+
with:
|
|
37
|
+
node-version: '22.x'
|
|
38
|
+
|
|
39
|
+
- name: Install build dependencies
|
|
40
|
+
shell: bash
|
|
41
|
+
run: |
|
|
42
|
+
npm install -g node-gyp node-addon-api
|
|
43
|
+
npm install
|
|
44
|
+
|
|
45
|
+
- name: Build CLI binary
|
|
46
|
+
shell: bash
|
|
47
|
+
run: |
|
|
48
|
+
# Set platform-specific variables
|
|
49
|
+
OS_NAME=$(echo "${{ matrix.os }}" | sed 's/-latest//')
|
|
50
|
+
ARCH="${{ matrix.arch }}"
|
|
51
|
+
EXT=""
|
|
52
|
+
|
|
53
|
+
# Build cisv with version info
|
|
54
|
+
npm install -g node-gyp
|
|
55
|
+
make clean
|
|
56
|
+
make build
|
|
57
|
+
CFLAGS="-DCISV_VERSION=\"${VERSION}\" -O3 -march=native" make cli
|
|
58
|
+
|
|
59
|
+
# Rename output file
|
|
60
|
+
OUTPUT="cisv-${{ matrix.pkg_os }}-${ARCH}${EXT}"
|
|
61
|
+
mv cisv${EXT} "$OUTPUT"
|
|
62
|
+
echo "OUTPUT_FILE=$OUTPUT" >> $GITHUB_ENV
|
|
63
|
+
|
|
64
|
+
- name: Build Node.js addon
|
|
65
|
+
shell: bash
|
|
66
|
+
run: |
|
|
67
|
+
npm run build
|
|
68
|
+
# Package node addon
|
|
69
|
+
ADDON_OUTPUT="cisv-node-${{ matrix.pkg_os }}-${VERSION}${ARCH}.node"
|
|
70
|
+
cp build/Release/cisv.node "$ADDON_OUTPUT"
|
|
71
|
+
echo "ADDON_FILE=$ADDON_OUTPUT" >> $GITHUB_ENV
|
|
72
|
+
|
|
73
|
+
- name: Package artifact
|
|
74
|
+
shell: bash
|
|
75
|
+
run: |
|
|
76
|
+
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
|
|
77
|
+
sudo apt-get update && sudo apt-get install -y ruby-dev rubygems
|
|
78
|
+
sudo gem install fpm
|
|
79
|
+
|
|
80
|
+
# Create directory structure for package
|
|
81
|
+
mkdir -p package/usr/local/bin
|
|
82
|
+
mkdir -p package/usr/local/lib/node_modules/cisv
|
|
83
|
+
|
|
84
|
+
cp "$OUTPUT_FILE" package/usr/local/bin/cisv
|
|
85
|
+
chmod +x package/usr/local/bin/cisv
|
|
86
|
+
|
|
87
|
+
# Package with fpm
|
|
88
|
+
fpm -s dir -t deb -n cisv -v ${VERSION#v} \
|
|
89
|
+
--architecture ${{ matrix.arch }} \
|
|
90
|
+
--description "The fastest CSV parser" \
|
|
91
|
+
--url "https://github.com/${{ github.repository }}" \
|
|
92
|
+
--maintainer "${{ github.repository_owner }}" \
|
|
93
|
+
-C package \
|
|
94
|
+
usr/local/bin
|
|
95
|
+
|
|
96
|
+
PACKAGE=$(ls cisv_*_${{ matrix.arch }}.deb)
|
|
97
|
+
else
|
|
98
|
+
# Create tarball with both CLI and Node addon
|
|
99
|
+
tar czf cisv-${{ matrix.pkg_os }}-${{ matrix.arch }}.tar.gz "$OUTPUT_FILE" "$ADDON_FILE" README.md LICENSE
|
|
100
|
+
PACKAGE="cisv-${{ matrix.pkg_os }}-${{ matrix.arch }}.tar.gz"
|
|
101
|
+
fi
|
|
102
|
+
echo "PACKAGE_FILE=$PACKAGE" >> $GITHUB_ENV
|
|
103
|
+
|
|
104
|
+
- name: Test binary
|
|
105
|
+
shell: bash
|
|
106
|
+
run: |
|
|
107
|
+
# Basic smoke test
|
|
108
|
+
./${{ env.OUTPUT_FILE }} --version
|
|
109
|
+
./${{ env.OUTPUT_FILE }} --help
|
|
110
|
+
|
|
111
|
+
# Create test CSV
|
|
112
|
+
echo "id,name,value" > test.csv
|
|
113
|
+
echo "1,test,100" >> test.csv
|
|
114
|
+
echo "2,demo,200" >> test.csv
|
|
115
|
+
|
|
116
|
+
# Test basic functionality
|
|
117
|
+
./${{ env.OUTPUT_FILE }} -c test.csv
|
|
118
|
+
./${{ env.OUTPUT_FILE }} test.csv
|
|
119
|
+
|
|
120
|
+
- name: Upload Release Assets
|
|
121
|
+
uses: softprops/action-gh-release@v1
|
|
122
|
+
with:
|
|
123
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
124
|
+
files: |
|
|
125
|
+
${{ env.OUTPUT_FILE }}
|
|
126
|
+
${{ env.ADDON_FILE }}
|
|
127
|
+
${{ env.PACKAGE_FILE }}
|
|
128
|
+
generate_release_notes: true
|
|
129
|
+
|
|
130
|
+
- name: Update Latest Tag
|
|
131
|
+
if: github.repository == 'Sanix-Darker/cisv'
|
|
132
|
+
uses: softprops/action-gh-release@v1
|
|
133
|
+
with:
|
|
134
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
135
|
+
tag_name: latest
|
|
136
|
+
name: latest
|
|
137
|
+
body: "latest release: ${{ env.VERSION }}"
|
|
138
|
+
files: |
|
|
139
|
+
${{ env.OUTPUT_FILE }}
|
|
140
|
+
${{ env.ADDON_FILE }}
|
|
141
|
+
${{ env.PACKAGE_FILE }}
|
|
142
|
+
|
|
143
|
+
# TODO: to do later
|
|
144
|
+
publish-npm:
|
|
145
|
+
needs: build
|
|
146
|
+
runs-on: ubuntu-latest
|
|
147
|
+
steps:
|
|
148
|
+
- uses: actions/checkout@v4
|
|
149
|
+
- uses: actions/setup-node@v3
|
|
150
|
+
with:
|
|
151
|
+
node-version: '23.x'
|
|
152
|
+
registry-url: 'https://registry.npmjs.org'
|
|
153
|
+
|
|
154
|
+
#- name: Update package version
|
|
155
|
+
# run: |
|
|
156
|
+
# npm version ${VERSION#v} --no-git-tag-version
|
|
157
|
+
|
|
158
|
+
- name: Build and test
|
|
159
|
+
run: |
|
|
160
|
+
npm install
|
|
161
|
+
npm run build
|
|
162
|
+
npm test
|
|
163
|
+
|
|
164
|
+
- name: Publish to npm
|
|
165
|
+
run: npm publish --access public
|
|
166
|
+
env:
|
|
167
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/Dockerfile
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Dockerfile for CISV benchmarking with CPU/RAM isolation
|
|
2
|
+
FROM ubuntu:22.04
|
|
3
|
+
|
|
4
|
+
# Prevent interactive prompts
|
|
5
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
6
|
+
|
|
7
|
+
# Install system dependencies
|
|
8
|
+
RUN apt-get update && apt-get install -y \
|
|
9
|
+
build-essential \
|
|
10
|
+
git \
|
|
11
|
+
curl \
|
|
12
|
+
wget \
|
|
13
|
+
python3 \
|
|
14
|
+
python3-pip \
|
|
15
|
+
bc \
|
|
16
|
+
time \
|
|
17
|
+
valgrind \
|
|
18
|
+
clang \
|
|
19
|
+
libc6-dev \
|
|
20
|
+
miller \
|
|
21
|
+
ruby \
|
|
22
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
23
|
+
|
|
24
|
+
# Install Node.js 20.x
|
|
25
|
+
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
26
|
+
&& apt-get install -y nodejs
|
|
27
|
+
|
|
28
|
+
# Install Rust
|
|
29
|
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
|
30
|
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
31
|
+
|
|
32
|
+
# Install Rust CSV tools
|
|
33
|
+
RUN cargo install xsv qsv
|
|
34
|
+
|
|
35
|
+
# Install Python CSV tools
|
|
36
|
+
RUN pip3 install --upgrade pip && \
|
|
37
|
+
pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0' pandas
|
|
38
|
+
|
|
39
|
+
# Install Node.js CSV tools (with legacy peer deps for compatibility)
|
|
40
|
+
RUN npm install -g node-gyp node-addon-api fast-csv --legacy-peer-deps
|
|
41
|
+
|
|
42
|
+
# Create benchmark user and directory
|
|
43
|
+
RUN useradd -m -s /bin/bash benchmark
|
|
44
|
+
WORKDIR /home/benchmark
|
|
45
|
+
|
|
46
|
+
# Copy the project
|
|
47
|
+
COPY --chown=benchmark:benchmark . /home/benchmark/cisv
|
|
48
|
+
|
|
49
|
+
# Build cisv and dependencies
|
|
50
|
+
WORKDIR /home/benchmark/cisv
|
|
51
|
+
# Skip install-benchmark-deps since we already installed dependencies
|
|
52
|
+
RUN make clean && cargo install qsv && make install-benchmark-deps && make cli
|
|
53
|
+
|
|
54
|
+
# Create main benchmark runner script
|
|
55
|
+
COPY ./benchmark_cli_writer.sh ./benchmark_cli_reader.sh ./run_benchmarks.sh /home/benchmark/
|
|
56
|
+
RUN chmod +x /home/benchmark/run_benchmarks.sh
|
|
57
|
+
|
|
58
|
+
# Switch to benchmark user
|
|
59
|
+
USER benchmark
|
|
60
|
+
WORKDIR /home/benchmark/cisv
|
|
61
|
+
|
|
62
|
+
# Default command runs all benchmarks
|
|
63
|
+
CMD ["/home/benchmark/run_benchmarks.sh", "all"]
|
package/LICENSE
ADDED
package/Makefile
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
CC ?= gcc
|
|
2
|
+
CFLAGS ?= -O3 -march=native -pipe -fomit-frame-pointer -Wall -Wextra -std=c11 -flto -ffast-math -funroll-loops
|
|
3
|
+
LDFLAGS ?= -flto -s
|
|
4
|
+
NODE_GYP ?= node-gyp
|
|
5
|
+
|
|
6
|
+
# CLI binary name
|
|
7
|
+
CLI_BIN = cisv
|
|
8
|
+
# Include both parser and writer
|
|
9
|
+
CLI_SRC = src/cisv_parser.c src/cisv_writer.c src/cisv_transformer.c
|
|
10
|
+
CLI_OBJ = $(CLI_SRC:.c=.o)
|
|
11
|
+
|
|
12
|
+
# Build targets
|
|
13
|
+
all: build cli
|
|
14
|
+
|
|
15
|
+
install:
|
|
16
|
+
npm install -g node-gyp
|
|
17
|
+
npm install -g node-addon-api
|
|
18
|
+
|
|
19
|
+
build:
|
|
20
|
+
npm install
|
|
21
|
+
$(NODE_GYP) configure build CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)"
|
|
22
|
+
|
|
23
|
+
# Build CLI tool (cisv_parser.c contains main when CISV_CLI is defined)
|
|
24
|
+
cli: $(CLI_BIN)
|
|
25
|
+
|
|
26
|
+
$(CLI_BIN): $(CLI_OBJ)
|
|
27
|
+
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
|
28
|
+
|
|
29
|
+
# Compile with CISV_CLI defined to include CLI code
|
|
30
|
+
src/cisv_parser.o: src/cisv_parser.c
|
|
31
|
+
$(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_parser.o src/cisv_parser.c
|
|
32
|
+
|
|
33
|
+
src/cisv_writer.o: src/cisv_writer.c
|
|
34
|
+
$(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_writer.o src/cisv_writer.c
|
|
35
|
+
|
|
36
|
+
src/cisv_transformer.o: src/cisv_transformer.c
|
|
37
|
+
$(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_transformer.o src/cisv_transformer.c
|
|
38
|
+
|
|
39
|
+
# Install CLI tool to /usr/local/bin
|
|
40
|
+
install-cli: cli
|
|
41
|
+
install -m 755 $(CLI_BIN) /usr/local/bin/$(CLI_BIN)
|
|
42
|
+
|
|
43
|
+
# Install benchmark dependencies
|
|
44
|
+
install-benchmark-deps:
|
|
45
|
+
@echo "Installing benchmark dependencies..."
|
|
46
|
+
@# Install Rust if not present
|
|
47
|
+
@if ! command -v cargo > /dev/null; then \
|
|
48
|
+
echo "Installing Rust..."; \
|
|
49
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
|
|
50
|
+
. $$HOME/.cargo/env; \
|
|
51
|
+
fi
|
|
52
|
+
#@# Install xsv
|
|
53
|
+
#@if ! command -v xsv > /dev/null; then \
|
|
54
|
+
# echo "Installing xsv..."; \
|
|
55
|
+
# cargo install xsv; \
|
|
56
|
+
#fi
|
|
57
|
+
@# Install qsv (faster fork of xsv)
|
|
58
|
+
#@if ! command -v qsv > /dev/null; then \
|
|
59
|
+
# echo "Installing qsv..."; \
|
|
60
|
+
# cargo install qsv; \
|
|
61
|
+
#fi
|
|
62
|
+
@# Build rust-csv example
|
|
63
|
+
@echo "Building rust-csv benchmark tool..."
|
|
64
|
+
@mkdir -p benchmark/rust-csv-bench
|
|
65
|
+
@cd benchmark/rust-csv-bench && \
|
|
66
|
+
if [ ! -f Cargo.toml ]; then \
|
|
67
|
+
cargo init --name csv-bench; \
|
|
68
|
+
echo 'csv = "1.3"' >> Cargo.toml; \
|
|
69
|
+
fi && \
|
|
70
|
+
echo 'use std::env;\nuse std::error::Error;\nuse csv::ReaderBuilder;\n\nfn main() -> Result<(), Box<dyn Error>> {\n let args: Vec<String> = env::args().collect();\n if args.len() < 2 { eprintln!("Usage: {} <file>", args[0]); std::process::exit(1); }\n let mut rdr = ReaderBuilder::new().has_headers(true).from_path(&args[1])?;\n let count = rdr.records().count();\n println!("{}", count);\n Ok(())\n}' > src/main.rs && \
|
|
71
|
+
cargo build --release
|
|
72
|
+
@# Fix csvkit Python 3.12 compatibility issue and install
|
|
73
|
+
@if command -v pip3 > /dev/null; then \
|
|
74
|
+
echo "Installing/fixing csvkit..."; \
|
|
75
|
+
pip3 install --upgrade pip; \
|
|
76
|
+
pip3 uninstall -y csvkit babel || true; \
|
|
77
|
+
pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0'; \
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
debug:
|
|
81
|
+
$(NODE_GYP) configure build --debug
|
|
82
|
+
|
|
83
|
+
clean:
|
|
84
|
+
$(NODE_GYP) clean
|
|
85
|
+
rm -rf build
|
|
86
|
+
rm -f $(CLI_BIN) $(CLI_OBJ)
|
|
87
|
+
rm -f src/cisv_cli.o # Clean old object file if exists
|
|
88
|
+
|
|
89
|
+
test: build
|
|
90
|
+
npm test
|
|
91
|
+
|
|
92
|
+
# Benchmark comparing cisv CLI with other tools
|
|
93
|
+
benchmark-cli: cli
|
|
94
|
+
@echo "=== Benchmarking CSV CLI tools ==="
|
|
95
|
+
@echo "Preparing test file..."
|
|
96
|
+
@python3 -c "import csv; w=csv.writer(open('bench_test.csv','w')); [w.writerow([i,f'name_{i}',f'email_{i}@test.com',f'city_{i}']) for i in range(1000000)]"
|
|
97
|
+
@echo ""
|
|
98
|
+
@echo "--- cisv (this project) ---"
|
|
99
|
+
@bash -c "TIMEFORMAT='real %3R'; time ./$(CLI_BIN) -c bench_test.csv 2>&1"
|
|
100
|
+
@if [ -f benchmark/rust-csv-bench/target/release/csv-bench ]; then \
|
|
101
|
+
echo ""; \
|
|
102
|
+
echo "--- rust-csv (Rust library) ---"; \
|
|
103
|
+
bash -c "TIMEFORMAT='real %3R'; time ./benchmark/rust-csv-bench/target/release/csv-bench bench_test.csv 2>&1"; \
|
|
104
|
+
fi
|
|
105
|
+
@if command -v xsv > /dev/null 2>&1; then \
|
|
106
|
+
echo ""; \
|
|
107
|
+
echo "--- xsv (Rust CLI) ---"; \
|
|
108
|
+
bash -c "TIMEFORMAT='real %3R'; time xsv count bench_test.csv 2>&1"; \
|
|
109
|
+
fi
|
|
110
|
+
@if command -v qsv > /dev/null 2>&1; then \
|
|
111
|
+
echo ""; \
|
|
112
|
+
echo "--- qsv (Rust CLI - faster xsv fork) ---"; \
|
|
113
|
+
bash -c "TIMEFORMAT='real %3R'; time qsv count bench_test.csv 2>&1"; \
|
|
114
|
+
fi
|
|
115
|
+
@if command -v wc > /dev/null 2>&1; then \
|
|
116
|
+
echo ""; \
|
|
117
|
+
echo "--- wc -l (baseline) ---"; \
|
|
118
|
+
bash -c "TIMEFORMAT='real %3R'; time wc -l bench_test.csv 2>&1"; \
|
|
119
|
+
fi
|
|
120
|
+
@if command -v csvstat > /dev/null 2>&1; then \
|
|
121
|
+
echo ""; \
|
|
122
|
+
echo "--- csvkit (Python) ---"; \
|
|
123
|
+
bash -c "TIMEFORMAT='real %3R'; time csvstat --count bench_test.csv 2>&1" || echo "csvkit failed - may need: pip3 install --upgrade babel csvkit"; \
|
|
124
|
+
fi
|
|
125
|
+
@if command -v mlr > /dev/null 2>&1; then \
|
|
126
|
+
echo ""; \
|
|
127
|
+
echo "--- Miller ---"; \
|
|
128
|
+
bash -c "TIMEFORMAT='real %3R'; time mlr --csv count bench_test.csv 2>&1"; \
|
|
129
|
+
fi
|
|
130
|
+
@echo ""
|
|
131
|
+
@echo "File size: " && ls -lh bench_test.csv | awk '{print $$5}'
|
|
132
|
+
@rm -f bench_test.csv
|
|
133
|
+
|
|
134
|
+
benchmark: build
|
|
135
|
+
node benchmark/benchmark.js $(SAMPLE)
|
|
136
|
+
|
|
137
|
+
perf: build
|
|
138
|
+
node test/performance.test.js
|
|
139
|
+
|
|
140
|
+
coverage:
|
|
141
|
+
$(NODE_GYP) configure --coverage
|
|
142
|
+
$(MAKE) test
|
|
143
|
+
|
|
144
|
+
package: clean build test
|
|
145
|
+
npm pack
|
|
146
|
+
|
|
147
|
+
# Test writer functionality
|
|
148
|
+
test-writer: cli
|
|
149
|
+
chmod +x test_writer.sh
|
|
150
|
+
./test_writer.sh
|
|
151
|
+
|
|
152
|
+
# Benchmark writer performance
|
|
153
|
+
benchmark-writer: cli
|
|
154
|
+
chmod +x benchmark_cli_writer.sh
|
|
155
|
+
./benchmark_cli_writer.sh
|
|
156
|
+
|
|
157
|
+
# Run all benchmarks
|
|
158
|
+
benchmark-all: benchmark-cli benchmark-writer
|
|
159
|
+
|
|
160
|
+
.PHONY: all build cli clean test benchmark benchmark-cli benchmark-writer benchmark-all perf coverage package install-cli install-benchmark-deps test-writer
|