cisv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ${{ matrix.os }}
8
+ strategy:
9
+ matrix:
10
+ os: [ubuntu-latest] # macos-latest, windows-latest # skipped for now
11
+ node-version: [23.x, 24.x]
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Use Node.js ${{ matrix.node-version }}
15
+ uses: actions/setup-node@v3
16
+ with:
17
+ node-version: ${{ matrix.node-version }}
18
+ - run: npm install
19
+ - run: npm test
20
+ - name: Build
21
+ run: |
22
+ npm run build
23
+ npm run test:build
24
+
25
+ cli-build:
26
+ runs-on: ${{ matrix.os }}
27
+ strategy:
28
+ matrix:
29
+ os: [ubuntu-latest] # macos-latest ignored for now
30
+ steps:
31
+ - uses: actions/checkout@v3
32
+ - name: Build CLI
33
+ run: |
34
+ npm install -g node-gyp
35
+ make clean
36
+ make build
37
+ make cli
38
+ - name: Test CLI basic functionality
39
+ run: |
40
+ ./cisv --version
41
+ ./cisv --help
42
+ echo "id,name,value" > test.csv
43
+ echo "1,test,100" >> test.csv
44
+ echo "2,demo,200" >> test.csv
45
+ ./cisv -c test.csv | grep -q "3" # including headers
46
+ ./cisv test.csv
47
+ ./cisv -s 0,2 test.csv
48
+ ./cisv --head 1 test.csv
49
+ - name: Upload CLI binary
50
+ uses: actions/upload-artifact@v4
51
+ with:
52
+ name: cisv-${{ matrix.os }}
53
+ path: cisv
54
+
55
+ memory-check:
56
+ runs-on: ubuntu-latest
57
+ needs: cli-build
58
+ steps:
59
+ - uses: actions/checkout@v3
60
+ - name: Install memory checking tools
61
+ run: |
62
+ sudo apt-get update
63
+ sudo apt-get install -y valgrind cppcheck clang-tools
64
+ - name: Build CLI with debug symbols
65
+ run: |
66
+ npm install -g node-gyp
67
+ make clean
68
+ make build
69
+ CFLAGS="-g -O0" make cli
70
+
71
+ - name: Run memory leak tests with Valgrind
72
+ run: |
73
+ # Run with valgrind leak check
74
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose ./cisv --version 2>&1 | tee valgrind-version.log
75
+ # Test basic CSV parsing
76
+ echo "id,name,value" > memtest.csv
77
+ echo "1,test,100" >> memtest.csv
78
+ echo "2,demo,200" >> memtest.csv
79
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv memtest.csv 2>&1 | tee valgrind-basic.log
80
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv -c memtest.csv 2>&1 | tee valgrind-count.log
81
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cisv -s 0,2 memtest.csv 2>&1 | tee valgrind-select.log
82
+ # Check for memory leaks
83
+ ! grep -q "definitely lost" valgrind-*.log
84
+ ! grep -q "indirectly lost" valgrind-*.log
85
+
86
+ - name: Run memory deep leak tests on reader and writer
87
+ run: |
88
+ chmod +x test_select.sh
89
+ bash ./test_select.sh
90
+
91
+ - name: Run memory deep leak tests on transform
92
+ run: |
93
+ chmod +x test_transform.sh
94
+ bash ./test_transform.sh
95
+
96
+ - name: Run AddressSanitizer tests
97
+ run: |
98
+ make clean
99
+ CC=clang CFLAGS="-g -O0 -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-fsanitize=address" make cli
100
+ ./cisv --version
101
+ echo "id,name,value,x" > test_x.csv
102
+ echo "1,test,100,a" >> test_x.csv
103
+ echo "2,demo,200,b" >> test_x.csv
104
+ ./cisv -c test_x.csv
105
+ ./cisv -s 0,2,3 test_x.csv
106
+ - name: Upload memory check results
107
+ if: failure()
108
+ uses: actions/upload-artifact@v4
109
+ with:
110
+ name: memory-check-logs
111
+ path: valgrind-*.log
112
+
113
+ #benchmark:
114
+ # runs-on: ubuntu-latest
115
+ # needs: [build, cli-build, memory-check]
116
+ # steps:
117
+ # - uses: actions/checkout@v3
118
+ # - uses: actions/setup-node@v3
119
+ # with:
120
+ # node-version: 23.x
121
+ # - name: Install benchmark dependencies
122
+ # run: |
123
+ # sudo apt-get update
124
+ # sudo apt-get install -y miller bc time
125
+ # pip3 install --upgrade pip
126
+ # pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0'
127
+ # - name: Install Rust tools
128
+ # uses: actions-rs/toolchain@v1
129
+ # with:
130
+ # toolchain: stable
131
+ # override: true
132
+ # - name: Install xsv and qsv
133
+ # run: |
134
+ # cargo install xsv
135
+ # cargo install qsv
136
+ # - name: Build cisv CLI
137
+ # run: |
138
+ # make clean
139
+ # make cli
140
+ # - name: Run Node.js benchmarks
141
+ # run: |
142
+ # npm install
143
+ # npm run benchmark
144
+ # - name: Run CLI benchmarks
145
+ # run: |
146
+ # make benchmark-cli
147
+ # - name: Generate benchmark report
148
+ # run: |
149
+ # echo "## Benchmark Results" > benchmark-report.md
150
+ # echo "Date: $(date)" >> benchmark-report.md
151
+ # echo "Commit: ${{ github.sha }}" >> benchmark-report.md
152
+ # echo "" >> benchmark-report.md
153
+ # make benchmark-cli >> benchmark-report.md 2>&1
154
+ # - name: Upload benchmark results
155
+ # uses: actions/upload-artifact@v3
156
+ # with:
157
+ # name: benchmark-results
158
+ # path: benchmark-report.md
@@ -0,0 +1,167 @@
1
+ name: release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ contents: write
10
+ packages: read
11
+ env:
12
+ VERSION: ${{ github.ref_name }}
13
+ jobs:
14
+ build:
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ matrix:
18
+ os: [ubuntu-latest] # , macos-latest
19
+ arch: [amd64, arm64]
20
+ include:
21
+ - os: ubuntu-latest
22
+ pkg_os: ubuntu
23
+ pkg_type: deb
24
+ # - os: macos-latest
25
+ # pkg_os: macos
26
+ # pkg_type: tar.gz
27
+
28
+ steps:
29
+ - name: Checkout
30
+ uses: actions/checkout@v4
31
+ with:
32
+ fetch-depth: 0
33
+
34
+ - name: Setup Node.js
35
+ uses: actions/setup-node@v3
36
+ with:
37
+ node-version: '22.x'
38
+
39
+ - name: Install build dependencies
40
+ shell: bash
41
+ run: |
42
+ npm install -g node-gyp node-addon-api
43
+ npm install
44
+
45
+ - name: Build CLI binary
46
+ shell: bash
47
+ run: |
48
+ # Set platform-specific variables
49
+ OS_NAME=$(echo "${{ matrix.os }}" | sed 's/-latest//')
50
+ ARCH="${{ matrix.arch }}"
51
+ EXT=""
52
+
53
+ # Build cisv with version info
54
+ npm install -g node-gyp
55
+ make clean
56
+ make build
57
+ CFLAGS="-DCISV_VERSION=\"${VERSION}\" -O3 -march=native" make cli
58
+
59
+ # Rename output file
60
+ OUTPUT="cisv-${{ matrix.pkg_os }}-${ARCH}${EXT}"
61
+ mv cisv${EXT} "$OUTPUT"
62
+ echo "OUTPUT_FILE=$OUTPUT" >> $GITHUB_ENV
63
+
64
+ - name: Build Node.js addon
65
+ shell: bash
66
+ run: |
67
+ npm run build
68
+ # Package node addon
69
+ ADDON_OUTPUT="cisv-node-${{ matrix.pkg_os }}-${VERSION}${ARCH}.node"
70
+ cp build/Release/cisv.node "$ADDON_OUTPUT"
71
+ echo "ADDON_FILE=$ADDON_OUTPUT" >> $GITHUB_ENV
72
+
73
+ - name: Package artifact
74
+ shell: bash
75
+ run: |
76
+ if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
77
+ sudo apt-get update && sudo apt-get install -y ruby-dev rubygems
78
+ sudo gem install fpm
79
+
80
+ # Create directory structure for package
81
+ mkdir -p package/usr/local/bin
82
+ mkdir -p package/usr/local/lib/node_modules/cisv
83
+
84
+ cp "$OUTPUT_FILE" package/usr/local/bin/cisv
85
+ chmod +x package/usr/local/bin/cisv
86
+
87
+ # Package with fpm
88
+ fpm -s dir -t deb -n cisv -v ${VERSION#v} \
89
+ --architecture ${{ matrix.arch }} \
90
+ --description "The fastest CSV parser" \
91
+ --url "https://github.com/${{ github.repository }}" \
92
+ --maintainer "${{ github.repository_owner }}" \
93
+ -C package \
94
+ usr/local/bin
95
+
96
+ PACKAGE=$(ls cisv_*_${{ matrix.arch }}.deb)
97
+ else
98
+ # Create tarball with both CLI and Node addon
99
+ tar czf cisv-${{ matrix.pkg_os }}-${{ matrix.arch }}.tar.gz "$OUTPUT_FILE" "$ADDON_FILE" README.md LICENSE
100
+ PACKAGE="cisv-${{ matrix.pkg_os }}-${{ matrix.arch }}.tar.gz"
101
+ fi
102
+ echo "PACKAGE_FILE=$PACKAGE" >> $GITHUB_ENV
103
+
104
+ - name: Test binary
105
+ shell: bash
106
+ run: |
107
+ # Basic smoke test
108
+ ./${{ env.OUTPUT_FILE }} --version
109
+ ./${{ env.OUTPUT_FILE }} --help
110
+
111
+ # Create test CSV
112
+ echo "id,name,value" > test.csv
113
+ echo "1,test,100" >> test.csv
114
+ echo "2,demo,200" >> test.csv
115
+
116
+ # Test basic functionality
117
+ ./${{ env.OUTPUT_FILE }} -c test.csv
118
+ ./${{ env.OUTPUT_FILE }} test.csv
119
+
120
+ - name: Upload Release Assets
121
+ uses: softprops/action-gh-release@v1
122
+ with:
123
+ token: ${{ secrets.GITHUB_TOKEN }}
124
+ files: |
125
+ ${{ env.OUTPUT_FILE }}
126
+ ${{ env.ADDON_FILE }}
127
+ ${{ env.PACKAGE_FILE }}
128
+ generate_release_notes: true
129
+
130
+ - name: Update Latest Tag
131
+ if: github.repository == 'Sanix-Darker/cisv'
132
+ uses: softprops/action-gh-release@v1
133
+ with:
134
+ token: ${{ secrets.GITHUB_TOKEN }}
135
+ tag_name: latest
136
+ name: latest
137
+ body: "latest release: ${{ env.VERSION }}"
138
+ files: |
139
+ ${{ env.OUTPUT_FILE }}
140
+ ${{ env.ADDON_FILE }}
141
+ ${{ env.PACKAGE_FILE }}
142
+
143
+ # TODO: to do later
144
+ publish-npm:
145
+ needs: build
146
+ runs-on: ubuntu-latest
147
+ steps:
148
+ - uses: actions/checkout@v4
149
+ - uses: actions/setup-node@v3
150
+ with:
151
+ node-version: '23.x'
152
+ registry-url: 'https://registry.npmjs.org'
153
+
154
+ #- name: Update package version
155
+ # run: |
156
+ # npm version ${VERSION#v} --no-git-tag-version
157
+
158
+ - name: Build and test
159
+ run: |
160
+ npm install
161
+ npm run build
162
+ npm test
163
+
164
+ - name: Publish to npm
165
+ run: npm publish --access public
166
+ env:
167
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
package/Dockerfile ADDED
@@ -0,0 +1,63 @@
1
+ # Dockerfile for CISV benchmarking with CPU/RAM isolation
2
+ FROM ubuntu:22.04
3
+
4
+ # Prevent interactive prompts
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ git \
11
+ curl \
12
+ wget \
13
+ python3 \
14
+ python3-pip \
15
+ bc \
16
+ time \
17
+ valgrind \
18
+ clang \
19
+ libc6-dev \
20
+ miller \
21
+ ruby \
22
+ && rm -rf /var/lib/apt/lists/*
23
+
24
+ # Install Node.js 20.x
25
+ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
26
+ && apt-get install -y nodejs
27
+
28
+ # Install Rust
29
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
30
+ ENV PATH="/root/.cargo/bin:${PATH}"
31
+
32
+ # Install Rust CSV tools
33
+ RUN cargo install xsv qsv
34
+
35
+ # Install Python CSV tools
36
+ RUN pip3 install --upgrade pip && \
37
+ pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0' pandas
38
+
39
+ # Install Node.js CSV tools (with legacy peer deps for compatibility)
40
+ RUN npm install -g node-gyp node-addon-api fast-csv --legacy-peer-deps
41
+
42
+ # Create benchmark user and directory
43
+ RUN useradd -m -s /bin/bash benchmark
44
+ WORKDIR /home/benchmark
45
+
46
+ # Copy the project
47
+ COPY --chown=benchmark:benchmark . /home/benchmark/cisv
48
+
49
+ # Build cisv and dependencies
50
+ WORKDIR /home/benchmark/cisv
51
+ # Skip install-benchmark-deps since we already installed dependencies
52
+ RUN make clean && cargo install qsv && make install-benchmark-deps && make cli
53
+
54
+ # Create main benchmark runner script
55
+ COPY ./benchmark_cli_writer.sh ./benchmark_cli_reader.sh ./run_benchmarks.sh /home/benchmark/
56
+ RUN chmod +x /home/benchmark/run_benchmarks.sh
57
+
58
+ # Switch to benchmark user
59
+ USER benchmark
60
+ WORKDIR /home/benchmark/cisv
61
+
62
+ # Default command runs all benchmarks
63
+ CMD ["/home/benchmark/run_benchmarks.sh", "all"]
package/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, 29 Jul 2025
3
+
4
+ Sanix-Darker CISV Project
5
+
6
+ This project is licensed under the GNU General Public License, Version 2.0 (GPL-2.0).
7
+ You may obtain a copy of the License at https://www.gnu.org/licenses/gpl-2.0.html.
package/Makefile ADDED
@@ -0,0 +1,160 @@
1
+ CC ?= gcc
2
+ CFLAGS ?= -O3 -march=native -pipe -fomit-frame-pointer -Wall -Wextra -std=c11 -flto -ffast-math -funroll-loops
3
+ LDFLAGS ?= -flto -s
4
+ NODE_GYP ?= node-gyp
5
+
6
+ # CLI binary name
7
+ CLI_BIN = cisv
8
+ # Include both parser and writer
9
+ CLI_SRC = src/cisv_parser.c src/cisv_writer.c src/cisv_transformer.c
10
+ CLI_OBJ = $(CLI_SRC:.c=.o)
11
+
12
+ # Build targets
13
+ all: build cli
14
+
15
+ install:
16
+ npm install -g node-gyp
17
+ npm install -g node-addon-api
18
+
19
+ build:
20
+ npm install
21
+ $(NODE_GYP) configure build CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)"
22
+
23
+ # Build CLI tool (cisv_parser.c contains main when CISV_CLI is defined)
24
+ cli: $(CLI_BIN)
25
+
26
+ $(CLI_BIN): $(CLI_OBJ)
27
+ $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
28
+
29
+ # Compile with CISV_CLI defined to include CLI code
30
+ src/cisv_parser.o: src/cisv_parser.c
31
+ $(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_parser.o src/cisv_parser.c
32
+
33
+ src/cisv_writer.o: src/cisv_writer.c
34
+ $(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_writer.o src/cisv_writer.c
35
+
36
+ src/cisv_transformer.o: src/cisv_transformer.c
37
+ $(CC) $(CFLAGS) -DCISV_CLI -c -o src/cisv_transformer.o src/cisv_transformer.c
38
+
39
+ # Install CLI tool to /usr/local/bin
40
+ install-cli: cli
41
+ install -m 755 $(CLI_BIN) /usr/local/bin/$(CLI_BIN)
42
+
43
+ # Install benchmark dependencies
44
+ install-benchmark-deps:
45
+ @echo "Installing benchmark dependencies..."
46
+ @# Install Rust if not present
47
+ @if ! command -v cargo > /dev/null; then \
48
+ echo "Installing Rust..."; \
49
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
50
+ . $$HOME/.cargo/env; \
51
+ fi
52
+ #@# Install xsv
53
+ #@if ! command -v xsv > /dev/null; then \
54
+ # echo "Installing xsv..."; \
55
+ # cargo install xsv; \
56
+ #fi
57
+ @# Install qsv (faster fork of xsv)
58
+ #@if ! command -v qsv > /dev/null; then \
59
+ # echo "Installing qsv..."; \
60
+ # cargo install qsv; \
61
+ #fi
62
+ @# Build rust-csv example
63
+ @echo "Building rust-csv benchmark tool..."
64
+ @mkdir -p benchmark/rust-csv-bench
65
+ @cd benchmark/rust-csv-bench && \
66
+ if [ ! -f Cargo.toml ]; then \
67
+ cargo init --name csv-bench; \
68
+ echo 'csv = "1.3"' >> Cargo.toml; \
69
+ fi && \
70
+ echo 'use std::env;\nuse std::error::Error;\nuse csv::ReaderBuilder;\n\nfn main() -> Result<(), Box<dyn Error>> {\n let args: Vec<String> = env::args().collect();\n if args.len() < 2 { eprintln!("Usage: {} <file>", args[0]); std::process::exit(1); }\n let mut rdr = ReaderBuilder::new().has_headers(true).from_path(&args[1])?;\n let count = rdr.records().count();\n println!("{}", count);\n Ok(())\n}' > src/main.rs && \
71
+ cargo build --release
72
+ @# Fix csvkit Python 3.12 compatibility issue and install
73
+ @if command -v pip3 > /dev/null; then \
74
+ echo "Installing/fixing csvkit..."; \
75
+ pip3 install --upgrade pip; \
76
+ pip3 uninstall -y csvkit babel || true; \
77
+ pip3 install 'babel>=2.9.0' 'csvkit>=1.1.0'; \
78
+ fi
79
+
80
+ debug:
81
+ $(NODE_GYP) configure build --debug
82
+
83
+ clean:
84
+ $(NODE_GYP) clean
85
+ rm -rf build
86
+ rm -f $(CLI_BIN) $(CLI_OBJ)
87
+ rm -f src/cisv_cli.o # Clean old object file if exists
88
+
89
+ test: build
90
+ npm test
91
+
92
+ # Benchmark comparing cisv CLI with other tools
93
+ benchmark-cli: cli
94
+ @echo "=== Benchmarking CSV CLI tools ==="
95
+ @echo "Preparing test file..."
96
+ @python3 -c "import csv; w=csv.writer(open('bench_test.csv','w')); [w.writerow([i,f'name_{i}',f'email_{i}@test.com',f'city_{i}']) for i in range(1000000)]"
97
+ @echo ""
98
+ @echo "--- cisv (this project) ---"
99
+ @bash -c "TIMEFORMAT='real %3R'; time ./$(CLI_BIN) -c bench_test.csv 2>&1"
100
+ @if [ -f benchmark/rust-csv-bench/target/release/csv-bench ]; then \
101
+ echo ""; \
102
+ echo "--- rust-csv (Rust library) ---"; \
103
+ bash -c "TIMEFORMAT='real %3R'; time ./benchmark/rust-csv-bench/target/release/csv-bench bench_test.csv 2>&1"; \
104
+ fi
105
+ @if command -v xsv > /dev/null 2>&1; then \
106
+ echo ""; \
107
+ echo "--- xsv (Rust CLI) ---"; \
108
+ bash -c "TIMEFORMAT='real %3R'; time xsv count bench_test.csv 2>&1"; \
109
+ fi
110
+ @if command -v qsv > /dev/null 2>&1; then \
111
+ echo ""; \
112
+ echo "--- qsv (Rust CLI - faster xsv fork) ---"; \
113
+ bash -c "TIMEFORMAT='real %3R'; time qsv count bench_test.csv 2>&1"; \
114
+ fi
115
+ @if command -v wc > /dev/null 2>&1; then \
116
+ echo ""; \
117
+ echo "--- wc -l (baseline) ---"; \
118
+ bash -c "TIMEFORMAT='real %3R'; time wc -l bench_test.csv 2>&1"; \
119
+ fi
120
+ @if command -v csvstat > /dev/null 2>&1; then \
121
+ echo ""; \
122
+ echo "--- csvkit (Python) ---"; \
123
+ bash -c "TIMEFORMAT='real %3R'; time csvstat --count bench_test.csv 2>&1" || echo "csvkit failed - may need: pip3 install --upgrade babel csvkit"; \
124
+ fi
125
+ @if command -v mlr > /dev/null 2>&1; then \
126
+ echo ""; \
127
+ echo "--- Miller ---"; \
128
+ bash -c "TIMEFORMAT='real %3R'; time mlr --csv count bench_test.csv 2>&1"; \
129
+ fi
130
+ @echo ""
131
+ @echo "File size: " && ls -lh bench_test.csv | awk '{print $$5}'
132
+ @rm -f bench_test.csv
133
+
134
+ benchmark: build
135
+ node benchmark/benchmark.js $(SAMPLE)
136
+
137
+ perf: build
138
+ node test/performance.test.js
139
+
140
+ coverage:
141
+ $(NODE_GYP) configure --coverage
142
+ $(MAKE) test
143
+
144
+ package: clean build test
145
+ npm pack
146
+
147
+ # Test writer functionality
148
+ test-writer: cli
149
+ chmod +x test_writer.sh
150
+ ./test_writer.sh
151
+
152
+ # Benchmark writer performance
153
+ benchmark-writer: cli
154
+ chmod +x benchmark_cli_writer.sh
155
+ ./benchmark_cli_writer.sh
156
+
157
+ # Run all benchmarks
158
+ benchmark-all: benchmark-cli benchmark-writer
159
+
160
+ .PHONY: all build cli clean test benchmark benchmark-cli benchmark-writer benchmark-all perf coverage package install-cli install-benchmark-deps test-writer