zlight_csv 0.1.1-x86_64-linux → 0.2.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +84 -286
- data/VERSION +1 -1
- data/lib/zlight_csv/3.0/zlight_csv.so +0 -0
- data/lib/zlight_csv/3.1/zlight_csv.so +0 -0
- data/lib/zlight_csv/3.2/zlight_csv.so +0 -0
- data/lib/zlight_csv/3.3/zlight_csv.so +0 -0
- data/lib/zlight_csv.rb +205 -6
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 10f44395049178fc02ebe7b3865d5e433acf630d2d2b8888e4cd2158f35bab2d
|
|
4
|
+
data.tar.gz: 85e21971fc15f1e33a183bd38345222c3ce85c0898fe986ad725bd7d67de4b26
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6cf0b8406557de41eb4a9ec7c65dfa9493f22d2d9ce217ebd00dac3cc4edcda0045108b23ceeed3e510187df167789644129cf0f66f49cb11e918e97343ea239
|
|
7
|
+
data.tar.gz: 9731a24e552c0e1dbfc8636720170e28b31435cbdf66bcc0dcf8b6a7deadd57ac943bd407e89f19e1117d3bf445c4119d51b86295fd76aa414d5fa1c7247e0da
|
data/README.md
CHANGED
|
@@ -1,346 +1,144 @@
|
|
|
1
|
-
# ZLight
|
|
1
|
+
# ZLight CSV
|
|
2
2
|
|
|
3
3
|
[](https://badge.fury.io/rb/zlight_csv)
|
|
4
|
-
[](https://github.com/yourusername/zlight-csv/actions/workflows/ci.yml)
|
|
5
4
|
[](https://opensource.org/licenses/MIT)
|
|
6
5
|
|
|
7
|
-
A
|
|
6
|
+
A fast CSV parser for Ruby, powered by Rust.
|
|
8
7
|
|
|
9
|
-
##
|
|
8
|
+
## Why ZLight?
|
|
10
9
|
|
|
11
|
-
- **
|
|
12
|
-
- **Simple API**: Just `ZLight.parse(csv_string)`
|
|
13
|
-
- **Automatic Type Conversion**: Optional numeric conversion for integer and float fields
|
|
14
|
-
- **Symbol Headers**: Automatically converts headers to symbols
|
|
15
|
-
- **Cross-Platform**: Pre-built binaries for Linux, macOS, and Windows
|
|
10
|
+
Ruby's built-in CSV library is slow. ZLight parses CSV files **up to 30x faster** by using Rust under the hood.
|
|
16
11
|
|
|
17
|
-
|
|
12
|
+
### Benchmark Results
|
|
18
13
|
|
|
19
|
-
Parsing
|
|
14
|
+
Parsing with headers and numeric conversion (Apple M1):
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
16
|
+
| Dataset | Ruby CSV | ZLight | Speedup |
|
|
17
|
+
|---------|----------|--------|---------|
|
|
18
|
+
| 1K rows | 12.6ms | 0.3ms | **42x faster** |
|
|
19
|
+
| 10K rows | 133ms | 4.4ms | **30x faster** |
|
|
20
|
+
| 100K rows | 1,458ms | 78ms | **19x faster** |
|
|
26
21
|
|
|
27
|
-
|
|
22
|
+
File reading comparison (100K rows):
|
|
28
23
|
|
|
29
|
-
|
|
24
|
+
| Method | Ruby CSV | ZLight | Speedup |
|
|
25
|
+
|--------|----------|--------|---------|
|
|
26
|
+
| Read all | 1,596ms | 58ms | **27x faster** |
|
|
27
|
+
| Streaming | 1,104ms | 74ms | **15x faster** |
|
|
30
28
|
|
|
31
|
-
|
|
29
|
+
Iterations per second (10K rows):
|
|
32
30
|
|
|
33
|
-
```ruby
|
|
34
|
-
gem 'zlight_csv'
|
|
35
31
|
```
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
```bash
|
|
40
|
-
bundle install
|
|
32
|
+
ZLight.parse: 253 i/s
|
|
33
|
+
ZLight.stream: 159 i/s
|
|
34
|
+
Ruby CSV: 8 i/s — 33x slower
|
|
41
35
|
```
|
|
42
36
|
|
|
43
|
-
|
|
37
|
+
## Installation
|
|
44
38
|
|
|
45
|
-
```
|
|
46
|
-
gem
|
|
39
|
+
```ruby
|
|
40
|
+
gem 'zlight_csv'
|
|
47
41
|
```
|
|
48
42
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
ZLight ships **prebuilt native gems** for the following platforms:
|
|
52
|
-
|
|
53
|
-
| Platform | Architecture |
|
|
54
|
-
|----------|--------------|
|
|
55
|
-
| Linux | x86_64, aarch64 |
|
|
56
|
-
| Linux (musl/Alpine) | x86_64 |
|
|
57
|
-
| macOS | x86_64 (Intel), arm64 (Apple Silicon) |
|
|
58
|
-
| Windows | x64 (UCRT) |
|
|
59
|
-
|
|
60
|
-
When you run `gem install zlight_csv`, RubyGems automatically downloads the correct prebuilt binary for your platform. **No Rust toolchain required!**
|
|
61
|
-
|
|
62
|
-
### Requirements
|
|
63
|
-
|
|
64
|
-
- Ruby >= 3.0.0 (3.0, 3.1, 3.2, 3.3)
|
|
65
|
-
- Supported platforms: Linux, macOS, Windows (see above)
|
|
43
|
+
No Rust toolchain required — prebuilt binaries are available for Linux, macOS, and Windows.
|
|
66
44
|
|
|
67
45
|
## Usage
|
|
68
46
|
|
|
69
|
-
### Basic Parsing
|
|
70
|
-
|
|
71
47
|
```ruby
|
|
72
48
|
require 'zlight_csv'
|
|
73
49
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
Bob,25,London
|
|
78
|
-
Charlie,35,Paris
|
|
79
|
-
CSV
|
|
50
|
+
# Parse a CSV string
|
|
51
|
+
data = ZLight.parse("name,age\nAlice,30\nBob,25")
|
|
52
|
+
# => [{:name=>"Alice", :age=>"30"}, {:name=>"Bob", :age=>"25"}]
|
|
80
53
|
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
# => [{:name=>"Alice", :age=>
|
|
54
|
+
# With automatic numeric conversion
|
|
55
|
+
data = ZLight.parse(csv_string, converters: :numeric)
|
|
56
|
+
# => [{:name=>"Alice", :age=>30}, {:name=>"Bob", :age=>25}]
|
|
84
57
|
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
### Numeric Conversion
|
|
90
|
-
|
|
91
|
-
```ruby
|
|
92
|
-
# Automatically convert numeric strings to integers/floats
|
|
93
|
-
result = ZLight.parse(csv_data, headers: true, converters: :numeric)
|
|
94
|
-
# => [{:name=>"Alice", :age=>30, :city=>"New York"}, ...]
|
|
95
|
-
```
|
|
58
|
+
# Read from a file
|
|
59
|
+
data = ZLight.read("users.csv")
|
|
96
60
|
|
|
97
|
-
### Without Headers
|
|
98
|
-
|
|
99
|
-
```ruby
|
|
100
|
-
csv_data = "Alice,30,New York\nBob,25,London"
|
|
101
|
-
|
|
102
|
-
result = ZLight.parse(csv_data, headers: false)
|
|
103
|
-
# => [["Alice", "30", "New York"], ["Bob", "25", "London"]]
|
|
104
|
-
|
|
105
|
-
# With numeric conversion
|
|
106
|
-
result = ZLight.parse(csv_data, headers: false, converters: :numeric)
|
|
107
|
-
# => [["Alice", 30, "New York"], ["Bob", 25, "London"]]
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
### Reading Files
|
|
111
|
-
|
|
112
|
-
```ruby
|
|
113
|
-
# Read and parse a CSV file
|
|
114
|
-
result = ZLight.read("path/to/file.csv", headers: true, converters: :numeric)
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Iteration
|
|
118
|
-
|
|
119
|
-
```ruby
|
|
120
61
|
# Iterate over rows
|
|
121
|
-
ZLight.foreach(
|
|
62
|
+
ZLight.foreach(csv_string) do |row|
|
|
122
63
|
puts row[:name]
|
|
123
64
|
end
|
|
124
|
-
|
|
125
|
-
# With Enumerator
|
|
126
|
-
enum = ZLight.foreach(csv_data, headers: true)
|
|
127
|
-
enum.map { |row| row[:name].upcase }
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
### Custom Delimiters
|
|
131
|
-
|
|
132
|
-
```ruby
|
|
133
|
-
# Tab-separated values
|
|
134
|
-
tsv_data = "name\tage\nAlice\t30"
|
|
135
|
-
result = ZLight.parse(tsv_data, headers: true, col_sep: "\t")
|
|
136
|
-
|
|
137
|
-
# Semicolon-separated (common in European locales)
|
|
138
|
-
result = ZLight.parse(data, headers: true, col_sep: ";")
|
|
139
65
|
```
|
|
140
66
|
|
|
141
|
-
###
|
|
142
|
-
|
|
143
|
-
| Option | Type | Default | Description |
|
|
144
|
-
|--------|------|---------|-------------|
|
|
145
|
-
| `headers` | Boolean | `true` | Treat first row as headers |
|
|
146
|
-
| `converters` | Symbol | `nil` | Set to `:numeric` for auto-conversion |
|
|
147
|
-
| `col_sep` | String | `","` | Column separator character |
|
|
148
|
-
| `quote_char` | String | `"` | Quote character |
|
|
149
|
-
| `flexible` | Boolean | `true` | Allow variable length records |
|
|
150
|
-
|
|
151
|
-
## Development
|
|
67
|
+
### Streaming Large Files
|
|
152
68
|
|
|
153
|
-
|
|
69
|
+
For large files, use streaming to process rows one at a time without loading everything into memory:
|
|
154
70
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
```bash
|
|
164
|
-
# Clone the repository
|
|
165
|
-
git clone https://github.com/yourusername/zlight-csv.git
|
|
166
|
-
cd zlight-csv
|
|
167
|
-
|
|
168
|
-
# Install dependencies
|
|
169
|
-
bundle install
|
|
170
|
-
|
|
171
|
-
# Compile the native extension (requires Rust)
|
|
172
|
-
bundle exec rake compile
|
|
173
|
-
|
|
174
|
-
# Run tests
|
|
175
|
-
bundle exec rake spec
|
|
71
|
+
```ruby
|
|
72
|
+
# Stream from a file (auto-closes when block exits)
|
|
73
|
+
ZLight.open("large_file.csv") do |reader|
|
|
74
|
+
reader.each do |row|
|
|
75
|
+
process(row)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
176
78
|
|
|
177
|
-
#
|
|
178
|
-
|
|
79
|
+
# Lazy enumeration — stop early without loading remaining rows
|
|
80
|
+
ZLight.open("huge_file.csv", converters: :numeric) do |reader|
|
|
81
|
+
high_scores = reader.lazy
|
|
82
|
+
.select { |row| row[:score] > 90 }
|
|
83
|
+
.first(100)
|
|
84
|
+
end
|
|
179
85
|
|
|
180
|
-
#
|
|
181
|
-
|
|
86
|
+
# Manual control
|
|
87
|
+
reader = ZLight.stream_file("data.csv")
|
|
88
|
+
while row = reader.next_row
|
|
89
|
+
break if row[:id] > 1000
|
|
90
|
+
process(row)
|
|
91
|
+
end
|
|
92
|
+
reader.close
|
|
182
93
|
```
|
|
183
94
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
### Automated Release (Recommended)
|
|
95
|
+
**Streaming is especially efficient for partial reads:**
|
|
187
96
|
|
|
188
|
-
Releases are fully automated via GitHub Actions. When you push a version tag:
|
|
189
|
-
|
|
190
|
-
1. Builds prebuilt native gems for all 6 platforms
|
|
191
|
-
2. Publishes all gems to RubyGems
|
|
192
|
-
3. Creates a GitHub Release with all gem files attached
|
|
193
|
-
|
|
194
|
-
**To release:**
|
|
195
|
-
|
|
196
|
-
```bash
|
|
197
|
-
# 1. Update version
|
|
198
|
-
vim VERSION
|
|
199
|
-
|
|
200
|
-
# 2. Update changelog
|
|
201
|
-
vim CHANGELOG.md
|
|
202
|
-
|
|
203
|
-
# 3. Commit and tag
|
|
204
|
-
git add -A
|
|
205
|
-
git commit -m "Release v0.1.0"
|
|
206
|
-
git tag v0.1.0
|
|
207
|
-
git push origin main --tags
|
|
208
97
|
```
|
|
98
|
+
Finding first 100 rows from 100K dataset:
|
|
209
99
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
Publishing uses [Trusted Publishing](https://guides.rubygems.org/trusted-publishing/) (OIDC from GitHub Actions — no API key secret).
|
|
215
|
-
|
|
216
|
-
Because `zlight_csv` is not on RubyGems yet, register a **pending** trusted publisher:
|
|
217
|
-
|
|
218
|
-
1. Sign in at [rubygems.org](https://rubygems.org/)
|
|
219
|
-
2. Open [Pending trusted publishers](https://rubygems.org/profile/oidc/pending_trusted_publishers) → **Create**
|
|
220
|
-
3. Set:
|
|
221
|
-
- **Gem name:** `zlight_csv`
|
|
222
|
-
- **Repository owner:** `zaidanch`
|
|
223
|
-
- **Repository name:** `zlight`
|
|
224
|
-
- **Workflow filename:** `release.yml`
|
|
225
|
-
4. Save. The first successful workflow run will create the gem and add you as owner.
|
|
226
|
-
|
|
227
|
-
If publish fails with an auth error, confirm the workflow filename is exactly `release.yml` and matches what you registered on RubyGems.
|
|
228
|
-
|
|
229
|
-
**Important:** Trusted publishing only works after the updated `release.yml` is on GitHub (`main`). Older workflow runs used a `RUBYGEMS_API_KEY` secret and ignored your pending publisher.
|
|
230
|
-
|
|
231
|
-
If you set **Environment** to `release` on RubyGems, uncomment `environment: release` under the `release` job in `.github/workflows/release.yml`.
|
|
232
|
-
|
|
233
|
-
### Re-run after pushing the workflow fix
|
|
234
|
-
|
|
235
|
-
```bash
|
|
236
|
-
git push origin main
|
|
237
|
-
# Actions → Release → Run workflow (uses main; no tag required)
|
|
238
|
-
# or move the tag to the latest commit and push the tag again
|
|
100
|
+
Ruby CSV (full parse): 1,696ms
|
|
101
|
+
ZLight.parse (full): 73ms
|
|
102
|
+
ZLight.stream (lazy): 0.3ms ← 5,600x faster!
|
|
239
103
|
```
|
|
240
104
|
|
|
241
|
-
###
|
|
242
|
-
|
|
243
|
-
Build locally with Docker:
|
|
105
|
+
### Options
|
|
244
106
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
107
|
+
| Option | Default | Description |
|
|
108
|
+
|--------|---------|-------------|
|
|
109
|
+
| `headers` | `true` | Use first row as headers (returns hashes). Set `false` for arrays. |
|
|
110
|
+
| `converters` | `nil` | Set to `:numeric` to convert numbers automatically |
|
|
111
|
+
| `col_sep` | `","` | Column separator (`"\t"` for TSV, `";"` for European CSV) |
|
|
112
|
+
| `quote_char` | `"` | Quote character |
|
|
113
|
+
| `flexible` | `true` | Allow rows with varying column counts |
|
|
248
114
|
|
|
249
|
-
|
|
250
|
-
rake dock:arm64-darwin
|
|
115
|
+
## Compatibility
|
|
251
116
|
|
|
252
|
-
|
|
253
|
-
rake dock:all
|
|
117
|
+
Works as a drop-in replacement for common `CSV.parse` patterns:
|
|
254
118
|
|
|
255
|
-
|
|
256
|
-
#
|
|
257
|
-
|
|
258
|
-
# zlight_csv-0.1.0-x64-mingw-ucrt.gem
|
|
259
|
-
# ... etc
|
|
260
|
-
|
|
261
|
-
# Push all gems to RubyGems
|
|
262
|
-
rake release:push
|
|
263
|
-
```
|
|
264
|
-
|
|
265
|
-
## Project Structure
|
|
119
|
+
```ruby
|
|
120
|
+
# Before
|
|
121
|
+
CSV.parse(data, headers: true, header_converters: :symbol, converters: :numeric)
|
|
266
122
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
├── ext/
|
|
270
|
-
│ └── zlight_csv/ # Rust native extension
|
|
271
|
-
│ ├── src/
|
|
272
|
-
│ │ ├── lib.rs # Entry point & Ruby bindings
|
|
273
|
-
│ │ ├── error.rs # Error types (enum-based)
|
|
274
|
-
│ │ ├── options.rs # Parse options handling
|
|
275
|
-
│ │ ├── parser.rs # CSV parsing logic
|
|
276
|
-
│ │ └── converter.rs # Type conversion utilities
|
|
277
|
-
│ ├── Cargo.toml # Rust dependencies
|
|
278
|
-
│ └── extconf.rb # Extension configuration
|
|
279
|
-
├── lib/
|
|
280
|
-
│ ├── zlight_csv.rb # Main Ruby entry point
|
|
281
|
-
│ └── zlight_csv/
|
|
282
|
-
│ └── version.rb # Reads VERSION from gem root
|
|
283
|
-
├── spec/ # RSpec tests
|
|
284
|
-
├── benchmark/ # Performance benchmarks
|
|
285
|
-
├── Gemfile # Ruby dependencies
|
|
286
|
-
├── Rakefile # Build tasks
|
|
287
|
-
├── zlight_csv.gemspec # Gem specification
|
|
288
|
-
├── VERSION # Single source of truth for release version
|
|
289
|
-
└── README.md
|
|
123
|
+
# After
|
|
124
|
+
ZLight.parse(data, converters: :numeric)
|
|
290
125
|
```
|
|
291
126
|
|
|
292
|
-
##
|
|
127
|
+
## Requirements
|
|
293
128
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
| `CSV.parse(str, headers: true)` | `ZLight.parse(str, headers: true)` | ✅ |
|
|
297
|
-
| `CSV.parse(str, header_converters: :symbol)` | Automatic | ✅ |
|
|
298
|
-
| `CSV.parse(str, converters: :numeric)` | `ZLight.parse(str, converters: :numeric)` | ✅ |
|
|
299
|
-
| `CSV.read(path)` | `ZLight.read(path)` | ✅ |
|
|
300
|
-
| `CSV.foreach(str) { }` | `ZLight.foreach(str) { }` | ✅ |
|
|
301
|
-
| `CSV.parse(str, col_sep: "\t")` | `ZLight.parse(str, col_sep: "\t")` | ✅ |
|
|
302
|
-
| Streaming/lazy parsing | Not yet supported | 🚧 |
|
|
303
|
-
| Writing CSV | Not supported | ❌ |
|
|
129
|
+
- Ruby 3.0+
|
|
130
|
+
- Linux (x86_64, aarch64), macOS (Intel, Apple Silicon), or Windows (x64)
|
|
304
131
|
|
|
305
|
-
##
|
|
132
|
+
## Roadmap
|
|
306
133
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
begin
|
|
311
|
-
ZLight.parse(invalid_csv)
|
|
312
|
-
rescue => e
|
|
313
|
-
puts e.class # RuntimeError, ArgumentError, EncodingError, etc.
|
|
314
|
-
puts e.message # Descriptive error message
|
|
315
|
-
end
|
|
316
|
-
```
|
|
317
|
-
|
|
318
|
-
Error types:
|
|
319
|
-
- `ArgumentError` - Invalid arguments or options
|
|
320
|
-
- `EncodingError` - Invalid UTF-8 in headers
|
|
321
|
-
- `RuntimeError` - CSV parsing errors
|
|
322
|
-
- `IOError` - File reading errors
|
|
134
|
+
- [x] Streaming/lazy parsing for large files
|
|
135
|
+
- [ ] CSV writing support
|
|
136
|
+
- [ ] Custom converter procs
|
|
323
137
|
|
|
324
138
|
## Contributing
|
|
325
139
|
|
|
326
|
-
|
|
327
|
-
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
328
|
-
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
329
|
-
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
330
|
-
5. Open a Pull Request
|
|
331
|
-
|
|
332
|
-
### Running Tests
|
|
333
|
-
|
|
334
|
-
```bash
|
|
335
|
-
bundle exec rake spec
|
|
336
|
-
```
|
|
140
|
+
Bug reports and pull requests are welcome on [GitHub](https://github.com/zaidanch/zlight).
|
|
337
141
|
|
|
338
142
|
## License
|
|
339
143
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
## Acknowledgments
|
|
343
|
-
|
|
344
|
-
- [csv](https://crates.io/crates/csv) - Rust CSV parsing library by BurntSushi
|
|
345
|
-
- [magnus](https://crates.io/crates/magnus) - Ruby bindings for Rust
|
|
346
|
-
- [rb-sys](https://github.com/oxidize-rb/rb-sys) - Ruby build system integration
|
|
144
|
+
MIT
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.2.0
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/zlight_csv.rb
CHANGED
|
@@ -10,31 +10,230 @@ rescue LoadError
|
|
|
10
10
|
require_relative 'zlight_csv/zlight_csv'
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
+
# ZLight is a high-performance CSV parser for Ruby, powered by Rust.
|
|
14
|
+
#
|
|
15
|
+
# It provides a simple, Ruby CSV-compatible API for parsing CSV files
|
|
16
|
+
# up to 30x faster than the standard library.
|
|
17
|
+
#
|
|
18
|
+
# @example Basic parsing with headers
|
|
19
|
+
# result = ZLight.parse("name,age\nAlice,30\nBob,25")
|
|
20
|
+
# # => [{:name=>"Alice", :age=>"30"}, {:name=>"Bob", :age=>"25"}]
|
|
21
|
+
#
|
|
22
|
+
# @example Parsing with numeric conversion
|
|
23
|
+
# result = ZLight.parse("name,age\nAlice,30", converters: :numeric)
|
|
24
|
+
# # => [{:name=>"Alice", :age=>30}]
|
|
25
|
+
#
|
|
26
|
+
# @example Parsing without headers
|
|
27
|
+
# result = ZLight.parse("Alice,30\nBob,25", headers: false)
|
|
28
|
+
# # => [["Alice", "30"], ["Bob", "25"]]
|
|
29
|
+
#
|
|
30
|
+
# @example Reading from a file
|
|
31
|
+
# result = ZLight.read("path/to/file.csv", headers: true)
|
|
32
|
+
#
|
|
33
|
+
# @example Streaming large files
|
|
34
|
+
# ZLight.stream_file("large.csv") do |reader|
|
|
35
|
+
# reader.each { |row| process(row) }
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# @example Lazy iteration with Enumerator
|
|
39
|
+
# reader = ZLight.stream_file("large.csv")
|
|
40
|
+
# reader.lazy.select { |row| row[:age] > 30 }.first(10)
|
|
41
|
+
# reader.close
|
|
42
|
+
#
|
|
43
|
+
# @see https://github.com/zaidanch/zlight
|
|
13
44
|
module ZLight
|
|
45
|
+
# Base error class for all ZLight errors
|
|
14
46
|
class Error < StandardError; end
|
|
47
|
+
|
|
48
|
+
# Raised when CSV parsing fails due to malformed data
|
|
15
49
|
class ParseError < Error; end
|
|
50
|
+
|
|
51
|
+
# Raised when CSV contains invalid UTF-8 encoding
|
|
16
52
|
class EncodingError < Error; end
|
|
17
53
|
|
|
54
|
+
# Raised when attempting to use a closed stream reader
|
|
55
|
+
class StreamClosedError < Error; end
|
|
56
|
+
|
|
57
|
+
# Parses a CSV string and returns an array of rows.
|
|
58
|
+
#
|
|
59
|
+
# This method is implemented as a native Rust extension for maximum performance.
|
|
60
|
+
#
|
|
61
|
+
# @param csv_string [String] The CSV data to parse
|
|
62
|
+
# @param headers [Boolean] Treat first row as headers (default: true)
|
|
63
|
+
# @param converters [Symbol, nil] Set to :numeric for auto-conversion of integers/floats
|
|
64
|
+
# @param col_sep [String] Column separator character (default: ",")
|
|
65
|
+
# @param quote_char [String] Quote character for escaping (default: '"')
|
|
66
|
+
# @param flexible [Boolean] Allow variable-length records (default: true)
|
|
67
|
+
#
|
|
68
|
+
# @return [Array<Hash>] When headers: true, returns array of Hashes with Symbol keys
|
|
69
|
+
# @return [Array<Array>] When headers: false, returns array of Arrays
|
|
70
|
+
#
|
|
71
|
+
# @raise [ArgumentError] If options are invalid
|
|
72
|
+
# @raise [ZLight::ParseError] If CSV data is malformed
|
|
73
|
+
# @raise [ZLight::EncodingError] If headers contain invalid UTF-8
|
|
74
|
+
#
|
|
75
|
+
# @example Parse with headers (default)
|
|
76
|
+
# ZLight.parse("name,age\nAlice,30")
|
|
77
|
+
# # => [{:name=>"Alice", :age=>"30"}]
|
|
78
|
+
#
|
|
79
|
+
# @example Parse without headers
|
|
80
|
+
# ZLight.parse("Alice,30", headers: false)
|
|
81
|
+
# # => [["Alice", "30"]]
|
|
82
|
+
#
|
|
83
|
+
# @example Parse with numeric conversion
|
|
84
|
+
# ZLight.parse("name,age\nAlice,30", converters: :numeric)
|
|
85
|
+
# # => [{:name=>"Alice", :age=>30}]
|
|
86
|
+
#
|
|
87
|
+
# @example Parse TSV (tab-separated values)
|
|
88
|
+
# ZLight.parse("name\tage\nAlice\t30", col_sep: "\t")
|
|
89
|
+
# # => [{:name=>"Alice", :age=>"30"}]
|
|
90
|
+
#
|
|
91
|
+
# @note The parse method is defined in the Rust native extension.
|
|
92
|
+
# See ext/zlight_csv/src/lib.rs for implementation.
|
|
93
|
+
|
|
94
|
+
# StreamReader is defined in the Rust extension.
|
|
95
|
+
# It provides lazy/streaming CSV parsing for large files.
|
|
96
|
+
#
|
|
97
|
+
# @see ZLight.stream
|
|
98
|
+
# @see ZLight.stream_file
|
|
99
|
+
|
|
18
100
|
class << self
|
|
19
|
-
#
|
|
101
|
+
# Reads and parses a CSV file from disk.
|
|
102
|
+
#
|
|
103
|
+
# Convenience method that reads the file contents and passes them to {.parse}.
|
|
104
|
+
# The file is read with UTF-8 encoding.
|
|
20
105
|
#
|
|
21
106
|
# @param path [String] Path to the CSV file
|
|
22
|
-
# @param
|
|
23
|
-
# @
|
|
107
|
+
# @param headers [Boolean] Treat first row as headers (default: true)
|
|
108
|
+
# @param converters [Symbol, nil] Set to :numeric for auto-conversion
|
|
109
|
+
# @param col_sep [String] Column separator character (default: ",")
|
|
110
|
+
# @param quote_char [String] Quote character for escaping (default: '"')
|
|
111
|
+
# @param flexible [Boolean] Allow variable-length records (default: true)
|
|
112
|
+
#
|
|
113
|
+
# @return [Array<Hash>] When headers: true, returns array of Hashes with Symbol keys
|
|
114
|
+
# @return [Array<Array>] When headers: false, returns array of Arrays
|
|
115
|
+
#
|
|
116
|
+
# @raise [Errno::ENOENT] If the file does not exist
|
|
117
|
+
# @raise [ArgumentError] If options are invalid
|
|
118
|
+
# @raise [ZLight::ParseError] If CSV data is malformed
|
|
119
|
+
#
|
|
120
|
+
# @example Read a CSV file with headers
|
|
121
|
+
# ZLight.read("users.csv")
|
|
122
|
+
# # => [{:name=>"Alice", :age=>"30"}, ...]
|
|
123
|
+
#
|
|
124
|
+
# @example Read with numeric conversion
|
|
125
|
+
# ZLight.read("users.csv", converters: :numeric)
|
|
126
|
+
# # => [{:name=>"Alice", :age=>30}, ...]
|
|
127
|
+
#
|
|
128
|
+
# @example Read a TSV file
|
|
129
|
+
# ZLight.read("data.tsv", col_sep: "\t")
|
|
130
|
+
#
|
|
131
|
+
# @see .parse
|
|
24
132
|
def read(path, **options)
|
|
25
133
|
parse(File.read(path, encoding: 'UTF-8'), **options)
|
|
26
134
|
end
|
|
27
135
|
|
|
28
136
|
# Iterates over each row in the CSV string.
|
|
29
137
|
#
|
|
138
|
+
# When called with a block, yields each row and returns nil.
|
|
139
|
+
# When called without a block, returns an Enumerator for lazy iteration.
|
|
140
|
+
#
|
|
30
141
|
# @param csv_string [String] The CSV data to parse
|
|
31
|
-
# @param
|
|
32
|
-
# @
|
|
33
|
-
# @
|
|
142
|
+
# @param headers [Boolean] Treat first row as headers (default: true)
|
|
143
|
+
# @param converters [Symbol, nil] Set to :numeric for auto-conversion
|
|
144
|
+
# @param col_sep [String] Column separator character (default: ",")
|
|
145
|
+
# @param quote_char [String] Quote character for escaping (default: '"')
|
|
146
|
+
# @param flexible [Boolean] Allow variable-length records (default: true)
|
|
147
|
+
#
|
|
148
|
+
# @yield [row] Yields each row to the block
|
|
149
|
+
# @yieldparam row [Hash, Array] A single row (Hash with headers, Array without)
|
|
150
|
+
#
|
|
151
|
+
# @return [nil] When a block is given
|
|
152
|
+
# @return [Enumerator] When no block is given, for lazy iteration
|
|
153
|
+
#
|
|
154
|
+
# @example Iterate with a block
|
|
155
|
+
# ZLight.foreach("name,age\nAlice,30\nBob,25") do |row|
|
|
156
|
+
# puts row[:name]
|
|
157
|
+
# end
|
|
158
|
+
# # Output:
|
|
159
|
+
# # Alice
|
|
160
|
+
# # Bob
|
|
161
|
+
#
|
|
162
|
+
# @example Use as Enumerator
|
|
163
|
+
# names = ZLight.foreach("name,age\nAlice,30").map { |row| row[:name] }
|
|
164
|
+
# # => ["Alice"]
|
|
165
|
+
#
|
|
166
|
+
# @example Chain with Enumerable methods
|
|
167
|
+
# adults = ZLight.foreach(csv_data, converters: :numeric)
|
|
168
|
+
# .select { |row| row[:age] >= 18 }
|
|
169
|
+
# .map { |row| row[:name] }
|
|
170
|
+
#
|
|
171
|
+
# @see .parse
|
|
34
172
|
def foreach(csv_string, **options, &block)
|
|
35
173
|
return to_enum(:foreach, csv_string, **options) unless block_given?
|
|
36
174
|
|
|
37
175
|
parse(csv_string, **options).each(&block)
|
|
38
176
|
end
|
|
177
|
+
|
|
178
|
+
# Creates a streaming reader for a CSV file with automatic resource management.
|
|
179
|
+
#
|
|
180
|
+
# This is the most memory-efficient way to process large CSV files.
|
|
181
|
+
# Rows are read one at a time directly from disk.
|
|
182
|
+
#
|
|
183
|
+
# When called with a block, automatically closes the reader after
|
|
184
|
+
# the block completes (even if an exception is raised).
|
|
185
|
+
#
|
|
186
|
+
# @param path [String] Path to the CSV file
|
|
187
|
+
# @param headers [Boolean] Treat first row as headers (default: true)
|
|
188
|
+
# @param converters [Symbol, nil] Set to :numeric for auto-conversion
|
|
189
|
+
# @param col_sep [String] Column separator character (default: ",")
|
|
190
|
+
# @param quote_char [String] Quote character (default: '"')
|
|
191
|
+
# @param flexible [Boolean] Allow variable-length records (default: true)
|
|
192
|
+
#
|
|
193
|
+
# @yield [reader] If a block is given, yields the reader and auto-closes
|
|
194
|
+
# @yieldparam reader [ZLight::StreamReader] The streaming reader
|
|
195
|
+
#
|
|
196
|
+
# @return [ZLight::StreamReader] The streaming reader (if no block)
|
|
197
|
+
# @return [Object] The block's return value (if block given)
|
|
198
|
+
#
|
|
199
|
+
# @raise [Errno::ENOENT] If the file does not exist
|
|
200
|
+
# @raise [Errno::EACCES] If the file is not readable
|
|
201
|
+
#
|
|
202
|
+
# @example Process a large file row by row
|
|
203
|
+
# ZLight.open("users.csv") do |reader|
|
|
204
|
+
# reader.each do |row|
|
|
205
|
+
# User.create!(name: row[:name], email: row[:email])
|
|
206
|
+
# end
|
|
207
|
+
# end
|
|
208
|
+
#
|
|
209
|
+
# @example Get first 100 matching rows lazily
|
|
210
|
+
# ZLight.open("data.csv", converters: :numeric) do |reader|
|
|
211
|
+
# reader.lazy
|
|
212
|
+
# .select { |row| row[:score] > 90 }
|
|
213
|
+
# .first(100)
|
|
214
|
+
# end
|
|
215
|
+
#
|
|
216
|
+
# @example Manual resource management
|
|
217
|
+
# reader = ZLight.open("large.csv")
|
|
218
|
+
# begin
|
|
219
|
+
# reader.each { |row| process(row) }
|
|
220
|
+
# ensure
|
|
221
|
+
# reader.close
|
|
222
|
+
# end
|
|
223
|
+
#
|
|
224
|
+
# @see ZLight::StreamReader
|
|
225
|
+
def open(path, **options)
|
|
226
|
+
reader = stream_file(path, **options)
|
|
227
|
+
|
|
228
|
+
if block_given?
|
|
229
|
+
begin
|
|
230
|
+
yield reader
|
|
231
|
+
ensure
|
|
232
|
+
reader.close unless reader.closed?
|
|
233
|
+
end
|
|
234
|
+
else
|
|
235
|
+
reader
|
|
236
|
+
end
|
|
237
|
+
end
|
|
39
238
|
end
|
|
40
239
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zlight_csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Zaidan Chaudhary
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
ZLight is a blazing-fast CSV parser that provides a drop-in replacement
|