osv 0.3.12 → 0.3.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/osv/version.rb +1 -1
- metadata +14 -218
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e44a6bcd337573fcfd8586b5a6989940a8e0e5497f7376a85177eed59223d62
|
4
|
+
data.tar.gz: a577f31562aa3d5806222f2deb4b86f6594ace07bcb7079d80b812449437abbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 346984e4dfd2a01943f874848920aaa0f63d2e59db72d0307e192e3bc2679a60bba1749020b357021ba4285cccba86480b6a1a13f8d2723803df765009126c10
|
7
|
+
data.tar.gz: 71b5a79edf20b0377748ff78ebc9f6d11b077f7f8cdc70248d917df108e4e57ef9e5e1f611524b533ec300db5009a02fde3631ab6eaa739bf45aa705e131ebde
|
data/lib/osv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: osv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Jaremko
|
@@ -39,223 +39,15 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.2.0
|
41
41
|
description: |
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
## Installation
|
53
|
-
|
54
|
-
Add this line to your application's Gemfile:
|
55
|
-
|
56
|
-
```ruby
|
57
|
-
gem 'osv'
|
58
|
-
```
|
59
|
-
|
60
|
-
And then execute:
|
61
|
-
|
62
|
-
```bash
|
63
|
-
bundle install
|
64
|
-
```
|
65
|
-
|
66
|
-
Or install it directly:
|
67
|
-
|
68
|
-
```bash
|
69
|
-
gem install osv
|
70
|
-
```
|
71
|
-
|
72
|
-
## Usage
|
73
|
-
|
74
|
-
### Reading CSV Files
|
75
|
-
|
76
|
-
```ruby
|
77
|
-
require 'osv'
|
78
|
-
|
79
|
-
# Basic usage - each row as a hash
|
80
|
-
OSV.for_each("data.csv") do |row|
|
81
|
-
puts row["name"] # => "John"
|
82
|
-
puts row["age"] # => "25"
|
83
|
-
end
|
84
|
-
|
85
|
-
# Return an enumerator instead of using a block
|
86
|
-
rows = OSV.for_each("data.csv")
|
87
|
-
rows.each { |row| puts row["name"] }
|
88
|
-
|
89
|
-
# High-performance array mode
|
90
|
-
OSV.for_each("data.csv", result_type: :array) do |row|
|
91
|
-
puts row[0] # First column
|
92
|
-
puts row[1] # Second column
|
93
|
-
end
|
94
|
-
```
|
95
|
-
|
96
|
-
### Input Sources
|
97
|
-
|
98
|
-
```ruby
|
99
|
-
# From a file path
|
100
|
-
OSV.for_each("data.csv") { |row| puts row["name"] }
|
101
|
-
|
102
|
-
# From a file path
|
103
|
-
OSV.for_each("data.csv.gz") { |row| puts row["name"] }
|
104
|
-
|
105
|
-
# From an IO object
|
106
|
-
File.open("data.csv") { |file| OSV.for_each(file) { |row| puts row["name"] } }
|
107
|
-
|
108
|
-
# From a string
|
109
|
-
data = StringIO.new("name,age\nJohn,25")
|
110
|
-
OSV.for_each(data) { |row| puts row["name"] }
|
111
|
-
```
|
112
|
-
|
113
|
-
### Configuration Options
|
114
|
-
|
115
|
-
```ruby
|
116
|
-
OSV.for_each("data.csv",
|
117
|
-
# Input formatting
|
118
|
-
has_headers: true, # First row contains headers (default: true)
|
119
|
-
col_sep: ",", # Column separator (default: ",")
|
120
|
-
quote_char: '"', # Quote character (default: '"')
|
121
|
-
|
122
|
-
# Output formatting
|
123
|
-
result_type: :hash, # :hash or :array (hash is default)
|
124
|
-
nil_string: nil, # String to interpret as nil when parsing (default: nil)
|
125
|
-
|
126
|
-
# Parsing behavior
|
127
|
-
flexible: false, # Allow varying number of fields (default: false)
|
128
|
-
flexible_default: nil, # Default value for missing fields. If unset, we ignore missing fields.
|
129
|
-
# Implicitly enables flexible mode if set.
|
130
|
-
trim: :all, # Whether to trim whitespace. Options are :all, :headers, or :fields (default: nil)
|
131
|
-
buffer_size: 1024, # Number of rows to buffer in memory (default: 1024)
|
132
|
-
)
|
133
|
-
```
|
134
|
-
|
135
|
-
#### Available Options
|
136
|
-
|
137
|
-
- `has_headers`: Boolean indicating if the first row contains headers (default: true)
|
138
|
-
- `col_sep`: String specifying the field separator (default: ",")
|
139
|
-
- `quote_char`: String specifying the quote character (default: "\"")
|
140
|
-
- `nil_string`: String that should be interpreted as nil
|
141
|
-
- by default, empty strings are interpreted as empty strings
|
142
|
-
- if you want to interpret empty strings as nil, set this to an empty string
|
143
|
-
- `buffer_size`: Integer specifying the number of rows to buffer in memory (default: 1024)
|
144
|
-
- `result_type`: String specifying the output format ("hash" or "array" or :hash or :array)
|
145
|
-
- `flexible`: Boolean specifying if the parser should be flexible (default: false)
|
146
|
-
- `flexible_default`: String specifying the default value for missing fields. Implicitly enables flexible mode if set. (default: `nil`)
|
147
|
-
- `trim`: String specifying the trim mode ("all" or "headers" or "fields" or :all or :headers or :fields)
|
148
|
-
|
149
|
-
When `has_headers` is false, hash keys will be generated as `"c0"`, `"c1"`, etc.
|
150
|
-
|
151
|
-
## Requirements
|
152
|
-
|
153
|
-
- Ruby >= 3.1.0
|
154
|
-
- Rust toolchain (for installation from source)
|
155
|
-
|
156
|
-
## Performance
|
157
|
-
|
158
|
-
This library is faster than the standard Ruby CSV library, and is comparable to the fastest CSV parser gems I've used.
|
159
|
-
|
160
|
-
Here's some unscientific benchmarks. You can find the code in the [benchmark/comparison_benchmark.rb](benchmark/comparison_benchmark.rb) file.
|
161
|
-
|
162
|
-
### 10,000 lines
|
163
|
-
|
164
|
-
```
|
165
|
-
Benchmarking with 100001 lines of data
|
166
|
-
|
167
|
-
ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
|
168
|
-
Warming up --------------------------------------
|
169
|
-
OSV - Hash output 1.000 i/100ms
|
170
|
-
CSV - Hash output 1.000 i/100ms
|
171
|
-
OSV - Array output 1.000 i/100ms
|
172
|
-
OSV - Direct Open Array output
|
173
|
-
12.719M i/100ms
|
174
|
-
CSV - Array output 1.000 i/100ms
|
175
|
-
FastCSV - Array output
|
176
|
-
1.000 i/100ms
|
177
|
-
OSV - StringIO 1.000 i/100ms
|
178
|
-
CSV - StringIO 1.000 i/100ms
|
179
|
-
FastCSV - StringIO 1.000 i/100ms
|
180
|
-
OSV - Gzipped 1.000 i/100ms
|
181
|
-
CSV - Gzipped 1.000 i/100ms
|
182
|
-
Calculating -------------------------------------
|
183
|
-
OSV - Hash output 6.722 (±14.9%) i/s (148.77 ms/i) - 59.000 in 10.074753s
|
184
|
-
CSV - Hash output 1.223 (± 0.0%) i/s (817.62 ms/i) - 13.000 in 10.788284s
|
185
|
-
OSV - Array output 17.284 (±11.6%) i/s (57.86 ms/i) - 171.000 in 10.007321s
|
186
|
-
OSV - Direct Open Array output
|
187
|
-
213.629M (±13.5%) i/s (4.68 ns/i) - 1.921B in 10.005506s
|
188
|
-
CSV - Array output 2.193 (± 0.0%) i/s (455.93 ms/i) - 22.000 in 10.052607s
|
189
|
-
FastCSV - Array output
|
190
|
-
7.993 (± 0.0%) i/s (125.11 ms/i) - 80.000 in 10.053729s
|
191
|
-
OSV - StringIO 6.626 (±15.1%) i/s (150.91 ms/i) - 66.000 in 10.103646s
|
192
|
-
CSV - StringIO 1.478 (± 0.0%) i/s (676.78 ms/i) - 15.000 in 10.158640s
|
193
|
-
FastCSV - StringIO 17.074 (± 5.9%) i/s (58.57 ms/i) - 171.000 in 10.059266s
|
194
|
-
OSV - Gzipped 5.639 (± 0.0%) i/s (177.32 ms/i) - 57.000 in 10.152487s
|
195
|
-
CSV - Gzipped 1.176 (± 0.0%) i/s (850.19 ms/i) - 12.000 in 10.233398s
|
196
|
-
|
197
|
-
Comparison:
|
198
|
-
OSV - Direct Open Array output: 213629268.6 i/s
|
199
|
-
OSV - Array output: 17.3 i/s - 12360250.79x slower
|
200
|
-
FastCSV - StringIO: 17.1 i/s - 12511956.50x slower
|
201
|
-
FastCSV - Array output: 8.0 i/s - 26727225.72x slower
|
202
|
-
OSV - Hash output: 6.7 i/s - 31780615.83x slower
|
203
|
-
OSV - StringIO: 6.6 i/s - 32239620.60x slower
|
204
|
-
OSV - Gzipped: 5.6 i/s - 37881517.48x slower
|
205
|
-
CSV - Array output: 2.2 i/s - 97400427.87x slower
|
206
|
-
CSV - StringIO: 1.5 i/s - 144580048.04x slower
|
207
|
-
CSV - Hash output: 1.2 i/s - 174666591.31x slower
|
208
|
-
CSV - Gzipped: 1.2 i/s - 181626018.23x slower
|
209
|
-
```
|
210
|
-
|
211
|
-
### 1,000,000 lines
|
212
|
-
|
213
|
-
```
|
214
|
-
Benchmarking with 1000001 lines of data
|
215
|
-
|
216
|
-
ruby 3.3.6 (2024-11-05 revision 75015d4c1f) [arm64-darwin24]
|
217
|
-
Warming up --------------------------------------
|
218
|
-
OSV - Hash output 1.000 i/100ms
|
219
|
-
CSV - Hash output 1.000 i/100ms
|
220
|
-
OSV - Array output 1.000 i/100ms
|
221
|
-
OSV - Direct Open Array output
|
222
|
-
1.000 i/100ms
|
223
|
-
CSV - Array output 1.000 i/100ms
|
224
|
-
FastCSV - Array output
|
225
|
-
1.000 i/100ms
|
226
|
-
OSV - StringIO 1.000 i/100ms
|
227
|
-
CSV - StringIO 1.000 i/100ms
|
228
|
-
FastCSV - StringIO 1.000 i/100ms
|
229
|
-
OSV - Gzipped 1.000 i/100ms
|
230
|
-
CSV - Gzipped 1.000 i/100ms
|
231
|
-
Calculating -------------------------------------
|
232
|
-
OSV - Hash output 0.492 (± 0.0%) i/s (2.03 s/i) - 5.000 in 10.463278s
|
233
|
-
CSV - Hash output 0.114 (± 0.0%) i/s (8.75 s/i) - 2.000 in 17.573877s
|
234
|
-
OSV - Array output 1.502 (± 0.0%) i/s (665.58 ms/i) - 14.000 in 10.217551s
|
235
|
-
OSV - Direct Open Array output
|
236
|
-
1.626 (± 0.0%) i/s (614.90 ms/i) - 16.000 in 10.190323s
|
237
|
-
CSV - Array output 0.183 (± 0.0%) i/s (5.46 s/i) - 2.000 in 10.951943s
|
238
|
-
FastCSV - Array output
|
239
|
-
0.326 (± 0.0%) i/s (3.07 s/i) - 4.000 in 12.340605s
|
240
|
-
OSV - StringIO 0.567 (± 0.0%) i/s (1.76 s/i) - 6.000 in 10.698027s
|
241
|
-
CSV - StringIO 0.141 (± 0.0%) i/s (7.10 s/i) - 2.000 in 14.237144s
|
242
|
-
FastCSV - StringIO 0.923 (± 0.0%) i/s (1.08 s/i) - 10.000 in 11.567775s
|
243
|
-
OSV - Gzipped 0.437 (± 0.0%) i/s (2.29 s/i) - 5.000 in 11.452764s
|
244
|
-
CSV - Gzipped 0.104 (± 0.0%) i/s (9.64 s/i) - 2.000 in 19.373423s
|
245
|
-
|
246
|
-
Comparison:
|
247
|
-
OSV - Direct Open Array output: 1.6 i/s
|
248
|
-
OSV - Array output: 1.5 i/s - 1.08x slower
|
249
|
-
FastCSV - StringIO: 0.9 i/s - 1.76x slower
|
250
|
-
OSV - StringIO: 0.6 i/s - 2.87x slower
|
251
|
-
OSV - Hash output: 0.5 i/s - 3.30x slower
|
252
|
-
OSV - Gzipped: 0.4 i/s - 3.72x slower
|
253
|
-
FastCSV - Array output: 0.3 i/s - 4.99x slower
|
254
|
-
CSV - Array output: 0.2 i/s - 8.88x slower
|
255
|
-
CSV - StringIO: 0.1 i/s - 11.55x slower
|
256
|
-
CSV - Hash output: 0.1 i/s - 14.24x slower
|
257
|
-
CSV - Gzipped: 0.1 i/s - 15.68x slower
|
258
|
-
```
|
42
|
+
OSV is a high-performance CSV parser for Ruby, implemented in Rust. It wraps BurntSushi's csv-rs crate to provide fast CSV parsing with support for both hash-based and array-based row formats.
|
43
|
+
|
44
|
+
Features include:
|
45
|
+
- Flexible input sources (file paths, gzipped files, IO objects, strings)
|
46
|
+
- Configurable parsing options (headers, separators, quote chars)
|
47
|
+
- Support for both hash and array output formats
|
48
|
+
- Whitespace trimming options
|
49
|
+
- Strict or flexible parsing modes
|
50
|
+
- Significantly faster than Ruby's standard CSV library
|
259
51
|
email:
|
260
52
|
- nathan@jaremko.ca
|
261
53
|
executables: []
|
@@ -290,6 +82,10 @@ licenses:
|
|
290
82
|
metadata:
|
291
83
|
homepage_uri: https://github.com/njaremko/osv
|
292
84
|
source_code_uri: https://github.com/njaremko/osv
|
85
|
+
readme_uri: https://github.com/njaremko/osv/blob/main/README.md
|
86
|
+
changelog_uri: https://github.com/njaremko/osv/blob/main/CHANGELOG.md
|
87
|
+
rubygems_mfa_required: 'true'
|
88
|
+
documentation_uri: https://www.rubydoc.info/gems/osv
|
293
89
|
post_install_message:
|
294
90
|
rdoc_options: []
|
295
91
|
require_paths:
|