rbcsv 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +1 -1
- data/DEVELOPMENT.md +121 -11
- data/README.md +89 -67
- data/docs/exe_upgrade_version.md +124 -0
- data/docs/release_process_v0.1.8.md +298 -0
- data/docs/special_character_bug_fix.md +257 -0
- data/docs/write_functionality_implementation.md +197 -0
- data/examples/README.md +221 -0
- data/{test.rb → examples/basic/basic_usage.rb} +2 -1
- data/{test_fixed.rb → examples/basic/test_fixed.rb} +1 -1
- data/examples/benchmarks/benchmark.rb +372 -0
- data/{output_comparison.rb → examples/benchmarks/output_comparison.rb} +41 -26
- data/examples/benchmarks/sample.csv +1001 -0
- data/examples/features/test_typed_functionality.rb +109 -0
- data/{test_write_functionality.rb → examples/features/test_write_functionality.rb} +1 -1
- data/ext/rbcsv/Cargo.toml +1 -1
- data/ext/rbcsv/src/error.rs +2 -2
- data/ext/rbcsv/src/lib.rs +8 -1
- data/ext/rbcsv/src/parser.rs +74 -15
- data/ext/rbcsv/src/ruby_api.rs +101 -2
- data/ext/rbcsv/src/value.rs +87 -0
- data/lib/rbcsv/version.rb +1 -1
- metadata +16 -8
- data/benchmark.rb +0 -190
- /data/{quick_test.rb → examples/basic/quick_test.rb} +0 -0
- /data/{test_install.rb → examples/basic/test_install.rb} +0 -0
data/benchmark.rb
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
# encoding: utf-8
|
|
3
|
-
|
|
4
|
-
require 'csv'
|
|
5
|
-
require 'benchmark'
|
|
6
|
-
require 'date'
|
|
7
|
-
require_relative 'lib/r_csv'
|
|
8
|
-
|
|
9
|
-
# CSV file path
|
|
10
|
-
CSV_FILE = 'sample.csv'
|
|
11
|
-
|
|
12
|
-
puts "=== CSV Library Benchmark Comparison ==="
|
|
13
|
-
puts "File: #{CSV_FILE}"
|
|
14
|
-
puts "Ruby version: #{RUBY_VERSION}"
|
|
15
|
-
puts
|
|
16
|
-
|
|
17
|
-
# Read CSV content once for string-based parsing
|
|
18
|
-
csv_content = File.read(CSV_FILE)
|
|
19
|
-
puts "File size: #{csv_content.bytesize} bytes"
|
|
20
|
-
puts "Records: #{CSV.read(CSV_FILE, headers: true).length}"
|
|
21
|
-
puts
|
|
22
|
-
|
|
23
|
-
puts "=== Parse Performance Comparison ==="
|
|
24
|
-
Benchmark.bm(35) do |x|
|
|
25
|
-
|
|
26
|
-
# Built-in CSV.read - bulk read with headers
|
|
27
|
-
x.report("CSV.read (headers: true)") do
|
|
28
|
-
1000.times do
|
|
29
|
-
data = CSV.read(CSV_FILE, headers: true)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Built-in CSV.parse - from string with headers
|
|
34
|
-
x.report("CSV.parse (headers: true)") do
|
|
35
|
-
1000.times do
|
|
36
|
-
data = CSV.parse(csv_content, headers: true)
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Built-in CSV.parse - raw parsing
|
|
41
|
-
x.report("CSV.parse (raw)") do
|
|
42
|
-
1000.times do
|
|
43
|
-
data = CSV.parse(csv_content)
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# r_csv - Rust extension parsing
|
|
48
|
-
x.report("RCsv.parse (Rust)") do
|
|
49
|
-
1000.times do
|
|
50
|
-
data = RCsv.parse(csv_content)
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
puts "\n=== Memory Usage Comparison ==="
|
|
57
|
-
|
|
58
|
-
# Helper to measure memory usage
|
|
59
|
-
def memory_usage
|
|
60
|
-
`ps -o rss= -p #{Process.pid}`.to_i
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
puts "Initial memory usage: #{memory_usage} KB"
|
|
64
|
-
|
|
65
|
-
# CSV.read
|
|
66
|
-
before_read = memory_usage
|
|
67
|
-
data_read = CSV.read(CSV_FILE, headers: true)
|
|
68
|
-
after_read = memory_usage
|
|
69
|
-
puts "After CSV.read: #{after_read} KB (diff: #{after_read - before_read} KB)"
|
|
70
|
-
|
|
71
|
-
# CSV.parse
|
|
72
|
-
before_parse = memory_usage
|
|
73
|
-
data_parse = CSV.parse(csv_content)
|
|
74
|
-
after_parse = memory_usage
|
|
75
|
-
puts "After CSV.parse: #{after_parse} KB (diff: #{after_parse - before_parse} KB)"
|
|
76
|
-
|
|
77
|
-
# r_csv
|
|
78
|
-
before_rcv = memory_usage
|
|
79
|
-
data_rcv = RCsv.parse(csv_content)
|
|
80
|
-
after_rcv = memory_usage
|
|
81
|
-
puts "After RCsv.parse: #{after_rcv} KB (diff: #{after_rcv - before_parse} KB)"
|
|
82
|
-
|
|
83
|
-
puts "\n=== Data Accuracy Verification ==="
|
|
84
|
-
puts "CSV.read rows: #{data_read.length}"
|
|
85
|
-
puts "CSV.parse rows: #{data_parse.length}"
|
|
86
|
-
puts "RCsv.parse rows: #{data_rcv.length}"
|
|
87
|
-
|
|
88
|
-
# Verify first row data
|
|
89
|
-
if data_rcv.length > 0
|
|
90
|
-
puts "\nFirst row comparison:"
|
|
91
|
-
puts "CSV.read: #{data_read.first.fields}"
|
|
92
|
-
puts "CSV.parse: #{data_parse[1]}" # Skip header
|
|
93
|
-
puts "RCsv.parse: #{data_rcv[0]}"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
puts "\n=== Large Data Simulation ==="
|
|
97
|
-
puts "Generating 10,000 records for benchmark..."
|
|
98
|
-
|
|
99
|
-
# Generate large data
|
|
100
|
-
large_csv_file = 'large_sample.csv'
|
|
101
|
-
CSV.open(large_csv_file, "w") do |csv|
|
|
102
|
-
# Write header
|
|
103
|
-
csv << data_read.first.headers
|
|
104
|
-
|
|
105
|
-
# Duplicate original data 100 times
|
|
106
|
-
100.times do |batch|
|
|
107
|
-
data_read.each_with_index do |row, index|
|
|
108
|
-
new_row = row.fields.dup
|
|
109
|
-
new_row[0] = (batch * 100 + index + 1).to_s # Update ID
|
|
110
|
-
csv << new_row
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
large_csv_content = File.read(large_csv_file)
|
|
116
|
-
puts "Large data file created: #{File.size(large_csv_file)} bytes"
|
|
117
|
-
|
|
118
|
-
# Large data benchmark
|
|
119
|
-
puts "\n=== Large Data Performance Test ==="
|
|
120
|
-
Benchmark.bm(35) do |x|
|
|
121
|
-
|
|
122
|
-
x.report("CSV.read (large, 10 times)") do
|
|
123
|
-
10.times do
|
|
124
|
-
large_data = CSV.read(large_csv_file, headers: true)
|
|
125
|
-
end
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
x.report("CSV.parse (large, 10 times)") do
|
|
129
|
-
10.times do
|
|
130
|
-
large_data = CSV.parse(large_csv_content, headers: true)
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
x.report("RCsv.parse (large, 10 times)") do
|
|
135
|
-
10.times do
|
|
136
|
-
large_data = RCsv.parse(large_csv_content)
|
|
137
|
-
end
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
puts "\n=== Processing Speed Comparison ==="
|
|
143
|
-
csv_data = CSV.parse(csv_content, headers: true)
|
|
144
|
-
rcv_data = RCsv.parse(csv_content)
|
|
145
|
-
|
|
146
|
-
Benchmark.bm(35) do |x|
|
|
147
|
-
|
|
148
|
-
# Search by category with CSV data
|
|
149
|
-
x.report("CSV search 'tech' (1000x)") do
|
|
150
|
-
1000.times do
|
|
151
|
-
csv_data.select { |row| row['category'] == 'tech' }
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
# Search by category with r_csv data (need to implement indexing)
|
|
156
|
-
x.report("RCsv search 'tech' (1000x)") do
|
|
157
|
-
1000.times do
|
|
158
|
-
rcv_data.select { |row| row[3] == 'tech' } # category is 4th column (index 3)
|
|
159
|
-
end
|
|
160
|
-
end
|
|
161
|
-
|
|
162
|
-
# Complex filtering with CSV
|
|
163
|
-
x.report("CSV complex filter (1000x)") do
|
|
164
|
-
1000.times do
|
|
165
|
-
csv_data.select { |row|
|
|
166
|
-
row['category'] == 'tech' && row['status'] == 'published'
|
|
167
|
-
}
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Complex filtering with r_csv
|
|
172
|
-
x.report("RCsv complex filter (1000x)") do
|
|
173
|
-
1000.times do
|
|
174
|
-
rcv_data.select { |row|
|
|
175
|
-
row[3] == 'tech' && row[4] == 'published'
|
|
176
|
-
}
|
|
177
|
-
end
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# Cleanup
|
|
183
|
-
File.delete(large_csv_file)
|
|
184
|
-
|
|
185
|
-
puts "\n=== Performance Summary ==="
|
|
186
|
-
puts "r_csv provides raw array data (faster for pure parsing)"
|
|
187
|
-
puts "CSV provides structured data with headers (better for development)"
|
|
188
|
-
puts "Choose based on your use case: speed vs convenience"
|
|
189
|
-
puts "\n=== Benchmark Complete ==="
|
|
190
|
-
puts "Execution time: #{Time.now}"
|
|
File without changes
|
|
File without changes
|