rbcsv 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/benchmark.rb DELETED
@@ -1,190 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # encoding: utf-8
3
-
4
- require 'csv'
5
- require 'benchmark'
6
- require 'date'
7
- require_relative 'lib/r_csv'
8
-
9
- # CSV file path
10
- CSV_FILE = 'sample.csv'
11
-
12
- puts "=== CSV Library Benchmark Comparison ==="
13
- puts "File: #{CSV_FILE}"
14
- puts "Ruby version: #{RUBY_VERSION}"
15
- puts
16
-
17
- # Read CSV content once for string-based parsing
18
- csv_content = File.read(CSV_FILE)
19
- puts "File size: #{csv_content.bytesize} bytes"
20
- puts "Records: #{CSV.read(CSV_FILE, headers: true).length}"
21
- puts
22
-
23
- puts "=== Parse Performance Comparison ==="
24
- Benchmark.bm(35) do |x|
25
-
26
- # Built-in CSV.read - bulk read with headers
27
- x.report("CSV.read (headers: true)") do
28
- 1000.times do
29
- data = CSV.read(CSV_FILE, headers: true)
30
- end
31
- end
32
-
33
- # Built-in CSV.parse - from string with headers
34
- x.report("CSV.parse (headers: true)") do
35
- 1000.times do
36
- data = CSV.parse(csv_content, headers: true)
37
- end
38
- end
39
-
40
- # Built-in CSV.parse - raw parsing
41
- x.report("CSV.parse (raw)") do
42
- 1000.times do
43
- data = CSV.parse(csv_content)
44
- end
45
- end
46
-
47
- # r_csv - Rust extension parsing
48
- x.report("RCsv.parse (Rust)") do
49
- 1000.times do
50
- data = RCsv.parse(csv_content)
51
- end
52
- end
53
-
54
- end
55
-
56
- puts "\n=== Memory Usage Comparison ==="
57
-
58
- # Helper to measure memory usage
59
- def memory_usage
60
- `ps -o rss= -p #{Process.pid}`.to_i
61
- end
62
-
63
- puts "Initial memory usage: #{memory_usage} KB"
64
-
65
- # CSV.read
66
- before_read = memory_usage
67
- data_read = CSV.read(CSV_FILE, headers: true)
68
- after_read = memory_usage
69
- puts "After CSV.read: #{after_read} KB (diff: #{after_read - before_read} KB)"
70
-
71
- # CSV.parse
72
- before_parse = memory_usage
73
- data_parse = CSV.parse(csv_content)
74
- after_parse = memory_usage
75
- puts "After CSV.parse: #{after_parse} KB (diff: #{after_parse - before_parse} KB)"
76
-
77
- # r_csv
78
- before_rcv = memory_usage
79
- data_rcv = RCsv.parse(csv_content)
80
- after_rcv = memory_usage
81
- puts "After RCsv.parse: #{after_rcv} KB (diff: #{after_rcv - before_parse} KB)"
82
-
83
- puts "\n=== Data Accuracy Verification ==="
84
- puts "CSV.read rows: #{data_read.length}"
85
- puts "CSV.parse rows: #{data_parse.length}"
86
- puts "RCsv.parse rows: #{data_rcv.length}"
87
-
88
- # Verify first row data
89
- if data_rcv.length > 0
90
- puts "\nFirst row comparison:"
91
- puts "CSV.read: #{data_read.first.fields}"
92
- puts "CSV.parse: #{data_parse[1]}" # Skip header
93
- puts "RCsv.parse: #{data_rcv[0]}"
94
- end
95
-
96
- puts "\n=== Large Data Simulation ==="
97
- puts "Generating 10,000 records for benchmark..."
98
-
99
- # Generate large data
100
- large_csv_file = 'large_sample.csv'
101
- CSV.open(large_csv_file, "w") do |csv|
102
- # Write header
103
- csv << data_read.first.headers
104
-
105
- # Duplicate original data 100 times
106
- 100.times do |batch|
107
- data_read.each_with_index do |row, index|
108
- new_row = row.fields.dup
109
- new_row[0] = (batch * 100 + index + 1).to_s # Update ID
110
- csv << new_row
111
- end
112
- end
113
- end
114
-
115
- large_csv_content = File.read(large_csv_file)
116
- puts "Large data file created: #{File.size(large_csv_file)} bytes"
117
-
118
- # Large data benchmark
119
- puts "\n=== Large Data Performance Test ==="
120
- Benchmark.bm(35) do |x|
121
-
122
- x.report("CSV.read (large, 10 times)") do
123
- 10.times do
124
- large_data = CSV.read(large_csv_file, headers: true)
125
- end
126
- end
127
-
128
- x.report("CSV.parse (large, 10 times)") do
129
- 10.times do
130
- large_data = CSV.parse(large_csv_content, headers: true)
131
- end
132
- end
133
-
134
- x.report("RCsv.parse (large, 10 times)") do
135
- 10.times do
136
- large_data = RCsv.parse(large_csv_content)
137
- end
138
- end
139
-
140
- end
141
-
142
- puts "\n=== Processing Speed Comparison ==="
143
- csv_data = CSV.parse(csv_content, headers: true)
144
- rcv_data = RCsv.parse(csv_content)
145
-
146
- Benchmark.bm(35) do |x|
147
-
148
- # Search by category with CSV data
149
- x.report("CSV search 'tech' (1000x)") do
150
- 1000.times do
151
- csv_data.select { |row| row['category'] == 'tech' }
152
- end
153
- end
154
-
155
- # Search by category with r_csv data (need to implement indexing)
156
- x.report("RCsv search 'tech' (1000x)") do
157
- 1000.times do
158
- rcv_data.select { |row| row[3] == 'tech' } # category is 4th column (index 3)
159
- end
160
- end
161
-
162
- # Complex filtering with CSV
163
- x.report("CSV complex filter (1000x)") do
164
- 1000.times do
165
- csv_data.select { |row|
166
- row['category'] == 'tech' && row['status'] == 'published'
167
- }
168
- end
169
- end
170
-
171
- # Complex filtering with r_csv
172
- x.report("RCsv complex filter (1000x)") do
173
- 1000.times do
174
- rcv_data.select { |row|
175
- row[3] == 'tech' && row[4] == 'published'
176
- }
177
- end
178
- end
179
-
180
- end
181
-
182
- # Cleanup
183
- File.delete(large_csv_file)
184
-
185
- puts "\n=== Performance Summary ==="
186
- puts "r_csv provides raw array data (faster for pure parsing)"
187
- puts "CSV provides structured data with headers (better for development)"
188
- puts "Choose based on your use case: speed vs convenience"
189
- puts "\n=== Benchmark Complete ==="
190
- puts "Execution time: #{Time.now}"
File without changes