rbcsv 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'csv'
5
+ require 'benchmark'
6
+ require 'fileutils'
7
+ require 'time'
8
+ require_relative '../../lib/rbcsv'
9
+
10
+ # ベンチマーク設定
11
+ ITERATIONS = 1000
12
+ LARGE_ITERATIONS = 10
13
+ CSV_FILE = 'sample.csv'
14
+ LARGE_CSV_FILE = 'large_sample.csv'
15
+
16
+ puts "=" * 60
17
+ puts "RbCsv vs Ruby CSV ベンチマーク比較"
18
+ puts "=" * 60
19
+ puts "Ruby version: #{RUBY_VERSION}"
20
+ puts "RbCsv version: #{RbCsv::VERSION}"
21
+ puts "Date: #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}"
22
+ puts
23
+
24
+ class BenchmarkRunner
25
+ def initialize
26
+ @csv_content = nil
27
+ @large_csv_content = nil
28
+ end
29
+
30
+ def setup_sample_data
31
+ # サンプルデータが存在しない場合は作成
32
+ unless File.exist?(CSV_FILE)
33
+ create_sample_csv_file
34
+ end
35
+
36
+ @csv_content = File.read(CSV_FILE)
37
+ puts "使用ファイル: #{CSV_FILE}"
38
+ puts "ファイルサイズ: #{@csv_content.bytesize} bytes"
39
+
40
+ # レコード数を確認
41
+ records_count = CSV.read(CSV_FILE).length - 1 # ヘッダーを除く
42
+ puts "レコード数: #{records_count}"
43
+ puts
44
+ end
45
+
46
+ def create_sample_csv_file
47
+ puts "サンプルCSVファイルを作成中..."
48
+ CSV.open(CSV_FILE, "w") do |csv|
49
+ # ヘッダー
50
+ csv << %w[id name age score department salary active_date]
51
+
52
+ # データ行を生成(1000レコード)
53
+ 1000.times do |i|
54
+ csv << [
55
+ i + 1,
56
+ "User#{i + 1}",
57
+ rand(20..65),
58
+ rand(60.0..100.0).round(2),
59
+ %w[Engineering Sales Marketing HR][rand(4)],
60
+ rand(40000..120000),
61
+ (Date.today - rand(365)).to_s
62
+ ]
63
+ end
64
+ end
65
+ puts "サンプルファイル作成完了: #{CSV_FILE}"
66
+ end
67
+
68
+ def create_large_sample_data
69
+ if File.exist?(LARGE_CSV_FILE)
70
+ puts "既存のファイルを使用: #{LARGE_CSV_FILE}"
71
+ else
72
+ puts "テストデータを作成中..."
73
+ original_data = CSV.read(CSV_FILE)
74
+
75
+ CSV.open(LARGE_CSV_FILE, "w") do |csv|
76
+ csv << original_data.first
77
+
78
+ 50.times do |batch|
79
+ original_data[1..-1].each_with_index do |row, index|
80
+ new_row = row.dup
81
+ new_row[0] = (batch * 1000 + index + 1).to_s # IDを更新
82
+ csv << new_row
83
+ end
84
+ end
85
+ end
86
+
87
+ puts "ファイル作成完了: #{LARGE_CSV_FILE} (#{File.size(LARGE_CSV_FILE)} bytes)"
88
+ end
89
+
90
+ # 既存または新規に関わらず、コンテンツを読み込み
91
+ @large_csv_content = File.read(LARGE_CSV_FILE)
92
+ puts "データサイズ: #{@large_csv_content.bytesize} bytes"
93
+ puts
94
+ end
95
+
96
+ def run_basic_parsing_benchmark
97
+ puts "🚀 基本パース性能比較 (#{ITERATIONS}回実行)"
98
+ puts "-" * 50
99
+
100
+ Benchmark.bm(40) do |x|
101
+ # Ruby標準CSV
102
+ x.report("Ruby CSV.parse") do
103
+ ITERATIONS.times do
104
+ CSV.parse(@csv_content)
105
+ end
106
+ end
107
+
108
+ x.report("Ruby CSV.parse (headers: true)") do
109
+ ITERATIONS.times do
110
+ CSV.parse(@csv_content, headers: true)
111
+ end
112
+ end
113
+
114
+ # RbCsv 基本機能
115
+ x.report("RbCsv.parse") do
116
+ ITERATIONS.times do
117
+ RbCsv.parse(@csv_content)
118
+ end
119
+ end
120
+
121
+ x.report("RbCsv.parse! (with trim)") do
122
+ ITERATIONS.times do
123
+ RbCsv.parse!(@csv_content)
124
+ end
125
+ end
126
+
127
+ # RbCsv 型認識機能
128
+ x.report("RbCsv.parse_typed") do
129
+ ITERATIONS.times do
130
+ RbCsv.parse_typed(@csv_content)
131
+ end
132
+ end
133
+
134
+ x.report("RbCsv.parse_typed! (typed + trim)") do
135
+ ITERATIONS.times do
136
+ RbCsv.parse_typed!(@csv_content)
137
+ end
138
+ end
139
+ end
140
+ puts
141
+ end
142
+
143
+ def run_file_reading_benchmark
144
+ puts "📁 ファイル読み込み性能比較 (#{ITERATIONS}回実行)"
145
+ puts "-" * 50
146
+
147
+ Benchmark.bm(40) do |x|
148
+ # Ruby標準CSV
149
+ x.report("Ruby CSV.read") do
150
+ ITERATIONS.times do
151
+ CSV.read(CSV_FILE)
152
+ end
153
+ end
154
+
155
+ x.report("Ruby CSV.read (headers: true)") do
156
+ ITERATIONS.times do
157
+ CSV.read(CSV_FILE, headers: true)
158
+ end
159
+ end
160
+
161
+ # RbCsv
162
+ x.report("RbCsv.read") do
163
+ ITERATIONS.times do
164
+ RbCsv.read(CSV_FILE)
165
+ end
166
+ end
167
+
168
+ x.report("RbCsv.read! (with trim)") do
169
+ ITERATIONS.times do
170
+ RbCsv.read!(CSV_FILE)
171
+ end
172
+ end
173
+
174
+ x.report("RbCsv.read_typed") do
175
+ ITERATIONS.times do
176
+ RbCsv.read_typed(CSV_FILE)
177
+ end
178
+ end
179
+
180
+ x.report("RbCsv.read_typed! (typed + trim)") do
181
+ ITERATIONS.times do
182
+ RbCsv.read_typed!(CSV_FILE)
183
+ end
184
+ end
185
+ end
186
+ puts
187
+ end
188
+
189
+ def run_large_data_benchmark
190
+ create_large_sample_data
191
+
192
+ puts "💪 データ性能比較 (#{LARGE_ITERATIONS}回実行)"
193
+ puts "-" * 50
194
+
195
+ # データが正しく読み込まれているかチェック
196
+ if @large_csv_content.nil? || @large_csv_content.empty?
197
+ puts "エラー: データが読み込まれていません"
198
+ return
199
+ end
200
+
201
+ Benchmark.bm(40) do |x|
202
+ # パース性能比較
203
+ x.report("Ruby CSV.parse (large)") do
204
+ LARGE_ITERATIONS.times do
205
+ CSV.parse(@large_csv_content)
206
+ end
207
+ end
208
+
209
+ x.report("RbCsv.parse (large)") do
210
+ LARGE_ITERATIONS.times do
211
+ RbCsv.parse(@large_csv_content)
212
+ end
213
+ end
214
+
215
+ x.report("RbCsv.parse_typed (large)") do
216
+ LARGE_ITERATIONS.times do
217
+ RbCsv.parse_typed(@large_csv_content)
218
+ end
219
+ end
220
+
221
+ # ファイル読み込み性能比較
222
+ x.report("Ruby CSV.read (large file)") do
223
+ LARGE_ITERATIONS.times do
224
+ CSV.read(LARGE_CSV_FILE)
225
+ end
226
+ end
227
+
228
+ x.report("RbCsv.read (large file)") do
229
+ LARGE_ITERATIONS.times do
230
+ RbCsv.read(LARGE_CSV_FILE)
231
+ end
232
+ end
233
+
234
+ x.report("RbCsv.read_typed (large file)") do
235
+ LARGE_ITERATIONS.times do
236
+ RbCsv.read_typed(LARGE_CSV_FILE)
237
+ end
238
+ end
239
+ end
240
+ puts
241
+ end
242
+
243
+ def run_writing_benchmark
244
+ puts "✏️ ファイル書き込み性能比較 (#{ITERATIONS}回実行)"
245
+ puts "-" * 50
246
+
247
+ # テスト用の出力ファイル名(絶対パスに修正)
248
+ csv_out = File.join(Dir.pwd, 'benchmark_csv_output.csv')
249
+ rbcsv_out = File.join(Dir.pwd, 'benchmark_rbcsv_output.csv')
250
+
251
+ # 書き込み用のテストデータを準備(文字列に変換)
252
+ test_data = []
253
+ 100.times do |i|
254
+ test_data << [
255
+ (i + 1).to_s,
256
+ "TestUser#{i + 1}",
257
+ rand(20..65).to_s,
258
+ rand(60.0..100.0).round(2).to_s,
259
+ %w[Engineering Sales Marketing HR][rand(4)],
260
+ rand(40000..120000).to_s,
261
+ (Date.today - rand(365)).to_s
262
+ ]
263
+ end
264
+
265
+ Benchmark.bm(40) do |x|
266
+ # Ruby標準CSV書き込み
267
+ x.report("Ruby CSV.open (write)") do
268
+ ITERATIONS.times do
269
+ CSV.open(csv_out, "w") do |csv|
270
+ csv << %w[id name age score department salary active_date]
271
+ test_data.each { |row| csv << row }
272
+ end
273
+ end
274
+ end
275
+
276
+ # RbCsv書き込み
277
+ x.report("RbCsv.write") do
278
+ ITERATIONS.times do
279
+ write_data = [%w[id name age score department salary active_date]] + test_data
280
+ RbCsv.write(rbcsv_out, write_data)
281
+ end
282
+ end
283
+ end
284
+
285
+ # テストファイルをクリーンアップ
286
+ [csv_out, rbcsv_out].each do |file|
287
+ File.delete(file) if File.exist?(file)
288
+ end
289
+ puts
290
+ end
291
+
292
+
293
+ def run_type_conversion_comparison
294
+ puts "🔢 型変換処理の比較 (#{ITERATIONS}回実行)"
295
+ puts "-" * 50
296
+
297
+ csv_data = CSV.parse(@csv_content)
298
+ rbcsv_data = RbCsv.parse(@csv_content)
299
+ rbcsv_typed_data = RbCsv.parse_typed(@csv_content)
300
+
301
+ Benchmark.bm(40) do |x|
302
+ # 手動型変換 vs 自動型変換
303
+ x.report("Manual conversion (CSV)") do
304
+ ITERATIONS.times do
305
+ csv_data[1..-1].map do |row|
306
+ [
307
+ row[0].to_i, # id to integer
308
+ row[1], # title (keep as string)
309
+ row[2], # description (keep as string)
310
+ row[3], # category (keep as string)
311
+ row[4], # status (keep as string)
312
+ row[5], # location (keep as string)
313
+ Time.parse(row[6]), # start_date to time
314
+ Time.parse(row[7]), # end_date to time
315
+ row[8].to_i, # max_participants to integer
316
+ Time.parse(row[9]), # created_at to time
317
+ Time.parse(row[10]) # updated_at to time
318
+ ]
319
+ end
320
+ end
321
+ end
322
+
323
+ x.report("Manual conversion (RbCsv)") do
324
+ ITERATIONS.times do
325
+ rbcsv_data[1..-1].map do |row|
326
+ [
327
+ row[0].to_i, # id to integer
328
+ row[1], # title (keep as string)
329
+ row[2], # description (keep as string)
330
+ row[3], # category (keep as string)
331
+ row[4], # status (keep as string)
332
+ row[5], # location (keep as string)
333
+ Time.parse(row[6]), # start_date to time
334
+ Time.parse(row[7]), # end_date to time
335
+ row[8].to_i, # max_participants to integer
336
+ Time.parse(row[9]), # created_at to time
337
+ Time.parse(row[10]) # updated_at to time
338
+ ]
339
+ end
340
+ end
341
+ end
342
+
343
+ x.report("Automatic conversion (RbCsv typed)") do
344
+ ITERATIONS.times do
345
+ rbcsv_typed_data[1..-1] # すでに型変換済み
346
+ end
347
+ end
348
+ end
349
+ puts
350
+ end
351
+
352
+ def cleanup
353
+ [LARGE_CSV_FILE].each do |file|
354
+ File.delete(file) if File.exist?(file)
355
+ end
356
+ end
357
+ def run_all_benchmarks
358
+ setup_sample_data
359
+ run_basic_parsing_benchmark
360
+ run_file_reading_benchmark
361
+ run_writing_benchmark
362
+ run_large_data_benchmark
363
+ run_type_conversion_comparison
364
+ cleanup
365
+ end
366
+ end
367
+
368
+ # ベンチマーク実行
369
+ if __FILE__ == $0
370
+ runner = BenchmarkRunner.new
371
+ runner.run_all_benchmarks
372
+ end
@@ -2,7 +2,7 @@
2
2
  # encoding: utf-8
3
3
 
4
4
  require 'csv'
5
- require_relative 'lib/r_csv'
5
+ require_relative '../../lib/rbcsv'
6
6
 
7
7
  CSV_FILE = 'sample.csv'
8
8
  csv_content = File.read(CSV_FILE)
@@ -48,8 +48,8 @@ puts "Data:"
48
48
  csv_table.each_with_index { |row, i| puts " [#{i}] #{row.to_h}" }
49
49
  puts
50
50
 
51
- puts "=== 4. RCsv.parse (current - raw arrays) ==="
52
- rcv_parse = RCsv.parse(small_csv)
51
+ puts "=== 4. RbCsv.parse (current - raw arrays) ==="
52
+ rcv_parse = RbCsv.parse(small_csv)
53
53
  puts "Type: #{rcv_parse.class}"
54
54
  puts "Rows: #{rcv_parse.length}"
55
55
  puts "Data:"
@@ -73,11 +73,11 @@ puts " - Excludes header from data rows"
73
73
  puts " - Example: #<CSV::Row id:\"1\" name:\"Alice\" age:\"30\">"
74
74
  puts
75
75
 
76
- puts "3. RCsv.parse (current):"
76
+ puts "3. RbCsv.parse (current):"
77
77
  puts " - Returns: Array<Array<String>>"
78
- puts " - Excludes header row (data only)"
78
+ puts " - Includes header row as first element"
79
79
  puts " - Raw string arrays"
80
- puts " - Example: [[\"1\", \"Alice\", \"30\"], [\"2\", \"Bob\", \"25\"]]"
80
+ puts " - Example: [[\"id\", \"name\", \"age\"], [\"1\", \"Alice\", \"30\"]]"
81
81
  puts
82
82
 
83
83
  puts "=== Key Differences ==="
@@ -86,54 +86,69 @@ puts
86
86
  puts "Header handling:"
87
87
  puts " CSV.parse (default): Includes header as row[0]"
88
88
  puts " CSV.parse (headers=true): Excludes header, provides row['column'] access"
89
- puts " RCsv.parse: Excludes header, raw arrays only"
89
+ puts " RbCsv.parse: Includes header as row[0] (same as CSV.parse default)"
90
90
  puts
91
91
 
92
92
  puts "Data structure:"
93
93
  puts " CSV.parse: Can return CSV::Row objects with named access"
94
- puts " RCsv.parse: Always returns raw Array<String>"
94
+ puts " RbCsv.parse: Always returns raw Array<String>"
95
95
  puts
96
96
 
97
97
  puts "Row count difference:"
98
98
  csv_default = CSV.parse(small_csv)
99
99
  csv_headers = CSV.parse(small_csv, headers: true)
100
- rcv_data = RCsv.parse(small_csv)
100
+ rcv_data = RbCsv.parse(small_csv)
101
101
 
102
102
  puts " CSV.parse (default): #{csv_default.length} rows (includes header)"
103
103
  puts " CSV.parse (headers=true): #{csv_headers.length} rows (data only)"
104
- puts " RCsv.parse: #{rcv_data.length} rows (data only)"
104
+ puts " RbCsv.parse: #{rcv_data.length} rows (includes header)"
105
105
  puts
106
106
 
107
107
  puts "=== Compatibility Recommendations ==="
108
108
  puts
109
109
 
110
- puts "To match CSV.parse (default behavior):"
111
- puts " - RCsv should include header row as first element"
112
- puts " - Return format: [[\"id\", \"name\"], [\"1\", \"Alice\"], [\"2\", \"Bob\"]]"
110
+ puts "RbCsv already matches CSV.parse (default behavior):"
111
+ puts " - Both include header row as first element"
112
+ puts " - Both return format: [[\"id\", \"name\"], [\"1\", \"Alice\"], [\"2\", \"Bob\"]]"
113
113
  puts
114
114
 
115
- puts "To match CSV.parse (headers: true):"
116
- puts " - More complex: need CSV::Row-like objects"
115
+ puts "To add CSV.parse (headers: true) compatibility:"
116
+ puts " - Would need CSV::Row-like objects"
117
117
  puts " - Alternative: return {headers: [...], data: [[...], [...]]} structure"
118
118
  puts
119
119
 
120
- puts "Current RCsv matches:"
121
- puts " - CSV.parse(content, headers: true) # data rows only"
122
- puts " - But returns raw arrays instead of CSV::Row objects"
120
+ puts "Current RbCsv compatibility:"
121
+ puts " - Matches CSV.parse(content) exactly - both return Array<Array<String>> with headers"
122
+ puts " - Does not yet support CSV.parse(content, headers: true) which returns CSV::Table"
123
123
  puts
124
124
 
125
125
  puts "=== Specific Implementation Suggestions ==="
126
126
  puts
127
127
 
128
- puts "Option 1: Match CSV.parse default (simplest):"
129
- puts " RCsv.parse(content) → includes header row"
128
+ puts "Option 1: Keep current behavior (already compatible):"
129
+ puts " RbCsv.parse(content) → already includes header row"
130
130
  puts
131
131
 
132
- puts "Option 2: Add options parameter:"
133
- puts " RCsv.parse(content, headers: false) → includes header"
134
- puts " RCsv.parse(content, headers: true) → excludes header"
132
+ puts "Option 2: Add options parameter for structured data:"
133
+ puts " RbCsv.parse(content, headers: false) → current behavior (default)"
134
+ puts " RbCsv.parse(content, headers: true) → future: structured data like CSV::Table"
135
135
  puts
136
136
 
137
- puts "Option 3: Multiple methods:"
138
- puts " RCsv.parse_raw(content) → raw arrays with header"
139
- puts " RCsv.parse(content) → structured data"
137
+ puts "Option 3: Add convenience methods:"
138
+ puts " RbCsv.table(content) → future: structured data with named access"
139
+ puts " RbCsv.foreach(file) → future: streaming with block"
140
+ puts
141
+ puts "=== Actual Compatibility Status ==="
142
+ puts
143
+ puts "✅ RbCsv.parse(content) == CSV.parse(content)"
144
+ puts " Both return: Array<Array<String>> with header row included"
145
+ puts
146
+ puts "❌ RbCsv does not yet support:"
147
+ puts " - CSV.parse(content, headers: true) → CSV::Table with CSV::Row objects"
148
+ puts " - Named column access like row['name']"
149
+ puts " - Header converters (e.g., :symbol)"
150
+ puts
151
+ puts "Future improvements could add:"
152
+ puts " - RbCsv.parse(content, headers: true) for structured data"
153
+ puts " - RbCsv::Row and RbCsv::Table classes for compatibility"
154
+ puts " - Integration with streaming API for large files"