RubyGems - rbcsv - Versions diffs - 0.1.8 → 0.2.0 - Mend

rbcsv 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +23 -0
data/Cargo.lock +1 -1
data/DEVELOPMENT.md +121 -11
data/README.md +89 -67
data/docs/exe_upgrade_version.md +124 -0
data/docs/release_process_v0.1.8.md +298 -0
data/docs/special_character_bug_fix.md +257 -0
data/docs/write_functionality_implementation.md +197 -0
data/examples/README.md +221 -0
data/{test.rb → examples/basic/basic_usage.rb} +2 -1
data/{test_fixed.rb → examples/basic/test_fixed.rb} +1 -1
data/examples/benchmarks/benchmark.rb +372 -0
data/{output_comparison.rb → examples/benchmarks/output_comparison.rb} +41 -26
data/examples/benchmarks/sample.csv +1001 -0
data/examples/features/test_typed_functionality.rb +109 -0
data/{test_write_functionality.rb → examples/features/test_write_functionality.rb} +1 -1
data/ext/rbcsv/Cargo.toml +1 -1
data/ext/rbcsv/src/error.rs +2 -2
data/ext/rbcsv/src/lib.rs +8 -1
data/ext/rbcsv/src/parser.rs +74 -15
data/ext/rbcsv/src/ruby_api.rs +101 -2
data/ext/rbcsv/src/value.rs +87 -0
data/lib/rbcsv/version.rb +1 -1
metadata +16 -8
data/benchmark.rb +0 -190
/data/{quick_test.rb → examples/basic/quick_test.rb} +0 -0
/data/{test_install.rb → examples/basic/test_install.rb} +0 -0

data/examples/benchmarks/benchmark.rb ADDED Viewed

@@ -0,0 +1,372 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require 'csv'
+require 'benchmark'
+require 'fileutils'
+require 'time'
+require_relative '../../lib/rbcsv'
+# ベンチマーク設定
+ITERATIONS = 1000
+LARGE_ITERATIONS = 10
+CSV_FILE = 'sample.csv'
+LARGE_CSV_FILE = 'large_sample.csv'
+puts "=" * 60
+puts "RbCsv vs Ruby CSV ベンチマーク比較"
+puts "=" * 60
+puts "Ruby version: #{RUBY_VERSION}"
+puts "RbCsv version: #{RbCsv::VERSION}"
+puts "Date: #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}"
+puts
+class BenchmarkRunner
+  def initialize
+    @csv_content = nil
+    @large_csv_content = nil
+  end
+  def setup_sample_data
+    # サンプルデータが存在しない場合は作成
+    unless File.exist?(CSV_FILE)
+      create_sample_csv_file
+    end
+    @csv_content = File.read(CSV_FILE)
+    puts "使用ファイル: #{CSV_FILE}"
+    puts "ファイルサイズ: #{@csv_content.bytesize} bytes"
+    # レコード数を確認
+    records_count = CSV.read(CSV_FILE).length - 1  # ヘッダーを除く
+    puts "レコード数: #{records_count}"
+    puts
+  end
+  def create_sample_csv_file
+    puts "サンプルCSVファイルを作成中..."
+    CSV.open(CSV_FILE, "w") do |csv|
+      # ヘッダー
+      csv << %w[id name age score department salary active_date]
+      # データ行を生成（1000レコード）
+      1000.times do |i|
+        csv << [
+          i + 1,
+          "User#{i + 1}",
+          rand(20..65),
+          rand(60.0..100.0).round(2),
+          %w[Engineering Sales Marketing HR][rand(4)],
+          rand(40000..120000),
+          (Date.today - rand(365)).to_s
+        ]
+      end
+    end
+    puts "サンプルファイル作成完了: #{CSV_FILE}"
+  end
+  def create_large_sample_data
+    if File.exist?(LARGE_CSV_FILE)
+      puts "既存のファイルを使用: #{LARGE_CSV_FILE}"
+    else
+      puts "テストデータを作成中..."
+      original_data = CSV.read(CSV_FILE)
+      CSV.open(LARGE_CSV_FILE, "w") do |csv|
+        csv << original_data.first
+        50.times do |batch|
+          original_data[1..-1].each_with_index do |row, index|
+            new_row = row.dup
+            new_row[0] = (batch * 1000 + index + 1).to_s  # IDを更新
+            csv << new_row
+          end
+        end
+      end
+      puts "ファイル作成完了: #{LARGE_CSV_FILE} (#{File.size(LARGE_CSV_FILE)} bytes)"
+    end
+    # 既存または新規に関わらず、コンテンツを読み込み
+    @large_csv_content = File.read(LARGE_CSV_FILE)
+    puts "データサイズ: #{@large_csv_content.bytesize} bytes"
+    puts
+  end
+  def run_basic_parsing_benchmark
+    puts "🚀 基本パース性能比較 (#{ITERATIONS}回実行)"
+    puts "-" * 50
+    Benchmark.bm(40) do |x|
+      # Ruby標準CSV
+      x.report("Ruby CSV.parse") do
+        ITERATIONS.times do
+          CSV.parse(@csv_content)
+        end
+      end
+      x.report("Ruby CSV.parse (headers: true)") do
+        ITERATIONS.times do
+          CSV.parse(@csv_content, headers: true)
+        end
+      end
+      # RbCsv 基本機能
+      x.report("RbCsv.parse") do
+        ITERATIONS.times do
+          RbCsv.parse(@csv_content)
+        end
+      end
+      x.report("RbCsv.parse! (with trim)") do
+        ITERATIONS.times do
+          RbCsv.parse!(@csv_content)
+        end
+      end
+      # RbCsv 型認識機能
+      x.report("RbCsv.parse_typed") do
+        ITERATIONS.times do
+          RbCsv.parse_typed(@csv_content)
+        end
+      end
+      x.report("RbCsv.parse_typed! (typed + trim)") do
+        ITERATIONS.times do
+          RbCsv.parse_typed!(@csv_content)
+        end
+      end
+    end
+    puts
+  end
+  def run_file_reading_benchmark
+    puts "📁 ファイル読み込み性能比較 (#{ITERATIONS}回実行)"
+    puts "-" * 50
+    Benchmark.bm(40) do |x|
+      # Ruby標準CSV
+      x.report("Ruby CSV.read") do
+        ITERATIONS.times do
+          CSV.read(CSV_FILE)
+        end
+      end
+      x.report("Ruby CSV.read (headers: true)") do
+        ITERATIONS.times do
+          CSV.read(CSV_FILE, headers: true)
+        end
+      end
+      # RbCsv
+      x.report("RbCsv.read") do
+        ITERATIONS.times do
+          RbCsv.read(CSV_FILE)
+        end
+      end
+      x.report("RbCsv.read! (with trim)") do
+        ITERATIONS.times do
+          RbCsv.read!(CSV_FILE)
+        end
+      end
+      x.report("RbCsv.read_typed") do
+        ITERATIONS.times do
+          RbCsv.read_typed(CSV_FILE)
+        end
+      end
+      x.report("RbCsv.read_typed! (typed + trim)") do
+        ITERATIONS.times do
+          RbCsv.read_typed!(CSV_FILE)
+        end
+      end
+    end
+    puts
+  end
+  def run_large_data_benchmark
+    create_large_sample_data
+    puts "💪 データ性能比較 (#{LARGE_ITERATIONS}回実行)"
+    puts "-" * 50
+    # データが正しく読み込まれているかチェック
+    if @large_csv_content.nil? || @large_csv_content.empty?
+      puts "エラー: データが読み込まれていません"
+      return
+    end
+    Benchmark.bm(40) do |x|
+      # パース性能比較
+      x.report("Ruby CSV.parse (large)") do
+        LARGE_ITERATIONS.times do
+          CSV.parse(@large_csv_content)
+        end
+      end
+      x.report("RbCsv.parse (large)") do
+        LARGE_ITERATIONS.times do
+          RbCsv.parse(@large_csv_content)
+        end
+      end
+      x.report("RbCsv.parse_typed (large)") do
+        LARGE_ITERATIONS.times do
+          RbCsv.parse_typed(@large_csv_content)
+        end
+      end
+      # ファイル読み込み性能比較
+      x.report("Ruby CSV.read (large file)") do
+        LARGE_ITERATIONS.times do
+          CSV.read(LARGE_CSV_FILE)
+        end
+      end
+      x.report("RbCsv.read (large file)") do
+        LARGE_ITERATIONS.times do
+          RbCsv.read(LARGE_CSV_FILE)
+        end
+      end
+      x.report("RbCsv.read_typed (large file)") do
+        LARGE_ITERATIONS.times do
+          RbCsv.read_typed(LARGE_CSV_FILE)
+        end
+      end
+    end
+    puts
+  end
+  def run_writing_benchmark
+    puts "✏️ ファイル書き込み性能比較 (#{ITERATIONS}回実行)"
+    puts "-" * 50
+    # テスト用の出力ファイル名（絶対パスに修正）
+    csv_out = File.join(Dir.pwd, 'benchmark_csv_output.csv')
+    rbcsv_out = File.join(Dir.pwd, 'benchmark_rbcsv_output.csv')
+    # 書き込み用のテストデータを準備（文字列に変換）
+    test_data = []
+    100.times do |i|
+      test_data << [
+        (i + 1).to_s,
+        "TestUser#{i + 1}",
+        rand(20..65).to_s,
+        rand(60.0..100.0).round(2).to_s,
+        %w[Engineering Sales Marketing HR][rand(4)],
+        rand(40000..120000).to_s,
+        (Date.today - rand(365)).to_s
+      ]
+    end
+    Benchmark.bm(40) do |x|
+      # Ruby標準CSV書き込み
+      x.report("Ruby CSV.open (write)") do
+        ITERATIONS.times do
+          CSV.open(csv_out, "w") do |csv|
+            csv << %w[id name age score department salary active_date]
+            test_data.each { |row| csv << row }
+          end
+        end
+      end
+      # RbCsv書き込み
+      x.report("RbCsv.write") do
+        ITERATIONS.times do
+          write_data = [%w[id name age score department salary active_date]] + test_data
+          RbCsv.write(rbcsv_out, write_data)
+        end
+      end
+    end
+    # テストファイルをクリーンアップ
+    [csv_out, rbcsv_out].each do |file|
+      File.delete(file) if File.exist?(file)
+    end
+    puts
+  end
+  def run_type_conversion_comparison
+    puts "🔢 型変換処理の比較 (#{ITERATIONS}回実行)"
+    puts "-" * 50
+    csv_data = CSV.parse(@csv_content)
+    rbcsv_data = RbCsv.parse(@csv_content)
+    rbcsv_typed_data = RbCsv.parse_typed(@csv_content)
+    Benchmark.bm(40) do |x|
+      # 手動型変換 vs 自動型変換
+      x.report("Manual conversion (CSV)") do
+        ITERATIONS.times do
+          csv_data[1..-1].map do |row|
+            [
+              row[0].to_i,      # id to integer
+              row[1],           # title (keep as string)
+              row[2],           # description (keep as string)
+              row[3],           # category (keep as string)
+              row[4],           # status (keep as string)
+              row[5],           # location (keep as string)
+              Time.parse(row[6]), # start_date to time
+              Time.parse(row[7]), # end_date to time
+              row[8].to_i,      # max_participants to integer
+              Time.parse(row[9]), # created_at to time
+              Time.parse(row[10]) # updated_at to time
+            ]
+          end
+        end
+      end
+      x.report("Manual conversion (RbCsv)") do
+        ITERATIONS.times do
+          rbcsv_data[1..-1].map do |row|
+            [
+              row[0].to_i,      # id to integer
+              row[1],           # title (keep as string)
+              row[2],           # description (keep as string)
+              row[3],           # category (keep as string)
+              row[4],           # status (keep as string)
+              row[5],           # location (keep as string)
+              Time.parse(row[6]), # start_date to time
+              Time.parse(row[7]), # end_date to time
+              row[8].to_i,      # max_participants to integer
+              Time.parse(row[9]), # created_at to time
+              Time.parse(row[10]) # updated_at to time
+            ]
+          end
+        end
+      end
+      x.report("Automatic conversion (RbCsv typed)") do
+        ITERATIONS.times do
+          rbcsv_typed_data[1..-1]  # すでに型変換済み
+        end
+      end
+    end
+    puts
+  end
+  def cleanup
+    [LARGE_CSV_FILE].each do |file|
+      File.delete(file) if File.exist?(file)
+    end
+  end
+  def run_all_benchmarks
+    setup_sample_data
+    run_basic_parsing_benchmark
+    run_file_reading_benchmark
+    run_writing_benchmark
+    run_large_data_benchmark
+    run_type_conversion_comparison
+    cleanup
+  end
+end
+# ベンチマーク実行
+if __FILE__ == $0
+  runner = BenchmarkRunner.new
+  runner.run_all_benchmarks
+end

data/{output_comparison.rb → examples/benchmarks/output_comparison.rb} RENAMED Viewed

@@ -2,7 +2,7 @@
 # encoding: utf-8
 require 'csv'
-require_relative 'lib/r_csv'
+require_relative '../../lib/rbcsv'
 CSV_FILE = 'sample.csv'
 csv_content = File.read(CSV_FILE)
@@ -48,8 +48,8 @@ puts "Data:"
 csv_table.each_with_index { |row, i| puts "  [#{i}] #{row.to_h}" }
 puts
-puts "=== 4. RCsv.parse (current - raw arrays) ==="
-rcv_parse = RCsv.parse(small_csv)
+puts "=== 4. RbCsv.parse (current - raw arrays) ==="
+rcv_parse = RbCsv.parse(small_csv)
 puts "Type: #{rcv_parse.class}"
 puts "Rows: #{rcv_parse.length}"
 puts "Data:"
@@ -73,11 +73,11 @@ puts "   - Excludes header from data rows"
 puts "   - Example: #<CSV::Row id:\"1\" name:\"Alice\" age:\"30\">"
 puts
-puts "3. RCsv.parse (current):"
+puts "3. RbCsv.parse (current):"
 puts "   - Returns: Array<Array<String>>"
-puts "   - Excludes header row (data only)"
+puts "   - Includes header row as first element"
 puts "   - Raw string arrays"
-puts "   - Example: [[\"1\", \"Alice\", \"30\"], [\"2\", \"Bob\", \"25\"]]"
+puts "   - Example: [[\"id\", \"name\", \"age\"], [\"1\", \"Alice\", \"30\"]]"
 puts
 puts "=== Key Differences ==="
@@ -86,54 +86,69 @@ puts
 puts "Header handling:"
 puts "  CSV.parse (default):     Includes header as row[0]"
 puts "  CSV.parse (headers=true): Excludes header, provides row['column'] access"
-puts "  RCsv.parse:              Excludes header, raw arrays only"
+puts "  RbCsv.parse:              Includes header as row[0] (same as CSV.parse default)"
 puts
 puts "Data structure:"
 puts "  CSV.parse:               Can return CSV::Row objects with named access"
-puts "  RCsv.parse:              Always returns raw Array<String>"
+puts "  RbCsv.parse:              Always returns raw Array<String>"
 puts
 puts "Row count difference:"
 csv_default = CSV.parse(small_csv)
 csv_headers = CSV.parse(small_csv, headers: true)
-rcv_data = RCsv.parse(small_csv)
+rcv_data = RbCsv.parse(small_csv)
 puts "  CSV.parse (default):     #{csv_default.length} rows (includes header)"
 puts "  CSV.parse (headers=true): #{csv_headers.length} rows (data only)"
-puts "  RCsv.parse:              #{rcv_data.length} rows (data only)"
+puts "  RbCsv.parse:              #{rcv_data.length} rows (includes header)"
 puts
 puts "=== Compatibility Recommendations ==="
 puts
-puts "To match CSV.parse (default behavior):"
-puts "  - RCsv should include header row as first element"
-puts "  - Return format: [[\"id\", \"name\"], [\"1\", \"Alice\"], [\"2\", \"Bob\"]]"
+puts "RbCsv already matches CSV.parse (default behavior):"
+puts "  - Both include header row as first element"
+puts "  - Both return format: [[\"id\", \"name\"], [\"1\", \"Alice\"], [\"2\", \"Bob\"]]"
 puts
-puts "To match CSV.parse (headers: true):"
-puts "  - More complex: need CSV::Row-like objects"
+puts "To add CSV.parse (headers: true) compatibility:"
+puts "  - Would need CSV::Row-like objects"
 puts "  - Alternative: return {headers: [...], data: [[...], [...]]} structure"
 puts
-puts "Current RCsv matches:"
-puts "  - CSV.parse(content, headers: true) # data rows only"
-puts "  - But returns raw arrays instead of CSV::Row objects"
+puts "Current RbCsv compatibility:"
+puts "  - Matches CSV.parse(content) exactly - both return Array<Array<String>> with headers"
+puts "  - Does not yet support CSV.parse(content, headers: true) which returns CSV::Table"
 puts
 puts "=== Specific Implementation Suggestions ==="
 puts
-puts "Option 1: Match CSV.parse default (simplest):"
-puts "  RCsv.parse(content) → includes header row"
+puts "Option 1: Keep current behavior (already compatible):"
+puts "  RbCsv.parse(content) → already includes header row ✅"
 puts
-puts "Option 2: Add options parameter:"
-puts "  RCsv.parse(content, headers: false) → includes header"
-puts "  RCsv.parse(content, headers: true) → excludes header"
+puts "Option 2: Add options parameter for structured data:"
+puts "  RbCsv.parse(content, headers: false) → current behavior (default)"
+puts "  RbCsv.parse(content, headers: true) → future: structured data like CSV::Table"
 puts
-puts "Option 3: Multiple methods:"
-puts "  RCsv.parse_raw(content) → raw arrays with header"
-puts "  RCsv.parse(content) → structured data"
+puts "Option 3: Add convenience methods:"
+puts "  RbCsv.table(content) → future: structured data with named access"
+puts "  RbCsv.foreach(file) → future: streaming with block"
+puts
+puts "=== Actual Compatibility Status ==="
+puts
+puts "✅ RbCsv.parse(content) == CSV.parse(content)"
+puts "   Both return: Array<Array<String>> with header row included"
+puts
+puts "❌ RbCsv does not yet support:"
+puts "   - CSV.parse(content, headers: true) → CSV::Table with CSV::Row objects"
+puts "   - Named column access like row['name']"
+puts "   - Header converters (e.g., :symbol)"
+puts
+puts "Future improvements could add:"
+puts "   - RbCsv.parse(content, headers: true) for structured data"
+puts "   - RbCsv::Row and RbCsv::Table classes for compatibility"
+puts "   - Integration with streaming API for large files"