RubyGems - csv-utils - Versions diffs - 0.3.25 → 0.5.0 - Mend

csv-utils 0.3.25 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +53 -0
data/.rubocop.yml +81 -0
data/ARCHITECTURE.md +154 -0
data/CLAUDE.md +63 -0
data/Gemfile +2 -1
data/Gemfile.lock +5 -0
data/README.md +238 -16
data/bin/csv-diff +3 -3
data/bin/csv-duplicate-finder +1 -1
data/bin/csv-grep +3 -3
data/bin/csv-readline +4 -5
data/bin/csv-splitter +1 -1
data/bin/csv-validator +38 -36
data/csv-utils.gemspec +6 -5
data/lib/csv-utils.rb +3 -0
data/lib/csv_utils/csv_compare.rb +77 -71
data/lib/csv_utils/csv_extender.rb +45 -41
data/lib/csv_utils/csv_iterator.rb +90 -75
data/lib/csv_utils/csv_options.rb +11 -11
data/lib/csv_utils/csv_report.rb +5 -2
data/lib/csv_utils/csv_row.rb +3 -1
data/lib/csv_utils/csv_row_matcher.rb +34 -0
data/lib/csv_utils/csv_sort.rb +110 -96
data/lib/csv_utils/csv_transformer.rb +95 -92
data/lib/csv_utils/csv_wrapper.rb +40 -36
metadata +13 -6
data/docs/ARCHITECTURE.md +0 -134

data/lib/csv_utils/csv_transformer.rb CHANGED Viewed

@@ -1,119 +1,122 @@
+# frozen_string_literal: true
 # Transforms a CSV given a series of steps
-class CSVUtils::CSVTransformer
-  attr_reader :headers
+module CSVUtils
+  class CSVTransformer
+    attr_reader :headers
-  def initialize(src_csv, dest_csv, csv_options = {})
-    @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
-    @dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
-  end
+    def initialize(src_csv, dest_csv, csv_options = {})
+      @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
+      @dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
+    end
-  def read_headers
-    @headers = @src_csv.shift
-    self
-  end
+    def read_headers
+      @headers = @src_csv.shift
+      self
+    end
-  def additional_data(&block)
-    steps << [:additional_data, @headers, block]
-    self
-  end
+    def additional_data(&block)
+      steps << [:additional_data, @headers, block]
+      self
+    end
-  def select(&block)
-    steps << [:select, @headers, block]
-    self
-  end
+    def select(&block)
+      steps << [:select, @headers, block]
+      self
+    end
-  def reject(&block)
-    steps << [:reject, @headers, block]
-    self
-  end
+    def reject(&block)
+      steps << [:reject, @headers, block]
+      self
+    end
-  def map(new_headers, &block)
-    steps << [:map, @headers, block]
-    @headers = new_headers
-    self
-  end
+    def map(new_headers, &block)
+      steps << [:map, @headers, block]
+      @headers = new_headers
+      self
+    end
-  def append(additional_headers, &block)
-    steps << [:append, @headers, block]
+    def append(additional_headers, &block)
+      steps << [:append, @headers, block]
-    if additional_headers
-      @headers += additional_headers
-    else
-      @headers = nil
-    end
+      if additional_headers
+        @headers += additional_headers
+      else
+        @headers = nil
+      end
-    self
-  end
+      self
+    end
-  def each(&block)
-    steps << [:each, @headers, block]
-    self
-  end
+    def each(&block)
+      steps << [:each, @headers, block]
+      self
+    end
-  def set_headers(headers)
-    @headers = headers
-    self
-  end
+    def set_headers(headers)
+      @headers = headers
+      self
+    end
-  def process(batch_size = 10_000, &block)
-    batch = []
+    def process(batch_size = 10_000)
+      batch = []
-    @dest_csv << @headers if @headers
+      @dest_csv << @headers if @headers
-    steps_proc = Proc.new do
-      steps.each do |step_type, current_headers, proc|
-        batch = process_step(step_type, current_headers, batch, &proc)
-      end
+      steps_proc = proc do
+        steps.each do |step_type, current_headers, proc|
+          batch = process_step(step_type, current_headers, batch, &proc)
+        end
-      batch.each { |row| @dest_csv << row }
+        batch.each { |row| @dest_csv << row }
-      batch = []
-    end
-    while (row = @src_csv.shift)
-      batch << row
-      steps_proc.call if batch.size >= batch_size
-    end
+        batch = []
+      end
-    steps_proc.call if batch.size > 0
+      while (row = @src_csv.shift)
+        batch << row
+        steps_proc.call if batch.size >= batch_size
+      end
-    @src_csv.close
-    @dest_csv.close
-  end
+      steps_proc.call if batch.size.positive?
-  private
+      @src_csv.close
+      @dest_csv.close
+    end
-  def steps
-    @steps ||= []
-  end
+    private
+    def steps
+      @steps ||= []
+    end
-  def process_step(step_type, current_headers, batch, &block)
-    case step_type
-    when :select
-      batch.select! do |row|
-        block.call row, current_headers, @additional_data
-      end
-    when :reject
-      batch.reject! do |row|
-        block.call row, current_headers, @additional_data
-      end
-    when :map
-      batch.map! do |row|
-        block.call row, current_headers, @additional_data
+    def process_step(step_type, current_headers, batch, &block)
+      case step_type
+      when :select
+        batch.select! do |row|
+          block.call row, current_headers, @additional_data
+        end
+      when :reject
+        batch.reject! do |row|
+          block.call row, current_headers, @additional_data
+        end
+      when :map
+        batch.map! do |row|
+          block.call row, current_headers, @additional_data
+        end
+      when :append
+        batch.map! do |row|
+          row + block.call(row, current_headers, @additional_data)
+        end
+      when :additional_data
+        @additional_data = block.call(batch, current_headers)
+      when :each
+        batch.each do |row|
+          block.call(row, current_headers, @additional_data)
+        end
       end
-    when :append
-      batch.map! do |row|
-        row + block.call(row, current_headers, @additional_data)
-      end
-    when :additional_data
-      @additional_data = block.call(batch, current_headers)
-    when :each
-      batch.each do |row|
-        block.call(row, current_headers, @additional_data)
-      end
-    end
-    batch
+      batch
+    end
   end
 end

data/lib/csv_utils/csv_wrapper.rb CHANGED Viewed

@@ -1,51 +1,55 @@
+# frozen_string_literal: true
 # Wraps a CSV object, if wrapper opens the csv file it will close it
-class CSVUtils::CSVWrapper
-  attr_reader :csv
+module CSVUtils
+  class CSVWrapper
+    attr_reader :csv
-  def initialize(csv, mode, csv_options)
-    open(csv, mode, csv_options)
-  end
+    def initialize(csv, mode, csv_options)
+      open(csv, mode, csv_options)
+    end
-  def self.open(file, mode, csv_options = {})
-    csv = new(file, mode, csv_options)
+    def self.open(file, mode, csv_options = {})
+      csv = new(file, mode, csv_options)
-    if block_given?
-      yield csv
-      csv.close
-    else
-      csv
+      if block_given?
+        yield csv
+        csv.close
+      else
+        csv
+      end
     end
-  end
-  def open(csv, mode, csv_options)
-    if csv.is_a?(String)
-      @close_when_done = true
-      @csv = CSV.open(csv, mode, **csv_options)
-    else
-      @close_when_done = false
-      @csv = csv
+    def open(csv, mode, csv_options)
+      if csv.is_a?(String)
+        @close_when_done = true
+        @csv = CSV.open(csv, mode, **csv_options)
+      else
+        @close_when_done = false
+        @csv = csv
+      end
     end
-  end
-  def <<(row)
-    csv << row
-  end
+    def <<(row)
+      csv << row
+    end
-  def shift
-    csv.shift
-  end
+    def shift
+      csv.shift
+    end
-  def rewind
-    csv.rewind
-  end
+    def rewind
+      csv.rewind
+    end
-  def close
-    csv.close if close_when_done?
-  end
+    def close
+      csv.close if close_when_done?
+    end
-  private
+    private
-  def close_when_done?
-    @close_when_done
+    def close_when_done?
+      @close_when_done
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: csv-utils
 version: !ruby/object:Gem::Version
-  version: 0.3.25
+  version: 0.5.0
 platform: ruby
 authors:
 - Doug Youch
 bindir: bin
 cert_chain: []
-date: 2025-07-03 00:00:00.000000000 Z
+date: 2026-01-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: csv
@@ -37,7 +37,9 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
-description: Tools for debugging malformed CSV files
+description: A Ruby library for CSV file processing featuring comparison, transformation,
+  sorting, and validation. Includes CLI tools for debugging malformed CSVs, auto-detection
+  of encodings and separators, and efficient handling of large files.
 email: dougyouch@gmail.com
 executables:
 - csv-change-eol
@@ -52,9 +54,13 @@ executables:
 extensions: []
 extra_rdoc_files: []
 files:
+- ".github/workflows/ci.yml"
 - ".gitignore"
+- ".rubocop.yml"
 - ".ruby-gemset"
 - ".ruby-version"
+- ARCHITECTURE.md
+- CLAUDE.md
 - Gemfile
 - Gemfile.lock
 - LICENSE
@@ -69,7 +75,6 @@ files:
 - bin/csv-splitter
 - bin/csv-validator
 - csv-utils.gemspec
-- docs/ARCHITECTURE.md
 - lib/csv-utils.rb
 - lib/csv_utils/csv_compare.rb
 - lib/csv_utils/csv_extender.rb
@@ -77,6 +82,7 @@ files:
 - lib/csv_utils/csv_options.rb
 - lib/csv_utils/csv_report.rb
 - lib/csv_utils/csv_row.rb
+- lib/csv_utils/csv_row_matcher.rb
 - lib/csv_utils/csv_sort.rb
 - lib/csv_utils/csv_transformer.rb
 - lib/csv_utils/csv_wrapper.rb
@@ -84,7 +90,8 @@ files:
 homepage: https://github.com/dougyouch/csv-utils
 licenses:
 - MIT
-metadata: {}
+metadata:
+  rubygems_mfa_required: 'true'
 rdoc_options: []
 require_paths:
 - lib
@@ -101,5 +108,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubygems_version: 3.6.2
 specification_version: 4
-summary: CSV Utils
+summary: Comprehensive CSV manipulation and debugging utilities for Ruby
 test_files: []

data/docs/ARCHITECTURE.md DELETED Viewed

@@ -1,134 +0,0 @@
-# CSV Utils Architecture
-## Overview
-CSV Utils is a Ruby library designed to provide a comprehensive set of tools for CSV file manipulation. The architecture follows a modular design pattern, with each component handling a specific aspect of CSV processing.
-## Core Components
-### 1. CSVCompare
-- **Purpose**: Compares two CSV files to identify differences
-- **Key Features**:
-  - Identifies creates, updates, and deletes between files
-  - Supports custom comparison logic
-  - Handles BOM (Byte Order Mark) stripping
-  - Memory-efficient streaming comparison
-- **Dependencies**: None (uses standard Ruby CSV library)
-### 2. CSVTransformer
-- **Purpose**: Transforms CSV data according to custom rules
-- **Key Features**:
-  - Row-by-row transformation
-  - Custom transformation blocks
-  - Maintains header structure
-- **Dependencies**: None
-### 3. CSVSort
-- **Purpose**: Sorts CSV files based on specified columns
-- **Key Features**:
-  - Multi-column sorting
-  - Memory-efficient sorting
-  - Preserves header row
-- **Dependencies**: None
-### 4. CSVReport
-- **Purpose**: Generates reports from CSV data
-- **Key Features**:
-  - Custom report formatting
-  - Data aggregation
-  - Summary statistics
-- **Dependencies**: None
-### 5. CSVIterator
-- **Purpose**: Provides efficient iteration over CSV files
-- **Key Features**:
-  - Memory-efficient streaming
-  - Custom iteration blocks
-  - Header handling
-- **Dependencies**: None
-### 6. CSVExtender
-- **Purpose**: Extends CSV files with additional data
-- **Key Features**:
-  - Column addition
-  - Data enrichment
-  - Custom extension logic
-- **Dependencies**: None
-### 7. CSVWrapper
-- **Purpose**: Provides a convenient wrapper for CSV operations
-- **Key Features**:
-  - Simplified CSV access
-  - Common operation shortcuts
-  - Error handling
-- **Dependencies**: None
-## Design Principles
-1. **Modularity**: Each component is self-contained and focused on a single responsibility
-2. **Memory Efficiency**: Components are designed to handle large files through streaming
-3. **Extensibility**: Custom logic can be injected through blocks and callbacks
-4. **Error Handling**: Robust error handling and validation
-5. **Performance**: Optimized for large file processing
-## Data Flow
-1. **Input Processing**:
-   - Files are read using Ruby's CSV library
-   - BOM stripping is handled automatically
-   - Headers are preserved and validated
-2. **Processing**:
-   - Each component processes data in a streaming fashion
-   - Custom logic can be injected at various points
-   - Memory usage is optimized for large files
-3. **Output Generation**:
-   - Results are written to new files or returned as data structures
-   - Headers are preserved in output files
-   - Error states are properly handled
-## Error Handling
-- File not found errors
-- Invalid CSV format
-- Missing required columns
-- Permission issues
-- Memory constraints
-## Performance Considerations
-1. **Memory Usage**:
-   - Streaming processing for large files
-   - Minimal in-memory data storage
-   - Efficient data structures
-2. **Processing Speed**:
-   - Optimized comparison algorithms
-   - Efficient sorting mechanisms
-   - Minimal file I/O operations
-## Future Considerations
-1. **Potential Enhancements**:
-   - Parallel processing support
-   - Additional data format support
-   - Enhanced reporting capabilities
-   - Caching mechanisms
-2. **Scalability**:
-   - Support for distributed processing
-   - Cloud storage integration
-   - Batch processing capabilities
-## Testing Strategy
-1. **Unit Tests**:
-   - Individual component testing
-   - Edge case coverage
-   - Performance benchmarks
-2. **Integration Tests**:
-   - Component interaction testing
-   - End-to-end workflows
-   - Error scenario coverage