csv-utils 0.3.25 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,119 +1,122 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Transforms a CSV given a series of steps
2
- class CSVUtils::CSVTransformer
3
- attr_reader :headers
4
+ module CSVUtils
5
+ class CSVTransformer
6
+ attr_reader :headers
4
7
 
5
- def initialize(src_csv, dest_csv, csv_options = {})
6
- @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
7
- @dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
8
- end
8
+ def initialize(src_csv, dest_csv, csv_options = {})
9
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
10
+ @dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
11
+ end
9
12
 
10
- def read_headers
11
- @headers = @src_csv.shift
12
- self
13
- end
13
+ def read_headers
14
+ @headers = @src_csv.shift
15
+ self
16
+ end
14
17
 
15
- def additional_data(&block)
16
- steps << [:additional_data, @headers, block]
17
- self
18
- end
18
+ def additional_data(&block)
19
+ steps << [:additional_data, @headers, block]
20
+ self
21
+ end
19
22
 
20
- def select(&block)
21
- steps << [:select, @headers, block]
22
- self
23
- end
23
+ def select(&block)
24
+ steps << [:select, @headers, block]
25
+ self
26
+ end
24
27
 
25
- def reject(&block)
26
- steps << [:reject, @headers, block]
27
- self
28
- end
28
+ def reject(&block)
29
+ steps << [:reject, @headers, block]
30
+ self
31
+ end
29
32
 
30
- def map(new_headers, &block)
31
- steps << [:map, @headers, block]
32
- @headers = new_headers
33
- self
34
- end
33
+ def map(new_headers, &block)
34
+ steps << [:map, @headers, block]
35
+ @headers = new_headers
36
+ self
37
+ end
35
38
 
36
- def append(additional_headers, &block)
37
- steps << [:append, @headers, block]
39
+ def append(additional_headers, &block)
40
+ steps << [:append, @headers, block]
38
41
 
39
- if additional_headers
40
- @headers += additional_headers
41
- else
42
- @headers = nil
43
- end
42
+ if additional_headers
43
+ @headers += additional_headers
44
+ else
45
+ @headers = nil
46
+ end
44
47
 
45
- self
46
- end
48
+ self
49
+ end
47
50
 
48
- def each(&block)
49
- steps << [:each, @headers, block]
50
- self
51
- end
51
+ def each(&block)
52
+ steps << [:each, @headers, block]
53
+ self
54
+ end
52
55
 
53
- def set_headers(headers)
54
- @headers = headers
55
- self
56
- end
56
+ def set_headers(headers)
57
+ @headers = headers
58
+ self
59
+ end
57
60
 
58
- def process(batch_size = 10_000, &block)
59
- batch = []
61
+ def process(batch_size = 10_000)
62
+ batch = []
60
63
 
61
- @dest_csv << @headers if @headers
64
+ @dest_csv << @headers if @headers
62
65
 
63
- steps_proc = Proc.new do
64
- steps.each do |step_type, current_headers, proc|
65
- batch = process_step(step_type, current_headers, batch, &proc)
66
- end
66
+ steps_proc = proc do
67
+ steps.each do |step_type, current_headers, proc|
68
+ batch = process_step(step_type, current_headers, batch, &proc)
69
+ end
67
70
 
68
- batch.each { |row| @dest_csv << row }
71
+ batch.each { |row| @dest_csv << row }
69
72
 
70
- batch = []
71
- end
72
-
73
- while (row = @src_csv.shift)
74
- batch << row
75
- steps_proc.call if batch.size >= batch_size
76
- end
73
+ batch = []
74
+ end
77
75
 
78
- steps_proc.call if batch.size > 0
76
+ while (row = @src_csv.shift)
77
+ batch << row
78
+ steps_proc.call if batch.size >= batch_size
79
+ end
79
80
 
80
- @src_csv.close
81
- @dest_csv.close
82
- end
81
+ steps_proc.call if batch.size.positive?
83
82
 
84
- private
83
+ @src_csv.close
84
+ @dest_csv.close
85
+ end
85
86
 
86
- def steps
87
- @steps ||= []
88
- end
87
+ private
89
88
 
89
+ def steps
90
+ @steps ||= []
91
+ end
90
92
 
91
- def process_step(step_type, current_headers, batch, &block)
92
- case step_type
93
- when :select
94
- batch.select! do |row|
95
- block.call row, current_headers, @additional_data
96
- end
97
- when :reject
98
- batch.reject! do |row|
99
- block.call row, current_headers, @additional_data
100
- end
101
- when :map
102
- batch.map! do |row|
103
- block.call row, current_headers, @additional_data
93
+ def process_step(step_type, current_headers, batch, &block)
94
+ case step_type
95
+ when :select
96
+ batch.select! do |row|
97
+ block.call row, current_headers, @additional_data
98
+ end
99
+ when :reject
100
+ batch.reject! do |row|
101
+ block.call row, current_headers, @additional_data
102
+ end
103
+ when :map
104
+ batch.map! do |row|
105
+ block.call row, current_headers, @additional_data
106
+ end
107
+ when :append
108
+ batch.map! do |row|
109
+ row + block.call(row, current_headers, @additional_data)
110
+ end
111
+ when :additional_data
112
+ @additional_data = block.call(batch, current_headers)
113
+ when :each
114
+ batch.each do |row|
115
+ block.call(row, current_headers, @additional_data)
116
+ end
104
117
  end
105
- when :append
106
- batch.map! do |row|
107
- row + block.call(row, current_headers, @additional_data)
108
- end
109
- when :additional_data
110
- @additional_data = block.call(batch, current_headers)
111
- when :each
112
- batch.each do |row|
113
- block.call(row, current_headers, @additional_data)
114
- end
115
- end
116
118
 
117
- batch
119
+ batch
120
+ end
118
121
  end
119
122
  end
@@ -1,51 +1,55 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Wraps a CSV object, if wrapper opens the csv file it will close it
2
- class CSVUtils::CSVWrapper
3
- attr_reader :csv
4
+ module CSVUtils
5
+ class CSVWrapper
6
+ attr_reader :csv
4
7
 
5
- def initialize(csv, mode, csv_options)
6
- open(csv, mode, csv_options)
7
- end
8
+ def initialize(csv, mode, csv_options)
9
+ open(csv, mode, csv_options)
10
+ end
8
11
 
9
- def self.open(file, mode, csv_options = {})
10
- csv = new(file, mode, csv_options)
12
+ def self.open(file, mode, csv_options = {})
13
+ csv = new(file, mode, csv_options)
11
14
 
12
- if block_given?
13
- yield csv
14
- csv.close
15
- else
16
- csv
15
+ if block_given?
16
+ yield csv
17
+ csv.close
18
+ else
19
+ csv
20
+ end
17
21
  end
18
- end
19
22
 
20
- def open(csv, mode, csv_options)
21
- if csv.is_a?(String)
22
- @close_when_done = true
23
- @csv = CSV.open(csv, mode, **csv_options)
24
- else
25
- @close_when_done = false
26
- @csv = csv
23
+ def open(csv, mode, csv_options)
24
+ if csv.is_a?(String)
25
+ @close_when_done = true
26
+ @csv = CSV.open(csv, mode, **csv_options)
27
+ else
28
+ @close_when_done = false
29
+ @csv = csv
30
+ end
27
31
  end
28
- end
29
32
 
30
- def <<(row)
31
- csv << row
32
- end
33
+ def <<(row)
34
+ csv << row
35
+ end
33
36
 
34
- def shift
35
- csv.shift
36
- end
37
+ def shift
38
+ csv.shift
39
+ end
37
40
 
38
- def rewind
39
- csv.rewind
40
- end
41
+ def rewind
42
+ csv.rewind
43
+ end
41
44
 
42
- def close
43
- csv.close if close_when_done?
44
- end
45
+ def close
46
+ csv.close if close_when_done?
47
+ end
45
48
 
46
- private
49
+ private
47
50
 
48
- def close_when_done?
49
- @close_when_done
51
+ def close_when_done?
52
+ @close_when_done
53
+ end
50
54
  end
51
55
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.25
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-07-03 00:00:00.000000000 Z
10
+ date: 2026-01-31 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: csv
@@ -37,7 +37,9 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- description: Tools for debugging malformed CSV files
40
+ description: A Ruby library for CSV file processing featuring comparison, transformation,
41
+ sorting, and validation. Includes CLI tools for debugging malformed CSVs, auto-detection
42
+ of encodings and separators, and efficient handling of large files.
41
43
  email: dougyouch@gmail.com
42
44
  executables:
43
45
  - csv-change-eol
@@ -52,9 +54,13 @@ executables:
52
54
  extensions: []
53
55
  extra_rdoc_files: []
54
56
  files:
57
+ - ".github/workflows/ci.yml"
55
58
  - ".gitignore"
59
+ - ".rubocop.yml"
56
60
  - ".ruby-gemset"
57
61
  - ".ruby-version"
62
+ - ARCHITECTURE.md
63
+ - CLAUDE.md
58
64
  - Gemfile
59
65
  - Gemfile.lock
60
66
  - LICENSE
@@ -69,7 +75,6 @@ files:
69
75
  - bin/csv-splitter
70
76
  - bin/csv-validator
71
77
  - csv-utils.gemspec
72
- - docs/ARCHITECTURE.md
73
78
  - lib/csv-utils.rb
74
79
  - lib/csv_utils/csv_compare.rb
75
80
  - lib/csv_utils/csv_extender.rb
@@ -77,6 +82,7 @@ files:
77
82
  - lib/csv_utils/csv_options.rb
78
83
  - lib/csv_utils/csv_report.rb
79
84
  - lib/csv_utils/csv_row.rb
85
+ - lib/csv_utils/csv_row_matcher.rb
80
86
  - lib/csv_utils/csv_sort.rb
81
87
  - lib/csv_utils/csv_transformer.rb
82
88
  - lib/csv_utils/csv_wrapper.rb
@@ -84,7 +90,8 @@ files:
84
90
  homepage: https://github.com/dougyouch/csv-utils
85
91
  licenses:
86
92
  - MIT
87
- metadata: {}
93
+ metadata:
94
+ rubygems_mfa_required: 'true'
88
95
  rdoc_options: []
89
96
  require_paths:
90
97
  - lib
@@ -101,5 +108,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
101
108
  requirements: []
102
109
  rubygems_version: 3.6.2
103
110
  specification_version: 4
104
- summary: CSV Utils
111
+ summary: Comprehensive CSV manipulation and debugging utilities for Ruby
105
112
  test_files: []
data/docs/ARCHITECTURE.md DELETED
@@ -1,134 +0,0 @@
1
- # CSV Utils Architecture
2
-
3
- ## Overview
4
-
5
- CSV Utils is a Ruby library designed to provide a comprehensive set of tools for CSV file manipulation. The architecture follows a modular design pattern, with each component handling a specific aspect of CSV processing.
6
-
7
- ## Core Components
8
-
9
- ### 1. CSVCompare
10
- - **Purpose**: Compares two CSV files to identify differences
11
- - **Key Features**:
12
- - Identifies creates, updates, and deletes between files
13
- - Supports custom comparison logic
14
- - Handles BOM (Byte Order Mark) stripping
15
- - Memory-efficient streaming comparison
16
- - **Dependencies**: None (uses standard Ruby CSV library)
17
-
18
- ### 2. CSVTransformer
19
- - **Purpose**: Transforms CSV data according to custom rules
20
- - **Key Features**:
21
- - Row-by-row transformation
22
- - Custom transformation blocks
23
- - Maintains header structure
24
- - **Dependencies**: None
25
-
26
- ### 3. CSVSort
27
- - **Purpose**: Sorts CSV files based on specified columns
28
- - **Key Features**:
29
- - Multi-column sorting
30
- - Memory-efficient sorting
31
- - Preserves header row
32
- - **Dependencies**: None
33
-
34
- ### 4. CSVReport
35
- - **Purpose**: Generates reports from CSV data
36
- - **Key Features**:
37
- - Custom report formatting
38
- - Data aggregation
39
- - Summary statistics
40
- - **Dependencies**: None
41
-
42
- ### 5. CSVIterator
43
- - **Purpose**: Provides efficient iteration over CSV files
44
- - **Key Features**:
45
- - Memory-efficient streaming
46
- - Custom iteration blocks
47
- - Header handling
48
- - **Dependencies**: None
49
-
50
- ### 6. CSVExtender
51
- - **Purpose**: Extends CSV files with additional data
52
- - **Key Features**:
53
- - Column addition
54
- - Data enrichment
55
- - Custom extension logic
56
- - **Dependencies**: None
57
-
58
- ### 7. CSVWrapper
59
- - **Purpose**: Provides a convenient wrapper for CSV operations
60
- - **Key Features**:
61
- - Simplified CSV access
62
- - Common operation shortcuts
63
- - Error handling
64
- - **Dependencies**: None
65
-
66
- ## Design Principles
67
-
68
- 1. **Modularity**: Each component is self-contained and focused on a single responsibility
69
- 2. **Memory Efficiency**: Components are designed to handle large files through streaming
70
- 3. **Extensibility**: Custom logic can be injected through blocks and callbacks
71
- 4. **Error Handling**: Robust error handling and validation
72
- 5. **Performance**: Optimized for large file processing
73
-
74
- ## Data Flow
75
-
76
- 1. **Input Processing**:
77
- - Files are read using Ruby's CSV library
78
- - BOM stripping is handled automatically
79
- - Headers are preserved and validated
80
-
81
- 2. **Processing**:
82
- - Each component processes data in a streaming fashion
83
- - Custom logic can be injected at various points
84
- - Memory usage is optimized for large files
85
-
86
- 3. **Output Generation**:
87
- - Results are written to new files or returned as data structures
88
- - Headers are preserved in output files
89
- - Error states are properly handled
90
-
91
- ## Error Handling
92
-
93
- - File not found errors
94
- - Invalid CSV format
95
- - Missing required columns
96
- - Permission issues
97
- - Memory constraints
98
-
99
- ## Performance Considerations
100
-
101
- 1. **Memory Usage**:
102
- - Streaming processing for large files
103
- - Minimal in-memory data storage
104
- - Efficient data structures
105
-
106
- 2. **Processing Speed**:
107
- - Optimized comparison algorithms
108
- - Efficient sorting mechanisms
109
- - Minimal file I/O operations
110
-
111
- ## Future Considerations
112
-
113
- 1. **Potential Enhancements**:
114
- - Parallel processing support
115
- - Additional data format support
116
- - Enhanced reporting capabilities
117
- - Caching mechanisms
118
-
119
- 2. **Scalability**:
120
- - Support for distributed processing
121
- - Cloud storage integration
122
- - Batch processing capabilities
123
-
124
- ## Testing Strategy
125
-
126
- 1. **Unit Tests**:
127
- - Individual component testing
128
- - Edge case coverage
129
- - Performance benchmarks
130
-
131
- 2. **Integration Tests**:
132
- - Component interaction testing
133
- - End-to-end workflows
134
- - Error scenario coverage