csv-utils 0.3.24 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56d81ad0c7a53bc4a738cc4f9643884fa301b6c2add83f9b961263dfdae1e896
4
- data.tar.gz: 5db7f72a3df30a2933ffdafa8599a7ce46ae763124b4c0d6d88b775f2238422b
3
+ metadata.gz: 9b30f6d77e44b6d4abca4ee199142b4efa6cb73895df87ba630b82f50b964fbc
4
+ data.tar.gz: 10337ebbb358c3e60f5f4d47756297577468b45bbaeaf4e1b0b355b12c9083a4
5
5
  SHA512:
6
- metadata.gz: bf3878abd3a7f0c5dfbdcd9cdcee6773dc20d43308464c3d0a984ee2fe147c055ba64627043478f8099604f8824b3e8630eabfe8f91ffc562d54fabed98b0535
7
- data.tar.gz: 68feef79f89b7b415c40b37f9ac03b2feb6c800552af70c22006fa56a92d987a0753e0fc00b6449b8edca04463dfeaaefc4b647fbf46ce14227ccdde927a95dd
6
+ metadata.gz: 84432b88f8fc0aee4422fe46a36bab6e99cca1d604469608e2e4ced97c1f748be66796bc7f0c161b660d31e6424f5b155b38bfb034e9cee234fd8c71459bb2d3
7
+ data.tar.gz: ad864bb734d1c9f7d6a99d97aa2567811c66f8bf317571a947d56196776c61abf229d3d3aad0f5485f1a39b64d257e5be234ffa19dcc9c8a975a045fffcd95b1
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.1.0
1
+ 3.4.2
data/Gemfile CHANGED
@@ -2,14 +2,12 @@
2
2
 
3
3
  source 'http://rubygems.org'
4
4
 
5
+ gem 'csv'
5
6
  gem 'inheritance-helper'
6
7
 
7
8
  group :development do
8
9
  gem 'rake'
9
10
  gem 'rubocop'
10
- end
11
-
12
- group :spec do
13
11
  gem 'rspec'
14
12
  gem 'simplecov'
15
13
  end
data/Gemfile.lock CHANGED
@@ -1,54 +1,67 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.2)
5
- diff-lcs (1.5.0)
6
- docile (1.4.0)
4
+ ast (2.4.3)
5
+ csv (3.3.4)
6
+ diff-lcs (1.6.2)
7
+ docile (1.4.1)
7
8
  inheritance-helper (0.2.5)
8
- parallel (1.22.1)
9
- parser (3.1.1.0)
9
+ json (2.12.0)
10
+ language_server-protocol (3.17.0.5)
11
+ lint_roller (1.1.0)
12
+ parallel (1.27.0)
13
+ parser (3.3.8.0)
10
14
  ast (~> 2.4.1)
15
+ racc
16
+ prism (1.4.0)
17
+ racc (1.8.1)
11
18
  rainbow (3.1.1)
12
- rake (13.0.6)
13
- regexp_parser (2.2.1)
14
- rexml (3.2.5)
15
- rspec (3.11.0)
16
- rspec-core (~> 3.11.0)
17
- rspec-expectations (~> 3.11.0)
18
- rspec-mocks (~> 3.11.0)
19
- rspec-core (3.11.0)
20
- rspec-support (~> 3.11.0)
21
- rspec-expectations (3.11.0)
19
+ rake (13.2.1)
20
+ regexp_parser (2.10.0)
21
+ rspec (3.13.0)
22
+ rspec-core (~> 3.13.0)
23
+ rspec-expectations (~> 3.13.0)
24
+ rspec-mocks (~> 3.13.0)
25
+ rspec-core (3.13.3)
26
+ rspec-support (~> 3.13.0)
27
+ rspec-expectations (3.13.4)
22
28
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.11.0)
24
- rspec-mocks (3.11.0)
29
+ rspec-support (~> 3.13.0)
30
+ rspec-mocks (3.13.4)
25
31
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.11.0)
27
- rspec-support (3.11.0)
28
- rubocop (1.26.1)
32
+ rspec-support (~> 3.13.0)
33
+ rspec-support (3.13.3)
34
+ rubocop (1.75.6)
35
+ json (~> 2.3)
36
+ language_server-protocol (~> 3.17.0.2)
37
+ lint_roller (~> 1.1.0)
29
38
  parallel (~> 1.10)
30
- parser (>= 3.1.0.0)
39
+ parser (>= 3.3.0.2)
31
40
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.8, < 3.0)
33
- rexml
34
- rubocop-ast (>= 1.16.0, < 2.0)
41
+ regexp_parser (>= 2.9.3, < 3.0)
42
+ rubocop-ast (>= 1.44.0, < 2.0)
35
43
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 3.0)
37
- rubocop-ast (1.16.0)
38
- parser (>= 3.1.1.0)
39
- ruby-progressbar (1.11.0)
40
- simplecov (0.21.2)
44
+ unicode-display_width (>= 2.4.0, < 4.0)
45
+ rubocop-ast (1.44.1)
46
+ parser (>= 3.3.7.2)
47
+ prism (~> 1.4)
48
+ ruby-progressbar (1.13.0)
49
+ simplecov (0.22.0)
41
50
  docile (~> 1.1)
42
51
  simplecov-html (~> 0.11)
43
52
  simplecov_json_formatter (~> 0.1)
44
- simplecov-html (0.12.3)
53
+ simplecov-html (0.13.1)
45
54
  simplecov_json_formatter (0.1.4)
46
- unicode-display_width (2.1.0)
55
+ unicode-display_width (3.1.4)
56
+ unicode-emoji (~> 4.0, >= 4.0.4)
57
+ unicode-emoji (4.0.4)
47
58
 
48
59
  PLATFORMS
49
- x86_64-darwin-21
60
+ ruby
61
+ x86_64-darwin-24
50
62
 
51
63
  DEPENDENCIES
64
+ csv
52
65
  inheritance-helper
53
66
  rake
54
67
  rspec
@@ -56,4 +69,4 @@ DEPENDENCIES
56
69
  simplecov
57
70
 
58
71
  BUNDLED WITH
59
- 2.3.3
72
+ 2.6.2
data/README.md CHANGED
@@ -1,2 +1,86 @@
1
- # csv-utils
2
- CSV Utilities for manipulating csv files in code
1
+ # CSV Utils
2
+
3
+ A Ruby library providing a comprehensive set of utilities for manipulating and processing CSV files. This library offers a robust set of tools for comparing, transforming, sorting, and managing CSV data efficiently.
4
+
5
+ ## Features
6
+
7
+ - **CSV Comparison**: Compare two CSV files and identify differences (creates, updates, and deletes)
8
+ - **CSV Transformation**: Transform CSV data with customizable rules
9
+ - **CSV Sorting**: Sort CSV files based on specified columns
10
+ - **CSV Reporting**: Generate reports from CSV data
11
+ - **CSV Iteration**: Efficient iteration over CSV files
12
+ - **CSV Extension**: Extend CSV files with additional data
13
+ - **CSV Wrapper**: Convenient wrapper for CSV operations
14
+
15
+ ## Installation
16
+
17
+ Add this line to your application's Gemfile:
18
+
19
+ ```ruby
20
+ gem 'csv-utils'
21
+ ```
22
+
23
+ And then execute:
24
+
25
+ ```bash
26
+ $ bundle install
27
+ ```
28
+
29
+ Or install it yourself as:
30
+
31
+ ```bash
32
+ $ gem install csv-utils
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Comparing CSV Files
38
+
39
+ ```ruby
40
+ require 'csv_utils'
41
+
42
+ comparator = CSVUtils::CSVCompare.new('primary.csv', ['updated_at']) do |src, dest|
43
+ src['id'] <=> dest['id']
44
+ end
45
+
46
+ comparator.compare('secondary.csv') do |action, record|
47
+ case action
48
+ when :create
49
+ puts "Create: #{record}"
50
+ when :update
51
+ puts "Update: #{record}"
52
+ when :delete
53
+ puts "Delete: #{record}"
54
+ end
55
+ end
56
+ ```
57
+
58
+ ### Sorting CSV Files
59
+
60
+ ```ruby
61
+ sorter = CSVUtils::CSVSort.new('input.csv')
62
+ sorter.sort('output.csv', ['id', 'name'])
63
+ ```
64
+
65
+ ### Transforming CSV Data
66
+
67
+ ```ruby
68
+ transformer = CSVUtils::CSVTransformer.new('input.csv')
69
+ transformer.transform('output.csv') do |row|
70
+ # Transform row data
71
+ row['new_column'] = row['old_column'].upcase
72
+ row
73
+ end
74
+ ```
75
+
76
+ ## Development
77
+
78
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests.
79
+
80
+ ## Contributing
81
+
82
+ Bug reports and pull requests are welcome on GitHub at https://github.com/yourusername/csv-utils.
83
+
84
+ ## License
85
+
86
+ The gem is available as open source under the terms of the MIT License.
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.24'
5
+ s.version = '0.3.25'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -0,0 +1,134 @@
1
+ # CSV Utils Architecture
2
+
3
+ ## Overview
4
+
5
+ CSV Utils is a Ruby library designed to provide a comprehensive set of tools for CSV file manipulation. The architecture follows a modular design pattern, with each component handling a specific aspect of CSV processing.
6
+
7
+ ## Core Components
8
+
9
+ ### 1. CSVCompare
10
+ - **Purpose**: Compares two CSV files to identify differences
11
+ - **Key Features**:
12
+ - Identifies creates, updates, and deletes between files
13
+ - Supports custom comparison logic
14
+ - Handles BOM (Byte Order Mark) stripping
15
+ - Memory-efficient streaming comparison
16
+ - **Dependencies**: None (uses standard Ruby CSV library)
17
+
18
+ ### 2. CSVTransformer
19
+ - **Purpose**: Transforms CSV data according to custom rules
20
+ - **Key Features**:
21
+ - Row-by-row transformation
22
+ - Custom transformation blocks
23
+ - Maintains header structure
24
+ - **Dependencies**: None
25
+
26
+ ### 3. CSVSort
27
+ - **Purpose**: Sorts CSV files based on specified columns
28
+ - **Key Features**:
29
+ - Multi-column sorting
30
+ - Memory-efficient sorting
31
+ - Preserves header row
32
+ - **Dependencies**: None
33
+
34
+ ### 4. CSVReport
35
+ - **Purpose**: Generates reports from CSV data
36
+ - **Key Features**:
37
+ - Custom report formatting
38
+ - Data aggregation
39
+ - Summary statistics
40
+ - **Dependencies**: None
41
+
42
+ ### 5. CSVIterator
43
+ - **Purpose**: Provides efficient iteration over CSV files
44
+ - **Key Features**:
45
+ - Memory-efficient streaming
46
+ - Custom iteration blocks
47
+ - Header handling
48
+ - **Dependencies**: None
49
+
50
+ ### 6. CSVExtender
51
+ - **Purpose**: Extends CSV files with additional data
52
+ - **Key Features**:
53
+ - Column addition
54
+ - Data enrichment
55
+ - Custom extension logic
56
+ - **Dependencies**: None
57
+
58
+ ### 7. CSVWrapper
59
+ - **Purpose**: Provides a convenient wrapper for CSV operations
60
+ - **Key Features**:
61
+ - Simplified CSV access
62
+ - Common operation shortcuts
63
+ - Error handling
64
+ - **Dependencies**: None
65
+
66
+ ## Design Principles
67
+
68
+ 1. **Modularity**: Each component is self-contained and focused on a single responsibility
69
+ 2. **Memory Efficiency**: Components are designed to handle large files through streaming
70
+ 3. **Extensibility**: Custom logic can be injected through blocks and callbacks
71
+ 4. **Error Handling**: Robust error handling and validation
72
+ 5. **Performance**: Optimized for large file processing
73
+
74
+ ## Data Flow
75
+
76
+ 1. **Input Processing**:
77
+ - Files are read using Ruby's CSV library
78
+ - BOM stripping is handled automatically
79
+ - Headers are preserved and validated
80
+
81
+ 2. **Processing**:
82
+ - Each component processes data in a streaming fashion
83
+ - Custom logic can be injected at various points
84
+ - Memory usage is optimized for large files
85
+
86
+ 3. **Output Generation**:
87
+ - Results are written to new files or returned as data structures
88
+ - Headers are preserved in output files
89
+ - Error states are properly handled
90
+
91
+ ## Error Handling
92
+
93
+ - File not found errors
94
+ - Invalid CSV format
95
+ - Missing required columns
96
+ - Permission issues
97
+ - Memory constraints
98
+
99
+ ## Performance Considerations
100
+
101
+ 1. **Memory Usage**:
102
+ - Streaming processing for large files
103
+ - Minimal in-memory data storage
104
+ - Efficient data structures
105
+
106
+ 2. **Processing Speed**:
107
+ - Optimized comparison algorithms
108
+ - Efficient sorting mechanisms
109
+ - Minimal file I/O operations
110
+
111
+ ## Future Considerations
112
+
113
+ 1. **Potential Enhancements**:
114
+ - Parallel processing support
115
+ - Additional data format support
116
+ - Enhanced reporting capabilities
117
+ - Caching mechanisms
118
+
119
+ 2. **Scalability**:
120
+ - Support for distributed processing
121
+ - Cloud storage integration
122
+ - Batch processing capabilities
123
+
124
+ ## Testing Strategy
125
+
126
+ 1. **Unit Tests**:
127
+ - Individual component testing
128
+ - Edge case coverage
129
+ - Performance benchmarks
130
+
131
+ 2. **Integration Tests**:
132
+ - Component interaction testing
133
+ - End-to-end workflows
134
+ - Error scenario coverage
@@ -21,8 +21,8 @@ class CSVUtils::CSVIterator
21
21
  end
22
22
  end
23
23
 
24
- def initialize(src_csv, csv_options = {})
25
- @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
24
+ def initialize(src_csv, csv_options = {}, mode = 'rb')
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, mode, csv_options)
26
26
  end
27
27
 
28
28
  def each(headers = nil)
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.24
4
+ version: 0.3.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-03-21 00:00:00.000000000 Z
10
+ date: 2025-07-03 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: csv
@@ -70,6 +69,7 @@ files:
70
69
  - bin/csv-splitter
71
70
  - bin/csv-validator
72
71
  - csv-utils.gemspec
72
+ - docs/ARCHITECTURE.md
73
73
  - lib/csv-utils.rb
74
74
  - lib/csv_utils/csv_compare.rb
75
75
  - lib/csv_utils/csv_extender.rb
@@ -85,7 +85,6 @@ homepage: https://github.com/dougyouch/csv-utils
85
85
  licenses:
86
86
  - MIT
87
87
  metadata: {}
88
- post_install_message:
89
88
  rdoc_options: []
90
89
  require_paths:
91
90
  - lib
@@ -100,8 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
99
  - !ruby/object:Gem::Version
101
100
  version: '0'
102
101
  requirements: []
103
- rubygems_version: 3.3.3
104
- signing_key:
102
+ rubygems_version: 3.6.2
105
103
  specification_version: 4
106
104
  summary: CSV Utils
107
105
  test_files: []