csv-utils 0.3.24 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile +1 -3
- data/Gemfile.lock +46 -33
- data/README.md +86 -2
- data/csv-utils.gemspec +1 -1
- data/docs/ARCHITECTURE.md +134 -0
- data/lib/csv_utils/csv_iterator.rb +2 -2
- metadata +4 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b30f6d77e44b6d4abca4ee199142b4efa6cb73895df87ba630b82f50b964fbc
|
4
|
+
data.tar.gz: 10337ebbb358c3e60f5f4d47756297577468b45bbaeaf4e1b0b355b12c9083a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 84432b88f8fc0aee4422fe46a36bab6e99cca1d604469608e2e4ced97c1f748be66796bc7f0c161b660d31e6424f5b155b38bfb034e9cee234fd8c71459bb2d3
|
7
|
+
data.tar.gz: ad864bb734d1c9f7d6a99d97aa2567811c66f8bf317571a947d56196776c61abf229d3d3aad0f5485f1a39b64d257e5be234ffa19dcc9c8a975a045fffcd95b1
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.4.2
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,54 +1,67 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
ast (2.4.
|
5
|
-
|
6
|
-
|
4
|
+
ast (2.4.3)
|
5
|
+
csv (3.3.4)
|
6
|
+
diff-lcs (1.6.2)
|
7
|
+
docile (1.4.1)
|
7
8
|
inheritance-helper (0.2.5)
|
8
|
-
|
9
|
-
|
9
|
+
json (2.12.0)
|
10
|
+
language_server-protocol (3.17.0.5)
|
11
|
+
lint_roller (1.1.0)
|
12
|
+
parallel (1.27.0)
|
13
|
+
parser (3.3.8.0)
|
10
14
|
ast (~> 2.4.1)
|
15
|
+
racc
|
16
|
+
prism (1.4.0)
|
17
|
+
racc (1.8.1)
|
11
18
|
rainbow (3.1.1)
|
12
|
-
rake (13.
|
13
|
-
regexp_parser (2.
|
14
|
-
|
15
|
-
|
16
|
-
rspec-
|
17
|
-
rspec-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
rspec-expectations (3.11.0)
|
19
|
+
rake (13.2.1)
|
20
|
+
regexp_parser (2.10.0)
|
21
|
+
rspec (3.13.0)
|
22
|
+
rspec-core (~> 3.13.0)
|
23
|
+
rspec-expectations (~> 3.13.0)
|
24
|
+
rspec-mocks (~> 3.13.0)
|
25
|
+
rspec-core (3.13.3)
|
26
|
+
rspec-support (~> 3.13.0)
|
27
|
+
rspec-expectations (3.13.4)
|
22
28
|
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
-
rspec-support (~> 3.
|
24
|
-
rspec-mocks (3.
|
29
|
+
rspec-support (~> 3.13.0)
|
30
|
+
rspec-mocks (3.13.4)
|
25
31
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
-
rspec-support (~> 3.
|
27
|
-
rspec-support (3.
|
28
|
-
rubocop (1.
|
32
|
+
rspec-support (~> 3.13.0)
|
33
|
+
rspec-support (3.13.3)
|
34
|
+
rubocop (1.75.6)
|
35
|
+
json (~> 2.3)
|
36
|
+
language_server-protocol (~> 3.17.0.2)
|
37
|
+
lint_roller (~> 1.1.0)
|
29
38
|
parallel (~> 1.10)
|
30
|
-
parser (>= 3.
|
39
|
+
parser (>= 3.3.0.2)
|
31
40
|
rainbow (>= 2.2.2, < 4.0)
|
32
|
-
regexp_parser (>=
|
33
|
-
|
34
|
-
rubocop-ast (>= 1.16.0, < 2.0)
|
41
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
42
|
+
rubocop-ast (>= 1.44.0, < 2.0)
|
35
43
|
ruby-progressbar (~> 1.7)
|
36
|
-
unicode-display_width (>=
|
37
|
-
rubocop-ast (1.
|
38
|
-
parser (>= 3.
|
39
|
-
|
40
|
-
|
44
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
45
|
+
rubocop-ast (1.44.1)
|
46
|
+
parser (>= 3.3.7.2)
|
47
|
+
prism (~> 1.4)
|
48
|
+
ruby-progressbar (1.13.0)
|
49
|
+
simplecov (0.22.0)
|
41
50
|
docile (~> 1.1)
|
42
51
|
simplecov-html (~> 0.11)
|
43
52
|
simplecov_json_formatter (~> 0.1)
|
44
|
-
simplecov-html (0.
|
53
|
+
simplecov-html (0.13.1)
|
45
54
|
simplecov_json_formatter (0.1.4)
|
46
|
-
unicode-display_width (
|
55
|
+
unicode-display_width (3.1.4)
|
56
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
57
|
+
unicode-emoji (4.0.4)
|
47
58
|
|
48
59
|
PLATFORMS
|
49
|
-
|
60
|
+
ruby
|
61
|
+
x86_64-darwin-24
|
50
62
|
|
51
63
|
DEPENDENCIES
|
64
|
+
csv
|
52
65
|
inheritance-helper
|
53
66
|
rake
|
54
67
|
rspec
|
@@ -56,4 +69,4 @@ DEPENDENCIES
|
|
56
69
|
simplecov
|
57
70
|
|
58
71
|
BUNDLED WITH
|
59
|
-
2.
|
72
|
+
2.6.2
|
data/README.md
CHANGED
@@ -1,2 +1,86 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# CSV Utils
|
2
|
+
|
3
|
+
A Ruby library providing a comprehensive set of utilities for manipulating and processing CSV files. This library offers a robust set of tools for comparing, transforming, sorting, and managing CSV data efficiently.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- **CSV Comparison**: Compare two CSV files and identify differences (creates, updates, and deletes)
|
8
|
+
- **CSV Transformation**: Transform CSV data with customizable rules
|
9
|
+
- **CSV Sorting**: Sort CSV files based on specified columns
|
10
|
+
- **CSV Reporting**: Generate reports from CSV data
|
11
|
+
- **CSV Iteration**: Efficient iteration over CSV files
|
12
|
+
- **CSV Extension**: Extend CSV files with additional data
|
13
|
+
- **CSV Wrapper**: Convenient wrapper for CSV operations
|
14
|
+
|
15
|
+
## Installation
|
16
|
+
|
17
|
+
Add this line to your application's Gemfile:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
gem 'csv-utils'
|
21
|
+
```
|
22
|
+
|
23
|
+
And then execute:
|
24
|
+
|
25
|
+
```bash
|
26
|
+
$ bundle install
|
27
|
+
```
|
28
|
+
|
29
|
+
Or install it yourself as:
|
30
|
+
|
31
|
+
```bash
|
32
|
+
$ gem install csv-utils
|
33
|
+
```
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
|
37
|
+
### Comparing CSV Files
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
require 'csv_utils'
|
41
|
+
|
42
|
+
comparator = CSVUtils::CSVCompare.new('primary.csv', ['updated_at']) do |src, dest|
|
43
|
+
src['id'] <=> dest['id']
|
44
|
+
end
|
45
|
+
|
46
|
+
comparator.compare('secondary.csv') do |action, record|
|
47
|
+
case action
|
48
|
+
when :create
|
49
|
+
puts "Create: #{record}"
|
50
|
+
when :update
|
51
|
+
puts "Update: #{record}"
|
52
|
+
when :delete
|
53
|
+
puts "Delete: #{record}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
### Sorting CSV Files
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
sorter = CSVUtils::CSVSort.new('input.csv')
|
62
|
+
sorter.sort('output.csv', ['id', 'name'])
|
63
|
+
```
|
64
|
+
|
65
|
+
### Transforming CSV Data
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
transformer = CSVUtils::CSVTransformer.new('input.csv')
|
69
|
+
transformer.transform('output.csv') do |row|
|
70
|
+
# Transform row data
|
71
|
+
row['new_column'] = row['old_column'].upcase
|
72
|
+
row
|
73
|
+
end
|
74
|
+
```
|
75
|
+
|
76
|
+
## Development
|
77
|
+
|
78
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests.
|
79
|
+
|
80
|
+
## Contributing
|
81
|
+
|
82
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/yourusername/csv-utils.
|
83
|
+
|
84
|
+
## License
|
85
|
+
|
86
|
+
The gem is available as open source under the terms of the MIT License.
|
data/csv-utils.gemspec
CHANGED
@@ -0,0 +1,134 @@
|
|
1
|
+
# CSV Utils Architecture
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
CSV Utils is a Ruby library designed to provide a comprehensive set of tools for CSV file manipulation. The architecture follows a modular design pattern, with each component handling a specific aspect of CSV processing.
|
6
|
+
|
7
|
+
## Core Components
|
8
|
+
|
9
|
+
### 1. CSVCompare
|
10
|
+
- **Purpose**: Compares two CSV files to identify differences
|
11
|
+
- **Key Features**:
|
12
|
+
- Identifies creates, updates, and deletes between files
|
13
|
+
- Supports custom comparison logic
|
14
|
+
- Handles BOM (Byte Order Mark) stripping
|
15
|
+
- Memory-efficient streaming comparison
|
16
|
+
- **Dependencies**: None (uses standard Ruby CSV library)
|
17
|
+
|
18
|
+
### 2. CSVTransformer
|
19
|
+
- **Purpose**: Transforms CSV data according to custom rules
|
20
|
+
- **Key Features**:
|
21
|
+
- Row-by-row transformation
|
22
|
+
- Custom transformation blocks
|
23
|
+
- Maintains header structure
|
24
|
+
- **Dependencies**: None
|
25
|
+
|
26
|
+
### 3. CSVSort
|
27
|
+
- **Purpose**: Sorts CSV files based on specified columns
|
28
|
+
- **Key Features**:
|
29
|
+
- Multi-column sorting
|
30
|
+
- Memory-efficient sorting
|
31
|
+
- Preserves header row
|
32
|
+
- **Dependencies**: None
|
33
|
+
|
34
|
+
### 4. CSVReport
|
35
|
+
- **Purpose**: Generates reports from CSV data
|
36
|
+
- **Key Features**:
|
37
|
+
- Custom report formatting
|
38
|
+
- Data aggregation
|
39
|
+
- Summary statistics
|
40
|
+
- **Dependencies**: None
|
41
|
+
|
42
|
+
### 5. CSVIterator
|
43
|
+
- **Purpose**: Provides efficient iteration over CSV files
|
44
|
+
- **Key Features**:
|
45
|
+
- Memory-efficient streaming
|
46
|
+
- Custom iteration blocks
|
47
|
+
- Header handling
|
48
|
+
- **Dependencies**: None
|
49
|
+
|
50
|
+
### 6. CSVExtender
|
51
|
+
- **Purpose**: Extends CSV files with additional data
|
52
|
+
- **Key Features**:
|
53
|
+
- Column addition
|
54
|
+
- Data enrichment
|
55
|
+
- Custom extension logic
|
56
|
+
- **Dependencies**: None
|
57
|
+
|
58
|
+
### 7. CSVWrapper
|
59
|
+
- **Purpose**: Provides a convenient wrapper for CSV operations
|
60
|
+
- **Key Features**:
|
61
|
+
- Simplified CSV access
|
62
|
+
- Common operation shortcuts
|
63
|
+
- Error handling
|
64
|
+
- **Dependencies**: None
|
65
|
+
|
66
|
+
## Design Principles
|
67
|
+
|
68
|
+
1. **Modularity**: Each component is self-contained and focused on a single responsibility
|
69
|
+
2. **Memory Efficiency**: Components are designed to handle large files through streaming
|
70
|
+
3. **Extensibility**: Custom logic can be injected through blocks and callbacks
|
71
|
+
4. **Error Handling**: Robust error handling and validation
|
72
|
+
5. **Performance**: Optimized for large file processing
|
73
|
+
|
74
|
+
## Data Flow
|
75
|
+
|
76
|
+
1. **Input Processing**:
|
77
|
+
- Files are read using Ruby's CSV library
|
78
|
+
- BOM stripping is handled automatically
|
79
|
+
- Headers are preserved and validated
|
80
|
+
|
81
|
+
2. **Processing**:
|
82
|
+
- Each component processes data in a streaming fashion
|
83
|
+
- Custom logic can be injected at various points
|
84
|
+
- Memory usage is optimized for large files
|
85
|
+
|
86
|
+
3. **Output Generation**:
|
87
|
+
- Results are written to new files or returned as data structures
|
88
|
+
- Headers are preserved in output files
|
89
|
+
- Error states are properly handled
|
90
|
+
|
91
|
+
## Error Handling
|
92
|
+
|
93
|
+
- File not found errors
|
94
|
+
- Invalid CSV format
|
95
|
+
- Missing required columns
|
96
|
+
- Permission issues
|
97
|
+
- Memory constraints
|
98
|
+
|
99
|
+
## Performance Considerations
|
100
|
+
|
101
|
+
1. **Memory Usage**:
|
102
|
+
- Streaming processing for large files
|
103
|
+
- Minimal in-memory data storage
|
104
|
+
- Efficient data structures
|
105
|
+
|
106
|
+
2. **Processing Speed**:
|
107
|
+
- Optimized comparison algorithms
|
108
|
+
- Efficient sorting mechanisms
|
109
|
+
- Minimal file I/O operations
|
110
|
+
|
111
|
+
## Future Considerations
|
112
|
+
|
113
|
+
1. **Potential Enhancements**:
|
114
|
+
- Parallel processing support
|
115
|
+
- Additional data format support
|
116
|
+
- Enhanced reporting capabilities
|
117
|
+
- Caching mechanisms
|
118
|
+
|
119
|
+
2. **Scalability**:
|
120
|
+
- Support for distributed processing
|
121
|
+
- Cloud storage integration
|
122
|
+
- Batch processing capabilities
|
123
|
+
|
124
|
+
## Testing Strategy
|
125
|
+
|
126
|
+
1. **Unit Tests**:
|
127
|
+
- Individual component testing
|
128
|
+
- Edge case coverage
|
129
|
+
- Performance benchmarks
|
130
|
+
|
131
|
+
2. **Integration Tests**:
|
132
|
+
- Component interaction testing
|
133
|
+
- End-to-end workflows
|
134
|
+
- Error scenario coverage
|
@@ -21,8 +21,8 @@ class CSVUtils::CSVIterator
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def initialize(src_csv, csv_options = {})
|
25
|
-
@src_csv = CSVUtils::CSVWrapper.new(src_csv,
|
24
|
+
def initialize(src_csv, csv_options = {}, mode = 'rb')
|
25
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, mode, csv_options)
|
26
26
|
end
|
27
27
|
|
28
28
|
def each(headers = nil)
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-07-03 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: csv
|
@@ -70,6 +69,7 @@ files:
|
|
70
69
|
- bin/csv-splitter
|
71
70
|
- bin/csv-validator
|
72
71
|
- csv-utils.gemspec
|
72
|
+
- docs/ARCHITECTURE.md
|
73
73
|
- lib/csv-utils.rb
|
74
74
|
- lib/csv_utils/csv_compare.rb
|
75
75
|
- lib/csv_utils/csv_extender.rb
|
@@ -85,7 +85,6 @@ homepage: https://github.com/dougyouch/csv-utils
|
|
85
85
|
licenses:
|
86
86
|
- MIT
|
87
87
|
metadata: {}
|
88
|
-
post_install_message:
|
89
88
|
rdoc_options: []
|
90
89
|
require_paths:
|
91
90
|
- lib
|
@@ -100,8 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
99
|
- !ruby/object:Gem::Version
|
101
100
|
version: '0'
|
102
101
|
requirements: []
|
103
|
-
rubygems_version: 3.
|
104
|
-
signing_key:
|
102
|
+
rubygems_version: 3.6.2
|
105
103
|
specification_version: 4
|
106
104
|
summary: CSV Utils
|
107
105
|
test_files: []
|