philiprehberger-csv_kit 0.1.2 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -3
- data/README.md +61 -1
- data/lib/philiprehberger/csv_kit/callbacks.rb +39 -0
- data/lib/philiprehberger/csv_kit/error_handler.rb +58 -0
- data/lib/philiprehberger/csv_kit/processor.rb +30 -25
- data/lib/philiprehberger/csv_kit/row.rb +16 -0
- data/lib/philiprehberger/csv_kit/version.rb +1 -1
- data/lib/philiprehberger/csv_kit/writer.rb +46 -0
- data/lib/philiprehberger/csv_kit.rb +3 -0
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5102ddea76ed2da14ae72a63b471103facb61636ba548e7d0f00fa985b53cbf1
|
|
4
|
+
data.tar.gz: e193e57fac4958f2ef09faca2f5b3811c2561932bc83c785f8dd044bf390267d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aaeeece43096c75e90321a44dde352f1822f5d9685ebacb89240554d6c782076c1bb467b43b8899c274dcadd6a221873c8cdf126144e589ad1af256bacfa73ff
|
|
7
|
+
data.tar.gz: 7781416688fa5abde89f73acf7891209511ea590ed10147565887db513fc114d81b116606b8f3bd2e5579699c617722d82d95c5bbd245a5fe75dd3113d266e51
|
data/CHANGELOG.md
CHANGED
|
@@ -1,15 +1,32 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## 0.
|
|
3
|
+
## 0.2.2
|
|
4
4
|
|
|
5
|
-
-
|
|
6
|
-
|
|
5
|
+
- Revert gemspec to single-quoted strings per RuboCop default configuration
|
|
6
|
+
|
|
7
|
+
## 0.2.1
|
|
8
|
+
|
|
9
|
+
- Fix RuboCop Style/StringLiterals violations in gemspec
|
|
7
10
|
|
|
8
11
|
All notable changes to this gem will be documented in this file.
|
|
9
12
|
|
|
10
13
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
11
14
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
12
15
|
|
|
16
|
+
## [0.2.0] - 2026-03-17
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- CSV writing via `Writer` class — generate CSV from arrays/hashes, write to string or IO
|
|
20
|
+
- Configurable per-row error handling with `on_error` — return `:skip` or `:abort`
|
|
21
|
+
- Max error tracking with `max_errors(n)` — stop processing after N errors
|
|
22
|
+
- Column aliasing with `rename(:from, :to)` — rename columns during processing
|
|
23
|
+
- Row callbacks with `after_each` — hook after each row is fully transformed
|
|
24
|
+
|
|
25
|
+
## [0.1.2]
|
|
26
|
+
|
|
27
|
+
- Add License badge to README
|
|
28
|
+
- Add bug_tracker_uri to gemspec
|
|
29
|
+
|
|
13
30
|
## [Unreleased]
|
|
14
31
|
|
|
15
32
|
## [0.1.0] - 2026-03-15
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
[](https://rubygems.org/gems/philiprehberger-csv_kit)
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
|
|
7
|
-
Streaming CSV processor with type coercion and
|
|
7
|
+
Streaming CSV processor with type coercion, validation, writing, and error recovery.
|
|
8
8
|
|
|
9
9
|
## Requirements
|
|
10
10
|
|
|
@@ -69,6 +69,58 @@ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
|
|
|
69
69
|
end
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
+
### Writing CSV
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
writer = Philiprehberger::CsvKit::Writer.new(headers: [:name, :age])
|
|
76
|
+
csv_string = writer.write([{ name: "Alice", age: 30 }, { name: "Bob", age: 25 }])
|
|
77
|
+
|
|
78
|
+
# Write to a file
|
|
79
|
+
File.open('output.csv', 'w') do |f|
|
|
80
|
+
writer.write_to([{ name: "Alice", age: 30 }], f)
|
|
81
|
+
end
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Error Recovery
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
rows = Philiprehberger::CsvKit.process('data.csv') do |p|
|
|
88
|
+
p.on_error { |row, err| :skip } # or :abort
|
|
89
|
+
p.transform(:age) { |v| Integer(v) }
|
|
90
|
+
end
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Max Errors
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
processor = Philiprehberger::CsvKit::Processor.new('data.csv')
|
|
97
|
+
processor.max_errors(10)
|
|
98
|
+
processor.on_error { |row, err| :skip }
|
|
99
|
+
processor.transform(:age) { |v| Integer(v) }
|
|
100
|
+
|
|
101
|
+
begin
|
|
102
|
+
processor.run
|
|
103
|
+
rescue Philiprehberger::CsvKit::Error
|
|
104
|
+
puts processor.errors.length # collected errors
|
|
105
|
+
end
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Column Aliasing
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
rows = Philiprehberger::CsvKit.process('data.csv') do |p|
|
|
112
|
+
p.rename(:raw_col, :clean_col)
|
|
113
|
+
end
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Row Callbacks
|
|
117
|
+
|
|
118
|
+
```ruby
|
|
119
|
+
rows = Philiprehberger::CsvKit.process('data.csv') do |p|
|
|
120
|
+
p.after_each { |row| puts row.to_h }
|
|
121
|
+
end
|
|
122
|
+
```
|
|
123
|
+
|
|
72
124
|
### Delimiter Detection
|
|
73
125
|
|
|
74
126
|
```ruby
|
|
@@ -89,6 +141,14 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
|
|
|
89
141
|
| `Processor#validate(key, &block)` | Register column validation (skip invalid) |
|
|
90
142
|
| `Processor#reject(&block)` | Reject rows matching predicate |
|
|
91
143
|
| `Processor#each(&block)` | Callback for each processed row |
|
|
144
|
+
| `Processor#on_error(&block)` | Per-row error handler (return `:skip` or `:abort`) |
|
|
145
|
+
| `Processor#max_errors(n)` | Stop after N errors |
|
|
146
|
+
| `Processor#errors` | Collected errors from last run |
|
|
147
|
+
| `Processor#rename(from, to)` | Rename column during processing |
|
|
148
|
+
| `Processor#after_each(&block)` | Callback after each row is fully processed |
|
|
149
|
+
| `Writer.new(headers:)` | Create a CSV writer with given headers |
|
|
150
|
+
| `Writer#write(rows)` | Generate CSV string from rows |
|
|
151
|
+
| `Writer#write_to(rows, io)` | Write CSV to an IO object |
|
|
92
152
|
| `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
|
|
93
153
|
| `Row#[](key)` | Access value by symbol key |
|
|
94
154
|
| `Row#to_h` | Convert row to plain hash |
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Philiprehberger
|
|
4
|
+
module CsvKit
|
|
5
|
+
# Mixin for column aliasing and row callbacks.
|
|
6
|
+
module Callbacks
|
|
7
|
+
# Register a callback to run after each row is processed.
|
|
8
|
+
#
|
|
9
|
+
# @yield [Row] the processed row
|
|
10
|
+
def after_each(&block)
|
|
11
|
+
@after_each_block = block
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Rename a column during processing.
|
|
15
|
+
#
|
|
16
|
+
# @param from [Symbol] original column name
|
|
17
|
+
# @param to [Symbol] new column name
|
|
18
|
+
def rename(from, to)
|
|
19
|
+
@renames[from.to_sym] = to.to_sym
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def init_callbacks
|
|
25
|
+
@after_each_block = nil
|
|
26
|
+
@renames = {}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def apply_renames!(row)
|
|
30
|
+
@renames.each do |from, to|
|
|
31
|
+
next unless row.key?(from)
|
|
32
|
+
|
|
33
|
+
row[to] = row[from]
|
|
34
|
+
row.delete(from)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Philiprehberger
|
|
4
|
+
module CsvKit
|
|
5
|
+
# Mixin for per-row error handling and max-error tracking.
|
|
6
|
+
module ErrorHandler
|
|
7
|
+
# Configure a per-row error handler.
|
|
8
|
+
#
|
|
9
|
+
# @yield [Hash, StandardError] the row data and the error
|
|
10
|
+
# @yieldreturn [:skip, :abort] action to take
|
|
11
|
+
def on_error(&block)
|
|
12
|
+
@error_handler = block
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Set a maximum number of errors before aborting.
|
|
16
|
+
#
|
|
17
|
+
# @param limit [Integer] max errors allowed
|
|
18
|
+
# @return [self]
|
|
19
|
+
def max_errors(limit)
|
|
20
|
+
@max_errors = limit
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returns collected errors from the last run.
|
|
25
|
+
#
|
|
26
|
+
# @return [Array<Hash>] error details
|
|
27
|
+
def errors
|
|
28
|
+
@errors ||= []
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def init_error_handler
|
|
34
|
+
@error_handler = nil
|
|
35
|
+
@max_errors = nil
|
|
36
|
+
@errors = []
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def handle_row_error(row, err)
|
|
40
|
+
@errors << { row: row.to_h, error: err }
|
|
41
|
+
check_max_errors!
|
|
42
|
+
resolve_error_action(row, err)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def check_max_errors!
|
|
46
|
+
return unless @max_errors && @errors.length >= @max_errors
|
|
47
|
+
|
|
48
|
+
raise Error, "Max errors (#{@max_errors}) reached"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def resolve_error_action(row, err)
|
|
52
|
+
return :skip unless @error_handler
|
|
53
|
+
|
|
54
|
+
@error_handler.call(row, err)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -4,6 +4,9 @@ module Philiprehberger
|
|
|
4
4
|
module CsvKit
|
|
5
5
|
# Streaming CSV processor with a DSL for transforms, validations, and filtering.
|
|
6
6
|
class Processor
|
|
7
|
+
include ErrorHandler
|
|
8
|
+
include Callbacks
|
|
9
|
+
|
|
7
10
|
def initialize(path_or_io)
|
|
8
11
|
@path_or_io = path_or_io
|
|
9
12
|
@transforms = {}
|
|
@@ -11,41 +14,31 @@ module Philiprehberger
|
|
|
11
14
|
@reject_block = nil
|
|
12
15
|
@each_block = nil
|
|
13
16
|
@header_names = nil
|
|
17
|
+
init_error_handler
|
|
18
|
+
init_callbacks
|
|
14
19
|
end
|
|
15
20
|
|
|
16
21
|
# Override header names used for symbolized keys.
|
|
17
|
-
#
|
|
18
|
-
# @param names [Array<Symbol>] header names
|
|
19
22
|
def headers(*names)
|
|
20
23
|
@header_names = names.map(&:to_sym)
|
|
21
24
|
end
|
|
22
25
|
|
|
23
26
|
# Register a transform for a specific column.
|
|
24
|
-
#
|
|
25
|
-
# @param key [Symbol] column name
|
|
26
|
-
# @yield [String] raw cell value
|
|
27
27
|
def transform(key, &block)
|
|
28
28
|
@transforms[key] = block
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
-
# Register a validation for a specific column.
|
|
32
|
-
#
|
|
33
|
-
# @param key [Symbol] column name
|
|
34
|
-
# @yield [String] cell value
|
|
31
|
+
# Register a validation for a specific column.
|
|
35
32
|
def validate(key, &block)
|
|
36
33
|
@validations[key] = block
|
|
37
34
|
end
|
|
38
35
|
|
|
39
|
-
# Register a reject predicate.
|
|
40
|
-
#
|
|
41
|
-
# @yield [Row] the row
|
|
36
|
+
# Register a reject predicate.
|
|
42
37
|
def reject(&block)
|
|
43
38
|
@reject_block = block
|
|
44
39
|
end
|
|
45
40
|
|
|
46
41
|
# Register a callback for each processed row.
|
|
47
|
-
#
|
|
48
|
-
# @yield [Row] the row
|
|
49
42
|
def each(&block)
|
|
50
43
|
@each_block = block
|
|
51
44
|
end
|
|
@@ -54,6 +47,7 @@ module Philiprehberger
|
|
|
54
47
|
#
|
|
55
48
|
# @return [Array<Row>] collected rows
|
|
56
49
|
def run
|
|
50
|
+
@collected_errors = []
|
|
57
51
|
open_csv { |csv| process_rows(csv) }
|
|
58
52
|
end
|
|
59
53
|
|
|
@@ -61,30 +55,41 @@ module Philiprehberger
|
|
|
61
55
|
|
|
62
56
|
def process_rows(csv)
|
|
63
57
|
csv.each_with_object([]) do |csv_row, results|
|
|
64
|
-
|
|
65
|
-
next unless valid?(row)
|
|
66
|
-
next if rejected?(row)
|
|
67
|
-
|
|
68
|
-
apply_transforms!(row)
|
|
69
|
-
@each_block&.call(row)
|
|
70
|
-
results << row
|
|
58
|
+
process_single_row(csv_row, results)
|
|
71
59
|
end
|
|
72
60
|
end
|
|
73
61
|
|
|
62
|
+
def process_single_row(csv_row, results)
|
|
63
|
+
row = build_row(csv_row)
|
|
64
|
+
return unless valid?(row)
|
|
65
|
+
return if rejected?(row)
|
|
66
|
+
|
|
67
|
+
apply_transforms!(row)
|
|
68
|
+
apply_renames!(row)
|
|
69
|
+
@each_block&.call(row)
|
|
70
|
+
@after_each_block&.call(row)
|
|
71
|
+
results << row
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
handle_error_for_row(row, e, results)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def handle_error_for_row(row, err, _results)
|
|
77
|
+
action = handle_row_error(row, err)
|
|
78
|
+
raise Error, "Aborted: #{err.message}" if action == :abort
|
|
79
|
+
end
|
|
80
|
+
|
|
74
81
|
def open_csv(&block)
|
|
75
82
|
if @path_or_io.is_a?(String)
|
|
76
83
|
CSV.open(@path_or_io, headers: true, &block)
|
|
77
84
|
else
|
|
78
|
-
|
|
79
|
-
block.call(csv)
|
|
85
|
+
block.call(CSV.new(@path_or_io, headers: true))
|
|
80
86
|
end
|
|
81
87
|
end
|
|
82
88
|
|
|
83
89
|
def build_row(csv_row)
|
|
84
90
|
data = csv_row.to_h
|
|
85
91
|
if @header_names
|
|
86
|
-
|
|
87
|
-
mapped = @header_names.zip(values).to_h
|
|
92
|
+
mapped = @header_names.zip(data.values).to_h
|
|
88
93
|
Row.new(mapped)
|
|
89
94
|
else
|
|
90
95
|
Row.new(data.transform_keys(&:to_sym))
|
|
@@ -25,6 +25,22 @@ module Philiprehberger
|
|
|
25
25
|
@data[key] = value
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
# Check if a key exists.
|
|
29
|
+
#
|
|
30
|
+
# @param key [Symbol] column name
|
|
31
|
+
# @return [Boolean]
|
|
32
|
+
def key?(key)
|
|
33
|
+
@data.key?(key)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Delete a key from the row.
|
|
37
|
+
#
|
|
38
|
+
# @param key [Symbol] column name
|
|
39
|
+
# @return [Object, nil] removed value
|
|
40
|
+
def delete(key)
|
|
41
|
+
@data.delete(key)
|
|
42
|
+
end
|
|
43
|
+
|
|
28
44
|
# Return the row as a plain hash.
|
|
29
45
|
#
|
|
30
46
|
# @return [Hash{Symbol => Object}]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Philiprehberger
|
|
4
|
+
module CsvKit
|
|
5
|
+
# Generates CSV output from arrays of hashes or arrays.
|
|
6
|
+
class Writer
|
|
7
|
+
# @param headers [Array<Symbol, String>] column headers
|
|
8
|
+
def initialize(headers:)
|
|
9
|
+
@headers = headers.map(&:to_sym)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Write rows to a CSV string.
|
|
13
|
+
#
|
|
14
|
+
# @param rows [Array<Hash, Array>] data rows
|
|
15
|
+
# @return [String] CSV string
|
|
16
|
+
def write(rows)
|
|
17
|
+
generate_csv(rows, StringIO.new).string
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Write rows to an IO object.
|
|
21
|
+
#
|
|
22
|
+
# @param rows [Array<Hash, Array>] data rows
|
|
23
|
+
# @param io [IO] writable IO
|
|
24
|
+
# @return [IO] the IO object
|
|
25
|
+
def write_to(rows, io)
|
|
26
|
+
generate_csv(rows, io)
|
|
27
|
+
io
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def generate_csv(rows, io)
|
|
33
|
+
csv = CSV.new(io)
|
|
34
|
+
csv << @headers
|
|
35
|
+
rows.each { |row| csv << row_values(row) }
|
|
36
|
+
csv
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def row_values(row)
|
|
40
|
+
return @headers.map { |h| row[h] } if row.is_a?(Hash)
|
|
41
|
+
|
|
42
|
+
row
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -4,7 +4,10 @@ require 'csv'
|
|
|
4
4
|
require_relative 'csv_kit/version'
|
|
5
5
|
require_relative 'csv_kit/detector'
|
|
6
6
|
require_relative 'csv_kit/row'
|
|
7
|
+
require_relative 'csv_kit/error_handler'
|
|
8
|
+
require_relative 'csv_kit/callbacks'
|
|
7
9
|
require_relative 'csv_kit/processor'
|
|
10
|
+
require_relative 'csv_kit/writer'
|
|
8
11
|
|
|
9
12
|
module Philiprehberger
|
|
10
13
|
module CsvKit
|
metadata
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-csv_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Streaming CSV processor with row-by-row transforms, validations, column
|
|
14
|
-
plucking, filtering, and automatic delimiter detection.
|
|
14
|
+
plucking, filtering, writing, error recovery, and automatic delimiter detection.
|
|
15
15
|
email:
|
|
16
16
|
- me@philiprehberger.com
|
|
17
17
|
executables: []
|
|
@@ -22,10 +22,13 @@ files:
|
|
|
22
22
|
- LICENSE
|
|
23
23
|
- README.md
|
|
24
24
|
- lib/philiprehberger/csv_kit.rb
|
|
25
|
+
- lib/philiprehberger/csv_kit/callbacks.rb
|
|
25
26
|
- lib/philiprehberger/csv_kit/detector.rb
|
|
27
|
+
- lib/philiprehberger/csv_kit/error_handler.rb
|
|
26
28
|
- lib/philiprehberger/csv_kit/processor.rb
|
|
27
29
|
- lib/philiprehberger/csv_kit/row.rb
|
|
28
30
|
- lib/philiprehberger/csv_kit/version.rb
|
|
31
|
+
- lib/philiprehberger/csv_kit/writer.rb
|
|
29
32
|
homepage: https://github.com/philiprehberger/rb-csv-kit
|
|
30
33
|
licenses:
|
|
31
34
|
- MIT
|