philiprehberger-csv_kit 0.1.2 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec0358461e5ad62f3a288a5e040be44f7b68b1e6fe42ad067f7a5eea5a26d304
4
- data.tar.gz: 39e0e6cdd8fda4b74200a5ba9f02019ee845b3b2a95bf251019a6e97cd5c73f6
3
+ metadata.gz: 5102ddea76ed2da14ae72a63b471103facb61636ba548e7d0f00fa985b53cbf1
4
+ data.tar.gz: e193e57fac4958f2ef09faca2f5b3811c2561932bc83c785f8dd044bf390267d
5
5
  SHA512:
6
- metadata.gz: c3b78d43a8678bad749b381387091f783a533a7dbaebef06f4cbab09a9b01009b3e172eb1be2daa0519d8bdddfb80a668804645def75a109989e0f581a29dc7c
7
- data.tar.gz: 62ae27c26356c622e149e4cb42195c59e54a8bd97774d2cb21289c0f177b3135dc24fee0ed67f0075cd98f0596e3228e356674476d2bf678d0b554a9a77e7f75
6
+ metadata.gz: aaeeece43096c75e90321a44dde352f1822f5d9685ebacb89240554d6c782076c1bb467b43b8899c274dcadd6a221873c8cdf126144e589ad1af256bacfa73ff
7
+ data.tar.gz: 7781416688fa5abde89f73acf7891209511ea590ed10147565887db513fc114d81b116606b8f3bd2e5579699c617722d82d95c5bbd245a5fe75dd3113d266e51
data/CHANGELOG.md CHANGED
@@ -1,15 +1,32 @@
1
1
  # Changelog
2
2
 
3
- ## 0.1.2
3
+ ## 0.2.2
4
4
 
5
- - Add License badge to README
6
- - Add bug_tracker_uri to gemspec
5
+ - Revert gemspec to single-quoted strings per RuboCop default configuration
6
+
7
+ ## 0.2.1
8
+
9
+ - Fix RuboCop Style/StringLiterals violations in gemspec
7
10
 
8
11
  All notable changes to this gem will be documented in this file.
9
12
 
10
13
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
11
14
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
12
15
 
16
+ ## [0.2.0] - 2026-03-17
17
+
18
+ ### Added
19
+ - CSV writing via `Writer` class — generate CSV from arrays/hashes, write to string or IO
20
+ - Configurable per-row error handling with `on_error` — return `:skip` or `:abort`
21
+ - Max error tracking with `max_errors(n)` — stop processing after N errors
22
+ - Column aliasing with `rename(:from, :to)` — rename columns during processing
23
+ - Row callbacks with `after_each` — hook after each row is fully transformed
24
+
25
+ ## [0.1.2]
26
+
27
+ - Add License badge to README
28
+ - Add bug_tracker_uri to gemspec
29
+
13
30
  ## [Unreleased]
14
31
 
15
32
  ## [0.1.0] - 2026-03-15
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Gem Version](https://badge.fury.io/rb/philiprehberger-csv_kit.svg)](https://rubygems.org/gems/philiprehberger-csv_kit)
5
5
  [![License](https://img.shields.io/github/license/philiprehberger/rb-csv-kit)](LICENSE)
6
6
 
7
- Streaming CSV processor with type coercion and validation.
7
+ Streaming CSV processor with type coercion, validation, writing, and error recovery.
8
8
 
9
9
  ## Requirements
10
10
 
@@ -69,6 +69,58 @@ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
69
69
  end
70
70
  ```
71
71
 
72
+ ### Writing CSV
73
+
74
+ ```ruby
75
+ writer = Philiprehberger::CsvKit::Writer.new(headers: [:name, :age])
76
+ csv_string = writer.write([{ name: "Alice", age: 30 }, { name: "Bob", age: 25 }])
77
+
78
+ # Write to a file
79
+ File.open('output.csv', 'w') do |f|
80
+ writer.write_to([{ name: "Alice", age: 30 }], f)
81
+ end
82
+ ```
83
+
84
+ ### Error Recovery
85
+
86
+ ```ruby
87
+ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
88
+ p.on_error { |row, err| :skip } # or :abort
89
+ p.transform(:age) { |v| Integer(v) }
90
+ end
91
+ ```
92
+
93
+ ### Max Errors
94
+
95
+ ```ruby
96
+ processor = Philiprehberger::CsvKit::Processor.new('data.csv')
97
+ processor.max_errors(10)
98
+ processor.on_error { |row, err| :skip }
99
+ processor.transform(:age) { |v| Integer(v) }
100
+
101
+ begin
102
+ processor.run
103
+ rescue Philiprehberger::CsvKit::Error
104
+ puts processor.errors.length # collected errors
105
+ end
106
+ ```
107
+
108
+ ### Column Aliasing
109
+
110
+ ```ruby
111
+ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
112
+ p.rename(:raw_col, :clean_col)
113
+ end
114
+ ```
115
+
116
+ ### Row Callbacks
117
+
118
+ ```ruby
119
+ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
120
+ p.after_each { |row| puts row.to_h }
121
+ end
122
+ ```
123
+
72
124
  ### Delimiter Detection
73
125
 
74
126
  ```ruby
@@ -89,6 +141,14 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
89
141
  | `Processor#validate(key, &block)` | Register column validation (skip invalid) |
90
142
  | `Processor#reject(&block)` | Reject rows matching predicate |
91
143
  | `Processor#each(&block)` | Callback for each processed row |
144
+ | `Processor#on_error(&block)` | Per-row error handler (return `:skip` or `:abort`) |
145
+ | `Processor#max_errors(n)` | Stop after N errors |
146
+ | `Processor#errors` | Collected errors from last run |
147
+ | `Processor#rename(from, to)` | Rename column during processing |
148
+ | `Processor#after_each(&block)` | Callback after each row is fully processed |
149
+ | `Writer.new(headers:)` | Create a CSV writer with given headers |
150
+ | `Writer#write(rows)` | Generate CSV string from rows |
151
+ | `Writer#write_to(rows, io)` | Write CSV to an IO object |
92
152
  | `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
93
153
  | `Row#[](key)` | Access value by symbol key |
94
154
  | `Row#to_h` | Convert row to plain hash |
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Mixin for column aliasing and row callbacks.
6
+ module Callbacks
7
+ # Register a callback to run after each row is processed.
8
+ #
9
+ # @yield [Row] the processed row
10
+ def after_each(&block)
11
+ @after_each_block = block
12
+ end
13
+
14
+ # Rename a column during processing.
15
+ #
16
+ # @param from [Symbol] original column name
17
+ # @param to [Symbol] new column name
18
+ def rename(from, to)
19
+ @renames[from.to_sym] = to.to_sym
20
+ end
21
+
22
+ private
23
+
24
+ def init_callbacks
25
+ @after_each_block = nil
26
+ @renames = {}
27
+ end
28
+
29
+ def apply_renames!(row)
30
+ @renames.each do |from, to|
31
+ next unless row.key?(from)
32
+
33
+ row[to] = row[from]
34
+ row.delete(from)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Mixin for per-row error handling and max-error tracking.
6
+ module ErrorHandler
7
+ # Configure a per-row error handler.
8
+ #
9
+ # @yield [Hash, StandardError] the row data and the error
10
+ # @yieldreturn [:skip, :abort] action to take
11
+ def on_error(&block)
12
+ @error_handler = block
13
+ end
14
+
15
+ # Set a maximum number of errors before aborting.
16
+ #
17
+ # @param limit [Integer] max errors allowed
18
+ # @return [self]
19
+ def max_errors(limit)
20
+ @max_errors = limit
21
+ self
22
+ end
23
+
24
+ # Returns collected errors from the last run.
25
+ #
26
+ # @return [Array<Hash>] error details
27
+ def errors
28
+ @errors ||= []
29
+ end
30
+
31
+ private
32
+
33
+ def init_error_handler
34
+ @error_handler = nil
35
+ @max_errors = nil
36
+ @errors = []
37
+ end
38
+
39
+ def handle_row_error(row, err)
40
+ @errors << { row: row.to_h, error: err }
41
+ check_max_errors!
42
+ resolve_error_action(row, err)
43
+ end
44
+
45
+ def check_max_errors!
46
+ return unless @max_errors && @errors.length >= @max_errors
47
+
48
+ raise Error, "Max errors (#{@max_errors}) reached"
49
+ end
50
+
51
+ def resolve_error_action(row, err)
52
+ return :skip unless @error_handler
53
+
54
+ @error_handler.call(row, err)
55
+ end
56
+ end
57
+ end
58
+ end
@@ -4,6 +4,9 @@ module Philiprehberger
4
4
  module CsvKit
5
5
  # Streaming CSV processor with a DSL for transforms, validations, and filtering.
6
6
  class Processor
7
+ include ErrorHandler
8
+ include Callbacks
9
+
7
10
  def initialize(path_or_io)
8
11
  @path_or_io = path_or_io
9
12
  @transforms = {}
@@ -11,41 +14,31 @@ module Philiprehberger
11
14
  @reject_block = nil
12
15
  @each_block = nil
13
16
  @header_names = nil
17
+ init_error_handler
18
+ init_callbacks
14
19
  end
15
20
 
16
21
  # Override header names used for symbolized keys.
17
- #
18
- # @param names [Array<Symbol>] header names
19
22
  def headers(*names)
20
23
  @header_names = names.map(&:to_sym)
21
24
  end
22
25
 
23
26
  # Register a transform for a specific column.
24
- #
25
- # @param key [Symbol] column name
26
- # @yield [String] raw cell value
27
27
  def transform(key, &block)
28
28
  @transforms[key] = block
29
29
  end
30
30
 
31
- # Register a validation for a specific column. Rows failing validation are skipped.
32
- #
33
- # @param key [Symbol] column name
34
- # @yield [String] cell value
31
+ # Register a validation for a specific column.
35
32
  def validate(key, &block)
36
33
  @validations[key] = block
37
34
  end
38
35
 
39
- # Register a reject predicate. Rows matching are excluded.
40
- #
41
- # @yield [Row] the row
36
+ # Register a reject predicate.
42
37
  def reject(&block)
43
38
  @reject_block = block
44
39
  end
45
40
 
46
41
  # Register a callback for each processed row.
47
- #
48
- # @yield [Row] the row
49
42
  def each(&block)
50
43
  @each_block = block
51
44
  end
@@ -54,6 +47,7 @@ module Philiprehberger
54
47
  #
55
48
  # @return [Array<Row>] collected rows
56
49
  def run
50
+ @collected_errors = []
57
51
  open_csv { |csv| process_rows(csv) }
58
52
  end
59
53
 
@@ -61,30 +55,41 @@ module Philiprehberger
61
55
 
62
56
  def process_rows(csv)
63
57
  csv.each_with_object([]) do |csv_row, results|
64
- row = build_row(csv_row)
65
- next unless valid?(row)
66
- next if rejected?(row)
67
-
68
- apply_transforms!(row)
69
- @each_block&.call(row)
70
- results << row
58
+ process_single_row(csv_row, results)
71
59
  end
72
60
  end
73
61
 
62
+ def process_single_row(csv_row, results)
63
+ row = build_row(csv_row)
64
+ return unless valid?(row)
65
+ return if rejected?(row)
66
+
67
+ apply_transforms!(row)
68
+ apply_renames!(row)
69
+ @each_block&.call(row)
70
+ @after_each_block&.call(row)
71
+ results << row
72
+ rescue StandardError => e
73
+ handle_error_for_row(row, e, results)
74
+ end
75
+
76
+ def handle_error_for_row(row, err, _results)
77
+ action = handle_row_error(row, err)
78
+ raise Error, "Aborted: #{err.message}" if action == :abort
79
+ end
80
+
74
81
  def open_csv(&block)
75
82
  if @path_or_io.is_a?(String)
76
83
  CSV.open(@path_or_io, headers: true, &block)
77
84
  else
78
- csv = CSV.new(@path_or_io, headers: true)
79
- block.call(csv)
85
+ block.call(CSV.new(@path_or_io, headers: true))
80
86
  end
81
87
  end
82
88
 
83
89
  def build_row(csv_row)
84
90
  data = csv_row.to_h
85
91
  if @header_names
86
- values = data.values
87
- mapped = @header_names.zip(values).to_h
92
+ mapped = @header_names.zip(data.values).to_h
88
93
  Row.new(mapped)
89
94
  else
90
95
  Row.new(data.transform_keys(&:to_sym))
@@ -25,6 +25,22 @@ module Philiprehberger
25
25
  @data[key] = value
26
26
  end
27
27
 
28
+ # Check if a key exists.
29
+ #
30
+ # @param key [Symbol] column name
31
+ # @return [Boolean]
32
+ def key?(key)
33
+ @data.key?(key)
34
+ end
35
+
36
+ # Delete a key from the row.
37
+ #
38
+ # @param key [Symbol] column name
39
+ # @return [Object, nil] removed value
40
+ def delete(key)
41
+ @data.delete(key)
42
+ end
43
+
28
44
  # Return the row as a plain hash.
29
45
  #
30
46
  # @return [Hash{Symbol => Object}]
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module CsvKit
5
- VERSION = '0.1.2'
5
+ VERSION = '0.2.2'
6
6
  end
7
7
  end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Generates CSV output from arrays of hashes or arrays.
6
+ class Writer
7
+ # @param headers [Array<Symbol, String>] column headers
8
+ def initialize(headers:)
9
+ @headers = headers.map(&:to_sym)
10
+ end
11
+
12
+ # Write rows to a CSV string.
13
+ #
14
+ # @param rows [Array<Hash, Array>] data rows
15
+ # @return [String] CSV string
16
+ def write(rows)
17
+ generate_csv(rows, StringIO.new).string
18
+ end
19
+
20
+ # Write rows to an IO object.
21
+ #
22
+ # @param rows [Array<Hash, Array>] data rows
23
+ # @param io [IO] writable IO
24
+ # @return [IO] the IO object
25
+ def write_to(rows, io)
26
+ generate_csv(rows, io)
27
+ io
28
+ end
29
+
30
+ private
31
+
32
+ def generate_csv(rows, io)
33
+ csv = CSV.new(io)
34
+ csv << @headers
35
+ rows.each { |row| csv << row_values(row) }
36
+ csv
37
+ end
38
+
39
+ def row_values(row)
40
+ return @headers.map { |h| row[h] } if row.is_a?(Hash)
41
+
42
+ row
43
+ end
44
+ end
45
+ end
46
+ end
@@ -4,7 +4,10 @@ require 'csv'
4
4
  require_relative 'csv_kit/version'
5
5
  require_relative 'csv_kit/detector'
6
6
  require_relative 'csv_kit/row'
7
+ require_relative 'csv_kit/error_handler'
8
+ require_relative 'csv_kit/callbacks'
7
9
  require_relative 'csv_kit/processor'
10
+ require_relative 'csv_kit/writer'
8
11
 
9
12
  module Philiprehberger
10
13
  module CsvKit
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-csv_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-16 00:00:00.000000000 Z
11
+ date: 2026-03-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Streaming CSV processor with row-by-row transforms, validations, column
14
- plucking, filtering, and automatic delimiter detection.
14
+ plucking, filtering, writing, error recovery, and automatic delimiter detection.
15
15
  email:
16
16
  - me@philiprehberger.com
17
17
  executables: []
@@ -22,10 +22,13 @@ files:
22
22
  - LICENSE
23
23
  - README.md
24
24
  - lib/philiprehberger/csv_kit.rb
25
+ - lib/philiprehberger/csv_kit/callbacks.rb
25
26
  - lib/philiprehberger/csv_kit/detector.rb
27
+ - lib/philiprehberger/csv_kit/error_handler.rb
26
28
  - lib/philiprehberger/csv_kit/processor.rb
27
29
  - lib/philiprehberger/csv_kit/row.rb
28
30
  - lib/philiprehberger/csv_kit/version.rb
31
+ - lib/philiprehberger/csv_kit/writer.rb
29
32
  homepage: https://github.com/philiprehberger/rb-csv-kit
30
33
  licenses:
31
34
  - MIT