philiprehberger-csv_kit 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c35825a1d3ef7d421c3f81a39764ec731b1fdee58cb36233f8814d6bf64204a
4
- data.tar.gz: 9ef07017bee59e872f3553ab00c2477f4e48b98f7667f1a90fa63280cf9454ed
3
+ metadata.gz: 2e1adf3b48028bff09be12a45a992c4a0166654770be8552e4bc5a7d49be7555
4
+ data.tar.gz: 5692fbac3a9152fe49d58b3ecd2a70ada46c3213ed7affd39c89020b4810f352
5
5
  SHA512:
6
- metadata.gz: 63090fc561da814943591e38cb070ffacce6ea9a85104fbfd6a64850834f8ec0047ed3a6b5d9525aa2d1eb930d1883c07376a89f9e2add7859a362abafb8a407
7
- data.tar.gz: 82a690e38566ecec250d8bc74bdbd02581c46dbdf34749cfc017c5dc174321c98bc8634b4e98b5abd0285e447efcf359fb543f1a8da2ee25200145c17c471804
6
+ metadata.gz: 9d735873d9123ad62afdff85ab07039d7b918438776c21a7e6b4a44b17632b9f537cb43f79aaf01daff7e057972697aba060f365964369a25ae893b61adbc76c
7
+ data.tar.gz: 7b16477c9f74e8714b5ed02e466c0c3fed53da9edf1c6d893b528a3d874f614c86c28ebcef9f3a9e389fa289472f8fa2150d37a5bb411ffdf1c7f4fecb1e2454
data/CHANGELOG.md CHANGED
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.0] - 2026-03-29
11
+
12
+ ### Added
13
+
14
+ - CSV dialect support with predefined presets (`:excel`, `:excel_tab`, `:unix`) and custom dialects
15
+ - Date/time type coercions via `Processor#type` — built-in `:date` and `:datetime` types with optional format strings
16
+ - Streaming writer via `Writer.stream(io, headers:) { |w| w << row }` for incremental CSV output
17
+ - Dialect integration into `process()`, `to_hashes()`, `pluck()`, and `filter()` methods
18
+
19
+ ## [0.2.6] - 2026-03-26
20
+
21
+ ### Changed
22
+
23
+ - Add Sponsor badge and fix License link format in README
24
+
25
+ ## [0.2.5] - 2026-03-24
26
+
27
+ ### Changed
28
+ - Expand test coverage to 60+ examples covering edge cases and error paths
29
+
30
+ ## [0.2.4] - 2026-03-24
31
+
32
+ ### Fixed
33
+ - Align README one-liner with gemspec summary
34
+
10
35
  ## [0.2.3] - 2026-03-24
11
36
 
12
37
  ### Fixed
data/README.md CHANGED
@@ -2,9 +2,14 @@
2
2
 
3
3
  [![Tests](https://github.com/philiprehberger/rb-csv-kit/actions/workflows/ci.yml/badge.svg)](https://github.com/philiprehberger/rb-csv-kit/actions/workflows/ci.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/philiprehberger-csv_kit.svg)](https://rubygems.org/gems/philiprehberger-csv_kit)
5
+ [![GitHub release](https://img.shields.io/github/v/release/philiprehberger/rb-csv-kit)](https://github.com/philiprehberger/rb-csv-kit/releases)
6
+ [![Last updated](https://img.shields.io/github/last-commit/philiprehberger/rb-csv-kit)](https://github.com/philiprehberger/rb-csv-kit/commits/main)
5
7
  [![License](https://img.shields.io/github/license/philiprehberger/rb-csv-kit)](LICENSE)
8
+ [![Bug Reports](https://img.shields.io/github/issues/philiprehberger/rb-csv-kit/bug)](https://github.com/philiprehberger/rb-csv-kit/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
9
+ [![Feature Requests](https://img.shields.io/github/issues/philiprehberger/rb-csv-kit/enhancement)](https://github.com/philiprehberger/rb-csv-kit/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement)
10
+ [![Sponsor](https://img.shields.io/badge/sponsor-GitHub%20Sponsors-ec6cb9)](https://github.com/sponsors/philiprehberger)
6
11
 
7
- Streaming CSV processor with type coercion, validation, writing, and error recovery
12
+ Streaming CSV processor with type coercion and validation
8
13
 
9
14
  ## Requirements
10
15
 
@@ -28,26 +33,22 @@ gem install philiprehberger-csv_kit
28
33
 
29
34
  ```ruby
30
35
  require "philiprehberger/csv_kit"
31
- ```
32
-
33
- ### Quick Load
34
36
 
35
- ```ruby
36
- rows = Philiprehberger::CsvKit.to_hashes('data.csv')
37
+ rows = Philiprehberger::CsvKit.to_hashes("data.csv")
37
38
  # => [{name: "Alice", age: "30"}, ...]
38
39
  ```
39
40
 
40
41
  ### Pluck Columns
41
42
 
42
43
  ```ruby
43
- names = Philiprehberger::CsvKit.pluck('data.csv', :name, :city)
44
+ names = Philiprehberger::CsvKit.pluck("data.csv", :name, :city)
44
45
  # => [{name: "Alice", city: "Berlin"}, ...]
45
46
  ```
46
47
 
47
48
  ### Filter Rows
48
49
 
49
50
  ```ruby
50
- csv_string = Philiprehberger::CsvKit.filter('data.csv') do |row|
51
+ csv_string = Philiprehberger::CsvKit.filter("data.csv") do |row|
51
52
  row[:age].to_i >= 30
52
53
  end
53
54
  ```
@@ -55,70 +56,75 @@ end
55
56
  ### Streaming Processor
56
57
 
57
58
  ```ruby
58
- rows = Philiprehberger::CsvKit.process('data.csv') do |p|
59
+ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
59
60
  p.transform(:age) { |v| v.to_i }
60
61
  p.validate(:age) { |v| v.to_i.positive? }
61
- p.reject { |row| row[:city] == 'Unknown' }
62
+ p.reject { |row| row[:city] == "Unknown" }
62
63
  p.each { |row| puts row[:name] }
63
64
  end
64
65
  ```
65
66
 
67
+ ### Date/Time Type Coercions
68
+
69
+ ```ruby
70
+ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
71
+ p.type(:birthday, :date)
72
+ p.type(:created_at, :datetime, format: "%Y-%m-%dT%H:%M:%S")
73
+ end
74
+ ```
75
+
76
+ ### CSV Dialects
77
+
78
+ ```ruby
79
+ rows = Philiprehberger::CsvKit.to_hashes("data.csv", dialect: :excel)
80
+ rows = Philiprehberger::CsvKit.process("data.csv", dialect: { delimiter: ";", quote: "'" }) do |p|
81
+ p.transform(:age, &:to_i)
82
+ end
83
+ ```
84
+
66
85
  ### Writing CSV
67
86
 
68
87
  ```ruby
69
88
  writer = Philiprehberger::CsvKit::Writer.new(headers: [:name, :age])
70
89
  csv_string = writer.write([{ name: "Alice", age: 30 }, { name: "Bob", age: 25 }])
71
90
 
72
- # Write to a file
73
- File.open('output.csv', 'w') do |f|
91
+ File.open("output.csv", "w") do |f|
74
92
  writer.write_to([{ name: "Alice", age: 30 }], f)
75
93
  end
76
94
  ```
77
95
 
78
- ### Error Recovery
96
+ ### Streaming Writer
79
97
 
80
98
  ```ruby
81
- rows = Philiprehberger::CsvKit.process('data.csv') do |p|
82
- p.on_error { |row, err| :skip } # or :abort
83
- p.transform(:age) { |v| Integer(v) }
99
+ File.open("output.csv", "w") do |f|
100
+ Philiprehberger::CsvKit::Writer.stream(f, headers: [:name, :age]) do |w|
101
+ w << { name: "Alice", age: 30 }
102
+ w << { name: "Bob", age: 25 }
103
+ end
84
104
  end
85
105
  ```
86
106
 
87
- ### Max Errors
107
+ ### Error Recovery
88
108
 
89
109
  ```ruby
90
- processor = Philiprehberger::CsvKit::Processor.new('data.csv')
91
- processor.max_errors(10)
92
- processor.on_error { |row, err| :skip }
93
- processor.transform(:age) { |v| Integer(v) }
94
-
95
- begin
96
- processor.run
97
- rescue Philiprehberger::CsvKit::Error
98
- puts processor.errors.length # collected errors
110
+ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
111
+ p.on_error { |row, err| :skip }
112
+ p.transform(:age) { |v| Integer(v) }
99
113
  end
100
114
  ```
101
115
 
102
116
  ### Column Aliasing
103
117
 
104
118
  ```ruby
105
- rows = Philiprehberger::CsvKit.process('data.csv') do |p|
119
+ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
106
120
  p.rename(:raw_col, :clean_col)
107
121
  end
108
122
  ```
109
123
 
110
- ### Row Callbacks
111
-
112
- ```ruby
113
- rows = Philiprehberger::CsvKit.process('data.csv') do |p|
114
- p.after_each { |row| puts row.to_h }
115
- end
116
- ```
117
-
118
124
  ### Delimiter Detection
119
125
 
120
126
  ```ruby
121
- delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
127
+ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
122
128
  # => "\t"
123
129
  ```
124
130
 
@@ -126,12 +132,13 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
126
132
 
127
133
  | Method / Class | Description |
128
134
  |----------------|-------------|
129
- | `CsvKit.to_hashes(path)` | Load CSV into array of symbolized hashes |
130
- | `CsvKit.pluck(path, *keys)` | Extract specific columns |
131
- | `CsvKit.filter(path, &block)` | Filter rows, return CSV string |
132
- | `CsvKit.process(path_or_io, &block)` | Streaming DSL with transforms and validations |
135
+ | `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
136
+ | `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
137
+ | `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
138
+ | `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
133
139
  | `Processor#headers(*names)` | Override header names |
134
140
  | `Processor#transform(key, &block)` | Register column transform |
141
+ | `Processor#type(key, type, **opts)` | Register built-in type coercion (:integer, :float, :string, :date, :datetime) |
135
142
  | `Processor#validate(key, &block)` | Register column validation (skip invalid) |
136
143
  | `Processor#reject(&block)` | Reject rows matching predicate |
137
144
  | `Processor#each(&block)` | Callback for each processed row |
@@ -143,6 +150,8 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
143
150
  | `Writer.new(headers:)` | Create a CSV writer with given headers |
144
151
  | `Writer#write(rows)` | Generate CSV string from rows |
145
152
  | `Writer#write_to(rows, io)` | Write CSV to an IO object |
153
+ | `Writer.stream(io, headers:, dialect:)` | Stream CSV rows incrementally to an IO |
154
+ | `Dialect.new(name_or_hash)` | Create a dialect from preset or custom hash |
146
155
  | `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
147
156
  | `Row#[](key)` | Access value by symbol key |
148
157
  | `Row#to_h` | Convert row to plain hash |
@@ -155,6 +164,13 @@ bundle exec rspec
155
164
  bundle exec rubocop
156
165
  ```
157
166
 
167
+ ## Support
168
+
169
+ If you find this package useful, consider giving it a star on GitHub — it helps motivate continued maintenance and development.
170
+
171
+ [![LinkedIn](https://img.shields.io/badge/Philip%20Rehberger-LinkedIn-0A66C2?logo=linkedin)](https://www.linkedin.com/in/philiprehberger)
172
+ [![More packages](https://img.shields.io/badge/more-open%20source%20packages-blue)](https://philiprehberger.com/open-source-packages)
173
+
158
174
  ## License
159
175
 
160
- MIT
176
+ [MIT](LICENSE)
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Predefined and custom CSV dialects for controlling parsing and writing behavior.
6
+ class Dialect
7
+ PRESETS = {
8
+ excel: { col_sep: ',', row_sep: "\r\n", strip: true },
9
+ excel_tab: { col_sep: "\t" },
10
+ unix: { col_sep: ',', row_sep: "\n" }
11
+ }.freeze
12
+
13
+ OPTION_MAP = {
14
+ delimiter: :col_sep,
15
+ quote: :quote_char,
16
+ line_ending: :row_sep
17
+ }.freeze
18
+
19
+ attr_reader :options
20
+
21
+ # Build a Dialect from a preset name or a custom options hash.
22
+ #
23
+ # @param name_or_hash [Symbol, Hash] preset name (:excel, :excel_tab, :unix) or custom hash
24
+ # @return [Dialect]
25
+ def initialize(name_or_hash)
26
+ @options = resolve(name_or_hash)
27
+ end
28
+
29
+ # Merge dialect options into a base CSV options hash.
30
+ #
31
+ # @param base [Hash] base CSV options
32
+ # @return [Hash] merged options
33
+ def merge_into(base)
34
+ base.merge(@options)
35
+ end
36
+
37
+ private
38
+
39
+ def resolve(name_or_hash)
40
+ case name_or_hash
41
+ when Symbol
42
+ preset = PRESETS[name_or_hash]
43
+ raise ArgumentError, "Unknown dialect: #{name_or_hash}" unless preset
44
+
45
+ preset.dup
46
+ when Hash
47
+ normalize_hash(name_or_hash)
48
+ else
49
+ raise ArgumentError, "Dialect must be a Symbol or Hash, got #{name_or_hash.class}"
50
+ end
51
+ end
52
+
53
+ def normalize_hash(hash)
54
+ hash.each_with_object({}) do |(key, value), opts|
55
+ csv_key = OPTION_MAP.fetch(key, key)
56
+ opts[csv_key] = value
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -7,8 +7,29 @@ module Philiprehberger
7
7
  include ErrorHandler
8
8
  include Callbacks
9
9
 
10
- def initialize(path_or_io)
10
+ TYPE_COERCIONS = {
11
+ integer: ->(v, _opts) { Integer(v) },
12
+ float: ->(v, _opts) { Float(v) },
13
+ string: ->(v, _opts) { v.to_s },
14
+ date: lambda { |v, opts|
15
+ if opts[:format]
16
+ Date.strptime(v, opts[:format])
17
+ else
18
+ Date.parse(v)
19
+ end
20
+ },
21
+ datetime: lambda { |v, opts|
22
+ if opts[:format]
23
+ Time.strptime(v, opts[:format])
24
+ else
25
+ Time.parse(v)
26
+ end
27
+ }
28
+ }.freeze
29
+
30
+ def initialize(path_or_io, dialect: nil)
11
31
  @path_or_io = path_or_io
32
+ @dialect = dialect ? Dialect.new(dialect) : nil
12
33
  @transforms = {}
13
34
  @validations = {}
14
35
  @reject_block = nil
@@ -28,6 +49,18 @@ module Philiprehberger
28
49
  @transforms[key] = block
29
50
  end
30
51
 
52
+ # Register a built-in type coercion for a column.
53
+ #
54
+ # @param key [Symbol] column name
55
+ # @param type_name [Symbol] one of :integer, :float, :string, :date, :datetime
56
+ # @param opts [Hash] additional options (e.g. format: '%Y-%m-%d')
57
+ def type(key, type_name, **opts)
58
+ coercion = TYPE_COERCIONS[type_name]
59
+ raise ArgumentError, "Unknown type: #{type_name}" unless coercion
60
+
61
+ @transforms[key] = ->(v) { coercion.call(v, opts) }
62
+ end
63
+
31
64
  # Register a validation for a specific column.
32
65
  def validate(key, &block)
33
66
  @validations[key] = block
@@ -79,10 +112,13 @@ module Philiprehberger
79
112
  end
80
113
 
81
114
  def open_csv(&block)
115
+ csv_opts = { headers: true }
116
+ csv_opts = @dialect.merge_into(csv_opts) if @dialect
117
+
82
118
  if @path_or_io.is_a?(String)
83
- CSV.open(@path_or_io, headers: true, &block)
119
+ CSV.open(@path_or_io, **csv_opts, &block)
84
120
  else
85
- block.call(CSV.new(@path_or_io, headers: true))
121
+ block.call(CSV.new(@path_or_io, **csv_opts))
86
122
  end
87
123
  end
88
124
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module CsvKit
5
- VERSION = '0.2.3'
5
+ VERSION = '0.3.0'
6
6
  end
7
7
  end
@@ -9,6 +9,19 @@ module Philiprehberger
9
9
  @headers = headers.map(&:to_sym)
10
10
  end
11
11
 
12
+ # Stream CSV rows incrementally to an IO object without buffering.
13
+ #
14
+ # @param io [IO] writable IO object
15
+ # @param headers [Array<Symbol, String>] column headers
16
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
17
+ # @yield [StreamWriter] writer that accepts rows via <<
18
+ # @return [IO] the IO object
19
+ def self.stream(io, headers:, dialect: nil, &block)
20
+ writer = StreamWriter.new(io, headers: headers, dialect: dialect)
21
+ block.call(writer)
22
+ io
23
+ end
24
+
12
25
  # Write rows to a CSV string.
13
26
  #
14
27
  # @param rows [Array<Hash, Array>] data rows
@@ -41,6 +54,37 @@ module Philiprehberger
41
54
 
42
55
  row
43
56
  end
57
+
58
+ # Incremental writer that streams rows to an IO object one at a time.
59
+ class StreamWriter
60
+ # @param io [IO] writable IO object
61
+ # @param headers [Array<Symbol, String>] column headers
62
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
63
+ def initialize(io, headers:, dialect: nil)
64
+ @headers = headers.map(&:to_sym)
65
+ csv_opts = {}
66
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
67
+ @csv = CSV.new(io, **csv_opts)
68
+ @csv << @headers
69
+ end
70
+
71
+ # Append a single row to the CSV output.
72
+ #
73
+ # @param row [Hash, Array] a single data row
74
+ # @return [self]
75
+ def <<(row)
76
+ @csv << row_values(row)
77
+ self
78
+ end
79
+
80
+ private
81
+
82
+ def row_values(row)
83
+ return @headers.map { |h| row[h] } if row.is_a?(Hash)
84
+
85
+ row
86
+ end
87
+ end
44
88
  end
45
89
  end
46
90
  end
@@ -1,7 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'csv'
4
+ require 'date'
5
+ require 'time'
4
6
  require_relative 'csv_kit/version'
7
+ require_relative 'csv_kit/dialect'
5
8
  require_relative 'csv_kit/detector'
6
9
  require_relative 'csv_kit/row'
7
10
  require_relative 'csv_kit/error_handler'
@@ -16,10 +19,11 @@ module Philiprehberger
16
19
  # Streaming DSL — yields a Processor for configuration, then executes.
17
20
  #
18
21
  # @param path_or_io [String, IO] file path or IO object
22
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
19
23
  # @yield [Processor] processor to configure transforms and validations
20
24
  # @return [Array<Row>] collected rows
21
- def self.process(path_or_io, &block)
22
- processor = Processor.new(path_or_io)
25
+ def self.process(path_or_io, dialect: nil, &block)
26
+ processor = Processor.new(path_or_io, dialect: dialect)
23
27
  block.call(processor)
24
28
  processor.run
25
29
  end
@@ -27,9 +31,12 @@ module Philiprehberger
27
31
  # Load an entire CSV into an array of symbolized hashes.
28
32
  #
29
33
  # @param path [String] file path
34
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
30
35
  # @return [Array<Hash{Symbol => String}>]
31
- def self.to_hashes(path)
32
- CSV.foreach(path, headers: true).map do |row|
36
+ def self.to_hashes(path, dialect: nil)
37
+ csv_opts = { headers: true }
38
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
39
+ CSV.foreach(path, **csv_opts).map do |row|
33
40
  row.to_h.transform_keys(&:to_sym)
34
41
  end
35
42
  end
@@ -38,18 +45,20 @@ module Philiprehberger
38
45
  #
39
46
  # @param path [String] file path
40
47
  # @param keys [Array<Symbol>] column names to extract
48
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
41
49
  # @return [Array<Hash{Symbol => String}>]
42
- def self.pluck(path, *keys)
43
- to_hashes(path).map { |h| h.slice(*keys) }
50
+ def self.pluck(path, *keys, dialect: nil)
51
+ to_hashes(path, dialect: dialect).map { |h| h.slice(*keys) }
44
52
  end
45
53
 
46
54
  # Filter rows and return matching rows as a CSV string.
47
55
  #
48
56
  # @param path [String] file path
57
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
49
58
  # @yield [Hash{Symbol => String}] each row as a symbolized hash
50
59
  # @return [String] CSV string with headers
51
- def self.filter(path, &)
52
- rows = to_hashes(path).select(&)
60
+ def self.filter(path, dialect: nil, &)
61
+ rows = to_hashes(path, dialect: dialect).select(&)
53
62
  return '' if rows.empty?
54
63
 
55
64
  headers = rows.first.keys
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-csv_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-25 00:00:00.000000000 Z
11
+ date: 2026-03-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Streaming CSV processor with row-by-row transforms, validations, column
14
14
  plucking, filtering, writing, error recovery, and automatic delimiter detection.
@@ -24,6 +24,7 @@ files:
24
24
  - lib/philiprehberger/csv_kit.rb
25
25
  - lib/philiprehberger/csv_kit/callbacks.rb
26
26
  - lib/philiprehberger/csv_kit/detector.rb
27
+ - lib/philiprehberger/csv_kit/dialect.rb
27
28
  - lib/philiprehberger/csv_kit/error_handler.rb
28
29
  - lib/philiprehberger/csv_kit/processor.rb
29
30
  - lib/philiprehberger/csv_kit/row.rb