philiprehberger-csv_kit 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +57 -41
- data/lib/philiprehberger/csv_kit/dialect.rb +61 -0
- data/lib/philiprehberger/csv_kit/processor.rb +39 -3
- data/lib/philiprehberger/csv_kit/version.rb +1 -1
- data/lib/philiprehberger/csv_kit/writer.rb +44 -0
- data/lib/philiprehberger/csv_kit.rb +17 -8
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2e1adf3b48028bff09be12a45a992c4a0166654770be8552e4bc5a7d49be7555
|
|
4
|
+
data.tar.gz: 5692fbac3a9152fe49d58b3ecd2a70ada46c3213ed7affd39c89020b4810f352
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9d735873d9123ad62afdff85ab07039d7b918438776c21a7e6b4a44b17632b9f537cb43f79aaf01daff7e057972697aba060f365964369a25ae893b61adbc76c
|
|
7
|
+
data.tar.gz: 7b16477c9f74e8714b5ed02e466c0c3fed53da9edf1c6d893b528a3d874f614c86c28ebcef9f3a9e389fa289472f8fa2150d37a5bb411ffdf1c7f4fecb1e2454
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.3.0] - 2026-03-29
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- CSV dialect support with predefined presets (`:excel`, `:excel_tab`, `:unix`) and custom dialects
|
|
15
|
+
- Date/time type coercions via `Processor#type` — built-in `:date` and `:datetime` types with optional format strings
|
|
16
|
+
- Streaming writer via `Writer.stream(io, headers:) { |w| w << row }` for incremental CSV output
|
|
17
|
+
- Dialect integration into `process()`, `to_hashes()`, `pluck()`, and `filter()` methods
|
|
18
|
+
|
|
19
|
+
## [0.2.6] - 2026-03-26
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Add Sponsor badge and fix License link format in README
|
|
24
|
+
|
|
25
|
+
## [0.2.5] - 2026-03-24
|
|
26
|
+
|
|
27
|
+
### Changed
|
|
28
|
+
- Expand test coverage to 60+ examples covering edge cases and error paths
|
|
29
|
+
|
|
30
|
+
## [0.2.4] - 2026-03-24
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
- Align README one-liner with gemspec summary
|
|
34
|
+
|
|
10
35
|
## [0.2.3] - 2026-03-24
|
|
11
36
|
|
|
12
37
|
### Fixed
|
data/README.md
CHANGED
|
@@ -2,9 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/philiprehberger/rb-csv-kit/actions/workflows/ci.yml)
|
|
4
4
|
[](https://rubygems.org/gems/philiprehberger-csv_kit)
|
|
5
|
+
[](https://github.com/philiprehberger/rb-csv-kit/releases)
|
|
6
|
+
[](https://github.com/philiprehberger/rb-csv-kit/commits/main)
|
|
5
7
|
[](LICENSE)
|
|
8
|
+
[](https://github.com/philiprehberger/rb-csv-kit/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
|
|
9
|
+
[](https://github.com/philiprehberger/rb-csv-kit/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement)
|
|
10
|
+
[](https://github.com/sponsors/philiprehberger)
|
|
6
11
|
|
|
7
|
-
Streaming CSV processor with type coercion
|
|
12
|
+
Streaming CSV processor with type coercion and validation
|
|
8
13
|
|
|
9
14
|
## Requirements
|
|
10
15
|
|
|
@@ -28,26 +33,22 @@ gem install philiprehberger-csv_kit
|
|
|
28
33
|
|
|
29
34
|
```ruby
|
|
30
35
|
require "philiprehberger/csv_kit"
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
### Quick Load
|
|
34
36
|
|
|
35
|
-
|
|
36
|
-
rows = Philiprehberger::CsvKit.to_hashes('data.csv')
|
|
37
|
+
rows = Philiprehberger::CsvKit.to_hashes("data.csv")
|
|
37
38
|
# => [{name: "Alice", age: "30"}, ...]
|
|
38
39
|
```
|
|
39
40
|
|
|
40
41
|
### Pluck Columns
|
|
41
42
|
|
|
42
43
|
```ruby
|
|
43
|
-
names = Philiprehberger::CsvKit.pluck(
|
|
44
|
+
names = Philiprehberger::CsvKit.pluck("data.csv", :name, :city)
|
|
44
45
|
# => [{name: "Alice", city: "Berlin"}, ...]
|
|
45
46
|
```
|
|
46
47
|
|
|
47
48
|
### Filter Rows
|
|
48
49
|
|
|
49
50
|
```ruby
|
|
50
|
-
csv_string = Philiprehberger::CsvKit.filter(
|
|
51
|
+
csv_string = Philiprehberger::CsvKit.filter("data.csv") do |row|
|
|
51
52
|
row[:age].to_i >= 30
|
|
52
53
|
end
|
|
53
54
|
```
|
|
@@ -55,70 +56,75 @@ end
|
|
|
55
56
|
### Streaming Processor
|
|
56
57
|
|
|
57
58
|
```ruby
|
|
58
|
-
rows = Philiprehberger::CsvKit.process(
|
|
59
|
+
rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
59
60
|
p.transform(:age) { |v| v.to_i }
|
|
60
61
|
p.validate(:age) { |v| v.to_i.positive? }
|
|
61
|
-
p.reject { |row| row[:city] ==
|
|
62
|
+
p.reject { |row| row[:city] == "Unknown" }
|
|
62
63
|
p.each { |row| puts row[:name] }
|
|
63
64
|
end
|
|
64
65
|
```
|
|
65
66
|
|
|
67
|
+
### Date/Time Type Coercions
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
71
|
+
p.type(:birthday, :date)
|
|
72
|
+
p.type(:created_at, :datetime, format: "%Y-%m-%dT%H:%M:%S")
|
|
73
|
+
end
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### CSV Dialects
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
rows = Philiprehberger::CsvKit.to_hashes("data.csv", dialect: :excel)
|
|
80
|
+
rows = Philiprehberger::CsvKit.process("data.csv", dialect: { delimiter: ";", quote: "'" }) do |p|
|
|
81
|
+
p.transform(:age, &:to_i)
|
|
82
|
+
end
|
|
83
|
+
```
|
|
84
|
+
|
|
66
85
|
### Writing CSV
|
|
67
86
|
|
|
68
87
|
```ruby
|
|
69
88
|
writer = Philiprehberger::CsvKit::Writer.new(headers: [:name, :age])
|
|
70
89
|
csv_string = writer.write([{ name: "Alice", age: 30 }, { name: "Bob", age: 25 }])
|
|
71
90
|
|
|
72
|
-
|
|
73
|
-
File.open('output.csv', 'w') do |f|
|
|
91
|
+
File.open("output.csv", "w") do |f|
|
|
74
92
|
writer.write_to([{ name: "Alice", age: 30 }], f)
|
|
75
93
|
end
|
|
76
94
|
```
|
|
77
95
|
|
|
78
|
-
###
|
|
96
|
+
### Streaming Writer
|
|
79
97
|
|
|
80
98
|
```ruby
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
99
|
+
File.open("output.csv", "w") do |f|
|
|
100
|
+
Philiprehberger::CsvKit::Writer.stream(f, headers: [:name, :age]) do |w|
|
|
101
|
+
w << { name: "Alice", age: 30 }
|
|
102
|
+
w << { name: "Bob", age: 25 }
|
|
103
|
+
end
|
|
84
104
|
end
|
|
85
105
|
```
|
|
86
106
|
|
|
87
|
-
###
|
|
107
|
+
### Error Recovery
|
|
88
108
|
|
|
89
109
|
```ruby
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
processor.transform(:age) { |v| Integer(v) }
|
|
94
|
-
|
|
95
|
-
begin
|
|
96
|
-
processor.run
|
|
97
|
-
rescue Philiprehberger::CsvKit::Error
|
|
98
|
-
puts processor.errors.length # collected errors
|
|
110
|
+
rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
111
|
+
p.on_error { |row, err| :skip }
|
|
112
|
+
p.transform(:age) { |v| Integer(v) }
|
|
99
113
|
end
|
|
100
114
|
```
|
|
101
115
|
|
|
102
116
|
### Column Aliasing
|
|
103
117
|
|
|
104
118
|
```ruby
|
|
105
|
-
rows = Philiprehberger::CsvKit.process(
|
|
119
|
+
rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
106
120
|
p.rename(:raw_col, :clean_col)
|
|
107
121
|
end
|
|
108
122
|
```
|
|
109
123
|
|
|
110
|
-
### Row Callbacks
|
|
111
|
-
|
|
112
|
-
```ruby
|
|
113
|
-
rows = Philiprehberger::CsvKit.process('data.csv') do |p|
|
|
114
|
-
p.after_each { |row| puts row.to_h }
|
|
115
|
-
end
|
|
116
|
-
```
|
|
117
|
-
|
|
118
124
|
### Delimiter Detection
|
|
119
125
|
|
|
120
126
|
```ruby
|
|
121
|
-
delimiter = Philiprehberger::CsvKit::Detector.detect(
|
|
127
|
+
delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
122
128
|
# => "\t"
|
|
123
129
|
```
|
|
124
130
|
|
|
@@ -126,12 +132,13 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
|
|
|
126
132
|
|
|
127
133
|
| Method / Class | Description |
|
|
128
134
|
|----------------|-------------|
|
|
129
|
-
| `CsvKit.to_hashes(path)` | Load CSV into array of symbolized hashes |
|
|
130
|
-
| `CsvKit.pluck(path, *keys)` | Extract specific columns |
|
|
131
|
-
| `CsvKit.filter(path, &block)` | Filter rows, return CSV string |
|
|
132
|
-
| `CsvKit.process(path_or_io, &block)` | Streaming DSL with transforms and validations |
|
|
135
|
+
| `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
|
|
136
|
+
| `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
|
|
137
|
+
| `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
|
|
138
|
+
| `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
|
|
133
139
|
| `Processor#headers(*names)` | Override header names |
|
|
134
140
|
| `Processor#transform(key, &block)` | Register column transform |
|
|
141
|
+
| `Processor#type(key, type, **opts)` | Register built-in type coercion (:integer, :float, :string, :date, :datetime) |
|
|
135
142
|
| `Processor#validate(key, &block)` | Register column validation (skip invalid) |
|
|
136
143
|
| `Processor#reject(&block)` | Reject rows matching predicate |
|
|
137
144
|
| `Processor#each(&block)` | Callback for each processed row |
|
|
@@ -143,6 +150,8 @@ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
|
|
|
143
150
|
| `Writer.new(headers:)` | Create a CSV writer with given headers |
|
|
144
151
|
| `Writer#write(rows)` | Generate CSV string from rows |
|
|
145
152
|
| `Writer#write_to(rows, io)` | Write CSV to an IO object |
|
|
153
|
+
| `Writer.stream(io, headers:, dialect:)` | Stream CSV rows incrementally to an IO |
|
|
154
|
+
| `Dialect.new(name_or_hash)` | Create a dialect from preset or custom hash |
|
|
146
155
|
| `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
|
|
147
156
|
| `Row#[](key)` | Access value by symbol key |
|
|
148
157
|
| `Row#to_h` | Convert row to plain hash |
|
|
@@ -155,6 +164,13 @@ bundle exec rspec
|
|
|
155
164
|
bundle exec rubocop
|
|
156
165
|
```
|
|
157
166
|
|
|
167
|
+
## Support
|
|
168
|
+
|
|
169
|
+
If you find this package useful, consider giving it a star on GitHub — it helps motivate continued maintenance and development.
|
|
170
|
+
|
|
171
|
+
[](https://www.linkedin.com/in/philiprehberger)
|
|
172
|
+
[](https://philiprehberger.com/open-source-packages)
|
|
173
|
+
|
|
158
174
|
## License
|
|
159
175
|
|
|
160
|
-
MIT
|
|
176
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Philiprehberger
|
|
4
|
+
module CsvKit
|
|
5
|
+
# Predefined and custom CSV dialects for controlling parsing and writing behavior.
|
|
6
|
+
class Dialect
|
|
7
|
+
PRESETS = {
|
|
8
|
+
excel: { col_sep: ',', row_sep: "\r\n", strip: true },
|
|
9
|
+
excel_tab: { col_sep: "\t" },
|
|
10
|
+
unix: { col_sep: ',', row_sep: "\n" }
|
|
11
|
+
}.freeze
|
|
12
|
+
|
|
13
|
+
OPTION_MAP = {
|
|
14
|
+
delimiter: :col_sep,
|
|
15
|
+
quote: :quote_char,
|
|
16
|
+
line_ending: :row_sep
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
attr_reader :options
|
|
20
|
+
|
|
21
|
+
# Build a Dialect from a preset name or a custom options hash.
|
|
22
|
+
#
|
|
23
|
+
# @param name_or_hash [Symbol, Hash] preset name (:excel, :excel_tab, :unix) or custom hash
|
|
24
|
+
# @return [Dialect]
|
|
25
|
+
def initialize(name_or_hash)
|
|
26
|
+
@options = resolve(name_or_hash)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Merge dialect options into a base CSV options hash.
|
|
30
|
+
#
|
|
31
|
+
# @param base [Hash] base CSV options
|
|
32
|
+
# @return [Hash] merged options
|
|
33
|
+
def merge_into(base)
|
|
34
|
+
base.merge(@options)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def resolve(name_or_hash)
|
|
40
|
+
case name_or_hash
|
|
41
|
+
when Symbol
|
|
42
|
+
preset = PRESETS[name_or_hash]
|
|
43
|
+
raise ArgumentError, "Unknown dialect: #{name_or_hash}" unless preset
|
|
44
|
+
|
|
45
|
+
preset.dup
|
|
46
|
+
when Hash
|
|
47
|
+
normalize_hash(name_or_hash)
|
|
48
|
+
else
|
|
49
|
+
raise ArgumentError, "Dialect must be a Symbol or Hash, got #{name_or_hash.class}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def normalize_hash(hash)
|
|
54
|
+
hash.each_with_object({}) do |(key, value), opts|
|
|
55
|
+
csv_key = OPTION_MAP.fetch(key, key)
|
|
56
|
+
opts[csv_key] = value
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -7,8 +7,29 @@ module Philiprehberger
|
|
|
7
7
|
include ErrorHandler
|
|
8
8
|
include Callbacks
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
TYPE_COERCIONS = {
|
|
11
|
+
integer: ->(v, _opts) { Integer(v) },
|
|
12
|
+
float: ->(v, _opts) { Float(v) },
|
|
13
|
+
string: ->(v, _opts) { v.to_s },
|
|
14
|
+
date: lambda { |v, opts|
|
|
15
|
+
if opts[:format]
|
|
16
|
+
Date.strptime(v, opts[:format])
|
|
17
|
+
else
|
|
18
|
+
Date.parse(v)
|
|
19
|
+
end
|
|
20
|
+
},
|
|
21
|
+
datetime: lambda { |v, opts|
|
|
22
|
+
if opts[:format]
|
|
23
|
+
Time.strptime(v, opts[:format])
|
|
24
|
+
else
|
|
25
|
+
Time.parse(v)
|
|
26
|
+
end
|
|
27
|
+
}
|
|
28
|
+
}.freeze
|
|
29
|
+
|
|
30
|
+
def initialize(path_or_io, dialect: nil)
|
|
11
31
|
@path_or_io = path_or_io
|
|
32
|
+
@dialect = dialect ? Dialect.new(dialect) : nil
|
|
12
33
|
@transforms = {}
|
|
13
34
|
@validations = {}
|
|
14
35
|
@reject_block = nil
|
|
@@ -28,6 +49,18 @@ module Philiprehberger
|
|
|
28
49
|
@transforms[key] = block
|
|
29
50
|
end
|
|
30
51
|
|
|
52
|
+
# Register a built-in type coercion for a column.
|
|
53
|
+
#
|
|
54
|
+
# @param key [Symbol] column name
|
|
55
|
+
# @param type_name [Symbol] one of :integer, :float, :string, :date, :datetime
|
|
56
|
+
# @param opts [Hash] additional options (e.g. format: '%Y-%m-%d')
|
|
57
|
+
def type(key, type_name, **opts)
|
|
58
|
+
coercion = TYPE_COERCIONS[type_name]
|
|
59
|
+
raise ArgumentError, "Unknown type: #{type_name}" unless coercion
|
|
60
|
+
|
|
61
|
+
@transforms[key] = ->(v) { coercion.call(v, opts) }
|
|
62
|
+
end
|
|
63
|
+
|
|
31
64
|
# Register a validation for a specific column.
|
|
32
65
|
def validate(key, &block)
|
|
33
66
|
@validations[key] = block
|
|
@@ -79,10 +112,13 @@ module Philiprehberger
|
|
|
79
112
|
end
|
|
80
113
|
|
|
81
114
|
def open_csv(&block)
|
|
115
|
+
csv_opts = { headers: true }
|
|
116
|
+
csv_opts = @dialect.merge_into(csv_opts) if @dialect
|
|
117
|
+
|
|
82
118
|
if @path_or_io.is_a?(String)
|
|
83
|
-
CSV.open(@path_or_io,
|
|
119
|
+
CSV.open(@path_or_io, **csv_opts, &block)
|
|
84
120
|
else
|
|
85
|
-
block.call(CSV.new(@path_or_io,
|
|
121
|
+
block.call(CSV.new(@path_or_io, **csv_opts))
|
|
86
122
|
end
|
|
87
123
|
end
|
|
88
124
|
|
|
@@ -9,6 +9,19 @@ module Philiprehberger
|
|
|
9
9
|
@headers = headers.map(&:to_sym)
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
+
# Stream CSV rows incrementally to an IO object without buffering.
|
|
13
|
+
#
|
|
14
|
+
# @param io [IO] writable IO object
|
|
15
|
+
# @param headers [Array<Symbol, String>] column headers
|
|
16
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
17
|
+
# @yield [StreamWriter] writer that accepts rows via <<
|
|
18
|
+
# @return [IO] the IO object
|
|
19
|
+
def self.stream(io, headers:, dialect: nil, &block)
|
|
20
|
+
writer = StreamWriter.new(io, headers: headers, dialect: dialect)
|
|
21
|
+
block.call(writer)
|
|
22
|
+
io
|
|
23
|
+
end
|
|
24
|
+
|
|
12
25
|
# Write rows to a CSV string.
|
|
13
26
|
#
|
|
14
27
|
# @param rows [Array<Hash, Array>] data rows
|
|
@@ -41,6 +54,37 @@ module Philiprehberger
|
|
|
41
54
|
|
|
42
55
|
row
|
|
43
56
|
end
|
|
57
|
+
|
|
58
|
+
# Incremental writer that streams rows to an IO object one at a time.
|
|
59
|
+
class StreamWriter
|
|
60
|
+
# @param io [IO] writable IO object
|
|
61
|
+
# @param headers [Array<Symbol, String>] column headers
|
|
62
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
63
|
+
def initialize(io, headers:, dialect: nil)
|
|
64
|
+
@headers = headers.map(&:to_sym)
|
|
65
|
+
csv_opts = {}
|
|
66
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
67
|
+
@csv = CSV.new(io, **csv_opts)
|
|
68
|
+
@csv << @headers
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Append a single row to the CSV output.
|
|
72
|
+
#
|
|
73
|
+
# @param row [Hash, Array] a single data row
|
|
74
|
+
# @return [self]
|
|
75
|
+
def <<(row)
|
|
76
|
+
@csv << row_values(row)
|
|
77
|
+
self
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def row_values(row)
|
|
83
|
+
return @headers.map { |h| row[h] } if row.is_a?(Hash)
|
|
84
|
+
|
|
85
|
+
row
|
|
86
|
+
end
|
|
87
|
+
end
|
|
44
88
|
end
|
|
45
89
|
end
|
|
46
90
|
end
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'csv'
|
|
4
|
+
require 'date'
|
|
5
|
+
require 'time'
|
|
4
6
|
require_relative 'csv_kit/version'
|
|
7
|
+
require_relative 'csv_kit/dialect'
|
|
5
8
|
require_relative 'csv_kit/detector'
|
|
6
9
|
require_relative 'csv_kit/row'
|
|
7
10
|
require_relative 'csv_kit/error_handler'
|
|
@@ -16,10 +19,11 @@ module Philiprehberger
|
|
|
16
19
|
# Streaming DSL — yields a Processor for configuration, then executes.
|
|
17
20
|
#
|
|
18
21
|
# @param path_or_io [String, IO] file path or IO object
|
|
22
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
19
23
|
# @yield [Processor] processor to configure transforms and validations
|
|
20
24
|
# @return [Array<Row>] collected rows
|
|
21
|
-
def self.process(path_or_io, &block)
|
|
22
|
-
processor = Processor.new(path_or_io)
|
|
25
|
+
def self.process(path_or_io, dialect: nil, &block)
|
|
26
|
+
processor = Processor.new(path_or_io, dialect: dialect)
|
|
23
27
|
block.call(processor)
|
|
24
28
|
processor.run
|
|
25
29
|
end
|
|
@@ -27,9 +31,12 @@ module Philiprehberger
|
|
|
27
31
|
# Load an entire CSV into an array of symbolized hashes.
|
|
28
32
|
#
|
|
29
33
|
# @param path [String] file path
|
|
34
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
30
35
|
# @return [Array<Hash{Symbol => String}>]
|
|
31
|
-
def self.to_hashes(path)
|
|
32
|
-
|
|
36
|
+
def self.to_hashes(path, dialect: nil)
|
|
37
|
+
csv_opts = { headers: true }
|
|
38
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
39
|
+
CSV.foreach(path, **csv_opts).map do |row|
|
|
33
40
|
row.to_h.transform_keys(&:to_sym)
|
|
34
41
|
end
|
|
35
42
|
end
|
|
@@ -38,18 +45,20 @@ module Philiprehberger
|
|
|
38
45
|
#
|
|
39
46
|
# @param path [String] file path
|
|
40
47
|
# @param keys [Array<Symbol>] column names to extract
|
|
48
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
41
49
|
# @return [Array<Hash{Symbol => String}>]
|
|
42
|
-
def self.pluck(path, *keys)
|
|
43
|
-
to_hashes(path).map { |h| h.slice(*keys) }
|
|
50
|
+
def self.pluck(path, *keys, dialect: nil)
|
|
51
|
+
to_hashes(path, dialect: dialect).map { |h| h.slice(*keys) }
|
|
44
52
|
end
|
|
45
53
|
|
|
46
54
|
# Filter rows and return matching rows as a CSV string.
|
|
47
55
|
#
|
|
48
56
|
# @param path [String] file path
|
|
57
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
49
58
|
# @yield [Hash{Symbol => String}] each row as a symbolized hash
|
|
50
59
|
# @return [String] CSV string with headers
|
|
51
|
-
def self.filter(path, &)
|
|
52
|
-
rows = to_hashes(path).select(&)
|
|
60
|
+
def self.filter(path, dialect: nil, &)
|
|
61
|
+
rows = to_hashes(path, dialect: dialect).select(&)
|
|
53
62
|
return '' if rows.empty?
|
|
54
63
|
|
|
55
64
|
headers = rows.first.keys
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-csv_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Streaming CSV processor with row-by-row transforms, validations, column
|
|
14
14
|
plucking, filtering, writing, error recovery, and automatic delimiter detection.
|
|
@@ -24,6 +24,7 @@ files:
|
|
|
24
24
|
- lib/philiprehberger/csv_kit.rb
|
|
25
25
|
- lib/philiprehberger/csv_kit/callbacks.rb
|
|
26
26
|
- lib/philiprehberger/csv_kit/detector.rb
|
|
27
|
+
- lib/philiprehberger/csv_kit/dialect.rb
|
|
27
28
|
- lib/philiprehberger/csv_kit/error_handler.rb
|
|
28
29
|
- lib/philiprehberger/csv_kit/processor.rb
|
|
29
30
|
- lib/philiprehberger/csv_kit/row.rb
|