philiprehberger-csv_kit 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/README.md +26 -0
- data/lib/philiprehberger/csv_kit/processor.rb +25 -0
- data/lib/philiprehberger/csv_kit/version.rb +1 -1
- data/lib/philiprehberger/csv_kit.rb +14 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2ad7f9f8f543aa7336129a60b131b397f7d46884dc63949aed97ffcdeea41a99
|
|
4
|
+
data.tar.gz: 623d9169efd50e7f1bf3894c7a6b7ffbc99c79085a444d633c280af727f0f8b6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aaee8ffddffaabf495a59a9e5b7b00d2ebfbcc73f06d73229f96c11184f16c7215203ead52e77c1bc109e29d8eb28e2b2b5e728c64d094bb35af953a5c62aad7
|
|
7
|
+
data.tar.gz: a310d1904161f42549cff7b2dfee711d2f483e6b2cb9ff5428267e9118bdc80130af7b701b47ad6e659c56dddb29607f7b6d5eea0af6dd0e667b207e0fc2e765
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.10.0] - 2026-05-07
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `CsvKit.transpose(path_or_io, dialect:)` — returns a column-oriented hash mapping each header to the array of values across all rows. The natural complement to `to_hashes` for column-wise operations.
|
|
14
|
+
|
|
15
|
+
## [0.9.0] - 2026-04-19
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- `Processor#default(key, value)` — fill nil or empty cells at `key` with a default value during transform; chains naturally with `type:` coercion
|
|
19
|
+
|
|
10
20
|
## [0.8.0] - 2026-04-17
|
|
11
21
|
|
|
12
22
|
### Added
|
|
@@ -107,7 +117,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
107
117
|
- Type coercion and row validation
|
|
108
118
|
- Quick load and filtering convenience methods
|
|
109
119
|
|
|
110
|
-
[Unreleased]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.
|
|
120
|
+
[Unreleased]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.10.0...HEAD
|
|
121
|
+
[0.10.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.10.0
|
|
122
|
+
[0.9.0]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.8.0...v0.9.0
|
|
111
123
|
[0.8.0]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.7.0...v0.8.0
|
|
112
124
|
[0.7.0]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.6.0...v0.7.0
|
|
113
125
|
[0.6.0]: https://github.com/philiprehberger/rb-csv-kit/compare/v0.5.0...v0.6.0
|
data/README.md
CHANGED
|
@@ -106,6 +106,17 @@ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
|
106
106
|
end
|
|
107
107
|
```
|
|
108
108
|
|
|
109
|
+
### Default Values for Missing Cells
|
|
110
|
+
|
|
111
|
+
Fill nil or empty-string cells with a default value before any `type` coercion runs:
|
|
112
|
+
|
|
113
|
+
```ruby
|
|
114
|
+
Philiprehberger::CsvKit.process("users.csv") do |p|
|
|
115
|
+
p.default(:country, "US")
|
|
116
|
+
p.type(:age, :integer)
|
|
117
|
+
end
|
|
118
|
+
```
|
|
119
|
+
|
|
109
120
|
### Date/Time Type Coercions
|
|
110
121
|
|
|
111
122
|
```ruby
|
|
@@ -194,11 +205,25 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
|
194
205
|
# => "\t"
|
|
195
206
|
```
|
|
196
207
|
|
|
208
|
+
### Column Transpose
|
|
209
|
+
|
|
210
|
+
```ruby
|
|
211
|
+
require 'philiprehberger/csv_kit'
|
|
212
|
+
|
|
213
|
+
# users.csv:
|
|
214
|
+
# name,age
|
|
215
|
+
# Alice,30
|
|
216
|
+
# Bob,25
|
|
217
|
+
Philiprehberger::CsvKit.transpose('users.csv')
|
|
218
|
+
# => { name: ['Alice', 'Bob'], age: ['30', '25'] }
|
|
219
|
+
```
|
|
220
|
+
|
|
197
221
|
## API
|
|
198
222
|
|
|
199
223
|
| Method / Class | Description |
|
|
200
224
|
|----------------|-------------|
|
|
201
225
|
| `CsvKit.to_hashes(path_or_io, dialect:)` | Load CSV into array of symbolized hashes |
|
|
226
|
+
| `CsvKit.transpose(path_or_io, dialect:)` | Returns a column-oriented hash mapping each header to its column of values |
|
|
202
227
|
| `CsvKit.to_csv(rows, headers:, dialect:)` | Serialize an array of hashes to a CSV string |
|
|
203
228
|
| `CsvKit.sample(path_or_io, n, dialect:)` | Return n randomly sampled rows using reservoir sampling (Algorithm R) |
|
|
204
229
|
| `CsvKit.pluck(path_or_io, *keys, dialect:)` | Extract specific columns |
|
|
@@ -211,6 +236,7 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
|
211
236
|
| `Processor#headers(*names)` | Override header names |
|
|
212
237
|
| `Processor#transform(key, &block)` | Register column transform |
|
|
213
238
|
| `Processor#type(key, type, **opts)` | Register built-in type coercion (:integer, :float, :string, :date, :datetime) |
|
|
239
|
+
| `Processor#default(key, value)` | Fill nil or empty cells at `key` with `value` (runs before `type` coercion) |
|
|
214
240
|
| `Processor#validate(key, &block)` | Register column validation (skip invalid) |
|
|
215
241
|
| `Processor#skip(n)` | Skip the first N data rows |
|
|
216
242
|
| `Processor#limit(n)` | Stop after processing N rows |
|
|
@@ -31,6 +31,7 @@ module Philiprehberger
|
|
|
31
31
|
@path_or_io = path_or_io
|
|
32
32
|
@dialect = dialect ? Dialect.new(dialect) : nil
|
|
33
33
|
@transforms = {}
|
|
34
|
+
@defaults = {}
|
|
34
35
|
@validations = {}
|
|
35
36
|
@reject_block = nil
|
|
36
37
|
@each_block = nil
|
|
@@ -63,6 +64,22 @@ module Philiprehberger
|
|
|
63
64
|
@transforms[key] = ->(v) { coercion.call(v, opts) }
|
|
64
65
|
end
|
|
65
66
|
|
|
67
|
+
# Register a default value for a column.
|
|
68
|
+
#
|
|
69
|
+
# Cells where the value is `nil` or an empty string are replaced with
|
|
70
|
+
# the provided default during transform. Defaults run BEFORE `type`
|
|
71
|
+
# coercions and `transform` blocks, so callers can default a missing
|
|
72
|
+
# cell to a string and then coerce it (e.g. default to "0" then cast
|
|
73
|
+
# to :integer).
|
|
74
|
+
#
|
|
75
|
+
# @param key [Symbol] column name
|
|
76
|
+
# @param value [Object] value to use when the cell is nil or empty
|
|
77
|
+
# @return [self]
|
|
78
|
+
def default(key, value)
|
|
79
|
+
@defaults[key] = value
|
|
80
|
+
self
|
|
81
|
+
end
|
|
82
|
+
|
|
66
83
|
# Register a validation for a specific column.
|
|
67
84
|
def validate(key, &block)
|
|
68
85
|
@validations[key] = block
|
|
@@ -122,6 +139,7 @@ module Philiprehberger
|
|
|
122
139
|
return unless valid?(row)
|
|
123
140
|
return if rejected?(row)
|
|
124
141
|
|
|
142
|
+
apply_defaults!(row)
|
|
125
143
|
apply_transforms!(row)
|
|
126
144
|
apply_renames!(row)
|
|
127
145
|
@each_block&.call(row)
|
|
@@ -165,6 +183,13 @@ module Philiprehberger
|
|
|
165
183
|
@reject_block&.call(row) || false
|
|
166
184
|
end
|
|
167
185
|
|
|
186
|
+
def apply_defaults!(row)
|
|
187
|
+
@defaults.each do |key, value|
|
|
188
|
+
current = row[key]
|
|
189
|
+
row[key] = value if current.nil? || current.to_s.empty?
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
168
193
|
def apply_transforms!(row)
|
|
169
194
|
@transforms.each { |key, blk| row[key] = blk.call(row[key]) }
|
|
170
195
|
end
|
|
@@ -42,6 +42,20 @@ module Philiprehberger
|
|
|
42
42
|
rows
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
# Read a CSV and return a hash mapping each header to the column of values.
|
|
46
|
+
#
|
|
47
|
+
# @param path_or_io [String, IO] the path or IO to read from
|
|
48
|
+
# @param dialect [Hash, nil] optional CSV dialect overrides
|
|
49
|
+
# @return [Hash{Symbol => Array}] column-oriented view of the CSV
|
|
50
|
+
def self.transpose(path_or_io, dialect: nil)
|
|
51
|
+
rows = to_hashes(path_or_io, dialect: dialect)
|
|
52
|
+
return {} if rows.empty?
|
|
53
|
+
|
|
54
|
+
rows.first.keys.to_h do |key|
|
|
55
|
+
[key, rows.map { |row| row[key] }]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
45
59
|
# Serialize an array of hashes to a CSV string.
|
|
46
60
|
#
|
|
47
61
|
# If headers is omitted, the keys of the first hash are used. Empty input
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-csv_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.10.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-05-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Streaming CSV processor with row-by-row transforms, validations, column
|
|
14
14
|
plucking, streaming each_hash iteration, filtering, writing, error recovery, and
|