philiprehberger-csv_kit 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +31 -0
- data/lib/philiprehberger/csv_kit/row.rb +39 -0
- data/lib/philiprehberger/csv_kit/version.rb +1 -1
- data/lib/philiprehberger/csv_kit.rb +36 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7a49695d1e89645d30e0d325f7c7967db04b044f0cfe69ecf66cabfcc7e28470
|
|
4
|
+
data.tar.gz: 8a0cd35cd652e1893493ac4a0428251517c4a234cc0dca7b1addc9580b3688c6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 528761ab2269e586d1d01c20885b5113dd12fe89b94b266b4bcb7452224cedd19a626100218d77def4db8b4d087117717dd3a6738aa5fe54c4c05f20b7592b5c
|
|
7
|
+
data.tar.gz: 2838f1cb2c0bb9caa43015b2f8f07e3b6252f80d4ad376160619d2c3dc50c4f1fecb65d6d5c47f9e86ffa2537e6b9ace01623a266aa2bc9cfd0fd4c0e9977512
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.6.0] - 2026-04-15
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `CsvKit.find(path, &block)` — return the first row matching a predicate, stopping as soon as a match is found
|
|
14
|
+
|
|
15
|
+
## [0.5.0] - 2026-04-09
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- `CsvKit.each_hash(path, dialect:)` for streaming row-by-row iteration with constant memory; returns Enumerator if no block given
|
|
19
|
+
- `Row` now includes `Enumerable` with `keys`, `values`, `size`, `each`, and `merge` methods
|
|
20
|
+
|
|
10
21
|
## [0.4.0] - 2026-04-09
|
|
11
22
|
|
|
12
23
|
### Added
|
|
@@ -84,3 +95,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
84
95
|
- Auto-detect delimiter
|
|
85
96
|
- Type coercion and row validation
|
|
86
97
|
- Quick load and filtering convenience methods
|
|
98
|
+
|
|
99
|
+
[0.5.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.5.0
|
|
100
|
+
[0.4.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.4.0
|
|
101
|
+
[0.3.1]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.3.1
|
|
102
|
+
[0.3.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.3.0
|
|
103
|
+
[0.2.6]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.6
|
|
104
|
+
[0.2.5]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.5
|
|
105
|
+
[0.2.4]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.4
|
|
106
|
+
[0.2.3]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.3
|
|
107
|
+
[0.2.2]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.2
|
|
108
|
+
[0.2.1]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.1
|
|
109
|
+
[0.2.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.0
|
|
110
|
+
[0.1.2]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.1.2
|
|
111
|
+
[0.1.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.1.0
|
data/README.md
CHANGED
|
@@ -54,6 +54,30 @@ Philiprehberger::CsvKit.count("data.csv")
|
|
|
54
54
|
# => 1000
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
+
### Streaming Row-by-Row
|
|
58
|
+
|
|
59
|
+
Iterate rows with constant memory. Returns an `Enumerator` if no block is given:
|
|
60
|
+
|
|
61
|
+
```ruby
|
|
62
|
+
Philiprehberger::CsvKit.each_hash("large.csv") do |row|
|
|
63
|
+
puts row[:name]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Or compose with Enumerator methods:
|
|
67
|
+
adults = Philiprehberger::CsvKit.each_hash("data.csv")
|
|
68
|
+
.select { |r| r[:age].to_i >= 18 }
|
|
69
|
+
.first(10)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Find First Match
|
|
73
|
+
|
|
74
|
+
Return the first row that matches a predicate, streaming and stopping on the first hit:
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
user = Philiprehberger::CsvKit.find("users.csv") { |row| row[:email] == "a@b.com" }
|
|
78
|
+
# => {email: "a@b.com", name: "Alice"} or nil
|
|
79
|
+
```
|
|
80
|
+
|
|
57
81
|
### Filter Rows
|
|
58
82
|
|
|
59
83
|
```ruby
|
|
@@ -153,8 +177,10 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
|
153
177
|
| `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
|
|
154
178
|
| `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
|
|
155
179
|
| `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
|
|
180
|
+
| `CsvKit.find(path, dialect:, &block)` | Return the first row matching the predicate, or nil |
|
|
156
181
|
| `CsvKit.headers(path, dialect:)` | Return header row as array of symbols |
|
|
157
182
|
| `CsvKit.count(path, dialect:)` | Count data rows without loading into memory |
|
|
183
|
+
| `CsvKit.each_hash(path, dialect:, &block)` | Stream rows as symbolized hashes; returns Enumerator if no block |
|
|
158
184
|
| `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
|
|
159
185
|
| `Processor#headers(*names)` | Override header names |
|
|
160
186
|
| `Processor#transform(key, &block)` | Register column transform |
|
|
@@ -176,6 +202,11 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
|
176
202
|
| `Dialect.new(name_or_hash)` | Create a dialect from preset or custom hash |
|
|
177
203
|
| `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
|
|
178
204
|
| `Row#[](key)` | Access value by symbol key |
|
|
205
|
+
| `Row#keys` | Column names as array of symbols |
|
|
206
|
+
| `Row#values` | Column values as array |
|
|
207
|
+
| `Row#size` | Number of columns |
|
|
208
|
+
| `Row#each { \|k, v\| }` | Iterate key-value pairs (Enumerable) |
|
|
209
|
+
| `Row#merge(other)` | Return new Row with merged data |
|
|
179
210
|
| `Row#to_h` | Convert row to plain hash |
|
|
180
211
|
|
|
181
212
|
## Development
|
|
@@ -4,11 +4,50 @@ module Philiprehberger
|
|
|
4
4
|
module CsvKit
|
|
5
5
|
# Wraps a CSV row as a hash with symbolized keys.
|
|
6
6
|
class Row
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
7
9
|
# @param data [Hash{Symbol => String}]
|
|
8
10
|
def initialize(data)
|
|
9
11
|
@data = data
|
|
10
12
|
end
|
|
11
13
|
|
|
14
|
+
# Iterate over key-value pairs.
|
|
15
|
+
#
|
|
16
|
+
# @yield [Symbol, Object] key and value
|
|
17
|
+
def each(&)
|
|
18
|
+
@data.each(&)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Return column names.
|
|
22
|
+
#
|
|
23
|
+
# @return [Array<Symbol>]
|
|
24
|
+
def keys
|
|
25
|
+
@data.keys
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Return column values.
|
|
29
|
+
#
|
|
30
|
+
# @return [Array<Object>]
|
|
31
|
+
def values
|
|
32
|
+
@data.values
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Return the number of columns.
|
|
36
|
+
#
|
|
37
|
+
# @return [Integer]
|
|
38
|
+
def size
|
|
39
|
+
@data.size
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Merge another hash or Row into this row, returning a new Row.
|
|
43
|
+
#
|
|
44
|
+
# @param other [Hash, Row] data to merge
|
|
45
|
+
# @return [Row]
|
|
46
|
+
def merge(other)
|
|
47
|
+
other_data = other.is_a?(Row) ? other.to_h : other
|
|
48
|
+
Row.new(@data.merge(other_data))
|
|
49
|
+
end
|
|
50
|
+
|
|
12
51
|
# Access a value by symbolized key.
|
|
13
52
|
#
|
|
14
53
|
# @param key [Symbol] column name
|
|
@@ -80,6 +80,42 @@ module Philiprehberger
|
|
|
80
80
|
n
|
|
81
81
|
end
|
|
82
82
|
|
|
83
|
+
# Stream rows one at a time as symbolized hashes with constant memory.
|
|
84
|
+
# Returns an Enumerator if no block is given.
|
|
85
|
+
#
|
|
86
|
+
# @param path [String] file path
|
|
87
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
88
|
+
# @yield [Hash{Symbol => String}] each row
|
|
89
|
+
# @return [Enumerator, nil]
|
|
90
|
+
def self.each_hash(path, dialect: nil, &block)
|
|
91
|
+
csv_opts = { headers: true }
|
|
92
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
93
|
+
|
|
94
|
+
enum = Enumerator.new do |yielder|
|
|
95
|
+
CSV.foreach(path, **csv_opts) do |row|
|
|
96
|
+
yielder.yield(row.to_h.transform_keys(&:to_sym))
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
block ? enum.each(&block) : enum
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Find the first row matching a predicate, streaming (stops as soon as a match is found).
|
|
104
|
+
#
|
|
105
|
+
# @param path [String] file path
|
|
106
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
107
|
+
# @yield [Hash{Symbol => String}] each row as a symbolized hash
|
|
108
|
+
# @return [Hash{Symbol => String}, nil] the first matching row or nil
|
|
109
|
+
def self.find(path, dialect: nil, &block)
|
|
110
|
+
csv_opts = { headers: true }
|
|
111
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
112
|
+
CSV.foreach(path, **csv_opts) do |row|
|
|
113
|
+
hash = row.to_h.transform_keys(&:to_sym)
|
|
114
|
+
return hash if block.call(hash)
|
|
115
|
+
end
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
|
|
83
119
|
# Filter rows and return matching rows as a CSV string.
|
|
84
120
|
#
|
|
85
121
|
# @param path [String] file path
|
metadata
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-csv_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Streaming CSV processor with row-by-row transforms, validations, column
|
|
14
|
-
plucking, filtering, writing, error recovery, and
|
|
14
|
+
plucking, streaming each_hash iteration, filtering, writing, error recovery, and
|
|
15
|
+
automatic delimiter detection.
|
|
15
16
|
email:
|
|
16
17
|
- me@philiprehberger.com
|
|
17
18
|
executables: []
|