philiprehberger-csv_kit 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a31a3bf217dace4e2ec6ba7214191c13a4c6123112f660b59faa3bf5819c1ea9
4
- data.tar.gz: e1f0a82cd1f02cae5b80c9cd78426b7fec741facd8504f15fa02922c8dd4c489
3
+ metadata.gz: 7a49695d1e89645d30e0d325f7c7967db04b044f0cfe69ecf66cabfcc7e28470
4
+ data.tar.gz: 8a0cd35cd652e1893493ac4a0428251517c4a234cc0dca7b1addc9580b3688c6
5
5
  SHA512:
6
- metadata.gz: cbe2e1d0ef8ae1294e0fa4d4b51ddf4b614afb3cc2b2b943af7d51969f87f181faf60961da29b2661096865f67ddde39245a617adb6c6f25f130a85cf625ad7b
7
- data.tar.gz: e519bc86bf1a6c6d35ff992e32dd41f6ab7ca89fbcaf2a9eac24e3dac3a9848cbe9b0ceac337ef9fb9cb1a2b2684aa2d5ac4a58d8e61eed642a63accdb14fd67
6
+ metadata.gz: 528761ab2269e586d1d01c20885b5113dd12fe89b94b266b4bcb7452224cedd19a626100218d77def4db8b4d087117717dd3a6738aa5fe54c4c05f20b7592b5c
7
+ data.tar.gz: 2838f1cb2c0bb9caa43015b2f8f07e3b6252f80d4ad376160619d2c3dc50c4f1fecb65d6d5c47f9e86ffa2537e6b9ace01623a266aa2bc9cfd0fd4c0e9977512
data/CHANGELOG.md CHANGED
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.6.0] - 2026-04-15
11
+
12
+ ### Added
13
+ - `CsvKit.find(path, &block)` — return the first row matching a predicate, stopping as soon as a match is found
14
+
15
+ ## [0.5.0] - 2026-04-09
16
+
17
+ ### Added
18
+ - `CsvKit.each_hash(path, dialect:)` for streaming row-by-row iteration with constant memory; returns Enumerator if no block given
19
+ - `Row` now includes `Enumerable` with `keys`, `values`, `size`, `each`, and `merge` methods
20
+
10
21
  ## [0.4.0] - 2026-04-09
11
22
 
12
23
  ### Added
@@ -84,3 +95,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
84
95
  - Auto-detect delimiter
85
96
  - Type coercion and row validation
86
97
  - Quick load and filtering convenience methods
98
+
99
+ [0.5.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.5.0
100
+ [0.4.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.4.0
101
+ [0.3.1]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.3.1
102
+ [0.3.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.3.0
103
+ [0.2.6]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.6
104
+ [0.2.5]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.5
105
+ [0.2.4]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.4
106
+ [0.2.3]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.3
107
+ [0.2.2]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.2
108
+ [0.2.1]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.1
109
+ [0.2.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.2.0
110
+ [0.1.2]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.1.2
111
+ [0.1.0]: https://github.com/philiprehberger/rb-csv-kit/releases/tag/v0.1.0
data/README.md CHANGED
@@ -54,6 +54,30 @@ Philiprehberger::CsvKit.count("data.csv")
54
54
  # => 1000
55
55
  ```
56
56
 
57
+ ### Streaming Row-by-Row
58
+
59
+ Iterate rows with constant memory. Returns an `Enumerator` if no block is given:
60
+
61
+ ```ruby
62
+ Philiprehberger::CsvKit.each_hash("large.csv") do |row|
63
+ puts row[:name]
64
+ end
65
+
66
+ # Or compose with Enumerator methods:
67
+ adults = Philiprehberger::CsvKit.each_hash("data.csv")
68
+ .select { |r| r[:age].to_i >= 18 }
69
+ .first(10)
70
+ ```
71
+
72
+ ### Find First Match
73
+
74
+ Return the first row that matches a predicate, streaming and stopping on the first hit:
75
+
76
+ ```ruby
77
+ user = Philiprehberger::CsvKit.find("users.csv") { |row| row[:email] == "a@b.com" }
78
+ # => {email: "a@b.com", name: "Alice"} or nil
79
+ ```
80
+
57
81
  ### Filter Rows
58
82
 
59
83
  ```ruby
@@ -153,8 +177,10 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
153
177
  | `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
154
178
  | `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
155
179
  | `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
180
+ | `CsvKit.find(path, dialect:, &block)` | Return the first row matching the predicate, or nil |
156
181
  | `CsvKit.headers(path, dialect:)` | Return header row as array of symbols |
157
182
  | `CsvKit.count(path, dialect:)` | Count data rows without loading into memory |
183
+ | `CsvKit.each_hash(path, dialect:, &block)` | Stream rows as symbolized hashes; returns Enumerator if no block |
158
184
  | `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
159
185
  | `Processor#headers(*names)` | Override header names |
160
186
  | `Processor#transform(key, &block)` | Register column transform |
@@ -176,6 +202,11 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
176
202
  | `Dialect.new(name_or_hash)` | Create a dialect from preset or custom hash |
177
203
  | `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
178
204
  | `Row#[](key)` | Access value by symbol key |
205
+ | `Row#keys` | Column names as array of symbols |
206
+ | `Row#values` | Column values as array |
207
+ | `Row#size` | Number of columns |
208
+ | `Row#each { \|k, v\| }` | Iterate key-value pairs (Enumerable) |
209
+ | `Row#merge(other)` | Return new Row with merged data |
179
210
  | `Row#to_h` | Convert row to plain hash |
180
211
 
181
212
  ## Development
@@ -4,11 +4,50 @@ module Philiprehberger
4
4
  module CsvKit
5
5
  # Wraps a CSV row as a hash with symbolized keys.
6
6
  class Row
7
+ include Enumerable
8
+
7
9
  # @param data [Hash{Symbol => String}]
8
10
  def initialize(data)
9
11
  @data = data
10
12
  end
11
13
 
14
+ # Iterate over key-value pairs.
15
+ #
16
+ # @yield [Symbol, Object] key and value
17
+ def each(&)
18
+ @data.each(&)
19
+ end
20
+
21
+ # Return column names.
22
+ #
23
+ # @return [Array<Symbol>]
24
+ def keys
25
+ @data.keys
26
+ end
27
+
28
+ # Return column values.
29
+ #
30
+ # @return [Array<Object>]
31
+ def values
32
+ @data.values
33
+ end
34
+
35
+ # Return the number of columns.
36
+ #
37
+ # @return [Integer]
38
+ def size
39
+ @data.size
40
+ end
41
+
42
+ # Merge another hash or Row into this row, returning a new Row.
43
+ #
44
+ # @param other [Hash, Row] data to merge
45
+ # @return [Row]
46
+ def merge(other)
47
+ other_data = other.is_a?(Row) ? other.to_h : other
48
+ Row.new(@data.merge(other_data))
49
+ end
50
+
12
51
  # Access a value by symbolized key.
13
52
  #
14
53
  # @param key [Symbol] column name
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module CsvKit
5
- VERSION = '0.4.0'
5
+ VERSION = '0.6.0'
6
6
  end
7
7
  end
@@ -80,6 +80,42 @@ module Philiprehberger
80
80
  n
81
81
  end
82
82
 
83
+ # Stream rows one at a time as symbolized hashes with constant memory.
84
+ # Returns an Enumerator if no block is given.
85
+ #
86
+ # @param path [String] file path
87
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
88
+ # @yield [Hash{Symbol => String}] each row
89
+ # @return [Enumerator, nil]
90
+ def self.each_hash(path, dialect: nil, &block)
91
+ csv_opts = { headers: true }
92
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
93
+
94
+ enum = Enumerator.new do |yielder|
95
+ CSV.foreach(path, **csv_opts) do |row|
96
+ yielder.yield(row.to_h.transform_keys(&:to_sym))
97
+ end
98
+ end
99
+
100
+ block ? enum.each(&block) : enum
101
+ end
102
+
103
+ # Find the first row matching a predicate, streaming (stops as soon as a match is found).
104
+ #
105
+ # @param path [String] file path
106
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
107
+ # @yield [Hash{Symbol => String}] each row as a symbolized hash
108
+ # @return [Hash{Symbol => String}, nil] the first matching row or nil
109
+ def self.find(path, dialect: nil, &block)
110
+ csv_opts = { headers: true }
111
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
112
+ CSV.foreach(path, **csv_opts) do |row|
113
+ hash = row.to_h.transform_keys(&:to_sym)
114
+ return hash if block.call(hash)
115
+ end
116
+ nil
117
+ end
118
+
83
119
  # Filter rows and return matching rows as a CSV string.
84
120
  #
85
121
  # @param path [String] file path
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-csv_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-10 00:00:00.000000000 Z
11
+ date: 2026-04-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Streaming CSV processor with row-by-row transforms, validations, column
14
- plucking, filtering, writing, error recovery, and automatic delimiter detection.
14
+ plucking, streaming each_hash iteration, filtering, writing, error recovery, and
15
+ automatic delimiter detection.
15
16
  email:
16
17
  - me@philiprehberger.com
17
18
  executables: []