philiprehberger-csv_kit 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +27 -0
- data/lib/philiprehberger/csv_kit/processor.rb +25 -0
- data/lib/philiprehberger/csv_kit/version.rb +1 -1
- data/lib/philiprehberger/csv_kit.rb +29 -0
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a31a3bf217dace4e2ec6ba7214191c13a4c6123112f660b59faa3bf5819c1ea9
|
|
4
|
+
data.tar.gz: e1f0a82cd1f02cae5b80c9cd78426b7fec741facd8504f15fa02922c8dd4c489
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cbe2e1d0ef8ae1294e0fa4d4b51ddf4b614afb3cc2b2b943af7d51969f87f181faf60961da29b2661096865f67ddde39245a617adb6c6f25f130a85cf625ad7b
|
|
7
|
+
data.tar.gz: e519bc86bf1a6c6d35ff992e32dd41f6ab7ca89fbcaf2a9eac24e3dac3a9848cbe9b0ceac337ef9fb9cb1a2b2684aa2d5ac4a58d8e61eed642a63accdb14fd67
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.0] - 2026-04-09
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `CsvKit.headers(path, dialect:)` to inspect header row without loading data
|
|
14
|
+
- `CsvKit.count(path, dialect:)` to count data rows without loading into memory
|
|
15
|
+
- `Processor#skip(n)` to skip the first N data rows
|
|
16
|
+
- `Processor#limit(n)` to stop after processing N rows
|
|
17
|
+
|
|
10
18
|
## [0.3.1] - 2026-03-31
|
|
11
19
|
|
|
12
20
|
### Changed
|
data/README.md
CHANGED
|
@@ -40,6 +40,20 @@ names = Philiprehberger::CsvKit.pluck("data.csv", :name, :city)
|
|
|
40
40
|
# => [{name: "Alice", city: "Berlin"}, ...]
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
+
### Inspect Headers
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
Philiprehberger::CsvKit.headers("data.csv")
|
|
47
|
+
# => [:name, :age, :city]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Count Rows
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
Philiprehberger::CsvKit.count("data.csv")
|
|
54
|
+
# => 1000
|
|
55
|
+
```
|
|
56
|
+
|
|
43
57
|
### Filter Rows
|
|
44
58
|
|
|
45
59
|
```ruby
|
|
@@ -108,6 +122,15 @@ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
|
108
122
|
end
|
|
109
123
|
```
|
|
110
124
|
|
|
125
|
+
### Skip and Limit
|
|
126
|
+
|
|
127
|
+
```ruby
|
|
128
|
+
rows = Philiprehberger::CsvKit.process("data.csv") do |p|
|
|
129
|
+
p.skip(10) # skip first 10 rows
|
|
130
|
+
p.limit(50) # stop after 50 rows
|
|
131
|
+
end
|
|
132
|
+
```
|
|
133
|
+
|
|
111
134
|
### Column Aliasing
|
|
112
135
|
|
|
113
136
|
```ruby
|
|
@@ -130,11 +153,15 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
|
|
|
130
153
|
| `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
|
|
131
154
|
| `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
|
|
132
155
|
| `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
|
|
156
|
+
| `CsvKit.headers(path, dialect:)` | Return header row as array of symbols |
|
|
157
|
+
| `CsvKit.count(path, dialect:)` | Count data rows without loading into memory |
|
|
133
158
|
| `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
|
|
134
159
|
| `Processor#headers(*names)` | Override header names |
|
|
135
160
|
| `Processor#transform(key, &block)` | Register column transform |
|
|
136
161
|
| `Processor#type(key, type, **opts)` | Register built-in type coercion (:integer, :float, :string, :date, :datetime) |
|
|
137
162
|
| `Processor#validate(key, &block)` | Register column validation (skip invalid) |
|
|
163
|
+
| `Processor#skip(n)` | Skip the first N data rows |
|
|
164
|
+
| `Processor#limit(n)` | Stop after processing N rows |
|
|
138
165
|
| `Processor#reject(&block)` | Reject rows matching predicate |
|
|
139
166
|
| `Processor#each(&block)` | Callback for each processed row |
|
|
140
167
|
| `Processor#on_error(&block)` | Per-row error handler (return `:skip` or `:abort`) |
|
|
@@ -35,6 +35,8 @@ module Philiprehberger
|
|
|
35
35
|
@reject_block = nil
|
|
36
36
|
@each_block = nil
|
|
37
37
|
@header_names = nil
|
|
38
|
+
@skip_count = nil
|
|
39
|
+
@limit_count = nil
|
|
38
40
|
init_error_handler
|
|
39
41
|
init_callbacks
|
|
40
42
|
end
|
|
@@ -66,6 +68,22 @@ module Philiprehberger
|
|
|
66
68
|
@validations[key] = block
|
|
67
69
|
end
|
|
68
70
|
|
|
71
|
+
# Skip the first N data rows during processing.
|
|
72
|
+
#
|
|
73
|
+
# @param n [Integer] number of rows to skip
|
|
74
|
+
# @return [void]
|
|
75
|
+
def skip(n)
|
|
76
|
+
@skip_count = n
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Stop after processing N rows.
|
|
80
|
+
#
|
|
81
|
+
# @param n [Integer] maximum rows to collect
|
|
82
|
+
# @return [void]
|
|
83
|
+
def limit(n)
|
|
84
|
+
@limit_count = n
|
|
85
|
+
end
|
|
86
|
+
|
|
69
87
|
# Register a reject predicate.
|
|
70
88
|
def reject(&block)
|
|
71
89
|
@reject_block = block
|
|
@@ -87,7 +105,14 @@ module Philiprehberger
|
|
|
87
105
|
private
|
|
88
106
|
|
|
89
107
|
def process_rows(csv)
|
|
108
|
+
skipped = 0
|
|
90
109
|
csv.each_with_object([]) do |csv_row, results|
|
|
110
|
+
if @skip_count && skipped < @skip_count
|
|
111
|
+
skipped += 1
|
|
112
|
+
next
|
|
113
|
+
end
|
|
114
|
+
break results if @limit_count && results.length >= @limit_count
|
|
115
|
+
|
|
91
116
|
process_single_row(csv_row, results)
|
|
92
117
|
end
|
|
93
118
|
end
|
|
@@ -51,6 +51,35 @@ module Philiprehberger
|
|
|
51
51
|
to_hashes(path, dialect: dialect).map { |h| h.slice(*keys) }
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
# Return the header row as an array of symbols.
|
|
55
|
+
#
|
|
56
|
+
# @param path [String] file path
|
|
57
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
58
|
+
# @return [Array<Symbol>]
|
|
59
|
+
def self.headers(path, dialect: nil)
|
|
60
|
+
csv_opts = {}
|
|
61
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
62
|
+
CSV.open(path, **csv_opts) do |csv|
|
|
63
|
+
row = csv.shift
|
|
64
|
+
return [] unless row
|
|
65
|
+
|
|
66
|
+
row.map(&:to_sym)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Count data rows without loading them all into memory.
|
|
71
|
+
#
|
|
72
|
+
# @param path [String] file path
|
|
73
|
+
# @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
|
|
74
|
+
# @return [Integer]
|
|
75
|
+
def self.count(path, dialect: nil)
|
|
76
|
+
csv_opts = { headers: true }
|
|
77
|
+
csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
|
|
78
|
+
n = 0
|
|
79
|
+
CSV.foreach(path, **csv_opts) { |_| n += 1 }
|
|
80
|
+
n
|
|
81
|
+
end
|
|
82
|
+
|
|
54
83
|
# Filter rows and return matching rows as a CSV string.
|
|
55
84
|
#
|
|
56
85
|
# @param path [String] file path
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-csv_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-04-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Streaming CSV processor with row-by-row transforms, validations, column
|
|
14
14
|
plucking, filtering, writing, error recovery, and automatic delimiter detection.
|
|
@@ -30,11 +30,11 @@ files:
|
|
|
30
30
|
- lib/philiprehberger/csv_kit/row.rb
|
|
31
31
|
- lib/philiprehberger/csv_kit/version.rb
|
|
32
32
|
- lib/philiprehberger/csv_kit/writer.rb
|
|
33
|
-
homepage: https://
|
|
33
|
+
homepage: https://philiprehberger.com/open-source-packages/ruby/philiprehberger-csv_kit
|
|
34
34
|
licenses:
|
|
35
35
|
- MIT
|
|
36
36
|
metadata:
|
|
37
|
-
homepage_uri: https://
|
|
37
|
+
homepage_uri: https://philiprehberger.com/open-source-packages/ruby/philiprehberger-csv_kit
|
|
38
38
|
source_code_uri: https://github.com/philiprehberger/rb-csv-kit
|
|
39
39
|
changelog_uri: https://github.com/philiprehberger/rb-csv-kit/blob/main/CHANGELOG.md
|
|
40
40
|
bug_tracker_uri: https://github.com/philiprehberger/rb-csv-kit/issues
|