philiprehberger-csv_kit 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 47d82c7f00d1c8263a9fe0c36db577a9b23d731ec0d0e29a0b937390e3b9ad5e
4
- data.tar.gz: d6a7e13c1e99839d7b1b346a09cddf5997acc23f23b8a38804cb0ff8ff9a123b
3
+ metadata.gz: a31a3bf217dace4e2ec6ba7214191c13a4c6123112f660b59faa3bf5819c1ea9
4
+ data.tar.gz: e1f0a82cd1f02cae5b80c9cd78426b7fec741facd8504f15fa02922c8dd4c489
5
5
  SHA512:
6
- metadata.gz: 05153db33d2ff42f6ba455456decfff96f0f2ecb453f9243e19c30fab41bde4dd48813a23795bb074a01d3a8d642758e881ee538c6044d0689664fc6288b4016
7
- data.tar.gz: 8603bacec3217f03caea385b232c6801040ea084180ba348346c5a6f49dd35bba92b25a0eb7c60b29945f35044c46b007a91a8381853eb747f8429cea1e1fa0e
6
+ metadata.gz: cbe2e1d0ef8ae1294e0fa4d4b51ddf4b614afb3cc2b2b943af7d51969f87f181faf60961da29b2661096865f67ddde39245a617adb6c6f25f130a85cf625ad7b
7
+ data.tar.gz: e519bc86bf1a6c6d35ff992e32dd41f6ab7ca89fbcaf2a9eac24e3dac3a9848cbe9b0ceac337ef9fb9cb1a2b2684aa2d5ac4a58d8e61eed642a63accdb14fd67
data/CHANGELOG.md CHANGED
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.4.0] - 2026-04-09
11
+
12
+ ### Added
13
+ - `CsvKit.headers(path, dialect:)` to inspect header row without loading data
14
+ - `CsvKit.count(path, dialect:)` to count data rows without loading into memory
15
+ - `Processor#skip(n)` to skip the first N data rows
16
+ - `Processor#limit(n)` to stop after processing N rows
17
+
10
18
  ## [0.3.1] - 2026-03-31
11
19
 
12
20
  ### Changed
data/README.md CHANGED
@@ -40,6 +40,20 @@ names = Philiprehberger::CsvKit.pluck("data.csv", :name, :city)
40
40
  # => [{name: "Alice", city: "Berlin"}, ...]
41
41
  ```
42
42
 
43
+ ### Inspect Headers
44
+
45
+ ```ruby
46
+ Philiprehberger::CsvKit.headers("data.csv")
47
+ # => [:name, :age, :city]
48
+ ```
49
+
50
+ ### Count Rows
51
+
52
+ ```ruby
53
+ Philiprehberger::CsvKit.count("data.csv")
54
+ # => 1000
55
+ ```
56
+
43
57
  ### Filter Rows
44
58
 
45
59
  ```ruby
@@ -108,6 +122,15 @@ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
108
122
  end
109
123
  ```
110
124
 
125
+ ### Skip and Limit
126
+
127
+ ```ruby
128
+ rows = Philiprehberger::CsvKit.process("data.csv") do |p|
129
+ p.skip(10) # skip first 10 rows
130
+ p.limit(50) # stop after 50 rows
131
+ end
132
+ ```
133
+
111
134
  ### Column Aliasing
112
135
 
113
136
  ```ruby
@@ -130,11 +153,15 @@ delimiter = Philiprehberger::CsvKit::Detector.detect("data.tsv")
130
153
  | `CsvKit.to_hashes(path, dialect:)` | Load CSV into array of symbolized hashes |
131
154
  | `CsvKit.pluck(path, *keys, dialect:)` | Extract specific columns |
132
155
  | `CsvKit.filter(path, dialect:, &block)` | Filter rows, return CSV string |
156
+ | `CsvKit.headers(path, dialect:)` | Return header row as array of symbols |
157
+ | `CsvKit.count(path, dialect:)` | Count data rows without loading into memory |
133
158
  | `CsvKit.process(path_or_io, dialect:, &block)` | Streaming DSL with transforms and validations |
134
159
  | `Processor#headers(*names)` | Override header names |
135
160
  | `Processor#transform(key, &block)` | Register column transform |
136
161
  | `Processor#type(key, type, **opts)` | Register built-in type coercion (:integer, :float, :string, :date, :datetime) |
137
162
  | `Processor#validate(key, &block)` | Register column validation (skip invalid) |
163
+ | `Processor#skip(n)` | Skip the first N data rows |
164
+ | `Processor#limit(n)` | Stop after processing N rows |
138
165
  | `Processor#reject(&block)` | Reject rows matching predicate |
139
166
  | `Processor#each(&block)` | Callback for each processed row |
140
167
  | `Processor#on_error(&block)` | Per-row error handler (return `:skip` or `:abort`) |
@@ -35,6 +35,8 @@ module Philiprehberger
35
35
  @reject_block = nil
36
36
  @each_block = nil
37
37
  @header_names = nil
38
+ @skip_count = nil
39
+ @limit_count = nil
38
40
  init_error_handler
39
41
  init_callbacks
40
42
  end
@@ -66,6 +68,22 @@ module Philiprehberger
66
68
  @validations[key] = block
67
69
  end
68
70
 
71
+ # Skip the first N data rows during processing.
72
+ #
73
+ # @param n [Integer] number of rows to skip
74
+ # @return [void]
75
+ def skip(n)
76
+ @skip_count = n
77
+ end
78
+
79
+ # Stop after processing N rows.
80
+ #
81
+ # @param n [Integer] maximum rows to collect
82
+ # @return [void]
83
+ def limit(n)
84
+ @limit_count = n
85
+ end
86
+
69
87
  # Register a reject predicate.
70
88
  def reject(&block)
71
89
  @reject_block = block
@@ -87,7 +105,14 @@ module Philiprehberger
87
105
  private
88
106
 
89
107
  def process_rows(csv)
108
+ skipped = 0
90
109
  csv.each_with_object([]) do |csv_row, results|
110
+ if @skip_count && skipped < @skip_count
111
+ skipped += 1
112
+ next
113
+ end
114
+ break results if @limit_count && results.length >= @limit_count
115
+
91
116
  process_single_row(csv_row, results)
92
117
  end
93
118
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module CsvKit
5
- VERSION = '0.3.1'
5
+ VERSION = '0.4.0'
6
6
  end
7
7
  end
@@ -51,6 +51,35 @@ module Philiprehberger
51
51
  to_hashes(path, dialect: dialect).map { |h| h.slice(*keys) }
52
52
  end
53
53
 
54
+ # Return the header row as an array of symbols.
55
+ #
56
+ # @param path [String] file path
57
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
58
+ # @return [Array<Symbol>]
59
+ def self.headers(path, dialect: nil)
60
+ csv_opts = {}
61
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
62
+ CSV.open(path, **csv_opts) do |csv|
63
+ row = csv.shift
64
+ return [] unless row
65
+
66
+ row.map(&:to_sym)
67
+ end
68
+ end
69
+
70
+ # Count data rows without loading them all into memory.
71
+ #
72
+ # @param path [String] file path
73
+ # @param dialect [Symbol, Hash, nil] CSV dialect preset or custom options
74
+ # @return [Integer]
75
+ def self.count(path, dialect: nil)
76
+ csv_opts = { headers: true }
77
+ csv_opts = Dialect.new(dialect).merge_into(csv_opts) if dialect
78
+ n = 0
79
+ CSV.foreach(path, **csv_opts) { |_| n += 1 }
80
+ n
81
+ end
82
+
54
83
  # Filter rows and return matching rows as a CSV string.
55
84
  #
56
85
  # @param path [String] file path
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-csv_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-31 00:00:00.000000000 Z
11
+ date: 2026-04-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Streaming CSV processor with row-by-row transforms, validations, column
14
14
  plucking, filtering, writing, error recovery, and automatic delimiter detection.
@@ -30,11 +30,11 @@ files:
30
30
  - lib/philiprehberger/csv_kit/row.rb
31
31
  - lib/philiprehberger/csv_kit/version.rb
32
32
  - lib/philiprehberger/csv_kit/writer.rb
33
- homepage: https://github.com/philiprehberger/rb-csv-kit
33
+ homepage: https://philiprehberger.com/open-source-packages/ruby/philiprehberger-csv_kit
34
34
  licenses:
35
35
  - MIT
36
36
  metadata:
37
- homepage_uri: https://github.com/philiprehberger/rb-csv-kit
37
+ homepage_uri: https://philiprehberger.com/open-source-packages/ruby/philiprehberger-csv_kit
38
38
  source_code_uri: https://github.com/philiprehberger/rb-csv-kit
39
39
  changelog_uri: https://github.com/philiprehberger/rb-csv-kit/blob/main/CHANGELOG.md
40
40
  bug_tracker_uri: https://github.com/philiprehberger/rb-csv-kit/issues