philiprehberger-csv_builder 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fbaf1a09bda94dd3fff6302860abacf5ad60eb1376520f145f984da9ec8fb7b1
4
- data.tar.gz: 625b0af93c07cc624d51a2aed8239c5bebd465981bea6cdc7b9ef91433a2c645
3
+ metadata.gz: 9a4ffc869b5a686e7e0d33b193f309354328b4ea6aa12bd5421ff1d906aa8cce
4
+ data.tar.gz: 74f124f09a30ff6fd71f7caaaf13364c2bf63032adf8bf03b5d61dc1cf328caa
5
5
  SHA512:
6
- metadata.gz: 562365a1e93a6d9386a31630011021186ecf45029f6764e05fa594ccd872dcbe0a932477897c469dade5a95a2ca02d0d9c7f2a656b48210ff5d4c71c6d7ff9cf
7
- data.tar.gz: 7805ee034ea1c75bf1e8ccbea4854d1bbaab80ddbc0def7b7ba30b0be301280bfec22d8b9450e34da4e43fb9943b627c0a11a1397cd9fe5c1b04ded3a441f843
6
+ metadata.gz: fa82a727dacd0a8047b60a431045eeb40a99331346d96e5e2f9025137fbf83bce1ccbfb92e5bd502331943311e2543d6e7eb68d527ee61a9cc7bb79e9dbe8e36
7
+ data.tar.gz: 753166a7741b788a23133861c93d09441b7c8852b62bf1753d3b87565250aa017e38741c321a0388524d78885fa66a734ca112c63c27d8aba4c8c55b103ec37e
data/CHANGELOG.md CHANGED
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.9.0] - 2026-04-15
11
+
12
+ ### Added
13
+ - `Builder#column_stats` — per-column statistics (count, unique, nil_count, sample) across filtered records
14
+
15
+ ## [0.8.0] - 2026-04-15
16
+
17
+ ### Added
18
+ - `Builder#row_count` — number of data rows the builder will emit (applies filters, sorts, offsets, limits)
19
+
10
20
  ## [0.7.0] - 2026-04-15
11
21
 
12
22
  ### Added
@@ -97,6 +107,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
97
107
  - Support for hash records with symbol and string keys
98
108
  - Proper CSV escaping for values with commas and quotes
99
109
 
110
+ [0.9.0]: https://github.com/philiprehberger/rb-csv-builder/releases/tag/v0.9.0
111
+ [0.8.0]: https://github.com/philiprehberger/rb-csv-builder/releases/tag/v0.8.0
100
112
  [0.7.0]: https://github.com/philiprehberger/rb-csv-builder/releases/tag/v0.7.0
101
113
  [0.6.0]: https://github.com/philiprehberger/rb-csv-builder/releases/tag/v0.6.0
102
114
  [0.5.0]: https://github.com/philiprehberger/rb-csv-builder/releases/tag/v0.5.0
data/README.md CHANGED
@@ -294,6 +294,20 @@ builder = Philiprehberger::CsvBuilder.build(records, encoding: 'ISO-8859-1') do
294
294
  end
295
295
  ```
296
296
 
297
+ ### Row Count
298
+
299
+ Get the number of rows that will be emitted (respects filters, sorts, offsets, and limits) without building the CSV:
300
+
301
+ ```ruby
302
+ builder = Philiprehberger::CsvBuilder.build(records) do
303
+ column :name
304
+ filter { |r| r[:active] }
305
+ limit 50
306
+ end
307
+
308
+ builder.row_count # => 50
309
+ ```
310
+
297
311
  ### Streaming
298
312
 
299
313
  ```ruby
@@ -370,6 +384,31 @@ builder.to_a
370
384
  # => [["name", "email"], ["Alice", "alice@example.com"], ["Bob", "bob@example.com"]]
371
385
  ```
372
386
 
387
+ ### Column Statistics
388
+
389
+ Inspect per-column statistics without generating CSV output:
390
+
391
+ ```ruby
392
+ records = [
393
+ { name: 'Alice', email: 'alice@example.com' },
394
+ { name: 'Bob', email: nil },
395
+ { name: 'Alice', email: 'alice@example.com' }
396
+ ]
397
+
398
+ builder = Philiprehberger::CsvBuilder.build(records) do
399
+ column :name
400
+ column :email
401
+ end
402
+
403
+ builder.column_stats
404
+ # => {
405
+ # name: { count: 3, unique: 2, nil_count: 0, sample: ["Alice", "Bob"] },
406
+ # email: { count: 3, unique: 1, nil_count: 1, sample: ["alice@example.com"] }
407
+ # }
408
+ ```
409
+
410
+ Statistics respect filters, limits, and offsets. The `nil_count` includes both `nil` values and values matching the configured `empty_value`.
411
+
373
412
  ### Headers
374
413
 
375
414
  ```ruby
@@ -406,6 +445,8 @@ builder.headers # => ["name", "email"]
406
445
  | `Builder#append_to(path)` | Append data rows (no header, no BOM) to an existing CSV file |
407
446
  | `Builder#to_io(io)` | Stream CSV to any IO object |
408
447
  | `Builder#headers` | Return column header names |
448
+ | `Builder#column_stats` | Per-column statistics: count, unique, nil_count, and sample values |
449
+ | `Builder#row_count` | Number of data rows after filters, sorts, offsets, and limits |
409
450
 
410
451
  ## Development
411
452
 
@@ -166,6 +166,33 @@ module Philiprehberger
166
166
  @row_number_header ? [@row_number_header] + base : base
167
167
  end
168
168
 
169
+ # Number of data rows the builder will emit (headers and footer excluded).
170
+ # Applies all configured filters, sorts, offsets, and limits.
171
+ #
172
+ # @return [Integer]
173
+ def row_count
174
+ filtered_records.size
175
+ end
176
+
177
+ # Per-column statistics across filtered records.
178
+ #
179
+ # @return [Hash{Symbol => Hash}] column name mapped to stats hash
180
+ # with keys :count, :unique, :nil_count, :sample
181
+ def column_stats
182
+ recs = filtered_records
183
+ @columns.to_h do |col|
184
+ values = recs.map { |r| col.extract(r, empty_value: @empty_value) }
185
+ nil_count = values.count { |v| v.nil? || v == @empty_value }
186
+ unique_values = values.reject { |v| v.nil? || v == @empty_value }.uniq
187
+ [col.name, {
188
+ count: values.size,
189
+ unique: unique_values.size,
190
+ nil_count: nil_count,
191
+ sample: unique_values.first(3)
192
+ }]
193
+ end
194
+ end
195
+
169
196
  # Return the filtered records
170
197
  #
171
198
  # @return [Array]
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module CsvBuilder
5
- VERSION = '0.7.0'
5
+ VERSION = '0.9.0'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-csv_builder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-15 00:00:00.000000000 Z
11
+ date: 2026-04-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Build CSV files from record collections using a declarative DSL with
14
14
  column definitions, custom transforms, filtering, sorting, pagination via limit/offset,