philiprehberger-csv_kit 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a3c2bc60fa0f4ed93e30547903b2ea5dac2d8fdf2f7fd8eae6519839f5d581f1
4
+ data.tar.gz: 105ed419b40775e09c72c00669e6c59d876684e8462566ce5f82d66cf0957792
5
+ SHA512:
6
+ metadata.gz: 89f1b6c68c8b2ef72bc2e1d7be187605fc3680682fe2e0003819f4c2b85787e581ea9c8a25bd1ed4eb7ef42422e304bd0982b9e1f1d3c1a5c4fa0519e08de6e7
7
+ data.tar.gz: f187ca48f5a86a3bf530dcb715e2da6474c645836a5c9a2b9f872a674a30864f150bcb38bdb98fcd0883babd2d67d25e1e31b74ef0358f85efdf714434bdcc5a
data/CHANGELOG.md ADDED
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ All notable changes to this gem will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2026-03-15
11
+
12
+ ### Added
13
+ - Initial release
14
+ - Streaming CSV processing with constant memory
15
+ - Auto-detect delimiter
16
+ - Type coercion and row validation
17
+ - Quick load and filtering convenience methods
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 philiprehberger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,105 @@
1
+ # philiprehberger-csv_kit
2
+
3
+ [![Tests](https://github.com/philiprehberger/rb-csv-kit/actions/workflows/ci.yml/badge.svg)](https://github.com/philiprehberger/rb-csv-kit/actions/workflows/ci.yml)
4
+ [![Gem Version](https://badge.fury.io/rb/philiprehberger-csv_kit.svg)](https://rubygems.org/gems/philiprehberger-csv_kit)
5
+
6
+ Streaming CSV processor with type coercion and validation.
7
+
8
+ ## Requirements
9
+
10
+ - Ruby >= 3.1
11
+
12
+ ## Installation
13
+
14
+ Add to your Gemfile:
15
+
16
+ ```ruby
17
+ gem 'philiprehberger-csv_kit'
18
+ ```
19
+
20
+ Then run:
21
+
22
+ ```bash
23
+ bundle install
24
+ ```
25
+
26
+ Or install directly:
27
+
28
+ ```bash
29
+ gem install philiprehberger-csv_kit
30
+ ```
31
+
32
+ ## Usage
33
+
34
+ ```ruby
35
+ require 'philiprehberger/csv_kit'
36
+ ```
37
+
38
+ ### Quick Load
39
+
40
+ ```ruby
41
+ rows = Philiprehberger::CsvKit.to_hashes('data.csv')
42
+ # => [{name: "Alice", age: "30"}, ...]
43
+ ```
44
+
45
+ ### Pluck Columns
46
+
47
+ ```ruby
48
+ names = Philiprehberger::CsvKit.pluck('data.csv', :name, :city)
49
+ # => [{name: "Alice", city: "Berlin"}, ...]
50
+ ```
51
+
52
+ ### Filter Rows
53
+
54
+ ```ruby
55
+ csv_string = Philiprehberger::CsvKit.filter('data.csv') do |row|
56
+ row[:age].to_i >= 30
57
+ end
58
+ ```
59
+
60
+ ### Streaming Processor
61
+
62
+ ```ruby
63
+ rows = Philiprehberger::CsvKit.process('data.csv') do |p|
64
+ p.transform(:age) { |v| v.to_i }
65
+ p.validate(:age) { |v| v.to_i.positive? }
66
+ p.reject { |row| row[:city] == 'Unknown' }
67
+ p.each { |row| puts row[:name] }
68
+ end
69
+ ```
70
+
71
+ ### Delimiter Detection
72
+
73
+ ```ruby
74
+ delimiter = Philiprehberger::CsvKit::Detector.detect('data.tsv')
75
+ # => "\t"
76
+ ```
77
+
78
+ ## API
79
+
80
+ | Method / Class | Description |
81
+ |----------------|-------------|
82
+ | `CsvKit.to_hashes(path)` | Load CSV into array of symbolized hashes |
83
+ | `CsvKit.pluck(path, *keys)` | Extract specific columns |
84
+ | `CsvKit.filter(path, &block)` | Filter rows, return CSV string |
85
+ | `CsvKit.process(path_or_io, &block)` | Streaming DSL with transforms and validations |
86
+ | `Processor#headers(*names)` | Override header names |
87
+ | `Processor#transform(key, &block)` | Register column transform |
88
+ | `Processor#validate(key, &block)` | Register column validation (skip invalid) |
89
+ | `Processor#reject(&block)` | Reject rows matching predicate |
90
+ | `Processor#each(&block)` | Callback for each processed row |
91
+ | `Detector.detect(path_or_io)` | Auto-detect CSV delimiter |
92
+ | `Row#[](key)` | Access value by symbol key |
93
+ | `Row#to_h` | Convert row to plain hash |
94
+
95
+ ## Development
96
+
97
+ ```bash
98
+ bundle install
99
+ bundle exec rspec # Run tests
100
+ bundle exec rubocop # Check code style
101
+ ```
102
+
103
+ ## License
104
+
105
+ MIT
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Detects the most likely delimiter for a CSV file by sampling its first lines.
6
+ class Detector
7
+ DELIMITERS = [',', "\t", ';', '|'].freeze
8
+ SAMPLE_LINES = 5
9
+
10
+ # Detect the delimiter used in a file or IO.
11
+ #
12
+ # @param path_or_io [String, IO] file path or IO object
13
+ # @return [String] the detected delimiter
14
+ def self.detect(path_or_io)
15
+ lines = read_sample(path_or_io)
16
+ return ',' if lines.empty?
17
+
18
+ DELIMITERS.min_by { |d| variance(lines, d) }
19
+ end
20
+
21
+ # @api private
22
+ def self.read_sample(path_or_io)
23
+ if path_or_io.is_a?(String)
24
+ File.foreach(path_or_io).first(SAMPLE_LINES)
25
+ else
26
+ path_or_io.rewind if path_or_io.respond_to?(:rewind)
27
+ path_or_io.each_line.first(SAMPLE_LINES)
28
+ end
29
+ end
30
+
31
+ # @api private
32
+ def self.variance(lines, delimiter)
33
+ counts = lines.map { |l| l.count(delimiter) }
34
+ return Float::INFINITY if counts.all?(&:zero?)
35
+
36
+ mean = counts.sum.to_f / counts.size
37
+ counts.sum { |c| (c - mean)**2 } / counts.size
38
+ end
39
+
40
+ private_class_method :read_sample, :variance
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Streaming CSV processor with a DSL for transforms, validations, and filtering.
6
+ class Processor
7
+ def initialize(path_or_io)
8
+ @path_or_io = path_or_io
9
+ @transforms = {}
10
+ @validations = {}
11
+ @reject_block = nil
12
+ @each_block = nil
13
+ @header_names = nil
14
+ end
15
+
16
+ # Override header names used for symbolized keys.
17
+ #
18
+ # @param names [Array<Symbol>] header names
19
+ def headers(*names)
20
+ @header_names = names.map(&:to_sym)
21
+ end
22
+
23
+ # Register a transform for a specific column.
24
+ #
25
+ # @param key [Symbol] column name
26
+ # @yield [String] raw cell value
27
+ def transform(key, &block)
28
+ @transforms[key] = block
29
+ end
30
+
31
+ # Register a validation for a specific column. Rows failing validation are skipped.
32
+ #
33
+ # @param key [Symbol] column name
34
+ # @yield [String] cell value
35
+ def validate(key, &block)
36
+ @validations[key] = block
37
+ end
38
+
39
+ # Register a reject predicate. Rows matching are excluded.
40
+ #
41
+ # @yield [Row] the row
42
+ def reject(&block)
43
+ @reject_block = block
44
+ end
45
+
46
+ # Register a callback for each processed row.
47
+ #
48
+ # @yield [Row] the row
49
+ def each(&block)
50
+ @each_block = block
51
+ end
52
+
53
+ # Execute the processor, streaming row by row.
54
+ #
55
+ # @return [Array<Row>] collected rows
56
+ def run
57
+ open_csv { |csv| process_rows(csv) }
58
+ end
59
+
60
+ private
61
+
62
+ def process_rows(csv)
63
+ csv.each_with_object([]) do |csv_row, results|
64
+ row = build_row(csv_row)
65
+ next unless valid?(row)
66
+ next if rejected?(row)
67
+
68
+ apply_transforms!(row)
69
+ @each_block&.call(row)
70
+ results << row
71
+ end
72
+ end
73
+
74
+ def open_csv(&block)
75
+ if @path_or_io.is_a?(String)
76
+ CSV.open(@path_or_io, headers: true, &block)
77
+ else
78
+ csv = CSV.new(@path_or_io, headers: true)
79
+ block.call(csv)
80
+ end
81
+ end
82
+
83
+ def build_row(csv_row)
84
+ data = csv_row.to_h
85
+ if @header_names
86
+ values = data.values
87
+ mapped = @header_names.zip(values).to_h
88
+ Row.new(mapped)
89
+ else
90
+ Row.new(data.transform_keys(&:to_sym))
91
+ end
92
+ end
93
+
94
+ def valid?(row)
95
+ @validations.all? { |key, blk| blk.call(row[key]) }
96
+ end
97
+
98
+ def rejected?(row)
99
+ @reject_block&.call(row) || false
100
+ end
101
+
102
+ def apply_transforms!(row)
103
+ @transforms.each { |key, blk| row[key] = blk.call(row[key]) }
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ # Wraps a CSV row as a hash with symbolized keys.
6
+ class Row
7
+ # @param data [Hash{Symbol => String}]
8
+ def initialize(data)
9
+ @data = data
10
+ end
11
+
12
+ # Access a value by symbolized key.
13
+ #
14
+ # @param key [Symbol] column name
15
+ # @return [Object]
16
+ def [](key)
17
+ @data[key]
18
+ end
19
+
20
+ # Set a value by symbolized key.
21
+ #
22
+ # @param key [Symbol] column name
23
+ # @param value [Object] new value
24
+ def []=(key, value)
25
+ @data[key] = value
26
+ end
27
+
28
+ # Return the row as a plain hash.
29
+ #
30
+ # @return [Hash{Symbol => Object}]
31
+ def to_h
32
+ @data.dup
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Philiprehberger
4
+ module CsvKit
5
+ VERSION = '0.1.1'
6
+ end
7
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require_relative 'csv_kit/version'
5
+ require_relative 'csv_kit/detector'
6
+ require_relative 'csv_kit/row'
7
+ require_relative 'csv_kit/processor'
8
+
9
+ module Philiprehberger
10
+ module CsvKit
11
+ class Error < StandardError; end
12
+
13
+ # Streaming DSL — yields a Processor for configuration, then executes.
14
+ #
15
+ # @param path_or_io [String, IO] file path or IO object
16
+ # @yield [Processor] processor to configure transforms and validations
17
+ # @return [Array<Row>] collected rows
18
+ def self.process(path_or_io, &block)
19
+ processor = Processor.new(path_or_io)
20
+ block.call(processor)
21
+ processor.run
22
+ end
23
+
24
+ # Load an entire CSV into an array of symbolized hashes.
25
+ #
26
+ # @param path [String] file path
27
+ # @return [Array<Hash{Symbol => String}>]
28
+ def self.to_hashes(path)
29
+ CSV.foreach(path, headers: true).map do |row|
30
+ row.to_h.transform_keys(&:to_sym)
31
+ end
32
+ end
33
+
34
+ # Extract specific columns from a CSV.
35
+ #
36
+ # @param path [String] file path
37
+ # @param keys [Array<Symbol>] column names to extract
38
+ # @return [Array<Hash{Symbol => String}>]
39
+ def self.pluck(path, *keys)
40
+ to_hashes(path).map { |h| h.slice(*keys) }
41
+ end
42
+
43
+ # Filter rows and return matching rows as a CSV string.
44
+ #
45
+ # @param path [String] file path
46
+ # @yield [Hash{Symbol => String}] each row as a symbolized hash
47
+ # @return [String] CSV string with headers
48
+ def self.filter(path, &)
49
+ rows = to_hashes(path).select(&)
50
+ return '' if rows.empty?
51
+
52
+ headers = rows.first.keys
53
+ CSV.generate do |csv|
54
+ csv << headers
55
+ rows.each { |row| csv << headers.map { |k| row[k] } }
56
+ end
57
+ end
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: philiprehberger-csv_kit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Philip Rehberger
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-03-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Streaming CSV processor with row-by-row transforms, validations, column
14
+ plucking, filtering, and automatic delimiter detection.
15
+ email:
16
+ - me@philiprehberger.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - CHANGELOG.md
22
+ - LICENSE
23
+ - README.md
24
+ - lib/philiprehberger/csv_kit.rb
25
+ - lib/philiprehberger/csv_kit/detector.rb
26
+ - lib/philiprehberger/csv_kit/processor.rb
27
+ - lib/philiprehberger/csv_kit/row.rb
28
+ - lib/philiprehberger/csv_kit/version.rb
29
+ homepage: https://github.com/philiprehberger/rb-csv-kit
30
+ licenses:
31
+ - MIT
32
+ metadata:
33
+ homepage_uri: https://github.com/philiprehberger/rb-csv-kit
34
+ source_code_uri: https://github.com/philiprehberger/rb-csv-kit
35
+ changelog_uri: https://github.com/philiprehberger/rb-csv-kit/blob/main/CHANGELOG.md
36
+ rubygems_mfa_required: 'true'
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: 3.1.0
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubygems_version: 3.5.22
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: Streaming CSV processor with type coercion and validation
56
+ test_files: []