hasherize_csv 0.0.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -0
- data/README.md +31 -0
- data/lib/hasherize_csv.rb +15 -17
- data/lib/hasherize_csv/version.rb +1 -1
- data/test/test_hasherize_csv.rb +7 -0
- metadata +2 -2
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
Dead simple CSV parsing, with configurable regex selectors if required.
|
5
5
|
Reads line-by-line, so you can parse big CSV files without running out of memory.
|
6
|
+
The first line is always assumed to be column headers, and thus keys in the corresponding hash.
|
6
7
|
|
7
8
|
### Simple case
|
8
9
|
Given sample_csv.csv
|
@@ -28,3 +29,33 @@ The output will be
|
|
28
29
|
{'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
|
29
30
|
{'Col1' => 'Val4', 'Col2' => 'Val5', 'Col3' => 'Val6'}
|
30
31
|
```
|
32
|
+
|
33
|
+
### Complex case: parse strange line endings and elegantly select quoted fields
|
34
|
+
Given complex_sample.csv
|
35
|
+
```csv
|
36
|
+
"Col1";"Col2";"Col3"\r
|
37
|
+
"Val1";"Val2";"Val3"\r
|
38
|
+
"Val4";"Val5
|
39
|
+
|
40
|
+
Oh my, newlines in the record!";"Val6"\r
|
41
|
+
```
|
42
|
+
|
43
|
+
and
|
44
|
+
```ruby
|
45
|
+
require 'hasherize_csv'
|
46
|
+
@f = File.new("complex_sample.csv")
|
47
|
+
|
48
|
+
#HasherizeCsv yields the value of the first match group in the :value_pattern regex
|
49
|
+
@csv = HasherizeCsv::Csv.new(@f, :separator => "\r", :value_pattern => /\"(.*?)\"/m)
|
50
|
+
|
51
|
+
@csv.each do |hash|
|
52
|
+
puts hash.inspect
|
53
|
+
end
|
54
|
+
```
|
55
|
+
|
56
|
+
The output will be
|
57
|
+
```
|
58
|
+
{'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
|
59
|
+
{'Col1' => 'Val4', 'Col2' => 'Val5\n\nOh my, newlines in the record!', 'Col3' => 'Val6'}
|
60
|
+
```
|
61
|
+
|
data/lib/hasherize_csv.rb
CHANGED
@@ -7,37 +7,35 @@ module HasherizeCsv
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Csv
|
10
|
+
include Enumerable
|
10
11
|
attr_accessor :keys, :file, :separator
|
11
12
|
def initialize file, opts = {}
|
12
13
|
@file = file
|
13
14
|
@separator = opts[:separator] || DefaultOpts::DEFAULT[:separator]
|
14
15
|
@value_pattern = opts[:value_pattern] || DefaultOpts::DEFAULT[:value_pattern]
|
15
16
|
@keys = []
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
def next_item
|
20
|
-
next_line { |l|
|
21
|
-
if l.nil?
|
22
|
-
yield nil
|
23
|
-
else
|
24
|
-
yield hashify_values( values_from_line l )
|
25
|
-
end
|
26
|
-
}
|
17
|
+
@keys = values_from_line next_line
|
27
18
|
end
|
28
19
|
|
29
20
|
def each
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
}
|
21
|
+
return self.to_enum if !block_given?
|
22
|
+
|
23
|
+
until (hash = next_item).nil?
|
24
|
+
yield hash
|
35
25
|
end
|
36
26
|
end
|
37
27
|
|
38
28
|
private
|
29
|
+
def next_item
|
30
|
+
if (l = next_line).nil?
|
31
|
+
return nil
|
32
|
+
else
|
33
|
+
return hashify_values( values_from_line l )
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
39
37
|
def next_line
|
40
|
-
|
38
|
+
@file.gets(@separator) ? $_.chomp : $_
|
41
39
|
end
|
42
40
|
|
43
41
|
def hashify_values values
|
data/test/test_hasherize_csv.rb
CHANGED
@@ -25,6 +25,13 @@ class HasherizeCsvTest < Test::Unit::TestCase
|
|
25
25
|
assert_equal ["Heading1", "Heading2", "H_EA_3__c"], @csv.keys
|
26
26
|
end
|
27
27
|
|
28
|
+
def test_hash_file_enumerator
|
29
|
+
result = []
|
30
|
+
enumerator = @csv.each
|
31
|
+
|
32
|
+
assert_equal({"Heading1"=>"Item1", "Heading2"=>"Item2", "H_EA_3__c"=>"Item3"}, enumerator.next)
|
33
|
+
end
|
34
|
+
|
28
35
|
def test_hash_file
|
29
36
|
result = []
|
30
37
|
@csv.each { |hash|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hasherize_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-10-05 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Turns csv files into hashes without reading the entire csv into memory
|
15
15
|
email:
|