hasherize_csv 0.0.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ rvm:
9
9
  - jruby-head
10
10
  - 1.9.2
11
11
  - 1.9.3
12
+ - 2.0.0
12
13
  gemfile:
13
14
  - Gemfile
14
15
  notifications:
data/README.md CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  Dead simple CSV parsing, with configurable regex selectors if required.
5
5
  Reads line-by-line, so you can parse big CSV files without running out of memory.
6
+ The first line is always assumed to be column headers, and thus keys in the corresponding hash.
6
7
 
7
8
  ### Simple case
8
9
  Given sample_csv.csv
@@ -28,3 +29,33 @@ The output will be
28
29
  {'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
29
30
  {'Col1' => 'Val4', 'Col2' => 'Val5', 'Col3' => 'Val6'}
30
31
  ```
32
+
33
+ ### Complex case: parse strange line endings and elegantly select quoted fields
34
+ Given complex_sample.csv
35
+ ```csv
36
+ "Col1";"Col2";"Col3"\r
37
+ "Val1";"Val2";"Val3"\r
38
+ "Val4";"Val5
39
+
40
+ Oh my, newlines in the record!";"Val6"\r
41
+ ```
42
+
43
+ and
44
+ ```ruby
45
+ require 'hasherize_csv'
46
+ @f = File.new("complex_sample.csv")
47
+
48
+ #HasherizeCsv yields the value of the first match group in the :value_pattern regex
49
+ @csv = HasherizeCsv::Csv.new(@f, :separator => "\r", :value_pattern => /\"(.*?)\"/m)
50
+
51
+ @csv.each do |hash|
52
+ puts hash.inspect
53
+ end
54
+ ```
55
+
56
+ The output will be
57
+ ```
58
+ {'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
59
+ {'Col1' => 'Val4', 'Col2' => 'Val5\n\nOh my, newlines in the record!', 'Col3' => 'Val6'}
60
+ ```
61
+
@@ -7,37 +7,35 @@ module HasherizeCsv
7
7
  end
8
8
 
9
9
  class Csv
10
+ include Enumerable
10
11
  attr_accessor :keys, :file, :separator
11
12
  def initialize file, opts = {}
12
13
  @file = file
13
14
  @separator = opts[:separator] || DefaultOpts::DEFAULT[:separator]
14
15
  @value_pattern = opts[:value_pattern] || DefaultOpts::DEFAULT[:value_pattern]
15
16
  @keys = []
16
- next_line { |l| @keys = values_from_line l if !l.nil? }
17
- end
18
-
19
- def next_item
20
- next_line { |l|
21
- if l.nil?
22
- yield nil
23
- else
24
- yield hashify_values( values_from_line l )
25
- end
26
- }
17
+ @keys = values_from_line next_line
27
18
  end
28
19
 
29
20
  def each
30
- while(1)
31
- self.next_item { |hash|
32
- return if hash.nil?
33
- yield hash
34
- }
21
+ return self.to_enum if !block_given?
22
+
23
+ until (hash = next_item).nil?
24
+ yield hash
35
25
  end
36
26
  end
37
27
 
38
28
  private
29
+ def next_item
30
+ if (l = next_line).nil?
31
+ return nil
32
+ else
33
+ return hashify_values( values_from_line l )
34
+ end
35
+ end
36
+
39
37
  def next_line
40
- yield(@file.gets(@separator) ? $_.chomp : $_)
38
+ @file.gets(@separator) ? $_.chomp : $_
41
39
  end
42
40
 
43
41
  def hashify_values values
@@ -1,3 +1,3 @@
1
1
  module HasherizeCsv
2
- VERSION = "0.0.7"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -25,6 +25,13 @@ class HasherizeCsvTest < Test::Unit::TestCase
25
25
  assert_equal ["Heading1", "Heading2", "H_EA_3__c"], @csv.keys
26
26
  end
27
27
 
28
+ def test_hash_file_enumerator
29
+ result = []
30
+ enumerator = @csv.each
31
+
32
+ assert_equal({"Heading1"=>"Item1", "Heading2"=>"Item2", "H_EA_3__c"=>"Item3"}, enumerator.next)
33
+ end
34
+
28
35
  def test_hash_file
29
36
  result = []
30
37
  @csv.each { |hash|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hasherize_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-06 00:00:00.000000000 Z
12
+ date: 2013-10-05 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Turns csv files into hashes without reading the entire csv into memory
15
15
  email: