hasherize_csv 0.0.7 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,6 +9,7 @@ rvm:
9
9
  - jruby-head
10
10
  - 1.9.2
11
11
  - 1.9.3
12
+ - 2.0.0
12
13
  gemfile:
13
14
  - Gemfile
14
15
  notifications:
data/README.md CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  Dead simple CSV parsing, with configurable regex selectors if required.
5
5
  Reads line-by-line, so you can parse big CSV files without running out of memory.
6
+ The first line is always assumed to be column headers, and thus keys in the corresponding hash.
6
7
 
7
8
  ### Simple case
8
9
  Given sample_csv.csv
@@ -28,3 +29,33 @@ The output will be
28
29
  {'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
29
30
  {'Col1' => 'Val4', 'Col2' => 'Val5', 'Col3' => 'Val6'}
30
31
  ```
32
+
33
+ ### Complex case: parse strange line endings and elegantly select quoted fields
34
+ Given complex_sample.csv
35
+ ```csv
36
+ "Col1";"Col2";"Col3"\r
37
+ "Val1";"Val2";"Val3"\r
38
+ "Val4";"Val5
39
+
40
+ Oh my, newlines in the record!";"Val6"\r
41
+ ```
42
+
43
+ and
44
+ ```ruby
45
+ require 'hasherize_csv'
46
+ @f = File.new("complex_sample.csv")
47
+
48
+ #HasherizeCsv yields the value of the first match group in the :value_pattern regex
49
+ @csv = HasherizeCsv::Csv.new(@f, :separator => "\r", :value_pattern => /\"(.*?)\"/m)
50
+
51
+ @csv.each do |hash|
52
+ puts hash.inspect
53
+ end
54
+ ```
55
+
56
+ The output will be
57
+ ```
58
+ {'Col1' => 'Val1', 'Col2' => 'Val2', 'Col3' => 'Val3'}
59
+ {'Col1' => 'Val4', 'Col2' => 'Val5\n\nOh my, newlines in the record!', 'Col3' => 'Val6'}
60
+ ```
61
+
@@ -7,37 +7,35 @@ module HasherizeCsv
7
7
  end
8
8
 
9
9
  class Csv
10
+ include Enumerable
10
11
  attr_accessor :keys, :file, :separator
11
12
  def initialize file, opts = {}
12
13
  @file = file
13
14
  @separator = opts[:separator] || DefaultOpts::DEFAULT[:separator]
14
15
  @value_pattern = opts[:value_pattern] || DefaultOpts::DEFAULT[:value_pattern]
15
16
  @keys = []
16
- next_line { |l| @keys = values_from_line l if !l.nil? }
17
- end
18
-
19
- def next_item
20
- next_line { |l|
21
- if l.nil?
22
- yield nil
23
- else
24
- yield hashify_values( values_from_line l )
25
- end
26
- }
17
+ @keys = values_from_line next_line
27
18
  end
28
19
 
29
20
  def each
30
- while(1)
31
- self.next_item { |hash|
32
- return if hash.nil?
33
- yield hash
34
- }
21
+ return self.to_enum if !block_given?
22
+
23
+ until (hash = next_item).nil?
24
+ yield hash
35
25
  end
36
26
  end
37
27
 
38
28
  private
29
+ def next_item
30
+ if (l = next_line).nil?
31
+ return nil
32
+ else
33
+ return hashify_values( values_from_line l )
34
+ end
35
+ end
36
+
39
37
  def next_line
40
- yield(@file.gets(@separator) ? $_.chomp : $_)
38
+ @file.gets(@separator) ? $_.chomp : $_
41
39
  end
42
40
 
43
41
  def hashify_values values
@@ -1,3 +1,3 @@
1
1
  module HasherizeCsv
2
- VERSION = "0.0.7"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -25,6 +25,13 @@ class HasherizeCsvTest < Test::Unit::TestCase
25
25
  assert_equal ["Heading1", "Heading2", "H_EA_3__c"], @csv.keys
26
26
  end
27
27
 
28
+ def test_hash_file_enumerator
29
+ result = []
30
+ enumerator = @csv.each
31
+
32
+ assert_equal({"Heading1"=>"Item1", "Heading2"=>"Item2", "H_EA_3__c"=>"Item3"}, enumerator.next)
33
+ end
34
+
28
35
  def test_hash_file
29
36
  result = []
30
37
  @csv.each { |hash|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hasherize_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-06 00:00:00.000000000 Z
12
+ date: 2013-10-05 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Turns csv files into hashes without reading the entire csv into memory
15
15
  email: