arc-furnace 0.1.32 → 0.1.33

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 84822796e6d4c84acf30c21a33cb07650dde0a7f
4
- data.tar.gz: 7eae5563c3e5f33593a02134481f2ca1c68ffe1a
3
+ metadata.gz: 56c1cd7c32681828afee985402338a2774d6ebf2
4
+ data.tar.gz: d9d3aa812fa74ea4f49213d6aeb040a37b33ac75
5
5
  SHA512:
6
- metadata.gz: a86c07cb46fd13c789466c51117415c09d90369d4a0621ee42777b5ccd069e5371cd696aa136ef837e6e240aaaaf0f107e0d8fc3900e3121ea558bb98a725b55
7
- data.tar.gz: 94a73440bc74d95612650d0515a079e5db42213f197f14053204372ffacbcba4d12b09b57a76c047d9c795af3a57c7dc27d19fcf1a7d8d3d84f85cc53bca67d9
6
+ metadata.gz: b7b51fe6a3dc7f886e3e6e363fe73eb2e69bb70726e34aaf3d0f3aa88b54ad4ff611d24b5b599e982bc61fe72cadffdeaa5af4a07be6ad71ceb686be8c12d8f6
7
+ data.tar.gz: d7e7f3da2908a4ac93ba9febd6e4b02f32dabf8773b2b1bb42dbc4864ff515a2aa1dfe470cc4cfbb7f9ebafa1d3f119f585895f971c2f1aa94f53755a0126aac
@@ -6,27 +6,57 @@ module ArcFurnace
6
6
  class CSVSource < EnumeratorSource
7
7
  include CSVToHashWithDuplicateHeaders
8
8
 
9
- attr_reader :value, :file, :csv, :delimiter
9
+ attr_reader :value, :file, :csv, :delimiter, :group_by,
10
+ :key_column, :preprocessed_csv
10
11
 
11
12
  COMMA = ','.freeze
12
13
 
13
- def initialize(filename: nil, csv: nil, encoding: 'UTF-8', delimiter: COMMA)
14
+ def initialize(
15
+ filename: nil,
16
+ csv: nil,
17
+ encoding: 'UTF-8',
18
+ delimiter: COMMA,
19
+ group_by: false,
20
+ key_column: nil
21
+ )
14
22
  @file = File.open(filename, encoding: encoding) if filename
15
23
  @csv = csv
16
24
  @delimiter = delimiter
25
+ @preprocessed_csv = []
26
+ @group_by = group_by
27
+ @key_column = key_column
17
28
  super()
18
29
  end
19
30
 
31
+ alias_method :group_by?, :group_by
32
+
33
+ #
34
+ # note that group_by requires the entire file to be
35
+ # read into memory
36
+ #
37
+ def preprocess
38
+ if group_by?
39
+ parse_file { |row| @preprocessed_csv << csv_to_hash_with_duplicates(row) }
40
+ @preprocessed_csv = @preprocessed_csv.group_by { |row| row[key_column] }
41
+ end
42
+ end
43
+
20
44
  def finalize
21
45
  file.close if file
22
46
  end
23
47
 
24
48
  def build_enumerator
25
49
  Enumerator.new do |yielder|
26
- (csv ? csv : CSV.new(file, { headers: true, col_sep: delimiter })).each do |row|
27
- yielder << csv_to_hash_with_duplicates(row)
50
+ if group_by?
51
+ preprocessed_csv.each { |_, array| yielder.yield(array) }
52
+ else
53
+ parse_file { |row| yielder.yield(csv_to_hash_with_duplicates(row)) }
28
54
  end
29
55
  end
30
56
  end
57
+
58
+ def parse_file
59
+ (csv ? csv : CSV.new(file, { headers: true, col_sep: delimiter })).each { |row| yield row }
60
+ end
31
61
  end
32
62
  end
@@ -18,6 +18,10 @@ module ArcFurnace
18
18
 
19
19
  alias_method :group_by?, :group_by
20
20
 
21
+ #
22
+ # note that group_by requires the entire file to be
23
+ # read into memory
24
+ #
21
25
  def preprocess
22
26
  if group_by?
23
27
  build_headers
@@ -1,3 +1,3 @@
1
1
  module ArcFurnace
2
- VERSION = "0.1.32"
2
+ VERSION = "0.1.33"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arc-furnace
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.32
4
+ version: 0.1.33
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Spangenberger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2018-04-02 00:00:00.000000000 Z
12
+ date: 2018-04-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack