arc-furnace 0.1.14 → 0.1.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c014a227e52695c9f5a91ac292f21ff1850785f6
4
- data.tar.gz: b273460b4eafdf5be12d57d3bc721c98eb4b324f
3
+ metadata.gz: e5dfedc8371c24fad4931ca94741d943a0e696d9
4
+ data.tar.gz: ef7e1108e5f1f1695f235670877f417b51455cbe
5
5
  SHA512:
6
- metadata.gz: 661e36f39e6389337bd70f0b12a618a646345353c271a96087894c3ed2b616520519d10d2c4072b8a139f2174a04ffc63b4ad7d7339f6c22b8730222fda104f0
7
- data.tar.gz: c47fdb6fa20296507f24ff79076816fbca19b06a6c94b9b3b151c3cda6d571049e138a46fe38e6b845ed992a412977c29e2692d697919849c53372f1bb69cec3
6
+ metadata.gz: 4f9303126e3d7b7e0aa0c0f682defc33c2694206eb90ee92cde2d30aa186fdbe056a9e68d7ecfd82622e3419c68f35e3da003700e008ca48c1b4f4afc3bd6000
7
+ data.tar.gz: 29322368c00d734c17497c564d24588eec8a86a0603337642c345a0efde48cb0c72f72118a0611dd1b61d3e7fae4831a9a928f252e9c6d7b8ec5e0c33d10a450
data/.hound.yml ADDED
@@ -0,0 +1,2 @@
1
+ ruby:
2
+ config_file: .ruby-style.yml
data/.ruby-style.yml ADDED
@@ -0,0 +1,13 @@
1
+ Style/StringLiterals:
2
+ EnforcedStyle: single_quotes
3
+ SupportedStyles:
4
+ - single_quotes
5
+ - double_quotes
6
+
7
+ Metrics/LineLength:
8
+ Max: 100
9
+ AllowHeredoc: true
10
+ AllowURI: true
11
+ URISchemes:
12
+ - http
13
+ - https
data/README.md CHANGED
@@ -99,7 +99,12 @@ associated sugar in the `transform` method of `Pipeline` make this very easy (se
99
99
 
100
100
  An `Unfold` acts as a source, however, takes a source as an input and produces multiple rows for that source as an output.
101
101
  A common case for this is splitting rows into multiple rows depending upon their keys. The `BlockTransform` and associated
102
- sugar in the `unfold` method of `Pipeline` make this fiarly easy (see `pipeline_spec.rb`).
102
+ sugar in the `unfold` method of `Pipeline` make this fairly easy (see `pipeline_spec.rb`).
103
+
104
+ #### Observers
105
+
106
+ An `Observer` acts as a source and takes a source as an input and serves as a pass-through for a stream. Observers
107
+ are used to observe the data stream--record data for use elsewhere.
103
108
 
104
109
  #### Sinks
105
110
 
@@ -0,0 +1,85 @@
1
+ require 'arc-furnace/source'
2
+ require 'roo'
3
+
4
+ module ArcFurnace
5
+ class MultiExcelSource < Source
6
+
7
+ private_attr_reader :enumerator, :header_row
8
+ attr_reader :value, :excel, :sheets_info_array
9
+
10
+ # Sheets is in the format of:
11
+ # [
12
+ # { filename: 'foo.xlsx', sheet: 'sheet name' },
13
+ # { filename: 'foo2.xlsx', sheet: 'sheet name' }
14
+ # ]
15
+ #
16
+ # The value for the :sheet key points to the sheet that we want to parse.
17
+ # If sheets are not explicitly indicated, they will not be parsed.
18
+
19
+ def initialize(sheets_info_array: [])
20
+ @sheets_info_array = sheets_info_array.reverse
21
+ open_next_file
22
+ end
23
+
24
+ private
25
+
26
+ def preprocess
27
+ enumerator.next
28
+ end
29
+
30
+ def advance
31
+ advance_in_current_file || open_next_file
32
+ end
33
+
34
+ def advance_in_current_file
35
+ @value =
36
+ begin
37
+ enumerator.next if enumerator
38
+ rescue StopIteration
39
+ @enumerator = nil
40
+ nil
41
+ end
42
+ value
43
+ end
44
+
45
+ def open_next_file
46
+ excel.close if excel
47
+ @excel = nil
48
+ @header_row = nil
49
+ if sheets_info_array.empty?
50
+ nil
51
+ else
52
+ sheets_info = sheets_info_array.pop
53
+ @excel = Roo::Excelx.new(sheets_info[:filename])
54
+ @excel.default_sheet = sheets_info[:sheet]
55
+ @enumerator = build_enumerator
56
+ preprocess
57
+ advance
58
+ end
59
+ end
60
+
61
+ def extract_cell_value(cell)
62
+ if cell
63
+ coerced_value = cell.type == :string ? cell.value : cell.excelx_value.try(:to_s).try(:strip)
64
+ coerced_value unless coerced_value.blank?
65
+ end
66
+ end
67
+
68
+ def build_enumerator
69
+ Enumerator.new do |yielder|
70
+ excel.each_row_streaming do |row|
71
+ yielder <<
72
+ if header_row
73
+ row.each_with_object({}) do |cell, result|
74
+ value = extract_cell_value(cell)
75
+ result[header_row[cell.coordinate.column - 1]] = value if value
76
+ end
77
+ else
78
+ # First time, return the header row so we can save it.
79
+ @header_row = row.map { |value| extract_cell_value(value) }
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -1,3 +1,3 @@
1
1
  module ArcFurnace
2
- VERSION = "0.1.14"
2
+ VERSION = "0.1.15"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arc-furnace
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.14
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Spangenberger
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2016-01-25 00:00:00.000000000 Z
12
+ date: 2016-02-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -118,7 +118,9 @@ extensions: []
118
118
  extra_rdoc_files: []
119
119
  files:
120
120
  - ".gitignore"
121
+ - ".hound.yml"
121
122
  - ".rspec"
123
+ - ".ruby-style.yml"
122
124
  - ".travis.yml"
123
125
  - Gemfile
124
126
  - LICENSE.txt
@@ -147,6 +149,7 @@ files:
147
149
  - lib/arc-furnace/logging_error_handler.rb
148
150
  - lib/arc-furnace/merging_hash.rb
149
151
  - lib/arc-furnace/multi_csv_source.rb
152
+ - lib/arc-furnace/multi_excel_source.rb
150
153
  - lib/arc-furnace/node.rb
151
154
  - lib/arc-furnace/nodes.rb
152
155
  - lib/arc-furnace/null_sink.rb