arc-furnace 0.1.14 → 0.1.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.hound.yml +2 -0
- data/.ruby-style.yml +13 -0
- data/README.md +6 -1
- data/lib/arc-furnace/multi_excel_source.rb +85 -0
- data/lib/arc-furnace/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e5dfedc8371c24fad4931ca94741d943a0e696d9
|
4
|
+
data.tar.gz: ef7e1108e5f1f1695f235670877f417b51455cbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f9303126e3d7b7e0aa0c0f682defc33c2694206eb90ee92cde2d30aa186fdbe056a9e68d7ecfd82622e3419c68f35e3da003700e008ca48c1b4f4afc3bd6000
|
7
|
+
data.tar.gz: 29322368c00d734c17497c564d24588eec8a86a0603337642c345a0efde48cb0c72f72118a0611dd1b61d3e7fae4831a9a928f252e9c6d7b8ec5e0c33d10a450
|
data/.hound.yml
ADDED
data/.ruby-style.yml
ADDED
data/README.md
CHANGED
@@ -99,7 +99,12 @@ associated sugar in the `transform` method of `Pipeline` make this very easy (se
|
|
99
99
|
|
100
100
|
An `Unfold` acts as a source, however, takes a source as an input and produces multiple rows for that source as an output.
|
101
101
|
A common case for this is splitting rows into multiple rows depending upon their keys. The `BlockTransform` and associated
|
102
|
-
sugar in the `unfold` method of `Pipeline` make this
|
102
|
+
sugar in the `unfold` method of `Pipeline` make this fairly easy (see `pipeline_spec.rb`).
|
103
|
+
|
104
|
+
#### Observers
|
105
|
+
|
106
|
+
An `Observer` acts as a source and takes a source as an input and serves as a pass-through for a stream. Observers
|
107
|
+
are used to observe the data stream--record data for use elsewhere.
|
103
108
|
|
104
109
|
#### Sinks
|
105
110
|
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'arc-furnace/source'
|
2
|
+
require 'roo'
|
3
|
+
|
4
|
+
module ArcFurnace
|
5
|
+
class MultiExcelSource < Source
|
6
|
+
|
7
|
+
private_attr_reader :enumerator, :header_row
|
8
|
+
attr_reader :value, :excel, :sheets_info_array
|
9
|
+
|
10
|
+
# Sheets is in the format of:
|
11
|
+
# [
|
12
|
+
# { filename: 'foo.xlsx', sheet: 'sheet name' },
|
13
|
+
# { filename: 'foo2.xlsx', sheet: 'sheet name' }
|
14
|
+
# ]
|
15
|
+
#
|
16
|
+
# The value for the :sheet key points to the sheet that we want to parse.
|
17
|
+
# If sheets are not explicitly indicated, they will not be parsed.
|
18
|
+
|
19
|
+
def initialize(sheets_info_array: [])
|
20
|
+
@sheets_info_array = sheets_info_array.reverse
|
21
|
+
open_next_file
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def preprocess
|
27
|
+
enumerator.next
|
28
|
+
end
|
29
|
+
|
30
|
+
def advance
|
31
|
+
advance_in_current_file || open_next_file
|
32
|
+
end
|
33
|
+
|
34
|
+
def advance_in_current_file
|
35
|
+
@value =
|
36
|
+
begin
|
37
|
+
enumerator.next if enumerator
|
38
|
+
rescue StopIteration
|
39
|
+
@enumerator = nil
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
value
|
43
|
+
end
|
44
|
+
|
45
|
+
def open_next_file
|
46
|
+
excel.close if excel
|
47
|
+
@excel = nil
|
48
|
+
@header_row = nil
|
49
|
+
if sheets_info_array.empty?
|
50
|
+
nil
|
51
|
+
else
|
52
|
+
sheets_info = sheets_info_array.pop
|
53
|
+
@excel = Roo::Excelx.new(sheets_info[:filename])
|
54
|
+
@excel.default_sheet = sheets_info[:sheet]
|
55
|
+
@enumerator = build_enumerator
|
56
|
+
preprocess
|
57
|
+
advance
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def extract_cell_value(cell)
|
62
|
+
if cell
|
63
|
+
coerced_value = cell.type == :string ? cell.value : cell.excelx_value.try(:to_s).try(:strip)
|
64
|
+
coerced_value unless coerced_value.blank?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def build_enumerator
|
69
|
+
Enumerator.new do |yielder|
|
70
|
+
excel.each_row_streaming do |row|
|
71
|
+
yielder <<
|
72
|
+
if header_row
|
73
|
+
row.each_with_object({}) do |cell, result|
|
74
|
+
value = extract_cell_value(cell)
|
75
|
+
result[header_row[cell.coordinate.column - 1]] = value if value
|
76
|
+
end
|
77
|
+
else
|
78
|
+
# First time, return the header row so we can save it.
|
79
|
+
@header_row = row.map { |value| extract_cell_value(value) }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/arc-furnace/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arc-furnace
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Spangenberger
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-01
|
12
|
+
date: 2016-02-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: msgpack
|
@@ -118,7 +118,9 @@ extensions: []
|
|
118
118
|
extra_rdoc_files: []
|
119
119
|
files:
|
120
120
|
- ".gitignore"
|
121
|
+
- ".hound.yml"
|
121
122
|
- ".rspec"
|
123
|
+
- ".ruby-style.yml"
|
122
124
|
- ".travis.yml"
|
123
125
|
- Gemfile
|
124
126
|
- LICENSE.txt
|
@@ -147,6 +149,7 @@ files:
|
|
147
149
|
- lib/arc-furnace/logging_error_handler.rb
|
148
150
|
- lib/arc-furnace/merging_hash.rb
|
149
151
|
- lib/arc-furnace/multi_csv_source.rb
|
152
|
+
- lib/arc-furnace/multi_excel_source.rb
|
150
153
|
- lib/arc-furnace/node.rb
|
151
154
|
- lib/arc-furnace/nodes.rb
|
152
155
|
- lib/arc-furnace/null_sink.rb
|