arc-furnace 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +6 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +127 -0
- data/Rakefile +9 -0
- data/arc-furnace.gemspec +30 -0
- data/lib/arc-furnace.rb +12 -0
- data/lib/arc-furnace/abstract_join.rb +53 -0
- data/lib/arc-furnace/all_fields_csv_sink.rb +68 -0
- data/lib/arc-furnace/binary_key_merging_hash.rb +38 -0
- data/lib/arc-furnace/block_transform.rb +18 -0
- data/lib/arc-furnace/block_unfold.rb +18 -0
- data/lib/arc-furnace/csv_sink.rb +21 -0
- data/lib/arc-furnace/csv_source.rb +33 -0
- data/lib/arc-furnace/csv_to_hash_with_duplicate_headers.rb +19 -0
- data/lib/arc-furnace/dsl.rb +6 -0
- data/lib/arc-furnace/enumerator_source.rb +36 -0
- data/lib/arc-furnace/error_handler.rb +27 -0
- data/lib/arc-furnace/excel_source.rb +38 -0
- data/lib/arc-furnace/fixed_column_csv_sink.rb +37 -0
- data/lib/arc-furnace/hash.rb +41 -0
- data/lib/arc-furnace/inner_join.rb +27 -0
- data/lib/arc-furnace/logging_error_handler.rb +0 -0
- data/lib/arc-furnace/merging_hash.rb +41 -0
- data/lib/arc-furnace/multi_csv_source.rb +50 -0
- data/lib/arc-furnace/node.rb +5 -0
- data/lib/arc-furnace/nodes.rb +19 -0
- data/lib/arc-furnace/outer_join.rb +14 -0
- data/lib/arc-furnace/pipeline.rb +158 -0
- data/lib/arc-furnace/private_attr.rb +28 -0
- data/lib/arc-furnace/sink.rb +21 -0
- data/lib/arc-furnace/source.rb +40 -0
- data/lib/arc-furnace/suffixed_fixed_column_csv_sink.rb +18 -0
- data/lib/arc-furnace/transform.rb +28 -0
- data/lib/arc-furnace/unfold.rb +45 -0
- data/lib/arc-furnace/version.rb +3 -0
- metadata +182 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
class Module
|
2
|
+
|
3
|
+
private
|
4
|
+
|
5
|
+
# Meta-programming to easily create private attribute reader methods.
|
6
|
+
def private_attr_reader(*attrs)
|
7
|
+
attr_reader(*attrs)
|
8
|
+
private(*attrs)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Meta-programming to easily create private attribute writer methods.
|
12
|
+
def private_attr_writer(*attrs)
|
13
|
+
attr_writer(*attrs)
|
14
|
+
private(*attrs.map { |attr| "#{attr}=".to_sym })
|
15
|
+
end
|
16
|
+
|
17
|
+
# Meta-programming to easily create private attribute accessor methods.
|
18
|
+
def private_attr_accessor(*attrs)
|
19
|
+
private_attr_reader(*attrs)
|
20
|
+
private_attr_writer(*attrs)
|
21
|
+
end
|
22
|
+
|
23
|
+
def private_alias_method(new_name, old_name)
|
24
|
+
alias_method(new_name, old_name)
|
25
|
+
private(new_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module ArcFurnace
|
2
|
+
class Sink
|
3
|
+
|
4
|
+
# The only required method to implement. #row is called for each output row and
|
5
|
+
# a sink must handle each.
|
6
|
+
def row(row)
|
7
|
+
raise "Unimplemented!"
|
8
|
+
end
|
9
|
+
|
10
|
+
# Handle any pre-processing here.
|
11
|
+
def prepare
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
# If the sink needs to perform any clean-up (closing file handles, etc),
|
16
|
+
# do it here.
|
17
|
+
def finalize
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'arc-furnace/node'
|
2
|
+
|
3
|
+
module ArcFurnace
|
4
|
+
class Source < Node
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def prepare
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
# Advance this source by one, returning the row as a hash
|
12
|
+
def row
|
13
|
+
result = value
|
14
|
+
advance
|
15
|
+
result
|
16
|
+
end
|
17
|
+
|
18
|
+
# Is this source empty?
|
19
|
+
def empty?
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
# The current value this source points at
|
24
|
+
# This is generally the only method required to implement a source.
|
25
|
+
def value
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Close the source
|
30
|
+
def close
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
# Advance this source by one. #advance specifies no return value contract
|
35
|
+
def advance
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'arc-furnace/fixed_column_csv_sink'
|
2
|
+
|
3
|
+
module ArcFurnace
|
4
|
+
class SuffixedFixedColumnCSVSink < FixedColumnCSVSink
|
5
|
+
private_attr_reader :fields
|
6
|
+
|
7
|
+
def write_header
|
8
|
+
csv << fields.each_with_object([]) do |(key, count), result|
|
9
|
+
if count > 1
|
10
|
+
count.times { |index| result << "#{key} #{index + 1}" }
|
11
|
+
else
|
12
|
+
result << key
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'arc-furnace/source'
|
2
|
+
|
3
|
+
module ArcFurnace
|
4
|
+
class Transform < Source
|
5
|
+
|
6
|
+
private_attr_reader :source
|
7
|
+
|
8
|
+
def initialize(source:)
|
9
|
+
@source = source
|
10
|
+
end
|
11
|
+
|
12
|
+
def prepare
|
13
|
+
source.prepare
|
14
|
+
end
|
15
|
+
|
16
|
+
def value
|
17
|
+
value = source.value.deep_dup
|
18
|
+
transform(value) if value
|
19
|
+
end
|
20
|
+
|
21
|
+
def transform(row)
|
22
|
+
row
|
23
|
+
end
|
24
|
+
|
25
|
+
delegate [:empty?, :advance] => :source
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'arc-furnace/source'
|
2
|
+
|
3
|
+
# An unfold is the reverse of a fold--it takes a single value an blows it out
|
4
|
+
# into an enumeration of values. Useful for splitting up rows into multiple output
|
5
|
+
# rows and whatnot. Only #unfold is required for implementation.
|
6
|
+
module ArcFurnace
|
7
|
+
class Unfold < Source
|
8
|
+
|
9
|
+
private_attr_reader :source, :unfolded
|
10
|
+
attr_reader :value
|
11
|
+
|
12
|
+
def initialize(source:)
|
13
|
+
@source = source
|
14
|
+
advance
|
15
|
+
end
|
16
|
+
|
17
|
+
def prepare
|
18
|
+
source.prepare
|
19
|
+
end
|
20
|
+
|
21
|
+
# Given a row from the source, produce the unfolded rows as a result. This method must return
|
22
|
+
# an array.
|
23
|
+
def unfold(row)
|
24
|
+
raise "Unimplemented!"
|
25
|
+
end
|
26
|
+
|
27
|
+
def empty?
|
28
|
+
value.nil? && source.empty?
|
29
|
+
end
|
30
|
+
|
31
|
+
def advance
|
32
|
+
while (unfolded.nil? || unfolded.empty?) && !source.empty?
|
33
|
+
# Use reverse since we want to process in-order, but, #pop is much faster than #unshift
|
34
|
+
@unfolded = unfold(source.row.deep_dup)
|
35
|
+
unfolded.reverse!
|
36
|
+
end
|
37
|
+
if unfolded && !unfolded.empty?
|
38
|
+
@value = unfolded.pop
|
39
|
+
else
|
40
|
+
@value = nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
metadata
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: arc-furnace
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Daniel Spangenberger
|
8
|
+
- Brian Tenggren
|
9
|
+
autorequire:
|
10
|
+
bindir: exe
|
11
|
+
cert_chain: []
|
12
|
+
date: 2015-10-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: msgpack
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0.6'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0.6'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: activesupport
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '3.2'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '3.2'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: eigenclass
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '2'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '2'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: roo
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '2.1'
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '2.1'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rake
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '10.0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '10.0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: rspec
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '3'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '3'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: ice_nine
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0.11'
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0.11'
|
112
|
+
description: 'An ETL library for Ruby that performs the basic actions of ETL: extract,
|
113
|
+
transform, and load. Easily extensible.'
|
114
|
+
email:
|
115
|
+
- dan@salsify.com
|
116
|
+
executables: []
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- ".gitignore"
|
121
|
+
- ".rspec"
|
122
|
+
- ".travis.yml"
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.md
|
126
|
+
- Rakefile
|
127
|
+
- arc-furnace.gemspec
|
128
|
+
- lib/arc-furnace.rb
|
129
|
+
- lib/arc-furnace/abstract_join.rb
|
130
|
+
- lib/arc-furnace/all_fields_csv_sink.rb
|
131
|
+
- lib/arc-furnace/binary_key_merging_hash.rb
|
132
|
+
- lib/arc-furnace/block_transform.rb
|
133
|
+
- lib/arc-furnace/block_unfold.rb
|
134
|
+
- lib/arc-furnace/csv_sink.rb
|
135
|
+
- lib/arc-furnace/csv_source.rb
|
136
|
+
- lib/arc-furnace/csv_to_hash_with_duplicate_headers.rb
|
137
|
+
- lib/arc-furnace/dsl.rb
|
138
|
+
- lib/arc-furnace/enumerator_source.rb
|
139
|
+
- lib/arc-furnace/error_handler.rb
|
140
|
+
- lib/arc-furnace/excel_source.rb
|
141
|
+
- lib/arc-furnace/fixed_column_csv_sink.rb
|
142
|
+
- lib/arc-furnace/hash.rb
|
143
|
+
- lib/arc-furnace/inner_join.rb
|
144
|
+
- lib/arc-furnace/logging_error_handler.rb
|
145
|
+
- lib/arc-furnace/merging_hash.rb
|
146
|
+
- lib/arc-furnace/multi_csv_source.rb
|
147
|
+
- lib/arc-furnace/node.rb
|
148
|
+
- lib/arc-furnace/nodes.rb
|
149
|
+
- lib/arc-furnace/outer_join.rb
|
150
|
+
- lib/arc-furnace/pipeline.rb
|
151
|
+
- lib/arc-furnace/private_attr.rb
|
152
|
+
- lib/arc-furnace/sink.rb
|
153
|
+
- lib/arc-furnace/source.rb
|
154
|
+
- lib/arc-furnace/suffixed_fixed_column_csv_sink.rb
|
155
|
+
- lib/arc-furnace/transform.rb
|
156
|
+
- lib/arc-furnace/unfold.rb
|
157
|
+
- lib/arc-furnace/version.rb
|
158
|
+
homepage: http://github.com/salsify/arc-furnace
|
159
|
+
licenses:
|
160
|
+
- MIT
|
161
|
+
metadata: {}
|
162
|
+
post_install_message:
|
163
|
+
rdoc_options: []
|
164
|
+
require_paths:
|
165
|
+
- lib
|
166
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
171
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
172
|
+
requirements:
|
173
|
+
- - ">="
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
requirements: []
|
177
|
+
rubyforge_project:
|
178
|
+
rubygems_version: 2.4.7
|
179
|
+
signing_key:
|
180
|
+
specification_version: 4
|
181
|
+
summary: Melds and transforms data from multiple sources into a single stream
|
182
|
+
test_files: []
|