arc-furnace 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +127 -0
  8. data/Rakefile +9 -0
  9. data/arc-furnace.gemspec +30 -0
  10. data/lib/arc-furnace.rb +12 -0
  11. data/lib/arc-furnace/abstract_join.rb +53 -0
  12. data/lib/arc-furnace/all_fields_csv_sink.rb +68 -0
  13. data/lib/arc-furnace/binary_key_merging_hash.rb +38 -0
  14. data/lib/arc-furnace/block_transform.rb +18 -0
  15. data/lib/arc-furnace/block_unfold.rb +18 -0
  16. data/lib/arc-furnace/csv_sink.rb +21 -0
  17. data/lib/arc-furnace/csv_source.rb +33 -0
  18. data/lib/arc-furnace/csv_to_hash_with_duplicate_headers.rb +19 -0
  19. data/lib/arc-furnace/dsl.rb +6 -0
  20. data/lib/arc-furnace/enumerator_source.rb +36 -0
  21. data/lib/arc-furnace/error_handler.rb +27 -0
  22. data/lib/arc-furnace/excel_source.rb +38 -0
  23. data/lib/arc-furnace/fixed_column_csv_sink.rb +37 -0
  24. data/lib/arc-furnace/hash.rb +41 -0
  25. data/lib/arc-furnace/inner_join.rb +27 -0
  26. data/lib/arc-furnace/logging_error_handler.rb +0 -0
  27. data/lib/arc-furnace/merging_hash.rb +41 -0
  28. data/lib/arc-furnace/multi_csv_source.rb +50 -0
  29. data/lib/arc-furnace/node.rb +5 -0
  30. data/lib/arc-furnace/nodes.rb +19 -0
  31. data/lib/arc-furnace/outer_join.rb +14 -0
  32. data/lib/arc-furnace/pipeline.rb +158 -0
  33. data/lib/arc-furnace/private_attr.rb +28 -0
  34. data/lib/arc-furnace/sink.rb +21 -0
  35. data/lib/arc-furnace/source.rb +40 -0
  36. data/lib/arc-furnace/suffixed_fixed_column_csv_sink.rb +18 -0
  37. data/lib/arc-furnace/transform.rb +28 -0
  38. data/lib/arc-furnace/unfold.rb +45 -0
  39. data/lib/arc-furnace/version.rb +3 -0
  40. metadata +182 -0
@@ -0,0 +1,28 @@
1
+ class Module
2
+
3
+ private
4
+
5
+ # Meta-programming to easily create private attribute reader methods.
6
+ def private_attr_reader(*attrs)
7
+ attr_reader(*attrs)
8
+ private(*attrs)
9
+ end
10
+
11
+ # Meta-programming to easily create private attribute writer methods.
12
+ def private_attr_writer(*attrs)
13
+ attr_writer(*attrs)
14
+ private(*attrs.map { |attr| "#{attr}=".to_sym })
15
+ end
16
+
17
+ # Meta-programming to easily create private attribute accessor methods.
18
+ def private_attr_accessor(*attrs)
19
+ private_attr_reader(*attrs)
20
+ private_attr_writer(*attrs)
21
+ end
22
+
23
+ def private_alias_method(new_name, old_name)
24
+ alias_method(new_name, old_name)
25
+ private(new_name)
26
+ end
27
+
28
+ end
@@ -0,0 +1,21 @@
1
+ module ArcFurnace
2
+ class Sink
3
+
4
+ # The only required method to implement. #row is called for each output row and
5
+ # a sink must handle each.
6
+ def row(row)
7
+ raise "Unimplemented!"
8
+ end
9
+
10
+ # Handle any pre-processing here.
11
+ def prepare
12
+
13
+ end
14
+
15
+ # If the sink needs to perform any clean-up (closing file handles, etc),
16
+ # do it here.
17
+ def finalize
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ require 'arc-furnace/node'
2
+
3
+ module ArcFurnace
4
+ class Source < Node
5
+ extend Forwardable
6
+
7
+ def prepare
8
+
9
+ end
10
+
11
+ # Advance this source by one, returning the row as a hash
12
+ def row
13
+ result = value
14
+ advance
15
+ result
16
+ end
17
+
18
+ # Is this source empty?
19
+ def empty?
20
+
21
+ end
22
+
23
+ # The current value this source points at
24
+ # This is generally the only method required to implement a source.
25
+ def value
26
+
27
+ end
28
+
29
+ # Close the source
30
+ def close
31
+
32
+ end
33
+
34
+ # Advance this source by one. #advance specifies no return value contract
35
+ def advance
36
+
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ require 'arc-furnace/fixed_column_csv_sink'
2
+
3
+ module ArcFurnace
4
+ class SuffixedFixedColumnCSVSink < FixedColumnCSVSink
5
+ private_attr_reader :fields
6
+
7
+ def write_header
8
+ csv << fields.each_with_object([]) do |(key, count), result|
9
+ if count > 1
10
+ count.times { |index| result << "#{key} #{index + 1}" }
11
+ else
12
+ result << key
13
+ end
14
+ end
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,28 @@
1
+ require 'arc-furnace/source'
2
+
3
+ module ArcFurnace
4
+ class Transform < Source
5
+
6
+ private_attr_reader :source
7
+
8
+ def initialize(source:)
9
+ @source = source
10
+ end
11
+
12
+ def prepare
13
+ source.prepare
14
+ end
15
+
16
+ def value
17
+ value = source.value.deep_dup
18
+ transform(value) if value
19
+ end
20
+
21
+ def transform(row)
22
+ row
23
+ end
24
+
25
+ delegate [:empty?, :advance] => :source
26
+
27
+ end
28
+ end
@@ -0,0 +1,45 @@
1
+ require 'arc-furnace/source'
2
+
3
+ # An unfold is the reverse of a fold--it takes a single value an blows it out
4
+ # into an enumeration of values. Useful for splitting up rows into multiple output
5
+ # rows and whatnot. Only #unfold is required for implementation.
6
+ module ArcFurnace
7
+ class Unfold < Source
8
+
9
+ private_attr_reader :source, :unfolded
10
+ attr_reader :value
11
+
12
+ def initialize(source:)
13
+ @source = source
14
+ advance
15
+ end
16
+
17
+ def prepare
18
+ source.prepare
19
+ end
20
+
21
+ # Given a row from the source, produce the unfolded rows as a result. This method must return
22
+ # an array.
23
+ def unfold(row)
24
+ raise "Unimplemented!"
25
+ end
26
+
27
+ def empty?
28
+ value.nil? && source.empty?
29
+ end
30
+
31
+ def advance
32
+ while (unfolded.nil? || unfolded.empty?) && !source.empty?
33
+ # Use reverse since we want to process in-order, but, #pop is much faster than #unshift
34
+ @unfolded = unfold(source.row.deep_dup)
35
+ unfolded.reverse!
36
+ end
37
+ if unfolded && !unfolded.empty?
38
+ @value = unfolded.pop
39
+ else
40
+ @value = nil
41
+ end
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module ArcFurnace
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,182 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: arc-furnace
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Spangenberger
8
+ - Brian Tenggren
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2015-10-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: msgpack
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '0.6'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '0.6'
28
+ - !ruby/object:Gem::Dependency
29
+ name: activesupport
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '3.2'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '3.2'
42
+ - !ruby/object:Gem::Dependency
43
+ name: eigenclass
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '2'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '2'
56
+ - !ruby/object:Gem::Dependency
57
+ name: roo
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '2.1'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '2.1'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rake
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '10.0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '10.0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: rspec
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '3'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '3'
98
+ - !ruby/object:Gem::Dependency
99
+ name: ice_nine
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0.11'
105
+ type: :development
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: '0.11'
112
+ description: 'An ETL library for Ruby that performs the basic actions of ETL: extract,
113
+ transform, and load. Easily extensible.'
114
+ email:
115
+ - dan@salsify.com
116
+ executables: []
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - ".gitignore"
121
+ - ".rspec"
122
+ - ".travis.yml"
123
+ - Gemfile
124
+ - LICENSE.txt
125
+ - README.md
126
+ - Rakefile
127
+ - arc-furnace.gemspec
128
+ - lib/arc-furnace.rb
129
+ - lib/arc-furnace/abstract_join.rb
130
+ - lib/arc-furnace/all_fields_csv_sink.rb
131
+ - lib/arc-furnace/binary_key_merging_hash.rb
132
+ - lib/arc-furnace/block_transform.rb
133
+ - lib/arc-furnace/block_unfold.rb
134
+ - lib/arc-furnace/csv_sink.rb
135
+ - lib/arc-furnace/csv_source.rb
136
+ - lib/arc-furnace/csv_to_hash_with_duplicate_headers.rb
137
+ - lib/arc-furnace/dsl.rb
138
+ - lib/arc-furnace/enumerator_source.rb
139
+ - lib/arc-furnace/error_handler.rb
140
+ - lib/arc-furnace/excel_source.rb
141
+ - lib/arc-furnace/fixed_column_csv_sink.rb
142
+ - lib/arc-furnace/hash.rb
143
+ - lib/arc-furnace/inner_join.rb
144
+ - lib/arc-furnace/logging_error_handler.rb
145
+ - lib/arc-furnace/merging_hash.rb
146
+ - lib/arc-furnace/multi_csv_source.rb
147
+ - lib/arc-furnace/node.rb
148
+ - lib/arc-furnace/nodes.rb
149
+ - lib/arc-furnace/outer_join.rb
150
+ - lib/arc-furnace/pipeline.rb
151
+ - lib/arc-furnace/private_attr.rb
152
+ - lib/arc-furnace/sink.rb
153
+ - lib/arc-furnace/source.rb
154
+ - lib/arc-furnace/suffixed_fixed_column_csv_sink.rb
155
+ - lib/arc-furnace/transform.rb
156
+ - lib/arc-furnace/unfold.rb
157
+ - lib/arc-furnace/version.rb
158
+ homepage: http://github.com/salsify/arc-furnace
159
+ licenses:
160
+ - MIT
161
+ metadata: {}
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - ">="
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ required_rubygems_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ requirements: []
177
+ rubyforge_project:
178
+ rubygems_version: 2.4.7
179
+ signing_key:
180
+ specification_version: 4
181
+ summary: Melds and transforms data from multiple sources into a single stream
182
+ test_files: []