masamune 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,356 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::DataPlan::Engine do
26
+ let(:filesystem) { Masamune::MockFilesystem.new }
27
+ let(:environment) { Masamune::Environment.new }
28
+ let(:engine) { Masamune::DataPlan::Engine.new }
29
+
30
+ before do
31
+ environment.filesystem = filesystem
32
+ engine.environment = environment
33
+ end
34
+
35
+ let(:command) do
36
+ Proc.new do |engine, rule|
37
+ missing_targets = []
38
+ engine.targets(rule).missing.each do |target|
39
+ missing_targets << target.path if target.sources.existing.any?
40
+ end
41
+ engine.filesystem.touch!(*missing_targets.map { |target| File.join(target, 'DATA') }) if missing_targets.any?
42
+ end
43
+ end
44
+
45
+ before do
46
+ engine.add_target_rule('non_primary', path: '/table/y=%Y/m=%m/d=%d', primary: false)
47
+ engine.add_source_rule('non_primary', path: '/log/%Y%m%d.*.log', primary: false)
48
+ engine.add_command_rule('non_primary', ->(*_) { fail } )
49
+ engine.add_target_rule('primary', path: '/table/y=%Y/m=%m/d=%d')
50
+ engine.add_source_rule('primary', path: '/log/%Y%m%d.*.log')
51
+ engine.add_command_rule('primary', command)
52
+ engine.add_target_rule('derived_daily', path: '/daily/%Y-%m-%d')
53
+ engine.add_source_rule('derived_daily', path: '/table/y=%Y/m=%m/d=%d')
54
+ engine.add_command_rule('derived_daily', command)
55
+ engine.add_target_rule('derived_monthly', path: '/monthly/%Y-%m')
56
+ engine.add_source_rule('derived_monthly', path: '/table/y=%Y/m=%m/d=%d')
57
+ engine.add_command_rule('derived_monthly', command)
58
+ end
59
+
60
+ describe '#filesystem' do
61
+ it { expect(engine.filesystem).to be_a(Masamune::CachedFilesystem) }
62
+ it { expect(environment.filesystem).to be_a(Masamune::MockFilesystem) }
63
+ end
64
+
65
+ describe '#targets_for_date_range' do
66
+ let(:start) { Date.civil(2013,01,01) }
67
+ let(:stop) { Date.civil(2013,01,03) }
68
+
69
+ subject { engine.targets_for_date_range(rule, start, stop).map(&:path) }
70
+
71
+ context 'primary' do
72
+ let(:rule) { 'primary' }
73
+ it { is_expected.to include '/table/y=2013/m=01/d=01' }
74
+ it { is_expected.to include '/table/y=2013/m=01/d=02' }
75
+ it { is_expected.to include '/table/y=2013/m=01/d=03' }
76
+ it 'has 3 items' do
77
+ expect(subject.size).to eq(3)
78
+ end
79
+ end
80
+
81
+ context 'derived_daily' do
82
+ let(:rule) { 'derived_daily' }
83
+ it { is_expected.to include '/daily/2013-01-01' }
84
+ it { is_expected.to include '/daily/2013-01-02' }
85
+ it { is_expected.to include '/daily/2013-01-03' }
86
+ it 'has 3 items' do
87
+ expect(subject.size).to eq(3)
88
+ end
89
+ end
90
+
91
+ context 'derived_monthly' do
92
+ let(:rule) { 'derived_monthly' }
93
+ it { is_expected.to include '/monthly/2013-01' }
94
+ it 'has 1 item' do
95
+ expect(subject.size).to eq(1)
96
+ end
97
+ end
98
+ end
99
+
100
+ describe '#targets_for_source' do
101
+ subject(:targets) do
102
+ engine.targets_for_source(rule, source)
103
+ end
104
+
105
+ context 'primary' do
106
+ let(:rule) { 'primary' }
107
+ let(:source) { '/log/20130101.random.log' }
108
+
109
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
110
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,01,02)) }
111
+ it { expect(targets.first.path).to eq('/table/y=2013/m=01/d=01') }
112
+ end
113
+
114
+ context 'derived_daily' do
115
+ let(:rule) { 'derived_daily' }
116
+ let(:source) { '/table/y=2013/m=01/d=01' }
117
+
118
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
119
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,01,02)) }
120
+ it { expect(targets.first.path).to eq('/daily/2013-01-01') }
121
+ end
122
+
123
+ context 'derived_monthly' do
124
+ let(:rule) { 'derived_monthly' }
125
+ let(:source) { '/table/y=2013/m=01/d=01' }
126
+
127
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
128
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,02,01)) }
129
+ it { expect(targets.first.path).to eq('/monthly/2013-01') }
130
+ end
131
+ end
132
+
133
+ describe '#sources_for_target' do
134
+ subject(:sources) do
135
+ engine.sources_for_target(rule, target)
136
+ end
137
+
138
+ subject(:existing) do
139
+ sources.existing
140
+ end
141
+
142
+ before do
143
+ engine.filesystem.touch!('/log/20130101.app1.log')
144
+ engine.filesystem.touch!('/log/20130101.app2.log')
145
+ engine.filesystem.touch!('/log/20130104.app1.log')
146
+ engine.filesystem.touch!('/log/20130104.app2.log')
147
+ end
148
+
149
+ context 'valid target associated with wildcard source' do
150
+ let(:rule) { 'primary' }
151
+ let(:target) { '/table/y=2013/m=01/d=01' }
152
+
153
+ it { expect(sources.size).to eq(1) }
154
+ it { expect(sources).to include '/log/20130101.*.log' }
155
+ it { expect(existing.size).to eq(2) }
156
+ it { expect(existing).to include '/log/20130101.app1.log' }
157
+ it { expect(existing).to include '/log/20130101.app2.log' }
158
+ end
159
+
160
+ context 'valid target associated with a single source file' do
161
+ let(:rule) { 'derived_daily' }
162
+ let(:target) { '/daily/2013-01-03' }
163
+
164
+ it { expect(sources).to include '/table/y=2013/m=01/d=03' }
165
+ end
166
+
167
+ context 'valid target associated with a group of source files' do
168
+ let(:rule) { 'derived_monthly' }
169
+ let(:target) { '/monthly/2013-01' }
170
+
171
+ (1..31).each do |day|
172
+ it { expect(sources).to include '/table/y=2013/m=01/d=%02d' % day }
173
+ end
174
+ it { expect(sources.size).to eq(31) }
175
+ end
176
+
177
+ context 'invalid target' do
178
+ let(:rule) { 'derived_daily' }
179
+ let(:target) { '/table/y=2013/m=01/d=01' }
180
+ it { expect { subject }.to raise_error }
181
+ end
182
+ end
183
+
184
+ describe '#rule_for_target' do
185
+ subject { engine.rule_for_target(target) }
186
+
187
+ context 'primary source' do
188
+ let(:target) { '/log/20130101.random_1.log' }
189
+ it { is_expected.to eq(Masamune::DataPlan::Rule::TERMINAL) }
190
+ end
191
+
192
+ context 'primary target' do
193
+ let(:target) { '/table/y=2013/m=01/d=01' }
194
+ it { is_expected.to eq('primary') }
195
+ end
196
+
197
+ context 'derived_daily target' do
198
+ let(:target) { '/daily/2013-01-03' }
199
+ it { is_expected.to eq('derived_daily') }
200
+ end
201
+
202
+ context 'derived_monthly target' do
203
+ let(:target) { '/monthly/2013-01' }
204
+ it { is_expected.to eq('derived_monthly') }
205
+ end
206
+
207
+ context 'invalid target' do
208
+ let(:target) { '/daily' }
209
+ it { expect { subject }.to raise_error }
210
+ end
211
+ end
212
+
213
+ describe '#prepare' do
214
+ before do
215
+ engine.prepare(rule, options)
216
+ end
217
+
218
+ subject(:targets) do
219
+ engine.targets(rule)
220
+ end
221
+
222
+ subject(:sources) do
223
+ engine.sources(rule)
224
+ end
225
+
226
+ context 'with :targets' do
227
+ let(:rule) { 'primary' }
228
+
229
+ let(:options) { {targets: ['/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=02']} }
230
+
231
+ it { expect(targets).to include '/table/y=2013/m=01/d=01' }
232
+ it { expect(targets).to include '/table/y=2013/m=01/d=02' }
233
+ it { expect(sources).to include '/log/20130101.*.log' }
234
+ it { expect(sources).to include '/log/20130102.*.log' }
235
+ end
236
+
237
+ context 'with :sources' do
238
+ let(:rule) { 'derived_daily' }
239
+
240
+ let(:options) { {sources: ['/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=02']} }
241
+
242
+ it { expect(targets).to include "/daily/2013-01-01" }
243
+ it { expect(targets).to include "/daily/2013-01-02" }
244
+ it { expect(sources).to include '/table/y=2013/m=01/d=01' }
245
+ it { expect(sources).to include '/table/y=2013/m=01/d=02' }
246
+ end
247
+ end
248
+
249
+ describe '#execute' do
250
+ let(:options) { {} }
251
+
252
+ before do
253
+ engine.prepare(rule, targets: targets)
254
+ end
255
+
256
+ subject(:execute) do
257
+ engine.execute(rule, options)
258
+ end
259
+
260
+ context 'primary rule' do
261
+ let(:rule) { 'primary' }
262
+ let(:targets) { [
263
+ '/table/y=2013/m=01/d=01',
264
+ '/table/y=2013/m=01/d=02',
265
+ '/table/y=2013/m=01/d=03' ] }
266
+
267
+ context 'when target data exists' do
268
+ before do
269
+ engine.filesystem.touch!('/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=03')
270
+ expect(engine.filesystem).to receive(:touch!).never
271
+ execute
272
+ end
273
+
274
+ it 'should not call touch!' do; end
275
+ end
276
+
277
+ context 'when partial target data exists' do
278
+ before do
279
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
280
+ engine.filesystem.touch!('/table/y=2013/m=01/d=01/DATA', '/table/y=2013/m=01/d=03/DATA')
281
+ expect(engine.filesystem).to receive(:touch!).with('/table/y=2013/m=01/d=02/DATA').and_call_original
282
+ execute
283
+ end
284
+
285
+ it 'should call touch!' do; end
286
+ end
287
+
288
+ context 'when source data does not exist' do
289
+ before do
290
+ expect(engine.filesystem).to receive(:touch!).never
291
+ execute
292
+ end
293
+
294
+ it 'should not call touch!' do; end
295
+ end
296
+ end
297
+
298
+ shared_examples_for 'derived daily data' do
299
+ context 'when primary target data exists' do
300
+ let(:derived_targets) { ['/table/y=2013/m=01/d=01/DATA', '/table/y=2013/m=01/d=02/DATA', '/table/y=2013/m=01/d=03/DATA'] }
301
+
302
+ before do
303
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
304
+ expect(engine.filesystem).to receive(:touch!).with(*derived_targets).and_call_original
305
+ expect(engine.filesystem).to receive(:touch!).with(*targets).and_call_original
306
+ execute
307
+ end
308
+
309
+ it 'should call touch!' do; end
310
+ end
311
+
312
+ context 'when primary target data exists and :resolve is false' do
313
+ let(:options) { {resolve: false} }
314
+
315
+ before do
316
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
317
+ expect(engine.filesystem).not_to receive(:touch!)
318
+ execute
319
+ end
320
+
321
+ it 'should not call touch!' do; end
322
+ end
323
+ end
324
+
325
+ context 'derived_daily rule' do
326
+ let(:rule) { 'derived_daily' }
327
+ let(:targets) { ['/daily/2013-01-01/DATA', '/daily/2013-01-02/DATA', '/daily/2013-01-03/DATA'] }
328
+
329
+ it_behaves_like 'derived daily data' do
330
+ let(:derived_command) { derived_daily_command }
331
+ end
332
+ end
333
+
334
+ context 'derived_monthly rule' do
335
+ let(:rule) { 'derived_monthly' }
336
+ let(:targets) { ['/monthly/2013-01/DATA'] }
337
+
338
+ it_behaves_like 'derived daily data' do
339
+ let(:derived_command) { derived_monthly_command }
340
+ end
341
+ end
342
+ end
343
+
344
+ context 'recursive engines' do
345
+ before do
346
+ engine.add_target_rule('primary', path: '/table/y=%Y/m=%m/d=%d')
347
+ engine.add_source_rule('primary', path: '/log/%Y%m%d.*.log')
348
+ engine.add_source_rule('derived', path: '/table/y=%Y/m=%m/d=%d')
349
+ engine.add_target_rule('derived', path: '/log/%Y%m%d.*.log')
350
+ end
351
+
352
+ it 'should raise exception' do
353
+ expect { engine.prepare('derived', targets: ['/log/20140228.wtf.log']) }.to raise_error /Max depth .* exceeded for rule 'derived'/
354
+ end
355
+ end
356
+ end
@@ -0,0 +1,407 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::DataPlan::Rule do
26
+ let(:engine) { Masamune::DataPlan::Engine.new }
27
+ let(:name) { 'primary' }
28
+ let(:type) { :target }
29
+ let(:pattern) { 'report/%Y-%m-%d/%H' }
30
+ let(:options) { {path: pattern} }
31
+
32
+ let(:instance) { described_class.new(engine, name, type, options) }
33
+
34
+ describe '#pattern' do
35
+ subject do
36
+ instance.pattern
37
+ end
38
+
39
+ context 'with string' do
40
+ let(:pattern) { 'report/%Y-%m-%d/%H' }
41
+ it { is_expected.to eq('report/%Y-%m-%d/%H') }
42
+ end
43
+
44
+ context 'with lambda' do
45
+ let(:pattern) { lambda { |_| 'report/%Y-%m-%d/%H' } }
46
+ it { is_expected.to eq('report/%Y-%m-%d/%H') }
47
+ end
48
+ end
49
+
50
+ describe '#bind_date' do
51
+ subject(:elem) { instance.bind_date(input_date) }
52
+
53
+ context 'with default' do
54
+ let(:input_date) { DateTime.civil(2013,04,05,23,13) }
55
+
56
+ describe '#path' do
57
+ subject { elem.path }
58
+ it { is_expected.to eq('report/2013-04-05/23') }
59
+ end
60
+ let(:start_time) { DateTime.civil(2013,04,05,23) }
61
+ let(:stop_time) { DateTime.civil(2013,04,05,0) }
62
+ end
63
+
64
+ context 'with unix timestamp pattern' do
65
+ let(:pattern) { 'logs/%H-s.log' }
66
+ let(:input_date) { DateTime.civil(2013,04,05,23,13) }
67
+
68
+ describe '#path' do
69
+ subject { elem.path }
70
+ it { is_expected.to eq('logs/1365202800.log') }
71
+ end
72
+ let(:start_time) { DateTime.civil(2013,04,05,23) }
73
+ let(:stop_time) { DateTime.civil(2013,04,05,0) }
74
+ end
75
+ end
76
+
77
+ describe '#bind_input' do
78
+ subject(:elem) { instance.bind_input(input) }
79
+
80
+ context 'with default' do
81
+ let(:input) { 'report/2013-04-05/23' }
82
+ let(:output_date) { DateTime.civil(2013,04,05,23) }
83
+
84
+ describe '#path' do
85
+ subject { elem.path }
86
+ it { is_expected.to eq(input) }
87
+ end
88
+
89
+ describe '#start_time' do
90
+ subject { elem.start_time }
91
+ it { is_expected.to eq(output_date) }
92
+ end
93
+
94
+ describe '#stop_time' do
95
+ subject { elem.stop_time }
96
+ it { is_expected.to eq(output_date.to_time + 1.hour) }
97
+ end
98
+ end
99
+
100
+ context 'with unix timestamp pattern' do
101
+ let(:pattern) { 'logs/%H-s.log' }
102
+ let(:input) { 'logs/1365202800.log' }
103
+ let(:output_date) { DateTime.civil(2013,04,05,23) }
104
+
105
+ describe '#path' do
106
+ subject { elem.path }
107
+ it { is_expected.to eq(input) }
108
+ end
109
+
110
+ describe '#start_time' do
111
+ subject { elem.start_time }
112
+ it { is_expected.to eq(output_date) }
113
+ end
114
+
115
+ describe '#stop_time' do
116
+ subject { elem.stop_time }
117
+ it { is_expected.to eq(output_date.to_time + 1.hour) }
118
+ end
119
+ end
120
+
121
+ context 'with previously bound input' do
122
+ let(:prev_input) { 'report/2013-04-05/23' }
123
+ let(:input) { instance.bind_input(prev_input) }
124
+ it { is_expected.to eq(input) }
125
+ end
126
+ end
127
+
128
+ describe '#unify' do
129
+ let(:primary) { described_class.new(engine, name, type, {path: 'report/%Y-%m-%d/%H'}) }
130
+ let(:induced) { described_class.new(engine, name, type, {path: 'table/y=%Y/m=%m/d=%d/h=%H'}) }
131
+ let(:elem) { primary.bind_input(input) }
132
+
133
+ subject(:new_elem) { instance.unify(elem, induced) }
134
+
135
+ context 'when input fully matches basis pattern' do
136
+ let(:input) { 'report/2013-01-02/00' }
137
+
138
+ describe '#path' do
139
+ subject { new_elem.path }
140
+ it { is_expected.to eq('table/y=2013/m=01/d=02/h=00') }
141
+ end
142
+ end
143
+
144
+ context 'when input partially matches basis pattern' do
145
+ let(:induced) { described_class.new(engine, name, type, {path: 'table/%Y-%m'}) }
146
+
147
+ let(:input) { 'report/2013-01-02/00' }
148
+
149
+ describe '#path' do
150
+ subject { new_elem.path }
151
+ it { is_expected.to eq('table/2013-01') }
152
+ end
153
+ end
154
+ end
155
+
156
+ describe '#matches' do
157
+ subject do
158
+ instance.matches?(input)
159
+ end
160
+
161
+ context 'when input fully matches' do
162
+ let(:input) { 'report/2013-01-02/00' }
163
+ it { is_expected.to eq(true) }
164
+ end
165
+
166
+ context 'when input under matches' do
167
+ let(:input) { 'report/2013-01-02' }
168
+ it { is_expected.to eq(false) }
169
+ end
170
+
171
+ context 'when input over matches' do
172
+ let(:pattern) { 'report/%Y-%m-%d' }
173
+ let(:input) { 'report/2013-01-02/00' }
174
+ it { is_expected.to eq(false) }
175
+ end
176
+
177
+ context 'when input does not match' do
178
+ let(:input) { 'report' }
179
+ it { is_expected.to eq(false) }
180
+ end
181
+
182
+ context 'with alternative hour' do
183
+ let(:pattern) { 'requests/y=%Y/m=%-m/d=%-d/h=%-k' }
184
+ let(:input) { 'requests/y=2013/m=5/d=1/h=1' }
185
+ it { is_expected.to eq(true) }
186
+ end
187
+
188
+ context 'with alternative hour' do
189
+ let(:pattern) { 'requests/y=%Y/m=%-m/d=%-d/h=%-k' }
190
+ let(:input) { 'requests/y=2013/m=4/d=30/h=20' }
191
+ it { is_expected.to eq(true) }
192
+ end
193
+
194
+ context 'with wildcard pattern' do
195
+ let(:pattern) { 'request_logs/%Y%m%d*request.log' }
196
+ let(:input) { 'request_logs/20130524.random.request.log' }
197
+ it { is_expected.to eq(true) }
198
+ end
199
+
200
+ context 'with unix timestamp pattern' do
201
+ let(:pattern) { 'request_logs/%H-s.log' }
202
+ let(:input) { 'request_logs/1374192000.log' }
203
+ it { is_expected.to eq(true) }
204
+ end
205
+ end
206
+
207
+ describe '#generate' do
208
+ context 'with a block' do
209
+ let(:start_date) { DateTime.civil(2013,04,05,20) }
210
+ let(:stop_date) { DateTime.civil(2013,04,05,20) }
211
+ specify { expect { |b| instance.generate(start_date, stop_date, &b) }.to yield_control }
212
+ end
213
+
214
+ context 'without a block' do
215
+ let(:start_date) { DateTime.civil(2013,04,05,20) }
216
+ let(:stop_date) { DateTime.civil(2013,04,05,22) }
217
+
218
+ subject(:elems) do
219
+ instance.generate(start_date, stop_date)
220
+ end
221
+
222
+ it { expect(elems.map(&:path)).to eq(['report/2013-04-05/20', 'report/2013-04-05/21', 'report/2013-04-05/22']) }
223
+ end
224
+ end
225
+
226
+ describe '#time_step' do
227
+ subject { instance.time_step }
228
+
229
+ context '24 hour' do
230
+ let(:pattern) { '%Y-%m-%d/%k' }
231
+ it { is_expected.to eq(:hours) }
232
+ end
233
+ context '24 hour (condensed)' do
234
+ let(:pattern) { '%Y-%m-%d/%-k' }
235
+ it { is_expected.to eq(:hours) }
236
+ end
237
+ context '12 hour' do
238
+ let(:pattern) { '%Y-%m-%d/%H' }
239
+ it { is_expected.to eq(:hours) }
240
+ end
241
+ context '12 hour (condensed)' do
242
+ let(:pattern) { '%Y-%m-%d/%-H' }
243
+ it { is_expected.to eq(:hours) }
244
+ end
245
+ context 'daily' do
246
+ let(:pattern) { '%Y-%m-%d' }
247
+ it { is_expected.to eq(:days) }
248
+ end
249
+ context 'monthly' do
250
+ let(:pattern) { '%Y-%m' }
251
+ it { is_expected.to eq(:months) }
252
+ end
253
+ context 'yearly' do
254
+ let(:pattern) { '%Y' }
255
+ it { is_expected.to eq(:years) }
256
+ end
257
+ context 'hourly unix' do
258
+ let(:pattern) { '%H-s' }
259
+ it { is_expected.to eq(:hours) }
260
+ end
261
+ context 'daily unix' do
262
+ let(:pattern) { '%d-s' }
263
+ it { is_expected.to eq(:days) }
264
+ end
265
+ context 'monthly unix' do
266
+ let(:pattern) { '%m-s' }
267
+ it { is_expected.to eq(:months) }
268
+ end
269
+ context 'yearly unix' do
270
+ let(:pattern) { '%Y-s' }
271
+ it { is_expected.to eq(:years) }
272
+ end
273
+ end
274
+
275
+ describe '#time_round' do
276
+ let(:input_time) { DateTime.civil(2013,9,13,23,13) }
277
+ subject { instance.time_round(input_time) }
278
+
279
+ before do
280
+ allow(instance).to receive(:time_step) { time_step }
281
+ end
282
+
283
+ context 'hourly' do
284
+ let(:time_step) { :hours }
285
+ it { is_expected.to eq(DateTime.civil(2013,9,13,23)) }
286
+ end
287
+ context 'daily' do
288
+ let(:time_step) { :days }
289
+ it { is_expected.to eq(DateTime.civil(2013,9,13)) }
290
+ end
291
+ context 'monthly' do
292
+ let(:time_step) { :months }
293
+ it { is_expected.to eq(DateTime.civil(2013,9)) }
294
+ end
295
+ context 'yearly' do
296
+ let(:time_step) { :years }
297
+ it { is_expected.to eq(DateTime.civil(2013)) }
298
+ end
299
+ end
300
+
301
+ describe '#round' do
302
+ subject(:new_instance) { instance.round(grain) }
303
+
304
+ context 'with totally partitioned pattern' do
305
+ let(:pattern) { 'table/y=%Y/m=%m/d=%d/h=%H' }
306
+ context 'with :hour' do
307
+ let(:grain) { :hour }
308
+
309
+ describe '#pattern' do
310
+ subject { new_instance.pattern }
311
+ it { is_expected.to eq('table/y=%Y/m=%m/d=%d/h=%H') }
312
+ end
313
+ end
314
+
315
+ context 'with :day' do
316
+ let(:grain) { :day }
317
+
318
+ describe '#pattern' do
319
+ subject { new_instance.pattern }
320
+ it { is_expected.to eq('table/y=%Y/m=%m/d=%d') }
321
+ end
322
+ end
323
+
324
+ context 'with :month' do
325
+ let(:grain) { :month }
326
+
327
+ describe '#pattern' do
328
+ subject { new_instance.pattern }
329
+ it { is_expected.to eq('table/y=%Y/m=%m') }
330
+ end
331
+ end
332
+
333
+ context 'with :year' do
334
+ let(:grain) { :year }
335
+
336
+ describe '#pattern' do
337
+ subject { new_instance.pattern }
338
+ it { is_expected.to eq('table/y=%Y') }
339
+ end
340
+ end
341
+ end
342
+
343
+ context 'with partially partitioned pattern' do
344
+ let(:pattern) { 'table/%Y-%m-%d/%H' }
345
+
346
+ context 'with :hour' do
347
+ let(:grain) { :hour }
348
+
349
+ describe '#pattern' do
350
+ subject { new_instance.pattern }
351
+ it { is_expected.to eq('table/%Y-%m-%d/%H') }
352
+ end
353
+ end
354
+
355
+ context 'with :day' do
356
+ let(:grain) { :day }
357
+
358
+ describe '#pattern' do
359
+ subject { new_instance.pattern }
360
+ it { is_expected.to eq('table/%Y-%m-%d') }
361
+ end
362
+ end
363
+
364
+ context 'with :month' do
365
+ let(:grain) { :month }
366
+
367
+ describe '#pattern' do
368
+ subject { new_instance.pattern }
369
+ it { is_expected.to eq('table/%Y-%m-%d') }
370
+ end
371
+ end
372
+
373
+ context 'with :year' do
374
+ let(:grain) { :year }
375
+
376
+ describe '#pattern' do
377
+ subject { new_instance.pattern }
378
+ it { is_expected.to eq('table/%Y-%m-%d') }
379
+ end
380
+ end
381
+ end
382
+
383
+ context 'when cannot round due to granularity' do
384
+ shared_context 'cannot round' do
385
+ it { expect { subject }.to raise_error RuntimeError, /cannot round to :#{grain} for #{pattern}/ }
386
+ end
387
+
388
+ context 'with :hour' do
389
+ let(:grain) { :hour }
390
+ let(:pattern) { 'table/y=%Y/m=%m/d=%d' }
391
+ include_context 'cannot round'
392
+ end
393
+
394
+ context 'with :day' do
395
+ let(:grain) { :day }
396
+ let(:pattern) { 'table/y=%Y/m=%m' }
397
+ include_context 'cannot round'
398
+ end
399
+
400
+ context 'with :month' do
401
+ let(:grain) { :month }
402
+ let(:pattern) { 'table/y=%Y' }
403
+ include_context 'cannot round'
404
+ end
405
+ end
406
+ end
407
+ end