masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,356 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::DataPlan::Engine do
26
+ let(:filesystem) { Masamune::MockFilesystem.new }
27
+ let(:environment) { Masamune::Environment.new }
28
+ let(:engine) { Masamune::DataPlan::Engine.new }
29
+
30
+ before do
31
+ environment.filesystem = filesystem
32
+ engine.environment = environment
33
+ end
34
+
35
+ let(:command) do
36
+ Proc.new do |engine, rule|
37
+ missing_targets = []
38
+ engine.targets(rule).missing.each do |target|
39
+ missing_targets << target.path if target.sources.existing.any?
40
+ end
41
+ engine.filesystem.touch!(*missing_targets.map { |target| File.join(target, 'DATA') }) if missing_targets.any?
42
+ end
43
+ end
44
+
45
+ before do
46
+ engine.add_target_rule('non_primary', path: '/table/y=%Y/m=%m/d=%d', primary: false)
47
+ engine.add_source_rule('non_primary', path: '/log/%Y%m%d.*.log', primary: false)
48
+ engine.add_command_rule('non_primary', ->(*_) { fail } )
49
+ engine.add_target_rule('primary', path: '/table/y=%Y/m=%m/d=%d')
50
+ engine.add_source_rule('primary', path: '/log/%Y%m%d.*.log')
51
+ engine.add_command_rule('primary', command)
52
+ engine.add_target_rule('derived_daily', path: '/daily/%Y-%m-%d')
53
+ engine.add_source_rule('derived_daily', path: '/table/y=%Y/m=%m/d=%d')
54
+ engine.add_command_rule('derived_daily', command)
55
+ engine.add_target_rule('derived_monthly', path: '/monthly/%Y-%m')
56
+ engine.add_source_rule('derived_monthly', path: '/table/y=%Y/m=%m/d=%d')
57
+ engine.add_command_rule('derived_monthly', command)
58
+ end
59
+
60
+ describe '#filesystem' do
61
+ it { expect(engine.filesystem).to be_a(Masamune::CachedFilesystem) }
62
+ it { expect(environment.filesystem).to be_a(Masamune::MockFilesystem) }
63
+ end
64
+
65
+ describe '#targets_for_date_range' do
66
+ let(:start) { Date.civil(2013,01,01) }
67
+ let(:stop) { Date.civil(2013,01,03) }
68
+
69
+ subject { engine.targets_for_date_range(rule, start, stop).map(&:path) }
70
+
71
+ context 'primary' do
72
+ let(:rule) { 'primary' }
73
+ it { is_expected.to include '/table/y=2013/m=01/d=01' }
74
+ it { is_expected.to include '/table/y=2013/m=01/d=02' }
75
+ it { is_expected.to include '/table/y=2013/m=01/d=03' }
76
+ it 'has 3 items' do
77
+ expect(subject.size).to eq(3)
78
+ end
79
+ end
80
+
81
+ context 'derived_daily' do
82
+ let(:rule) { 'derived_daily' }
83
+ it { is_expected.to include '/daily/2013-01-01' }
84
+ it { is_expected.to include '/daily/2013-01-02' }
85
+ it { is_expected.to include '/daily/2013-01-03' }
86
+ it 'has 3 items' do
87
+ expect(subject.size).to eq(3)
88
+ end
89
+ end
90
+
91
+ context 'derived_monthly' do
92
+ let(:rule) { 'derived_monthly' }
93
+ it { is_expected.to include '/monthly/2013-01' }
94
+ it 'has 1 item' do
95
+ expect(subject.size).to eq(1)
96
+ end
97
+ end
98
+ end
99
+
100
+ describe '#targets_for_source' do
101
+ subject(:targets) do
102
+ engine.targets_for_source(rule, source)
103
+ end
104
+
105
+ context 'primary' do
106
+ let(:rule) { 'primary' }
107
+ let(:source) { '/log/20130101.random.log' }
108
+
109
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
110
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,01,02)) }
111
+ it { expect(targets.first.path).to eq('/table/y=2013/m=01/d=01') }
112
+ end
113
+
114
+ context 'derived_daily' do
115
+ let(:rule) { 'derived_daily' }
116
+ let(:source) { '/table/y=2013/m=01/d=01' }
117
+
118
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
119
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,01,02)) }
120
+ it { expect(targets.first.path).to eq('/daily/2013-01-01') }
121
+ end
122
+
123
+ context 'derived_monthly' do
124
+ let(:rule) { 'derived_monthly' }
125
+ let(:source) { '/table/y=2013/m=01/d=01' }
126
+
127
+ it { expect(targets.first.start_time).to eq(Date.civil(2013,01,01)) }
128
+ it { expect(targets.first.stop_time).to eq(Date.civil(2013,02,01)) }
129
+ it { expect(targets.first.path).to eq('/monthly/2013-01') }
130
+ end
131
+ end
132
+
133
+ describe '#sources_for_target' do
134
+ subject(:sources) do
135
+ engine.sources_for_target(rule, target)
136
+ end
137
+
138
+ subject(:existing) do
139
+ sources.existing
140
+ end
141
+
142
+ before do
143
+ engine.filesystem.touch!('/log/20130101.app1.log')
144
+ engine.filesystem.touch!('/log/20130101.app2.log')
145
+ engine.filesystem.touch!('/log/20130104.app1.log')
146
+ engine.filesystem.touch!('/log/20130104.app2.log')
147
+ end
148
+
149
+ context 'valid target associated with wildcard source' do
150
+ let(:rule) { 'primary' }
151
+ let(:target) { '/table/y=2013/m=01/d=01' }
152
+
153
+ it { expect(sources.size).to eq(1) }
154
+ it { expect(sources).to include '/log/20130101.*.log' }
155
+ it { expect(existing.size).to eq(2) }
156
+ it { expect(existing).to include '/log/20130101.app1.log' }
157
+ it { expect(existing).to include '/log/20130101.app2.log' }
158
+ end
159
+
160
+ context 'valid target associated with a single source file' do
161
+ let(:rule) { 'derived_daily' }
162
+ let(:target) { '/daily/2013-01-03' }
163
+
164
+ it { expect(sources).to include '/table/y=2013/m=01/d=03' }
165
+ end
166
+
167
+ context 'valid target associated with a group of source files' do
168
+ let(:rule) { 'derived_monthly' }
169
+ let(:target) { '/monthly/2013-01' }
170
+
171
+ (1..31).each do |day|
172
+ it { expect(sources).to include '/table/y=2013/m=01/d=%02d' % day }
173
+ end
174
+ it { expect(sources.size).to eq(31) }
175
+ end
176
+
177
+ context 'invalid target' do
178
+ let(:rule) { 'derived_daily' }
179
+ let(:target) { '/table/y=2013/m=01/d=01' }
180
+ it { expect { subject }.to raise_error }
181
+ end
182
+ end
183
+
184
+ describe '#rule_for_target' do
185
+ subject { engine.rule_for_target(target) }
186
+
187
+ context 'primary source' do
188
+ let(:target) { '/log/20130101.random_1.log' }
189
+ it { is_expected.to eq(Masamune::DataPlan::Rule::TERMINAL) }
190
+ end
191
+
192
+ context 'primary target' do
193
+ let(:target) { '/table/y=2013/m=01/d=01' }
194
+ it { is_expected.to eq('primary') }
195
+ end
196
+
197
+ context 'derived_daily target' do
198
+ let(:target) { '/daily/2013-01-03' }
199
+ it { is_expected.to eq('derived_daily') }
200
+ end
201
+
202
+ context 'derived_monthly target' do
203
+ let(:target) { '/monthly/2013-01' }
204
+ it { is_expected.to eq('derived_monthly') }
205
+ end
206
+
207
+ context 'invalid target' do
208
+ let(:target) { '/daily' }
209
+ it { expect { subject }.to raise_error }
210
+ end
211
+ end
212
+
213
+ describe '#prepare' do
214
+ before do
215
+ engine.prepare(rule, options)
216
+ end
217
+
218
+ subject(:targets) do
219
+ engine.targets(rule)
220
+ end
221
+
222
+ subject(:sources) do
223
+ engine.sources(rule)
224
+ end
225
+
226
+ context 'with :targets' do
227
+ let(:rule) { 'primary' }
228
+
229
+ let(:options) { {targets: ['/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=02']} }
230
+
231
+ it { expect(targets).to include '/table/y=2013/m=01/d=01' }
232
+ it { expect(targets).to include '/table/y=2013/m=01/d=02' }
233
+ it { expect(sources).to include '/log/20130101.*.log' }
234
+ it { expect(sources).to include '/log/20130102.*.log' }
235
+ end
236
+
237
+ context 'with :sources' do
238
+ let(:rule) { 'derived_daily' }
239
+
240
+ let(:options) { {sources: ['/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=02']} }
241
+
242
+ it { expect(targets).to include "/daily/2013-01-01" }
243
+ it { expect(targets).to include "/daily/2013-01-02" }
244
+ it { expect(sources).to include '/table/y=2013/m=01/d=01' }
245
+ it { expect(sources).to include '/table/y=2013/m=01/d=02' }
246
+ end
247
+ end
248
+
249
+ describe '#execute' do
250
+ let(:options) { {} }
251
+
252
+ before do
253
+ engine.prepare(rule, targets: targets)
254
+ end
255
+
256
+ subject(:execute) do
257
+ engine.execute(rule, options)
258
+ end
259
+
260
+ context 'primary rule' do
261
+ let(:rule) { 'primary' }
262
+ let(:targets) { [
263
+ '/table/y=2013/m=01/d=01',
264
+ '/table/y=2013/m=01/d=02',
265
+ '/table/y=2013/m=01/d=03' ] }
266
+
267
+ context 'when target data exists' do
268
+ before do
269
+ engine.filesystem.touch!('/table/y=2013/m=01/d=01', '/table/y=2013/m=01/d=02', '/table/y=2013/m=01/d=03')
270
+ expect(engine.filesystem).to receive(:touch!).never
271
+ execute
272
+ end
273
+
274
+ it 'should not call touch!' do; end
275
+ end
276
+
277
+ context 'when partial target data exists' do
278
+ before do
279
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
280
+ engine.filesystem.touch!('/table/y=2013/m=01/d=01/DATA', '/table/y=2013/m=01/d=03/DATA')
281
+ expect(engine.filesystem).to receive(:touch!).with('/table/y=2013/m=01/d=02/DATA').and_call_original
282
+ execute
283
+ end
284
+
285
+ it 'should call touch!' do; end
286
+ end
287
+
288
+ context 'when source data does not exist' do
289
+ before do
290
+ expect(engine.filesystem).to receive(:touch!).never
291
+ execute
292
+ end
293
+
294
+ it 'should not call touch!' do; end
295
+ end
296
+ end
297
+
298
+ shared_examples_for 'derived daily data' do
299
+ context 'when primary target data exists' do
300
+ let(:derived_targets) { ['/table/y=2013/m=01/d=01/DATA', '/table/y=2013/m=01/d=02/DATA', '/table/y=2013/m=01/d=03/DATA'] }
301
+
302
+ before do
303
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
304
+ expect(engine.filesystem).to receive(:touch!).with(*derived_targets).and_call_original
305
+ expect(engine.filesystem).to receive(:touch!).with(*targets).and_call_original
306
+ execute
307
+ end
308
+
309
+ it 'should call touch!' do; end
310
+ end
311
+
312
+ context 'when primary target data exists and :resolve is false' do
313
+ let(:options) { {resolve: false} }
314
+
315
+ before do
316
+ engine.filesystem.touch!('/log/20130101.app1.log', '/log/20130102.app1.log', '/log/20130103.app1.log')
317
+ expect(engine.filesystem).not_to receive(:touch!)
318
+ execute
319
+ end
320
+
321
+ it 'should not call touch!' do; end
322
+ end
323
+ end
324
+
325
+ context 'derived_daily rule' do
326
+ let(:rule) { 'derived_daily' }
327
+ let(:targets) { ['/daily/2013-01-01/DATA', '/daily/2013-01-02/DATA', '/daily/2013-01-03/DATA'] }
328
+
329
+ it_behaves_like 'derived daily data' do
330
+ let(:derived_command) { derived_daily_command }
331
+ end
332
+ end
333
+
334
+ context 'derived_monthly rule' do
335
+ let(:rule) { 'derived_monthly' }
336
+ let(:targets) { ['/monthly/2013-01/DATA'] }
337
+
338
+ it_behaves_like 'derived daily data' do
339
+ let(:derived_command) { derived_monthly_command }
340
+ end
341
+ end
342
+ end
343
+
344
+ context 'recursive engines' do
345
+ before do
346
+ engine.add_target_rule('primary', path: '/table/y=%Y/m=%m/d=%d')
347
+ engine.add_source_rule('primary', path: '/log/%Y%m%d.*.log')
348
+ engine.add_source_rule('derived', path: '/table/y=%Y/m=%m/d=%d')
349
+ engine.add_target_rule('derived', path: '/log/%Y%m%d.*.log')
350
+ end
351
+
352
+ it 'should raise exception' do
353
+ expect { engine.prepare('derived', targets: ['/log/20140228.wtf.log']) }.to raise_error /Max depth .* exceeded for rule 'derived'/
354
+ end
355
+ end
356
+ end
@@ -0,0 +1,407 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::DataPlan::Rule do
26
+ let(:engine) { Masamune::DataPlan::Engine.new }
27
+ let(:name) { 'primary' }
28
+ let(:type) { :target }
29
+ let(:pattern) { 'report/%Y-%m-%d/%H' }
30
+ let(:options) { {path: pattern} }
31
+
32
+ let(:instance) { described_class.new(engine, name, type, options) }
33
+
34
+ describe '#pattern' do
35
+ subject do
36
+ instance.pattern
37
+ end
38
+
39
+ context 'with string' do
40
+ let(:pattern) { 'report/%Y-%m-%d/%H' }
41
+ it { is_expected.to eq('report/%Y-%m-%d/%H') }
42
+ end
43
+
44
+ context 'with lambda' do
45
+ let(:pattern) { lambda { |_| 'report/%Y-%m-%d/%H' } }
46
+ it { is_expected.to eq('report/%Y-%m-%d/%H') }
47
+ end
48
+ end
49
+
50
+ describe '#bind_date' do
51
+ subject(:elem) { instance.bind_date(input_date) }
52
+
53
+ context 'with default' do
54
+ let(:input_date) { DateTime.civil(2013,04,05,23,13) }
55
+
56
+ describe '#path' do
57
+ subject { elem.path }
58
+ it { is_expected.to eq('report/2013-04-05/23') }
59
+ end
60
+ let(:start_time) { DateTime.civil(2013,04,05,23) }
61
+ let(:stop_time) { DateTime.civil(2013,04,05,0) }
62
+ end
63
+
64
+ context 'with unix timestamp pattern' do
65
+ let(:pattern) { 'logs/%H-s.log' }
66
+ let(:input_date) { DateTime.civil(2013,04,05,23,13) }
67
+
68
+ describe '#path' do
69
+ subject { elem.path }
70
+ it { is_expected.to eq('logs/1365202800.log') }
71
+ end
72
+ let(:start_time) { DateTime.civil(2013,04,05,23) }
73
+ let(:stop_time) { DateTime.civil(2013,04,05,0) }
74
+ end
75
+ end
76
+
77
+ describe '#bind_input' do
78
+ subject(:elem) { instance.bind_input(input) }
79
+
80
+ context 'with default' do
81
+ let(:input) { 'report/2013-04-05/23' }
82
+ let(:output_date) { DateTime.civil(2013,04,05,23) }
83
+
84
+ describe '#path' do
85
+ subject { elem.path }
86
+ it { is_expected.to eq(input) }
87
+ end
88
+
89
+ describe '#start_time' do
90
+ subject { elem.start_time }
91
+ it { is_expected.to eq(output_date) }
92
+ end
93
+
94
+ describe '#stop_time' do
95
+ subject { elem.stop_time }
96
+ it { is_expected.to eq(output_date.to_time + 1.hour) }
97
+ end
98
+ end
99
+
100
+ context 'with unix timestamp pattern' do
101
+ let(:pattern) { 'logs/%H-s.log' }
102
+ let(:input) { 'logs/1365202800.log' }
103
+ let(:output_date) { DateTime.civil(2013,04,05,23) }
104
+
105
+ describe '#path' do
106
+ subject { elem.path }
107
+ it { is_expected.to eq(input) }
108
+ end
109
+
110
+ describe '#start_time' do
111
+ subject { elem.start_time }
112
+ it { is_expected.to eq(output_date) }
113
+ end
114
+
115
+ describe '#stop_time' do
116
+ subject { elem.stop_time }
117
+ it { is_expected.to eq(output_date.to_time + 1.hour) }
118
+ end
119
+ end
120
+
121
+ context 'with previously bound input' do
122
+ let(:prev_input) { 'report/2013-04-05/23' }
123
+ let(:input) { instance.bind_input(prev_input) }
124
+ it { is_expected.to eq(input) }
125
+ end
126
+ end
127
+
128
+ describe '#unify' do
129
+ let(:primary) { described_class.new(engine, name, type, {path: 'report/%Y-%m-%d/%H'}) }
130
+ let(:induced) { described_class.new(engine, name, type, {path: 'table/y=%Y/m=%m/d=%d/h=%H'}) }
131
+ let(:elem) { primary.bind_input(input) }
132
+
133
+ subject(:new_elem) { instance.unify(elem, induced) }
134
+
135
+ context 'when input fully matches basis pattern' do
136
+ let(:input) { 'report/2013-01-02/00' }
137
+
138
+ describe '#path' do
139
+ subject { new_elem.path }
140
+ it { is_expected.to eq('table/y=2013/m=01/d=02/h=00') }
141
+ end
142
+ end
143
+
144
+ context 'when input partially matches basis pattern' do
145
+ let(:induced) { described_class.new(engine, name, type, {path: 'table/%Y-%m'}) }
146
+
147
+ let(:input) { 'report/2013-01-02/00' }
148
+
149
+ describe '#path' do
150
+ subject { new_elem.path }
151
+ it { is_expected.to eq('table/2013-01') }
152
+ end
153
+ end
154
+ end
155
+
156
+ describe '#matches' do
157
+ subject do
158
+ instance.matches?(input)
159
+ end
160
+
161
+ context 'when input fully matches' do
162
+ let(:input) { 'report/2013-01-02/00' }
163
+ it { is_expected.to eq(true) }
164
+ end
165
+
166
+ context 'when input under matches' do
167
+ let(:input) { 'report/2013-01-02' }
168
+ it { is_expected.to eq(false) }
169
+ end
170
+
171
+ context 'when input over matches' do
172
+ let(:pattern) { 'report/%Y-%m-%d' }
173
+ let(:input) { 'report/2013-01-02/00' }
174
+ it { is_expected.to eq(false) }
175
+ end
176
+
177
+ context 'when input does not match' do
178
+ let(:input) { 'report' }
179
+ it { is_expected.to eq(false) }
180
+ end
181
+
182
+ context 'with alternative hour' do
183
+ let(:pattern) { 'requests/y=%Y/m=%-m/d=%-d/h=%-k' }
184
+ let(:input) { 'requests/y=2013/m=5/d=1/h=1' }
185
+ it { is_expected.to eq(true) }
186
+ end
187
+
188
+ context 'with alternative hour' do
189
+ let(:pattern) { 'requests/y=%Y/m=%-m/d=%-d/h=%-k' }
190
+ let(:input) { 'requests/y=2013/m=4/d=30/h=20' }
191
+ it { is_expected.to eq(true) }
192
+ end
193
+
194
+ context 'with wildcard pattern' do
195
+ let(:pattern) { 'request_logs/%Y%m%d*request.log' }
196
+ let(:input) { 'request_logs/20130524.random.request.log' }
197
+ it { is_expected.to eq(true) }
198
+ end
199
+
200
+ context 'with unix timestamp pattern' do
201
+ let(:pattern) { 'request_logs/%H-s.log' }
202
+ let(:input) { 'request_logs/1374192000.log' }
203
+ it { is_expected.to eq(true) }
204
+ end
205
+ end
206
+
207
+ describe '#generate' do
208
+ context 'with a block' do
209
+ let(:start_date) { DateTime.civil(2013,04,05,20) }
210
+ let(:stop_date) { DateTime.civil(2013,04,05,20) }
211
+ specify { expect { |b| instance.generate(start_date, stop_date, &b) }.to yield_control }
212
+ end
213
+
214
+ context 'without a block' do
215
+ let(:start_date) { DateTime.civil(2013,04,05,20) }
216
+ let(:stop_date) { DateTime.civil(2013,04,05,22) }
217
+
218
+ subject(:elems) do
219
+ instance.generate(start_date, stop_date)
220
+ end
221
+
222
+ it { expect(elems.map(&:path)).to eq(['report/2013-04-05/20', 'report/2013-04-05/21', 'report/2013-04-05/22']) }
223
+ end
224
+ end
225
+
226
+ describe '#time_step' do
227
+ subject { instance.time_step }
228
+
229
+ context '24 hour' do
230
+ let(:pattern) { '%Y-%m-%d/%k' }
231
+ it { is_expected.to eq(:hours) }
232
+ end
233
+ context '24 hour (condensed)' do
234
+ let(:pattern) { '%Y-%m-%d/%-k' }
235
+ it { is_expected.to eq(:hours) }
236
+ end
237
+ context '12 hour' do
238
+ let(:pattern) { '%Y-%m-%d/%H' }
239
+ it { is_expected.to eq(:hours) }
240
+ end
241
+ context '12 hour (condensed)' do
242
+ let(:pattern) { '%Y-%m-%d/%-H' }
243
+ it { is_expected.to eq(:hours) }
244
+ end
245
+ context 'daily' do
246
+ let(:pattern) { '%Y-%m-%d' }
247
+ it { is_expected.to eq(:days) }
248
+ end
249
+ context 'monthly' do
250
+ let(:pattern) { '%Y-%m' }
251
+ it { is_expected.to eq(:months) }
252
+ end
253
+ context 'yearly' do
254
+ let(:pattern) { '%Y' }
255
+ it { is_expected.to eq(:years) }
256
+ end
257
+ context 'hourly unix' do
258
+ let(:pattern) { '%H-s' }
259
+ it { is_expected.to eq(:hours) }
260
+ end
261
+ context 'daily unix' do
262
+ let(:pattern) { '%d-s' }
263
+ it { is_expected.to eq(:days) }
264
+ end
265
+ context 'monthly unix' do
266
+ let(:pattern) { '%m-s' }
267
+ it { is_expected.to eq(:months) }
268
+ end
269
+ context 'yearly unix' do
270
+ let(:pattern) { '%Y-s' }
271
+ it { is_expected.to eq(:years) }
272
+ end
273
+ end
274
+
275
+ describe '#time_round' do
276
+ let(:input_time) { DateTime.civil(2013,9,13,23,13) }
277
+ subject { instance.time_round(input_time) }
278
+
279
+ before do
280
+ allow(instance).to receive(:time_step) { time_step }
281
+ end
282
+
283
+ context 'hourly' do
284
+ let(:time_step) { :hours }
285
+ it { is_expected.to eq(DateTime.civil(2013,9,13,23)) }
286
+ end
287
+ context 'daily' do
288
+ let(:time_step) { :days }
289
+ it { is_expected.to eq(DateTime.civil(2013,9,13)) }
290
+ end
291
+ context 'monthly' do
292
+ let(:time_step) { :months }
293
+ it { is_expected.to eq(DateTime.civil(2013,9)) }
294
+ end
295
+ context 'yearly' do
296
+ let(:time_step) { :years }
297
+ it { is_expected.to eq(DateTime.civil(2013)) }
298
+ end
299
+ end
300
+
301
+ describe '#round' do
302
+ subject(:new_instance) { instance.round(grain) }
303
+
304
+ context 'with totally partitioned pattern' do
305
+ let(:pattern) { 'table/y=%Y/m=%m/d=%d/h=%H' }
306
+ context 'with :hour' do
307
+ let(:grain) { :hour }
308
+
309
+ describe '#pattern' do
310
+ subject { new_instance.pattern }
311
+ it { is_expected.to eq('table/y=%Y/m=%m/d=%d/h=%H') }
312
+ end
313
+ end
314
+
315
+ context 'with :day' do
316
+ let(:grain) { :day }
317
+
318
+ describe '#pattern' do
319
+ subject { new_instance.pattern }
320
+ it { is_expected.to eq('table/y=%Y/m=%m/d=%d') }
321
+ end
322
+ end
323
+
324
+ context 'with :month' do
325
+ let(:grain) { :month }
326
+
327
+ describe '#pattern' do
328
+ subject { new_instance.pattern }
329
+ it { is_expected.to eq('table/y=%Y/m=%m') }
330
+ end
331
+ end
332
+
333
+ context 'with :year' do
334
+ let(:grain) { :year }
335
+
336
+ describe '#pattern' do
337
+ subject { new_instance.pattern }
338
+ it { is_expected.to eq('table/y=%Y') }
339
+ end
340
+ end
341
+ end
342
+
343
+ context 'with partially partitioned pattern' do
344
+ let(:pattern) { 'table/%Y-%m-%d/%H' }
345
+
346
+ context 'with :hour' do
347
+ let(:grain) { :hour }
348
+
349
+ describe '#pattern' do
350
+ subject { new_instance.pattern }
351
+ it { is_expected.to eq('table/%Y-%m-%d/%H') }
352
+ end
353
+ end
354
+
355
+ context 'with :day' do
356
+ let(:grain) { :day }
357
+
358
+ describe '#pattern' do
359
+ subject { new_instance.pattern }
360
+ it { is_expected.to eq('table/%Y-%m-%d') }
361
+ end
362
+ end
363
+
364
+ context 'with :month' do
365
+ let(:grain) { :month }
366
+
367
+ describe '#pattern' do
368
+ subject { new_instance.pattern }
369
+ it { is_expected.to eq('table/%Y-%m-%d') }
370
+ end
371
+ end
372
+
373
+ context 'with :year' do
374
+ let(:grain) { :year }
375
+
376
+ describe '#pattern' do
377
+ subject { new_instance.pattern }
378
+ it { is_expected.to eq('table/%Y-%m-%d') }
379
+ end
380
+ end
381
+ end
382
+
383
+ context 'when cannot round due to granularity' do
384
+ shared_context 'cannot round' do
385
+ it { expect { subject }.to raise_error RuntimeError, /cannot round to :#{grain} for #{pattern}/ }
386
+ end
387
+
388
+ context 'with :hour' do
389
+ let(:grain) { :hour }
390
+ let(:pattern) { 'table/y=%Y/m=%m/d=%d' }
391
+ include_context 'cannot round'
392
+ end
393
+
394
+ context 'with :day' do
395
+ let(:grain) { :day }
396
+ let(:pattern) { 'table/y=%Y/m=%m' }
397
+ include_context 'cannot round'
398
+ end
399
+
400
+ context 'with :month' do
401
+ let(:grain) { :month }
402
+ let(:pattern) { 'table/y=%Y' }
403
+ include_context 'cannot round'
404
+ end
405
+ end
406
+ end
407
+ end