masamune 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,137 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Dimension do
26
+ let(:store) { double }
27
+
28
+ context 'for type :date' do
29
+ let(:dimension) do
30
+ described_class.new id: 'date', type: :date,
31
+ columns: [
32
+ Masamune::Schema::Column.new(id: 'date_id')
33
+ ]
34
+ end
35
+
36
+ it { expect(dimension.name).to eq('date_dimension') }
37
+ it { expect(dimension.type).to eq(:date) }
38
+ end
39
+
40
+ context 'for type :one' do
41
+ let(:dimension) do
42
+ described_class.new id: 'user', type: :one,
43
+ columns: [
44
+ Masamune::Schema::Column.new(id: 'tenant_id'),
45
+ Masamune::Schema::Column.new(id: 'user_id')
46
+ ]
47
+ end
48
+
49
+ it { expect(dimension.name).to eq('user_dimension') }
50
+ it { expect(dimension.type).to eq(:one) }
51
+ end
52
+
53
+ context 'for type :two' do
54
+ let(:dimension) do
55
+ described_class.new id: 'user', type: :two,
56
+ columns: [
57
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true, natural_key: true),
58
+ Masamune::Schema::Column.new(id: 'user_id', index: true, natural_key: true)
59
+ ]
60
+ end
61
+
62
+ it { expect(dimension.name).to eq('user_dimension') }
63
+ it { expect(dimension.type).to eq(:two) }
64
+ end
65
+
66
+ context 'with invalid values' do
67
+ let(:dimension) do
68
+ described_class.new id: 'user_account_state', type: :mini,
69
+ columns: [
70
+ Masamune::Schema::Column.new(id: 'name', type: :string, unique: true),
71
+ Masamune::Schema::Column.new(id: 'description', type: :string)
72
+ ],
73
+ rows: [
74
+ Masamune::Schema::Row.new(values: {
75
+ name: 'active',
76
+ description: 'Active',
77
+ missing_column: true
78
+ })
79
+ ]
80
+ end
81
+
82
+ it { expect { dimension }.to raise_error ArgumentError, /contains undefined columns/ }
83
+ end
84
+
85
+ context 'for type :four' do
86
+ let(:mini_dimension) do
87
+ described_class.new id: 'user_account_state', type: :mini,
88
+ columns: [
89
+ Masamune::Schema::Column.new(id: 'name', type: :string, unique: true),
90
+ Masamune::Schema::Column.new(id: 'description', type: :string)
91
+ ],
92
+ rows: [
93
+ Masamune::Schema::Row.new(values: {
94
+ name: 'active',
95
+ description: 'Active',
96
+ }, default: true)
97
+ ]
98
+ end
99
+
100
+ let(:dimension) do
101
+ described_class.new id: 'user', store: store, type: :four, references: [Masamune::Schema::TableReference.new(mini_dimension)],
102
+ columns: [
103
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true, natural_key: true),
104
+ Masamune::Schema::Column.new(id: 'user_id', index: true, natural_key: true),
105
+ Masamune::Schema::Column.new(id: 'preferences', type: :key_value, null: true)
106
+ ]
107
+ end
108
+
109
+ it { expect(dimension.name).to eq('user_dimension') }
110
+ it { expect(dimension.type).to eq(:four) }
111
+
112
+ describe '#stage_table' do
113
+ let!(:stage_table) { dimension.stage_table }
114
+
115
+ it 'should inherit id' do
116
+ expect(stage_table.id).to eq(:user)
117
+ expect(stage_table.name).to eq('user_dimension_stage')
118
+ end
119
+
120
+ it 'should inherit store' do
121
+ expect(stage_table.store).to eq(store)
122
+ end
123
+
124
+ it 'should duplicate columns' do
125
+ expect(dimension.parent).to be_nil
126
+ expect(dimension.columns[:tenant_id].parent).to eq(dimension)
127
+ expect(stage_table.parent).to eq(dimension)
128
+ expect(stage_table.columns[:tenant_id].parent).to eq(stage_table)
129
+ end
130
+
131
+ it 'should inherit reserved_columns' do
132
+ expect(dimension.reserved_columns.keys).to_not be_empty
133
+ expect(stage_table.reserved_columns.keys).to eq(dimension.reserved_columns.keys)
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,75 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Event do
26
+ context 'without id' do
27
+ subject(:event) { described_class.new }
28
+ it { expect { event }.to raise_error ArgumentError }
29
+ end
30
+
31
+ context 'with attributes' do
32
+ let(:event) do
33
+ described_class.new id: 'user',
34
+ attributes: [
35
+ Masamune::Schema::Event::Attribute.new(id: 'tenant_id', type: :integer),
36
+ Masamune::Schema::Event::Attribute.new(id: 'user_id', type: :integer)
37
+ ]
38
+ end
39
+
40
+ it { expect(event.attributes).to include :tenant_id }
41
+ it { expect(event.attributes).to include :user_id }
42
+ it { expect(event.attributes[:tenant_id].type).to eq(:integer) }
43
+ it { expect(event.attributes[:user_id].type).to eq(:integer) }
44
+ end
45
+
46
+ context 'with array attributes' do
47
+ let(:event) do
48
+ described_class.new id: 'user',
49
+ attributes: [
50
+ Masamune::Schema::Event::Attribute.new(id: 'group_id', type: :integer, array: true),
51
+ ]
52
+ end
53
+
54
+ it { expect(event.attributes).to include :group_id }
55
+ it { expect(event.attributes[:group_id].type).to eq(:integer) }
56
+ it { expect(event.attributes[:group_id].array).to be(true) }
57
+ end
58
+
59
+
60
+ describe Masamune::Schema::Event::Attribute do
61
+ context 'without id' do
62
+ subject(:attribute) { described_class.new }
63
+ it { expect { attribute }.to raise_error ArgumentError }
64
+ end
65
+
66
+ subject(:attribute) { described_class.new id: 'id' }
67
+
68
+ it do
69
+ expect(attribute.id).to eq(:id)
70
+ expect(attribute.type).to eq(:integer)
71
+ expect(attribute.immutable).to eq(false)
72
+ expect(attribute.array).to eq(false)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,117 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Fact do
26
+ let(:store) { double(id: 'store', type: :postgres) }
27
+
28
+ let(:date_dimension) do
29
+ Masamune::Schema::Dimension.new id: 'date', type: :date,
30
+ columns: [
31
+ Masamune::Schema::Column.new(id: 'date_id')
32
+ ]
33
+ end
34
+
35
+ let(:user_dimension) do
36
+ Masamune::Schema::Dimension.new id: 'user', type: :two,
37
+ columns: [
38
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true),
39
+ Masamune::Schema::Column.new(id: 'user_id', index: true)
40
+ ]
41
+ end
42
+
43
+ let(:fact) do
44
+ described_class.new id: 'visits', store: store, partition: 'y%Ym%m',
45
+ references: [
46
+ Masamune::Schema::TableReference.new(date_dimension),
47
+ Masamune::Schema::TableReference.new(user_dimension)
48
+ ],
49
+ columns: [
50
+ Masamune::Schema::Column.new(id: 'total', type: :integer)
51
+ ]
52
+ end
53
+
54
+ it { expect(fact.name).to eq('visits_fact') }
55
+
56
+ describe '#partition_table' do
57
+ let(:date) { Chronic.parse('2015-01-01') }
58
+
59
+ subject(:partition_table) { fact.partition_table(date) }
60
+
61
+ it { expect(partition_table.store.id).to eq(store.id) }
62
+ it { expect(partition_table.name).to eq('visits_fact_y2015m01') }
63
+ it { expect(partition_table.range.start_date).to eq(date.utc.to_date) }
64
+
65
+ describe '#stage_table' do
66
+ subject(:stage_table) { partition_table.stage_table }
67
+
68
+ it { expect(stage_table.store.id).to eq(store.id) }
69
+ it { expect(stage_table.name).to eq('visits_fact_y2015m01_stage') }
70
+ it { expect(stage_table.range.start_date).to eq(date.utc.to_date) }
71
+ end
72
+ end
73
+
74
+ context 'fact with unknown grain' do
75
+ subject(:fact) do
76
+ described_class.new id: 'visits', grain: :quarterly
77
+ end
78
+
79
+ it { expect { fact }.to raise_error ArgumentError, "unknown grain 'quarterly'" }
80
+ end
81
+
82
+ context 'fact with :hourly grain' do
83
+ let(:fact) do
84
+ described_class.new id: 'visits', store: store, grain: :hourly, partition: 'y%Ym%m',
85
+ references: [
86
+ Masamune::Schema::TableReference.new(date_dimension),
87
+ Masamune::Schema::TableReference.new(user_dimension)
88
+ ],
89
+ columns: [
90
+ Masamune::Schema::Column.new(id: 'total', type: :integer)
91
+ ]
92
+ end
93
+
94
+ it { expect(fact.id).to eq(:visits_hourly) }
95
+ it { expect(fact.name).to eq('visits_hourly_fact') }
96
+
97
+ describe '#partition_table' do
98
+ let(:date) { Chronic.parse('2015-01-01') }
99
+
100
+ subject(:partition_table) { fact.partition_table(date) }
101
+
102
+ it { expect(partition_table.store.id).to eq(store.id) }
103
+ it { expect(partition_table.name).to eq('visits_hourly_fact_y2015m01') }
104
+ it { expect(partition_table.grain).to eq(fact.grain) }
105
+ it { expect(partition_table.range.start_date).to eq(date.utc.to_date) }
106
+
107
+ describe '#stage_table' do
108
+ subject(:stage_table) { partition_table.stage_table }
109
+
110
+ it { expect(stage_table.store.id).to eq(store.id) }
111
+ it { expect(stage_table.name).to eq('visits_hourly_fact_y2015m01_stage') }
112
+ it { expect(stage_table.grain).to eq(fact.grain) }
113
+ it { expect(stage_table.range.start_date).to eq(date.utc.to_date) }
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,593 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Map do
26
+ let(:environment) { double(logger: double) }
27
+ let(:catalog) { Masamune::Schema::Catalog.new(environment) }
28
+
29
+ before do
30
+ catalog.schema :postgres do
31
+ dimension 'user_account_state', type: :mini do
32
+ column 'name', type: :string, unique: true
33
+ column 'description', type: :string, null: true
34
+ end
35
+
36
+ dimension 'user', type: :four do
37
+ references :user_account_state
38
+ references :user_account_state, label: :hr
39
+ column 'cluster_id', index: true, natural_key: true
40
+ column 'tenant_id', index: true, natural_key: true
41
+ column 'user_id', index: true, natural_key: true
42
+ column 'preferences', type: :key_value, null: true
43
+ column 'admin', type: :boolean
44
+ column 'source', type: :string
45
+ end
46
+
47
+ file 'user' do
48
+ column 'id', type: :integer
49
+ column 'tenant_id', type: :integer
50
+ column 'admin', type: :boolean
51
+ column 'preferences', type: :yaml
52
+ column 'deleted_at', type: :timestamp, null: true
53
+ end
54
+ end
55
+
56
+ catalog.schema :hive do
57
+ event 'user' do
58
+ attribute 'id', type: :integer, immutable: true
59
+ attribute 'tenant_id', type: :integer, immutable: true
60
+ attribute 'admin', type: :boolean
61
+ attribute 'preferences', type: :json
62
+ end
63
+
64
+ dimension 'tenant', type: :two, implicit: true do
65
+ column 'tenant_id'
66
+ end
67
+
68
+ fact 'user' do
69
+ references :tenant
70
+ measure 'delta'
71
+ end
72
+
73
+ file 'user' do
74
+ column 'id', type: :integer
75
+ column 'tenant_id', type: :integer
76
+ column 'admin', type: :boolean
77
+ column 'preferences', type: :json
78
+ column 'deleted_at', type: :timestamp, null: true
79
+ end
80
+ end
81
+ end
82
+
83
+ context 'without source' do
84
+ subject(:map) { described_class.new }
85
+ it { expect { map }.to raise_error ArgumentError }
86
+ end
87
+
88
+ context 'without target' do
89
+ subject(:map) { described_class.new(source: catalog.postgres.user_file) }
90
+ it { expect { map }.to raise_error ArgumentError }
91
+ end
92
+
93
+ let(:input) { Tempfile.new('masamune') }
94
+ let(:output) { Tempfile.new('masamune') }
95
+
96
+ describe '#apply' do
97
+ let(:map) do
98
+ source.map(to: target)
99
+ end
100
+
101
+ before do
102
+ output.truncate(0)
103
+ output.rewind
104
+ input.truncate(0)
105
+ input.write(source_data)
106
+ input.close
107
+ end
108
+
109
+ subject do
110
+ map.apply(input, output)
111
+ output.readlines.join
112
+ end
113
+
114
+ shared_examples_for 'apply input/output' do
115
+ context 'with IO' do
116
+ subject do
117
+ io = File.open(output, 'a+')
118
+ map.apply(File.open(input), io)
119
+ io.rewind
120
+ io.readlines.join
121
+ end
122
+ it 'should match target data' do
123
+ is_expected.to eq(target_data)
124
+ end
125
+ end
126
+
127
+ context 'with String' do
128
+ subject do
129
+ map.apply(input.path, output.path)
130
+ File.readlines(output.path).join
131
+ end
132
+ it 'should match target data' do
133
+ is_expected.to eq(target_data)
134
+ end
135
+ end
136
+ end
137
+
138
+ context 'with undefined function' do
139
+ let(:source) { catalog.hive.user_event }
140
+ let(:target) { catalog.hive.user_fact }
141
+ let(:source_data) { '' }
142
+ let(:target_data) { '' }
143
+
144
+ before do
145
+ catalog.schema :hive do
146
+ map from: hive.user_event, to: hive.user_fact do |row|
147
+ end
148
+ end
149
+ end
150
+
151
+ it { expect { subject }.to raise_error ArgumentError, /function for map between .* does not return output for default input/ }
152
+ end
153
+
154
+ context 'from csv file to dimension' do
155
+ before do
156
+ catalog.schema :files do
157
+ map from: postgres.user_file, to: postgres.user_dimension, distinct: true do |row|
158
+ {
159
+ 'tenant_id' => row[:tenant_id],
160
+ 'user_id' => row[:id],
161
+ 'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
162
+ 'hr_user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
163
+ 'admin' => row[:admin],
164
+ 'preferences_now' => row[:preferences],
165
+ 'source' => 'users_file',
166
+ 'cluster_id' => 100
167
+ }
168
+ end
169
+ end
170
+ end
171
+
172
+ let(:source) do
173
+ catalog.postgres.user_file
174
+ end
175
+
176
+ let(:target) do
177
+ catalog.postgres.user_dimension
178
+ end
179
+
180
+ let(:source_data) do
181
+ <<-EOS.strip_heredoc
182
+ id,tenant_id,junk_id,deleted_at,admin,preferences
183
+ 1,30,X,,0,,
184
+ # NOTE intentional duplicate record
185
+ 1,30,X,,0,,
186
+ 2,40,Y,2014-02-26 18:15:51 UTC,1,"---
187
+ :enabled: true
188
+ "
189
+ # NOTE record is intentionally invalid
190
+ ,50,X,,0,
191
+ EOS
192
+ end
193
+
194
+ let(:target_data) do
195
+ <<-EOS.strip_heredoc
196
+ tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences_now,source,cluster_id
197
+ 30,1,active,active,FALSE,{},users_file,100
198
+ 40,2,deleted,deleted,TRUE,"{""enabled"":true}",users_file,100
199
+ EOS
200
+ end
201
+
202
+ before do
203
+ expect(environment.logger).to receive(:warn).with(/row .* missing required columns 'user_id'/)
204
+ end
205
+
206
+ it 'should match target data' do
207
+ is_expected.to eq(target_data)
208
+ end
209
+
210
+ it_behaves_like 'apply input/output'
211
+ end
212
+
213
+ context 'from event to postgres dimension with quoted json' do
214
+ before do
215
+ catalog.schema :files do
216
+ map from: hive.user_event, to: postgres.user_dimension do |row|
217
+ raise if row[:tenant_id] == 42
218
+ {
219
+ 'tenant_id' => row[:tenant_id],
220
+ 'user_id' => row[:id],
221
+ 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
222
+ 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
223
+ 'preferences_now' => row[:preferences_now],
224
+ 'preferences_was' => row[:preferences_was],
225
+ 'source' => 'user_event',
226
+ 'cluster_id' => 100
227
+ }
228
+ end
229
+ end
230
+ end
231
+
232
+ let(:source) do
233
+ catalog.hive.user_event
234
+ end
235
+
236
+ let(:target) do
237
+ catalog.postgres.user_dimension
238
+ end
239
+
240
+ before do
241
+ expect(environment.logger).to receive(:warn).with(/failed to process '{.*}' for #{target.name}/).ordered
242
+ expect(environment.logger).to receive(:warn).with(/failed to parse '{.*}' for #{source.name}/).ordered
243
+ end
244
+
245
+ let(:source_data) do
246
+ <<-EOS.strip_heredoc
247
+ X user_create 1 30 0 \\N \\N \\N
248
+ # NOTE intentional duplicate record
249
+ X user_create 1 30 0 \\N \\N \\N
250
+ A user_create 1 42 0 \\N \\N \\N
251
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N
252
+ # NOTE record is intentionally invalid
253
+ Z user_create 3 50 0 1 INVALID_JSON \\N
254
+ EOS
255
+ end
256
+
257
+ let(:target_data) do
258
+ <<-EOS.strip_heredoc
259
+ tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
260
+ 30,1,active,FALSE,{},{},user_event,100
261
+ 30,1,active,FALSE,{},{},user_event,100
262
+ 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
263
+ EOS
264
+ end
265
+
266
+ it 'should match target data' do
267
+ is_expected.to eq(target_data)
268
+ end
269
+
270
+ it_behaves_like 'apply input/output'
271
+ end
272
+
273
+ context 'from event to tsv file' do
274
+ before do
275
+ catalog.schema :files do
276
+ map from: hive.user_event, to: hive.user_file do |row|
277
+ {
278
+ 'id' => row[:id],
279
+ 'tenant_id' => row[:tenant_id],
280
+ 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
281
+ 'admin' => row[:admin_now],
282
+ 'preferences' => row[:preferences_now]
283
+ }
284
+ end
285
+ end
286
+ end
287
+
288
+ let(:source) do
289
+ catalog.hive.user_event
290
+ end
291
+
292
+ let(:target) do
293
+ catalog.hive.user_file
294
+ end
295
+
296
+ let(:source_data) do
297
+ <<-EOS.strip_heredoc
298
+ X user_create 1 30 0 \\N \\N \\N 0 \\N
299
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
300
+ EOS
301
+ end
302
+
303
+ let(:target_data) do
304
+ <<-EOS.strip_heredoc
305
+ 1 30 {}
306
+ 2 40 2014-02-26T18:15:51.000Z "{""enabled"":true}"
307
+ EOS
308
+ end
309
+
310
+
311
+ it 'should match target data' do
312
+ is_expected.to eq(target_data)
313
+ end
314
+
315
+ it_behaves_like 'apply input/output'
316
+ end
317
+
318
+ context 'from event to csv file' do
319
+ before do
320
+ catalog.schema :files do
321
+ map from: hive.user_event, to: postgres.user_file do |row|
322
+ {
323
+ 'id' => row[:id],
324
+ 'tenant_id' => row[:tenant_id],
325
+ 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
326
+ 'admin' => row[:admin_now],
327
+ 'preferences' => row[:preferences_now]
328
+ }
329
+ end
330
+ end
331
+ end
332
+
333
+ let(:source) do
334
+ catalog.hive.user_event
335
+ end
336
+
337
+ let(:target) do
338
+ catalog.postgres.user_file
339
+ end
340
+
341
+ let(:source_data) do
342
+ <<-EOS.strip_heredoc
343
+ X user_create 1 30 0 \\N \\N \\N 0 \\N
344
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
345
+ EOS
346
+ end
347
+
348
+ let(:target_data) do
349
+ <<-EOS.strip_heredoc
350
+ id,tenant_id,deleted_at,admin,preferences
351
+ 1,30,,FALSE,"--- {}
352
+ "
353
+ 2,40,2014-02-26T18:15:51.000Z,FALSE,"---
354
+ enabled: true
355
+ "
356
+ EOS
357
+ end
358
+
359
+ it 'should match target data' do
360
+ is_expected.to eq(target_data)
361
+ end
362
+
363
+ it_behaves_like 'apply input/output'
364
+ end
365
+
366
+ context 'from event to fact' do
367
+ before do
368
+ catalog.schema :files do
369
+ map from: hive.user_event, to: hive.user_fact do |row|
370
+ if row[:type] =~ /update/
371
+ [
372
+ {
373
+ 'tenant.tenant_id' => row[:tenant_id],
374
+ 'delta' => 0,
375
+ 'time_key' => row[:created_at]
376
+ },
377
+ {
378
+ 'tenant.tenant_id' => row[:tenant_id],
379
+ 'delta' => 0,
380
+ 'time_key' => row[:created_at]
381
+ }
382
+ ]
383
+ else
384
+ {
385
+ 'tenant.tenant_id' => row[:tenant_id],
386
+ 'delta' => row[:type] =~ /create/ ? 1 : -1,
387
+ 'time_key' => row[:created_at]
388
+ }
389
+ end
390
+ end
391
+ end
392
+ end
393
+
394
+ let(:source) do
395
+ catalog.hive.user_event
396
+ end
397
+
398
+ let(:target) do
399
+ catalog.hive.user_fact
400
+ end
401
+
402
+ let(:source_data) do
403
+ <<-EOS.strip_heredoc
404
+ X user_create 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:10:00Z
405
+ Y user_update 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:20:00Z
406
+ Z user_delete 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:30:00Z
407
+ EOS
408
+ end
409
+
410
+ let(:target_data) do
411
+ <<-EOS.strip_heredoc
412
+ 10 1 1420071000
413
+ 10 0 1420071600
414
+ 10 0 1420071600
415
+ 10 -1 1420072200
416
+ EOS
417
+ end
418
+
419
+ it 'should match target data' do
420
+ is_expected.to eq(target_data)
421
+ end
422
+
423
+ it_behaves_like 'apply input/output'
424
+ end
425
+
426
+ context 'from event with array attribute to fact' do
427
+ before do
428
+ catalog.clear!
429
+ catalog.schema :hive do
430
+ event 'user' do
431
+ attribute 'id', type: :integer, immutable: true
432
+ attribute 'group_id', type: :integer, array: true
433
+ end
434
+
435
+ dimension 'group', type: :two, implicit: true do
436
+ column 'group_id'
437
+ end
438
+
439
+ fact 'user' do
440
+ references :group
441
+ column 'junk'
442
+ measure 'total'
443
+ end
444
+
445
+ map from: hive.user_event, to: hive.user_fact, columns: %w(group.group_id total time_key) do |row|
446
+ result = []
447
+ (row[:group_id_now] - row[:group_id_was]).each do |group_id|
448
+ result <<
449
+ {
450
+ 'group.group_id' => group_id,
451
+ 'total' => 1,
452
+ 'time_key' => row[:created_at]
453
+ }
454
+ end
455
+ (row[:group_id_was] - row[:group_id_now]).each do |group_id|
456
+ result <<
457
+ {
458
+ 'group.group_id' => group_id,
459
+ 'total' => -1,
460
+ 'time_key' => row[:created_at]
461
+ }
462
+ end
463
+ result
464
+ end
465
+ end
466
+ end
467
+
468
+ let(:source) do
469
+ catalog.hive.user_event
470
+ end
471
+
472
+ let(:target) do
473
+ catalog.hive.user_fact
474
+ end
475
+
476
+ let(:source_data) do
477
+ <<-EOS.strip_heredoc
478
+ # new lines and comments should be skipped
479
+
480
+ X user_create 3 [1,2] [] 0 2015-01-01T00:10:00Z
481
+ Y user_update 3 [1,2,3] [1,2] 1 2015-01-01T00:20:00Z
482
+ Y user_update 3 [1,2] [1,2,3] 1 2015-01-01T00:30:00Z
483
+ Z user_delete 3 [] [1,2] 0 2015-01-01T00:40:00Z
484
+ EOS
485
+ end
486
+
487
+ let(:target_data) do
488
+ <<-EOS.strip_heredoc
489
+ 1 1 1420071000
490
+ 2 1 1420071000
491
+ 3 1 1420071600
492
+ 3 -1 1420072200
493
+ 1 -1 1420072800
494
+ 2 -1 1420072800
495
+ EOS
496
+ end
497
+
498
+ it 'should match target data' do
499
+ is_expected.to eq(target_data)
500
+ end
501
+
502
+ it_behaves_like 'apply input/output'
503
+ end
504
+
505
+ context 'from event to postgres dimension with raw json' do
506
+ before do
507
+ catalog.schema :files do
508
+ map from: hive.user_event, to: postgres.user_dimension do |row|
509
+ {
510
+ 'tenant_id' => row[:tenant_id],
511
+ 'user_id' => row[:id],
512
+ 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
513
+ 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
514
+ 'preferences_now' => row[:preferences_now],
515
+ 'preferences_was' => row[:preferences_was],
516
+ 'source' => 'user_event',
517
+ 'cluster_id' => 100
518
+ }
519
+ end
520
+ end
521
+ end
522
+
523
+ let(:source) do
524
+ catalog.hive.user_event
525
+ end
526
+
527
+ let(:target) do
528
+ catalog.postgres.user_dimension
529
+ end
530
+
531
+ let(:source_data) do
532
+ <<-EOS.strip_heredoc
533
+ X user_create 1 30 0 \\N \\N \\N
534
+ Y user_delete 2 40 0 1 {"enabled":true} \\N
535
+ EOS
536
+ end
537
+
538
+ let(:target_data) do
539
+ <<-EOS.strip_heredoc
540
+ tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
541
+ 30,1,active,FALSE,{},{},user_event,100
542
+ 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
543
+ EOS
544
+ end
545
+
546
+ it 'should match target data' do
547
+ is_expected.to eq(target_data)
548
+ end
549
+
550
+ it_behaves_like 'apply input/output'
551
+ end
552
+ end
553
+
554
+ describe Masamune::Schema::Map::JSONEncoder do
555
+ let(:io) { StringIO.new }
556
+ let(:store) { double(json_encoding: :raw, format: :csv) }
557
+ let(:encoder) { described_class.new(io, store) }
558
+
559
+ subject { encoder.gets }
560
+
561
+ context 'with raw empty json' do
562
+ before do
563
+ io.write '{},{}'
564
+ io.rewind
565
+ end
566
+ it { is_expected.to eq(%Q{"{}","{}"}) }
567
+ end
568
+
569
+ context 'with raw quoted json' do
570
+ before do
571
+ io.write '"{}","{}"'
572
+ io.rewind
573
+ end
574
+ it { is_expected.to eq(%Q{"{}","{}"}) }
575
+ end
576
+
577
+ context 'with raw json' do
578
+ before do
579
+ io.write '{"enabled":true,"state":""}'
580
+ io.rewind
581
+ end
582
+ it { is_expected.to eq(%Q{"{""enabled"":true,""state"":""""}"}) }
583
+ end
584
+
585
+ context 'with quoted json' do
586
+ before do
587
+ io.write '"{""enabled"":true,""state"":""""}"'
588
+ io.rewind
589
+ end
590
+ it { is_expected.to eq(%Q{"{""enabled"":true,""state"":""""}"}) }
591
+ end
592
+ end
593
+ end