masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,137 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Dimension do
26
+ let(:store) { double }
27
+
28
+ context 'for type :date' do
29
+ let(:dimension) do
30
+ described_class.new id: 'date', type: :date,
31
+ columns: [
32
+ Masamune::Schema::Column.new(id: 'date_id')
33
+ ]
34
+ end
35
+
36
+ it { expect(dimension.name).to eq('date_dimension') }
37
+ it { expect(dimension.type).to eq(:date) }
38
+ end
39
+
40
+ context 'for type :one' do
41
+ let(:dimension) do
42
+ described_class.new id: 'user', type: :one,
43
+ columns: [
44
+ Masamune::Schema::Column.new(id: 'tenant_id'),
45
+ Masamune::Schema::Column.new(id: 'user_id')
46
+ ]
47
+ end
48
+
49
+ it { expect(dimension.name).to eq('user_dimension') }
50
+ it { expect(dimension.type).to eq(:one) }
51
+ end
52
+
53
+ context 'for type :two' do
54
+ let(:dimension) do
55
+ described_class.new id: 'user', type: :two,
56
+ columns: [
57
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true, natural_key: true),
58
+ Masamune::Schema::Column.new(id: 'user_id', index: true, natural_key: true)
59
+ ]
60
+ end
61
+
62
+ it { expect(dimension.name).to eq('user_dimension') }
63
+ it { expect(dimension.type).to eq(:two) }
64
+ end
65
+
66
+ context 'with invalid values' do
67
+ let(:dimension) do
68
+ described_class.new id: 'user_account_state', type: :mini,
69
+ columns: [
70
+ Masamune::Schema::Column.new(id: 'name', type: :string, unique: true),
71
+ Masamune::Schema::Column.new(id: 'description', type: :string)
72
+ ],
73
+ rows: [
74
+ Masamune::Schema::Row.new(values: {
75
+ name: 'active',
76
+ description: 'Active',
77
+ missing_column: true
78
+ })
79
+ ]
80
+ end
81
+
82
+ it { expect { dimension }.to raise_error ArgumentError, /contains undefined columns/ }
83
+ end
84
+
85
+ context 'for type :four' do
86
+ let(:mini_dimension) do
87
+ described_class.new id: 'user_account_state', type: :mini,
88
+ columns: [
89
+ Masamune::Schema::Column.new(id: 'name', type: :string, unique: true),
90
+ Masamune::Schema::Column.new(id: 'description', type: :string)
91
+ ],
92
+ rows: [
93
+ Masamune::Schema::Row.new(values: {
94
+ name: 'active',
95
+ description: 'Active',
96
+ }, default: true)
97
+ ]
98
+ end
99
+
100
+ let(:dimension) do
101
+ described_class.new id: 'user', store: store, type: :four, references: [Masamune::Schema::TableReference.new(mini_dimension)],
102
+ columns: [
103
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true, natural_key: true),
104
+ Masamune::Schema::Column.new(id: 'user_id', index: true, natural_key: true),
105
+ Masamune::Schema::Column.new(id: 'preferences', type: :key_value, null: true)
106
+ ]
107
+ end
108
+
109
+ it { expect(dimension.name).to eq('user_dimension') }
110
+ it { expect(dimension.type).to eq(:four) }
111
+
112
+ describe '#stage_table' do
113
+ let!(:stage_table) { dimension.stage_table }
114
+
115
+ it 'should inherit id' do
116
+ expect(stage_table.id).to eq(:user)
117
+ expect(stage_table.name).to eq('user_dimension_stage')
118
+ end
119
+
120
+ it 'should inherit store' do
121
+ expect(stage_table.store).to eq(store)
122
+ end
123
+
124
+ it 'should duplicate columns' do
125
+ expect(dimension.parent).to be_nil
126
+ expect(dimension.columns[:tenant_id].parent).to eq(dimension)
127
+ expect(stage_table.parent).to eq(dimension)
128
+ expect(stage_table.columns[:tenant_id].parent).to eq(stage_table)
129
+ end
130
+
131
+ it 'should inherit reserved_columns' do
132
+ expect(dimension.reserved_columns.keys).to_not be_empty
133
+ expect(stage_table.reserved_columns.keys).to eq(dimension.reserved_columns.keys)
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,75 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Event do
26
+ context 'without id' do
27
+ subject(:event) { described_class.new }
28
+ it { expect { event }.to raise_error ArgumentError }
29
+ end
30
+
31
+ context 'with attributes' do
32
+ let(:event) do
33
+ described_class.new id: 'user',
34
+ attributes: [
35
+ Masamune::Schema::Event::Attribute.new(id: 'tenant_id', type: :integer),
36
+ Masamune::Schema::Event::Attribute.new(id: 'user_id', type: :integer)
37
+ ]
38
+ end
39
+
40
+ it { expect(event.attributes).to include :tenant_id }
41
+ it { expect(event.attributes).to include :user_id }
42
+ it { expect(event.attributes[:tenant_id].type).to eq(:integer) }
43
+ it { expect(event.attributes[:user_id].type).to eq(:integer) }
44
+ end
45
+
46
+ context 'with array attributes' do
47
+ let(:event) do
48
+ described_class.new id: 'user',
49
+ attributes: [
50
+ Masamune::Schema::Event::Attribute.new(id: 'group_id', type: :integer, array: true),
51
+ ]
52
+ end
53
+
54
+ it { expect(event.attributes).to include :group_id }
55
+ it { expect(event.attributes[:group_id].type).to eq(:integer) }
56
+ it { expect(event.attributes[:group_id].array).to be(true) }
57
+ end
58
+
59
+
60
+ describe Masamune::Schema::Event::Attribute do
61
+ context 'without id' do
62
+ subject(:attribute) { described_class.new }
63
+ it { expect { attribute }.to raise_error ArgumentError }
64
+ end
65
+
66
+ subject(:attribute) { described_class.new id: 'id' }
67
+
68
+ it do
69
+ expect(attribute.id).to eq(:id)
70
+ expect(attribute.type).to eq(:integer)
71
+ expect(attribute.immutable).to eq(false)
72
+ expect(attribute.array).to eq(false)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,117 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Fact do
26
+ let(:store) { double(id: 'store', type: :postgres) }
27
+
28
+ let(:date_dimension) do
29
+ Masamune::Schema::Dimension.new id: 'date', type: :date,
30
+ columns: [
31
+ Masamune::Schema::Column.new(id: 'date_id')
32
+ ]
33
+ end
34
+
35
+ let(:user_dimension) do
36
+ Masamune::Schema::Dimension.new id: 'user', type: :two,
37
+ columns: [
38
+ Masamune::Schema::Column.new(id: 'tenant_id', index: true),
39
+ Masamune::Schema::Column.new(id: 'user_id', index: true)
40
+ ]
41
+ end
42
+
43
+ let(:fact) do
44
+ described_class.new id: 'visits', store: store, partition: 'y%Ym%m',
45
+ references: [
46
+ Masamune::Schema::TableReference.new(date_dimension),
47
+ Masamune::Schema::TableReference.new(user_dimension)
48
+ ],
49
+ columns: [
50
+ Masamune::Schema::Column.new(id: 'total', type: :integer)
51
+ ]
52
+ end
53
+
54
+ it { expect(fact.name).to eq('visits_fact') }
55
+
56
+ describe '#partition_table' do
57
+ let(:date) { Chronic.parse('2015-01-01') }
58
+
59
+ subject(:partition_table) { fact.partition_table(date) }
60
+
61
+ it { expect(partition_table.store.id).to eq(store.id) }
62
+ it { expect(partition_table.name).to eq('visits_fact_y2015m01') }
63
+ it { expect(partition_table.range.start_date).to eq(date.utc.to_date) }
64
+
65
+ describe '#stage_table' do
66
+ subject(:stage_table) { partition_table.stage_table }
67
+
68
+ it { expect(stage_table.store.id).to eq(store.id) }
69
+ it { expect(stage_table.name).to eq('visits_fact_y2015m01_stage') }
70
+ it { expect(stage_table.range.start_date).to eq(date.utc.to_date) }
71
+ end
72
+ end
73
+
74
+ context 'fact with unknown grain' do
75
+ subject(:fact) do
76
+ described_class.new id: 'visits', grain: :quarterly
77
+ end
78
+
79
+ it { expect { fact }.to raise_error ArgumentError, "unknown grain 'quarterly'" }
80
+ end
81
+
82
+ context 'fact with :hourly grain' do
83
+ let(:fact) do
84
+ described_class.new id: 'visits', store: store, grain: :hourly, partition: 'y%Ym%m',
85
+ references: [
86
+ Masamune::Schema::TableReference.new(date_dimension),
87
+ Masamune::Schema::TableReference.new(user_dimension)
88
+ ],
89
+ columns: [
90
+ Masamune::Schema::Column.new(id: 'total', type: :integer)
91
+ ]
92
+ end
93
+
94
+ it { expect(fact.id).to eq(:visits_hourly) }
95
+ it { expect(fact.name).to eq('visits_hourly_fact') }
96
+
97
+ describe '#partition_table' do
98
+ let(:date) { Chronic.parse('2015-01-01') }
99
+
100
+ subject(:partition_table) { fact.partition_table(date) }
101
+
102
+ it { expect(partition_table.store.id).to eq(store.id) }
103
+ it { expect(partition_table.name).to eq('visits_hourly_fact_y2015m01') }
104
+ it { expect(partition_table.grain).to eq(fact.grain) }
105
+ it { expect(partition_table.range.start_date).to eq(date.utc.to_date) }
106
+
107
+ describe '#stage_table' do
108
+ subject(:stage_table) { partition_table.stage_table }
109
+
110
+ it { expect(stage_table.store.id).to eq(store.id) }
111
+ it { expect(stage_table.name).to eq('visits_hourly_fact_y2015m01_stage') }
112
+ it { expect(stage_table.grain).to eq(fact.grain) }
113
+ it { expect(stage_table.range.start_date).to eq(date.utc.to_date) }
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,593 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Schema::Map do
26
+ let(:environment) { double(logger: double) }
27
+ let(:catalog) { Masamune::Schema::Catalog.new(environment) }
28
+
29
+ before do
30
+ catalog.schema :postgres do
31
+ dimension 'user_account_state', type: :mini do
32
+ column 'name', type: :string, unique: true
33
+ column 'description', type: :string, null: true
34
+ end
35
+
36
+ dimension 'user', type: :four do
37
+ references :user_account_state
38
+ references :user_account_state, label: :hr
39
+ column 'cluster_id', index: true, natural_key: true
40
+ column 'tenant_id', index: true, natural_key: true
41
+ column 'user_id', index: true, natural_key: true
42
+ column 'preferences', type: :key_value, null: true
43
+ column 'admin', type: :boolean
44
+ column 'source', type: :string
45
+ end
46
+
47
+ file 'user' do
48
+ column 'id', type: :integer
49
+ column 'tenant_id', type: :integer
50
+ column 'admin', type: :boolean
51
+ column 'preferences', type: :yaml
52
+ column 'deleted_at', type: :timestamp, null: true
53
+ end
54
+ end
55
+
56
+ catalog.schema :hive do
57
+ event 'user' do
58
+ attribute 'id', type: :integer, immutable: true
59
+ attribute 'tenant_id', type: :integer, immutable: true
60
+ attribute 'admin', type: :boolean
61
+ attribute 'preferences', type: :json
62
+ end
63
+
64
+ dimension 'tenant', type: :two, implicit: true do
65
+ column 'tenant_id'
66
+ end
67
+
68
+ fact 'user' do
69
+ references :tenant
70
+ measure 'delta'
71
+ end
72
+
73
+ file 'user' do
74
+ column 'id', type: :integer
75
+ column 'tenant_id', type: :integer
76
+ column 'admin', type: :boolean
77
+ column 'preferences', type: :json
78
+ column 'deleted_at', type: :timestamp, null: true
79
+ end
80
+ end
81
+ end
82
+
83
+ context 'without source' do
84
+ subject(:map) { described_class.new }
85
+ it { expect { map }.to raise_error ArgumentError }
86
+ end
87
+
88
+ context 'without target' do
89
+ subject(:map) { described_class.new(source: catalog.postgres.user_file) }
90
+ it { expect { map }.to raise_error ArgumentError }
91
+ end
92
+
93
+ let(:input) { Tempfile.new('masamune') }
94
+ let(:output) { Tempfile.new('masamune') }
95
+
96
+ describe '#apply' do
97
+ let(:map) do
98
+ source.map(to: target)
99
+ end
100
+
101
+ before do
102
+ output.truncate(0)
103
+ output.rewind
104
+ input.truncate(0)
105
+ input.write(source_data)
106
+ input.close
107
+ end
108
+
109
+ subject do
110
+ map.apply(input, output)
111
+ output.readlines.join
112
+ end
113
+
114
+ shared_examples_for 'apply input/output' do
115
+ context 'with IO' do
116
+ subject do
117
+ io = File.open(output, 'a+')
118
+ map.apply(File.open(input), io)
119
+ io.rewind
120
+ io.readlines.join
121
+ end
122
+ it 'should match target data' do
123
+ is_expected.to eq(target_data)
124
+ end
125
+ end
126
+
127
+ context 'with String' do
128
+ subject do
129
+ map.apply(input.path, output.path)
130
+ File.readlines(output.path).join
131
+ end
132
+ it 'should match target data' do
133
+ is_expected.to eq(target_data)
134
+ end
135
+ end
136
+ end
137
+
138
+ context 'with undefined function' do
139
+ let(:source) { catalog.hive.user_event }
140
+ let(:target) { catalog.hive.user_fact }
141
+ let(:source_data) { '' }
142
+ let(:target_data) { '' }
143
+
144
+ before do
145
+ catalog.schema :hive do
146
+ map from: hive.user_event, to: hive.user_fact do |row|
147
+ end
148
+ end
149
+ end
150
+
151
+ it { expect { subject }.to raise_error ArgumentError, /function for map between .* does not return output for default input/ }
152
+ end
153
+
154
+ context 'from csv file to dimension' do
155
+ before do
156
+ catalog.schema :files do
157
+ map from: postgres.user_file, to: postgres.user_dimension, distinct: true do |row|
158
+ {
159
+ 'tenant_id' => row[:tenant_id],
160
+ 'user_id' => row[:id],
161
+ 'user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
162
+ 'hr_user_account_state.name' => row[:deleted_at] ? 'deleted' : 'active',
163
+ 'admin' => row[:admin],
164
+ 'preferences_now' => row[:preferences],
165
+ 'source' => 'users_file',
166
+ 'cluster_id' => 100
167
+ }
168
+ end
169
+ end
170
+ end
171
+
172
+ let(:source) do
173
+ catalog.postgres.user_file
174
+ end
175
+
176
+ let(:target) do
177
+ catalog.postgres.user_dimension
178
+ end
179
+
180
+ let(:source_data) do
181
+ <<-EOS.strip_heredoc
182
+ id,tenant_id,junk_id,deleted_at,admin,preferences
183
+ 1,30,X,,0,,
184
+ # NOTE intentional duplicate record
185
+ 1,30,X,,0,,
186
+ 2,40,Y,2014-02-26 18:15:51 UTC,1,"---
187
+ :enabled: true
188
+ "
189
+ # NOTE record is intentionally invalid
190
+ ,50,X,,0,
191
+ EOS
192
+ end
193
+
194
+ let(:target_data) do
195
+ <<-EOS.strip_heredoc
196
+ tenant_id,user_id,user_account_state_type_name,hr_user_account_state_type_name,admin,preferences_now,source,cluster_id
197
+ 30,1,active,active,FALSE,{},users_file,100
198
+ 40,2,deleted,deleted,TRUE,"{""enabled"":true}",users_file,100
199
+ EOS
200
+ end
201
+
202
+ before do
203
+ expect(environment.logger).to receive(:warn).with(/row .* missing required columns 'user_id'/)
204
+ end
205
+
206
+ it 'should match target data' do
207
+ is_expected.to eq(target_data)
208
+ end
209
+
210
+ it_behaves_like 'apply input/output'
211
+ end
212
+
213
+ context 'from event to postgres dimension with quoted json' do
214
+ before do
215
+ catalog.schema :files do
216
+ map from: hive.user_event, to: postgres.user_dimension do |row|
217
+ raise if row[:tenant_id] == 42
218
+ {
219
+ 'tenant_id' => row[:tenant_id],
220
+ 'user_id' => row[:id],
221
+ 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
222
+ 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
223
+ 'preferences_now' => row[:preferences_now],
224
+ 'preferences_was' => row[:preferences_was],
225
+ 'source' => 'user_event',
226
+ 'cluster_id' => 100
227
+ }
228
+ end
229
+ end
230
+ end
231
+
232
+ let(:source) do
233
+ catalog.hive.user_event
234
+ end
235
+
236
+ let(:target) do
237
+ catalog.postgres.user_dimension
238
+ end
239
+
240
+ before do
241
+ expect(environment.logger).to receive(:warn).with(/failed to process '{.*}' for #{target.name}/).ordered
242
+ expect(environment.logger).to receive(:warn).with(/failed to parse '{.*}' for #{source.name}/).ordered
243
+ end
244
+
245
+ let(:source_data) do
246
+ <<-EOS.strip_heredoc
247
+ X user_create 1 30 0 \\N \\N \\N
248
+ # NOTE intentional duplicate record
249
+ X user_create 1 30 0 \\N \\N \\N
250
+ A user_create 1 42 0 \\N \\N \\N
251
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N
252
+ # NOTE record is intentionally invalid
253
+ Z user_create 3 50 0 1 INVALID_JSON \\N
254
+ EOS
255
+ end
256
+
257
+ let(:target_data) do
258
+ <<-EOS.strip_heredoc
259
+ tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
260
+ 30,1,active,FALSE,{},{},user_event,100
261
+ 30,1,active,FALSE,{},{},user_event,100
262
+ 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
263
+ EOS
264
+ end
265
+
266
+ it 'should match target data' do
267
+ is_expected.to eq(target_data)
268
+ end
269
+
270
+ it_behaves_like 'apply input/output'
271
+ end
272
+
273
+ context 'from event to tsv file' do
274
+ before do
275
+ catalog.schema :files do
276
+ map from: hive.user_event, to: hive.user_file do |row|
277
+ {
278
+ 'id' => row[:id],
279
+ 'tenant_id' => row[:tenant_id],
280
+ 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
281
+ 'admin' => row[:admin_now],
282
+ 'preferences' => row[:preferences_now]
283
+ }
284
+ end
285
+ end
286
+ end
287
+
288
+ let(:source) do
289
+ catalog.hive.user_event
290
+ end
291
+
292
+ let(:target) do
293
+ catalog.hive.user_file
294
+ end
295
+
296
+ let(:source_data) do
297
+ <<-EOS.strip_heredoc
298
+ X user_create 1 30 0 \\N \\N \\N 0 \\N
299
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
300
+ EOS
301
+ end
302
+
303
+ let(:target_data) do
304
+ <<-EOS.strip_heredoc
305
+ 1 30 {}
306
+ 2 40 2014-02-26T18:15:51.000Z "{""enabled"":true}"
307
+ EOS
308
+ end
309
+
310
+
311
+ it 'should match target data' do
312
+ is_expected.to eq(target_data)
313
+ end
314
+
315
+ it_behaves_like 'apply input/output'
316
+ end
317
+
318
+ context 'from event to csv file' do
319
+ before do
320
+ catalog.schema :files do
321
+ map from: hive.user_event, to: postgres.user_file do |row|
322
+ {
323
+ 'id' => row[:id],
324
+ 'tenant_id' => row[:tenant_id],
325
+ 'deleted_at' => row[:type] =~ /delete/ ? row[:created_at] : nil,
326
+ 'admin' => row[:admin_now],
327
+ 'preferences' => row[:preferences_now]
328
+ }
329
+ end
330
+ end
331
+ end
332
+
333
+ let(:source) do
334
+ catalog.hive.user_event
335
+ end
336
+
337
+ let(:target) do
338
+ catalog.postgres.user_file
339
+ end
340
+
341
+ let(:source_data) do
342
+ <<-EOS.strip_heredoc
343
+ X user_create 1 30 0 \\N \\N \\N 0 \\N
344
+ Y user_delete 2 40 0 1 "{""enabled"":true}" \\N 0 2014-02-26T18:15:51Z
345
+ EOS
346
+ end
347
+
348
+ let(:target_data) do
349
+ <<-EOS.strip_heredoc
350
+ id,tenant_id,deleted_at,admin,preferences
351
+ 1,30,,FALSE,"--- {}
352
+ "
353
+ 2,40,2014-02-26T18:15:51.000Z,FALSE,"---
354
+ enabled: true
355
+ "
356
+ EOS
357
+ end
358
+
359
+ it 'should match target data' do
360
+ is_expected.to eq(target_data)
361
+ end
362
+
363
+ it_behaves_like 'apply input/output'
364
+ end
365
+
366
+ context 'from event to fact' do
367
+ before do
368
+ catalog.schema :files do
369
+ map from: hive.user_event, to: hive.user_fact do |row|
370
+ if row[:type] =~ /update/
371
+ [
372
+ {
373
+ 'tenant.tenant_id' => row[:tenant_id],
374
+ 'delta' => 0,
375
+ 'time_key' => row[:created_at]
376
+ },
377
+ {
378
+ 'tenant.tenant_id' => row[:tenant_id],
379
+ 'delta' => 0,
380
+ 'time_key' => row[:created_at]
381
+ }
382
+ ]
383
+ else
384
+ {
385
+ 'tenant.tenant_id' => row[:tenant_id],
386
+ 'delta' => row[:type] =~ /create/ ? 1 : -1,
387
+ 'time_key' => row[:created_at]
388
+ }
389
+ end
390
+ end
391
+ end
392
+ end
393
+
394
+ let(:source) do
395
+ catalog.hive.user_event
396
+ end
397
+
398
+ let(:target) do
399
+ catalog.hive.user_fact
400
+ end
401
+
402
+ let(:source_data) do
403
+ <<-EOS.strip_heredoc
404
+ X user_create 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:10:00Z
405
+ Y user_update 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:20:00Z
406
+ Z user_delete 3 10 0 1 "{""enabled"":true}" \\N \\N 2015-01-01T00:30:00Z
407
+ EOS
408
+ end
409
+
410
+ let(:target_data) do
411
+ <<-EOS.strip_heredoc
412
+ 10 1 1420071000
413
+ 10 0 1420071600
414
+ 10 0 1420071600
415
+ 10 -1 1420072200
416
+ EOS
417
+ end
418
+
419
+ it 'should match target data' do
420
+ is_expected.to eq(target_data)
421
+ end
422
+
423
+ it_behaves_like 'apply input/output'
424
+ end
425
+
426
+ context 'from event with array attribute to fact' do
427
+ before do
428
+ catalog.clear!
429
+ catalog.schema :hive do
430
+ event 'user' do
431
+ attribute 'id', type: :integer, immutable: true
432
+ attribute 'group_id', type: :integer, array: true
433
+ end
434
+
435
+ dimension 'group', type: :two, implicit: true do
436
+ column 'group_id'
437
+ end
438
+
439
+ fact 'user' do
440
+ references :group
441
+ column 'junk'
442
+ measure 'total'
443
+ end
444
+
445
+ map from: hive.user_event, to: hive.user_fact, columns: %w(group.group_id total time_key) do |row|
446
+ result = []
447
+ (row[:group_id_now] - row[:group_id_was]).each do |group_id|
448
+ result <<
449
+ {
450
+ 'group.group_id' => group_id,
451
+ 'total' => 1,
452
+ 'time_key' => row[:created_at]
453
+ }
454
+ end
455
+ (row[:group_id_was] - row[:group_id_now]).each do |group_id|
456
+ result <<
457
+ {
458
+ 'group.group_id' => group_id,
459
+ 'total' => -1,
460
+ 'time_key' => row[:created_at]
461
+ }
462
+ end
463
+ result
464
+ end
465
+ end
466
+ end
467
+
468
+ let(:source) do
469
+ catalog.hive.user_event
470
+ end
471
+
472
+ let(:target) do
473
+ catalog.hive.user_fact
474
+ end
475
+
476
+ let(:source_data) do
477
+ <<-EOS.strip_heredoc
478
+ # new lines and comments should be skipped
479
+
480
+ X user_create 3 [1,2] [] 0 2015-01-01T00:10:00Z
481
+ Y user_update 3 [1,2,3] [1,2] 1 2015-01-01T00:20:00Z
482
+ Y user_update 3 [1,2] [1,2,3] 1 2015-01-01T00:30:00Z
483
+ Z user_delete 3 [] [1,2] 0 2015-01-01T00:40:00Z
484
+ EOS
485
+ end
486
+
487
+ let(:target_data) do
488
+ <<-EOS.strip_heredoc
489
+ 1 1 1420071000
490
+ 2 1 1420071000
491
+ 3 1 1420071600
492
+ 3 -1 1420072200
493
+ 1 -1 1420072800
494
+ 2 -1 1420072800
495
+ EOS
496
+ end
497
+
498
+ it 'should match target data' do
499
+ is_expected.to eq(target_data)
500
+ end
501
+
502
+ it_behaves_like 'apply input/output'
503
+ end
504
+
505
+ context 'from event to postgres dimension with raw json' do
506
+ before do
507
+ catalog.schema :files do
508
+ map from: hive.user_event, to: postgres.user_dimension do |row|
509
+ {
510
+ 'tenant_id' => row[:tenant_id],
511
+ 'user_id' => row[:id],
512
+ 'user_account_state.name' => row[:type] =~ /delete/ ? 'deleted' : 'active',
513
+ 'admin' => row[:type] =~ /delete/ ? row[:admin_was] : row[:admin_now],
514
+ 'preferences_now' => row[:preferences_now],
515
+ 'preferences_was' => row[:preferences_was],
516
+ 'source' => 'user_event',
517
+ 'cluster_id' => 100
518
+ }
519
+ end
520
+ end
521
+ end
522
+
523
+ let(:source) do
524
+ catalog.hive.user_event
525
+ end
526
+
527
+ let(:target) do
528
+ catalog.postgres.user_dimension
529
+ end
530
+
531
+ let(:source_data) do
532
+ <<-EOS.strip_heredoc
533
+ X user_create 1 30 0 \\N \\N \\N
534
+ Y user_delete 2 40 0 1 {"enabled":true} \\N
535
+ EOS
536
+ end
537
+
538
+ let(:target_data) do
539
+ <<-EOS.strip_heredoc
540
+ tenant_id,user_id,user_account_state_type_name,admin,preferences_now,preferences_was,source,cluster_id
541
+ 30,1,active,FALSE,{},{},user_event,100
542
+ 40,2,deleted,TRUE,"{""enabled"":true}",{},user_event,100
543
+ EOS
544
+ end
545
+
546
+ it 'should match target data' do
547
+ is_expected.to eq(target_data)
548
+ end
549
+
550
+ it_behaves_like 'apply input/output'
551
+ end
552
+ end
553
+
554
+ describe Masamune::Schema::Map::JSONEncoder do
555
+ let(:io) { StringIO.new }
556
+ let(:store) { double(json_encoding: :raw, format: :csv) }
557
+ let(:encoder) { described_class.new(io, store) }
558
+
559
+ subject { encoder.gets }
560
+
561
+ context 'with raw empty json' do
562
+ before do
563
+ io.write '{},{}'
564
+ io.rewind
565
+ end
566
+ it { is_expected.to eq(%Q{"{}","{}"}) }
567
+ end
568
+
569
+ context 'with raw quoted json' do
570
+ before do
571
+ io.write '"{}","{}"'
572
+ io.rewind
573
+ end
574
+ it { is_expected.to eq(%Q{"{}","{}"}) }
575
+ end
576
+
577
+ context 'with raw json' do
578
+ before do
579
+ io.write '{"enabled":true,"state":""}'
580
+ io.rewind
581
+ end
582
+ it { is_expected.to eq(%Q{"{""enabled"":true,""state"":""""}"}) }
583
+ end
584
+
585
+ context 'with quoted json' do
586
+ before do
587
+ io.write '"{""enabled"":true,""state"":""""}"'
588
+ io.rewind
589
+ end
590
+ it { is_expected.to eq(%Q{"{""enabled"":true,""state"":""""}"}) }
591
+ end
592
+ end
593
+ end