masamune 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +54 -0
- data/Rakefile +15 -0
- data/bin/masamune-elastic-mapreduce +4 -0
- data/bin/masamune-hive +4 -0
- data/bin/masamune-psql +4 -0
- data/bin/masamune-shell +4 -0
- data/lib/masamune.rb +56 -0
- data/lib/masamune/accumulate.rb +60 -0
- data/lib/masamune/actions.rb +38 -0
- data/lib/masamune/actions/data_flow.rb +131 -0
- data/lib/masamune/actions/date_parse.rb +75 -0
- data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
- data/lib/masamune/actions/execute.rb +52 -0
- data/lib/masamune/actions/filesystem.rb +37 -0
- data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
- data/lib/masamune/actions/hadoop_streaming.rb +41 -0
- data/lib/masamune/actions/hive.rb +74 -0
- data/lib/masamune/actions/postgres.rb +76 -0
- data/lib/masamune/actions/postgres_admin.rb +34 -0
- data/lib/masamune/actions/s3cmd.rb +44 -0
- data/lib/masamune/actions/transform.rb +89 -0
- data/lib/masamune/after_initialize_callbacks.rb +55 -0
- data/lib/masamune/cached_filesystem.rb +110 -0
- data/lib/masamune/commands.rb +37 -0
- data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
- data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
- data/lib/masamune/commands/hadoop_streaming.rb +116 -0
- data/lib/masamune/commands/hive.rb +178 -0
- data/lib/masamune/commands/interactive.rb +37 -0
- data/lib/masamune/commands/postgres.rb +128 -0
- data/lib/masamune/commands/postgres_admin.rb +72 -0
- data/lib/masamune/commands/postgres_common.rb +33 -0
- data/lib/masamune/commands/retry_with_backoff.rb +60 -0
- data/lib/masamune/commands/s3cmd.rb +70 -0
- data/lib/masamune/commands/shell.rb +202 -0
- data/lib/masamune/configuration.rb +195 -0
- data/lib/masamune/data_plan.rb +31 -0
- data/lib/masamune/data_plan/builder.rb +66 -0
- data/lib/masamune/data_plan/elem.rb +190 -0
- data/lib/masamune/data_plan/engine.rb +162 -0
- data/lib/masamune/data_plan/rule.rb +292 -0
- data/lib/masamune/data_plan/set.rb +176 -0
- data/lib/masamune/environment.rb +164 -0
- data/lib/masamune/filesystem.rb +567 -0
- data/lib/masamune/has_environment.rb +40 -0
- data/lib/masamune/helpers.rb +27 -0
- data/lib/masamune/helpers/postgres.rb +84 -0
- data/lib/masamune/io.rb +33 -0
- data/lib/masamune/last_element.rb +53 -0
- data/lib/masamune/method_logger.rb +41 -0
- data/lib/masamune/multi_io.rb +39 -0
- data/lib/masamune/schema.rb +36 -0
- data/lib/masamune/schema/catalog.rb +233 -0
- data/lib/masamune/schema/column.rb +527 -0
- data/lib/masamune/schema/dimension.rb +133 -0
- data/lib/masamune/schema/event.rb +121 -0
- data/lib/masamune/schema/fact.rb +133 -0
- data/lib/masamune/schema/map.rb +265 -0
- data/lib/masamune/schema/row.rb +133 -0
- data/lib/masamune/schema/store.rb +115 -0
- data/lib/masamune/schema/table.rb +308 -0
- data/lib/masamune/schema/table_reference.rb +76 -0
- data/lib/masamune/spec_helper.rb +23 -0
- data/lib/masamune/string_format.rb +34 -0
- data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
- data/lib/masamune/tasks/hive_thor.rb +55 -0
- data/lib/masamune/tasks/postgres_thor.rb +47 -0
- data/lib/masamune/tasks/shell_thor.rb +63 -0
- data/lib/masamune/template.rb +77 -0
- data/lib/masamune/thor.rb +186 -0
- data/lib/masamune/thor_loader.rb +38 -0
- data/lib/masamune/topological_hash.rb +34 -0
- data/lib/masamune/transform.rb +47 -0
- data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
- data/lib/masamune/transform/bulk_upsert.rb +52 -0
- data/lib/masamune/transform/consolidate_dimension.rb +54 -0
- data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
- data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
- data/lib/masamune/transform/define_event_view.hql.erb +51 -0
- data/lib/masamune/transform/define_event_view.rb +60 -0
- data/lib/masamune/transform/define_index.psql.erb +34 -0
- data/lib/masamune/transform/define_schema.hql.erb +23 -0
- data/lib/masamune/transform/define_schema.psql.erb +79 -0
- data/lib/masamune/transform/define_schema.rb +56 -0
- data/lib/masamune/transform/define_table.hql.erb +34 -0
- data/lib/masamune/transform/define_table.psql.erb +95 -0
- data/lib/masamune/transform/define_table.rb +40 -0
- data/lib/masamune/transform/define_unique.psql.erb +30 -0
- data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
- data/lib/masamune/transform/insert_reference_values.rb +64 -0
- data/lib/masamune/transform/load_dimension.rb +47 -0
- data/lib/masamune/transform/load_fact.rb +45 -0
- data/lib/masamune/transform/operator.rb +96 -0
- data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
- data/lib/masamune/transform/relabel_dimension.rb +39 -0
- data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
- data/lib/masamune/transform/rollup_fact.rb +149 -0
- data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
- data/lib/masamune/transform/snapshot_dimension.rb +74 -0
- data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
- data/lib/masamune/transform/stage_dimension.rb +83 -0
- data/lib/masamune/transform/stage_fact.psql.erb +80 -0
- data/lib/masamune/transform/stage_fact.rb +111 -0
- data/lib/masamune/version.rb +25 -0
- data/spec/fixtures/aggregate.sql.erb +25 -0
- data/spec/fixtures/comment.sql.erb +27 -0
- data/spec/fixtures/invalid.sql.erb +23 -0
- data/spec/fixtures/relative.sql.erb +23 -0
- data/spec/fixtures/simple.sql.erb +28 -0
- data/spec/fixtures/whitespace.sql.erb +30 -0
- data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
- data/spec/masamune/actions/execute_spec.rb +50 -0
- data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
- data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
- data/spec/masamune/actions/hive_spec.rb +117 -0
- data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
- data/spec/masamune/actions/postgres_spec.rb +134 -0
- data/spec/masamune/actions/s3cmd_spec.rb +44 -0
- data/spec/masamune/actions/transform_spec.rb +144 -0
- data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
- data/spec/masamune/cached_filesystem_spec.rb +167 -0
- data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
- data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
- data/spec/masamune/commands/hive_spec.rb +117 -0
- data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
- data/spec/masamune/commands/postgres_spec.rb +100 -0
- data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
- data/spec/masamune/commands/s3cmd_spec.rb +50 -0
- data/spec/masamune/commands/shell_spec.rb +101 -0
- data/spec/masamune/configuration_spec.rb +102 -0
- data/spec/masamune/data_plan/builder_spec.rb +91 -0
- data/spec/masamune/data_plan/elem_spec.rb +102 -0
- data/spec/masamune/data_plan/engine_spec.rb +356 -0
- data/spec/masamune/data_plan/rule_spec.rb +407 -0
- data/spec/masamune/data_plan/set_spec.rb +517 -0
- data/spec/masamune/environment_spec.rb +65 -0
- data/spec/masamune/filesystem_spec.rb +1421 -0
- data/spec/masamune/helpers/postgres_spec.rb +95 -0
- data/spec/masamune/schema/catalog_spec.rb +613 -0
- data/spec/masamune/schema/column_spec.rb +696 -0
- data/spec/masamune/schema/dimension_spec.rb +137 -0
- data/spec/masamune/schema/event_spec.rb +75 -0
- data/spec/masamune/schema/fact_spec.rb +117 -0
- data/spec/masamune/schema/map_spec.rb +593 -0
- data/spec/masamune/schema/row_spec.rb +28 -0
- data/spec/masamune/schema/store_spec.rb +49 -0
- data/spec/masamune/schema/table_spec.rb +395 -0
- data/spec/masamune/string_format_spec.rb +60 -0
- data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
- data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
- data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
- data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
- data/spec/masamune/template_spec.rb +77 -0
- data/spec/masamune/thor_spec.rb +238 -0
- data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
- data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
- data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
- data/spec/masamune/transform/define_event_view_spec.rb +84 -0
- data/spec/masamune/transform/define_schema_spec.rb +83 -0
- data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
- data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
- data/spec/masamune/transform/define_table.table_spec.rb +525 -0
- data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
- data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
- data/spec/masamune/transform/load_dimension_spec.rb +76 -0
- data/spec/masamune/transform/load_fact_spec.rb +89 -0
- data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
- data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
- data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
- data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
- data/spec/masamune/transform/stage_fact_spec.rb +204 -0
- data/spec/masamune_spec.rb +32 -0
- data/spec/spec_helper.rb +41 -0
- data/spec/support/masamune/example_group.rb +36 -0
- data/spec/support/masamune/mock_command.rb +99 -0
- data/spec/support/masamune/mock_delegate.rb +51 -0
- data/spec/support/masamune/mock_filesystem.rb +96 -0
- data/spec/support/masamune/thor_mute.rb +35 -0
- data/spec/support/rspec/example/action_example_group.rb +34 -0
- data/spec/support/rspec/example/task_example_group.rb +80 -0
- data/spec/support/rspec/example/transform_example_group.rb +36 -0
- data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
- metadata +462 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Dimension < Table
|
25
|
+
def initialize(opts = {})
|
26
|
+
super
|
27
|
+
initialize_dimension_columns!
|
28
|
+
end
|
29
|
+
|
30
|
+
def suffix
|
31
|
+
suffix = case type
|
32
|
+
when :mini
|
33
|
+
'type'
|
34
|
+
when :one, :two, :four, :date
|
35
|
+
'dimension'
|
36
|
+
else
|
37
|
+
type.to_s
|
38
|
+
end
|
39
|
+
parent ? [parent.suffix, suffix].compact.join('_') : suffix
|
40
|
+
end
|
41
|
+
|
42
|
+
def start_key
|
43
|
+
columns.values.detect { |column| column.id == :start_at }
|
44
|
+
end
|
45
|
+
|
46
|
+
def end_key
|
47
|
+
columns.values.detect { |column| column.id == :end_at }
|
48
|
+
end
|
49
|
+
|
50
|
+
def version_key
|
51
|
+
columns.values.detect { |column| column.id == :version }
|
52
|
+
end
|
53
|
+
|
54
|
+
def ledger_table
|
55
|
+
@ledger_table ||= self.class.new(id: id, type: :ledger, store: store, columns: ledger_table_columns, references: references.values, parent: self)
|
56
|
+
end
|
57
|
+
|
58
|
+
def reserved_column_ids
|
59
|
+
case type
|
60
|
+
when :one, :date
|
61
|
+
[:last_modified_at]
|
62
|
+
when :two
|
63
|
+
[:start_at, :end_at, :version, :last_modified_at]
|
64
|
+
when :four
|
65
|
+
[:parent_id, :record_id, :start_at, :end_at, :version, :last_modified_at]
|
66
|
+
when :ledger
|
67
|
+
[:source_kind, :source_uuid, :start_at, :last_modified_at, :delta]
|
68
|
+
else
|
69
|
+
super
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def ledger_table_columns
|
76
|
+
columns.values.map do |column|
|
77
|
+
next if column.surrogate_key
|
78
|
+
next if reserved_column_ids.include?(column.id)
|
79
|
+
|
80
|
+
if column.type == :key_value
|
81
|
+
column_now, column_was = column.dup, column.dup
|
82
|
+
column_now.id, column_was.id = "#{column.id}_now", "#{column.id}_was"
|
83
|
+
column_now.strict, column_was.strict = false, false
|
84
|
+
[column_now, column_was]
|
85
|
+
else
|
86
|
+
column.dup.tap do |column_copy|
|
87
|
+
column_copy.strict = false unless column.surrogate_key || column.natural_key || (column.reference && column.reference.surrogate_key.auto)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end.flatten
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize_surrogate_key_column!
|
94
|
+
case type
|
95
|
+
when :mini, :one, :two, :four, :ledger, :date
|
96
|
+
initialize_column! id: 'id', type: :integer, surrogate_key: true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize_dimension_columns!
|
101
|
+
case type
|
102
|
+
when :one, :date
|
103
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
104
|
+
when :two
|
105
|
+
initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
|
106
|
+
initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
|
107
|
+
initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
|
108
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
109
|
+
when :four
|
110
|
+
children << ledger_table
|
111
|
+
# FIXME derive type from from parent
|
112
|
+
initialize_column! id: 'parent_id', type: :integer, null: true, reference: ledger_table
|
113
|
+
initialize_column! id: 'record_id', type: :integer, null: true, reference: ledger_table
|
114
|
+
initialize_column! id: 'start_at', type: :timestamp, default: 'TO_TIMESTAMP(0)', index: true, unique: 'natural'
|
115
|
+
initialize_column! id: 'end_at', type: :timestamp, null: true, index: true
|
116
|
+
initialize_column! id: 'version', type: :integer, default: 1, null: true, index: true
|
117
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
118
|
+
when :ledger
|
119
|
+
initialize_column! id: 'source_kind', type: :string, unique: 'natural'
|
120
|
+
initialize_column! id: 'source_uuid', type: :string, unique: 'natural'
|
121
|
+
initialize_column! id: 'start_at', type: :timestamp, index: true, unique: 'natural'
|
122
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()'
|
123
|
+
initialize_column! id: 'delta', type: :integer
|
124
|
+
when :stage
|
125
|
+
if inherit
|
126
|
+
parent.reserved_columns.each do |_, column|
|
127
|
+
initialize_column! column.as_hash
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Event
|
25
|
+
class Attribute
|
26
|
+
attr_accessor :id
|
27
|
+
attr_accessor :type
|
28
|
+
attr_accessor :array
|
29
|
+
attr_accessor :immutable
|
30
|
+
|
31
|
+
def initialize(opts = {})
|
32
|
+
opts.symbolize_keys!
|
33
|
+
raise ArgumentError, 'required parameter id: missing' unless opts.key?(:id)
|
34
|
+
self.id = opts[:id].to_sym
|
35
|
+
self.type = opts.fetch(:type, :integer).to_sym
|
36
|
+
self.array = opts.fetch(:array, false)
|
37
|
+
self.immutable = opts.fetch(:immutable, false)
|
38
|
+
end
|
39
|
+
|
40
|
+
def as_columns(event, &block)
|
41
|
+
column_ids = immutable ? [id] : [:"#{id}_now", :"#{id}_was"]
|
42
|
+
column_ids.each do |id|
|
43
|
+
yield [id, Column.new(id: id, type: type, array: array, parent: event)]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
DEFAULT_ATTRIBUTES =
|
49
|
+
{
|
50
|
+
id: nil,
|
51
|
+
store: nil,
|
52
|
+
attributes: [],
|
53
|
+
debug: false
|
54
|
+
}
|
55
|
+
|
56
|
+
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
57
|
+
attr_accessor attr
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize(opts = {})
|
61
|
+
opts.symbolize_keys!
|
62
|
+
raise ArgumentError, 'required parameter id: missing' unless opts.key?(:id)
|
63
|
+
DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
|
64
|
+
public_send("#{name}=", value)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def name
|
69
|
+
@name || [id, suffix].compact.join('_')
|
70
|
+
end
|
71
|
+
|
72
|
+
def suffix
|
73
|
+
'event'
|
74
|
+
end
|
75
|
+
|
76
|
+
def attributes=(attributes)
|
77
|
+
@attributes = {}
|
78
|
+
attributes.each do |attribute|
|
79
|
+
@attributes[attribute.id] = attribute
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def columns
|
84
|
+
@columns ||= {}.tap do |columns|
|
85
|
+
columns[:uuid] = Column.new id: :uuid, type: :uuid, parent: self
|
86
|
+
columns[:type] = Column.new id: :type, type: :string, parent: self
|
87
|
+
attributes.map do |_, attribute|
|
88
|
+
attribute.as_columns(self) do |id, column|
|
89
|
+
columns[id] = column
|
90
|
+
end
|
91
|
+
end
|
92
|
+
columns[:delta] = Column.new id: :delta, type: :integer, parent: self
|
93
|
+
columns[:created_at] = Column.new id: :created_at, type: :timestamp, parent: self
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def reserved_column_ids
|
98
|
+
@reserved_column_ids ||= [:uuid, :type, :delta, :created_at]
|
99
|
+
end
|
100
|
+
|
101
|
+
def unreserved_columns
|
102
|
+
columns.reject { |_, column| reserved_column_ids.include?(column.id) }
|
103
|
+
end
|
104
|
+
|
105
|
+
def create_type
|
106
|
+
@create_type ||= "#{id}_create"
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_type
|
110
|
+
@update_type ||= "#{id}_update"
|
111
|
+
end
|
112
|
+
|
113
|
+
def delete_type
|
114
|
+
@delete_type ||= "#{id}_delete"
|
115
|
+
end
|
116
|
+
|
117
|
+
def dereference_column_name(name)
|
118
|
+
columns[name.to_sym]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
module Masamune::Schema
|
24
|
+
class Fact < Table
|
25
|
+
SUPPORTED_GRAINS = [:transaction, :hourly, :daily, :monthly]
|
26
|
+
|
27
|
+
attr_accessor :grain
|
28
|
+
attr_accessor :partition
|
29
|
+
attr_accessor :range
|
30
|
+
|
31
|
+
def initialize(opts = {})
|
32
|
+
opts.symbolize_keys!
|
33
|
+
self.grain = opts.delete(:grain)
|
34
|
+
@partition = opts.delete(:partition)
|
35
|
+
super opts.reverse_merge(type: :fact)
|
36
|
+
initialize_fact_columns!
|
37
|
+
foreign_key_columns.each do |column|
|
38
|
+
column.index << column.name
|
39
|
+
end
|
40
|
+
time_key.index << time_key.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def id
|
44
|
+
[@id, grain].compact.join('_').to_sym
|
45
|
+
end
|
46
|
+
|
47
|
+
def grain=(grain = nil)
|
48
|
+
return unless grain
|
49
|
+
raise ArgumentError, "unknown grain '#{grain}'" unless SUPPORTED_GRAINS.include?(grain.to_sym)
|
50
|
+
@grain = grain.to_sym
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix
|
54
|
+
inherited = super
|
55
|
+
[*inherited.split('_'), range.try(:suffix)].compact.uniq.join('_')
|
56
|
+
end
|
57
|
+
|
58
|
+
def date_column
|
59
|
+
columns.select { |_, column| column && column.reference && column.reference.type == :date }.values.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def time_key
|
63
|
+
columns.values.detect { |column| column.id == :time_key }
|
64
|
+
end
|
65
|
+
|
66
|
+
def stage_table(*a)
|
67
|
+
super.tap do |stage|
|
68
|
+
stage.id = @id
|
69
|
+
stage.store = store
|
70
|
+
stage.range = range
|
71
|
+
stage.grain = grain
|
72
|
+
stage.columns.each do |_, column|
|
73
|
+
column.unique = false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def partition_table(date)
|
79
|
+
partition_range = partition_rule.bind_date(date)
|
80
|
+
@partition_tables ||= {}
|
81
|
+
@partition_tables[partition_range] ||= self.class.new(id: @id, store: store, columns: partition_table_columns, parent: self, range: partition_range, grain: grain, inherit: true)
|
82
|
+
end
|
83
|
+
|
84
|
+
def partitions
|
85
|
+
columns.select { |_, column| column.partition }
|
86
|
+
end
|
87
|
+
|
88
|
+
def measures
|
89
|
+
columns.select { |_, column| column.measure }
|
90
|
+
end
|
91
|
+
|
92
|
+
def constraints
|
93
|
+
return unless range
|
94
|
+
"CHECK (time_key >= #{range.start_time.to_i} AND time_key < #{range.stop_time.to_i})"
|
95
|
+
end
|
96
|
+
|
97
|
+
def reserved_column_ids
|
98
|
+
case type
|
99
|
+
when :fact
|
100
|
+
[:time_key, :last_modified_at]
|
101
|
+
else
|
102
|
+
super
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def initialize_surrogate_key_column!
|
109
|
+
end
|
110
|
+
|
111
|
+
def initialize_fact_columns!
|
112
|
+
case type
|
113
|
+
when :fact
|
114
|
+
initialize_column! id: 'time_key', type: :integer, index: true
|
115
|
+
initialize_column! id: 'last_modified_at', type: :timestamp, default: 'NOW()' unless store.type == :hive
|
116
|
+
when :stage
|
117
|
+
if inherit
|
118
|
+
parent.reserved_columns.each do |_, column|
|
119
|
+
initialize_column! column.as_hash
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def partition_rule
|
126
|
+
@partition_rule ||= Masamune::DataPlan::Rule.new(nil, :tmp, :target, table: name, partition: @partition)
|
127
|
+
end
|
128
|
+
|
129
|
+
def partition_table_columns
|
130
|
+
unreserved_columns.map { |_, column| column.dup }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# The MIT License (MIT)
|
2
|
+
#
|
3
|
+
# Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'csv'
|
24
|
+
|
25
|
+
module Masamune::Schema
|
26
|
+
class Map
|
27
|
+
class JSONEncoder < SimpleDelegator
|
28
|
+
def initialize(io, store)
|
29
|
+
super io
|
30
|
+
@store = store
|
31
|
+
end
|
32
|
+
|
33
|
+
def gets(*a)
|
34
|
+
line = __getobj__.gets(*a)
|
35
|
+
return unless line
|
36
|
+
return line if skip?
|
37
|
+
encode(line, separator).join(separator)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def skip?
|
43
|
+
@store.json_encoding == :quoted
|
44
|
+
end
|
45
|
+
|
46
|
+
def encode(line, separator)
|
47
|
+
fields = []
|
48
|
+
buffer = ''
|
49
|
+
nested = false
|
50
|
+
line.strip.each_char do |char|
|
51
|
+
case char
|
52
|
+
when '{'
|
53
|
+
buffer << char
|
54
|
+
nested = true
|
55
|
+
when '}'
|
56
|
+
buffer << char
|
57
|
+
nested = false
|
58
|
+
when separator
|
59
|
+
if nested
|
60
|
+
buffer << char
|
61
|
+
else
|
62
|
+
fields << quote(buffer)
|
63
|
+
buffer = ''
|
64
|
+
end
|
65
|
+
else
|
66
|
+
buffer << char
|
67
|
+
end
|
68
|
+
end
|
69
|
+
fields << quote(buffer)
|
70
|
+
fields.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def quote(buffer)
|
74
|
+
return buffer if buffer =~ /\A".*"\z/
|
75
|
+
%Q{"#{buffer.gsub('"', '""')}"}
|
76
|
+
end
|
77
|
+
|
78
|
+
def separator
|
79
|
+
@separator ||= (@store.format == :tsv ? "\t" : ',')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class Buffer
|
84
|
+
extend Forwardable
|
85
|
+
|
86
|
+
def_delegators :@io, :flush, :path
|
87
|
+
|
88
|
+
def initialize(table, options = {})
|
89
|
+
@table = table
|
90
|
+
@store = table.store
|
91
|
+
@lines = 0
|
92
|
+
@options = options
|
93
|
+
end
|
94
|
+
|
95
|
+
def bind(io)
|
96
|
+
@io = io.set_encoding('binary', 'UTF-8', undef: :replace)
|
97
|
+
@csv = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def each(&block)
|
101
|
+
raise 'must call Buffer#bind first' unless @io
|
102
|
+
CSV.parse(JSONEncoder.new(@io, @store), options.merge(headers: @store.headers || @table.columns.keys)) do |data|
|
103
|
+
next if data.to_s =~ /\A#/
|
104
|
+
yield safe_row(data)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def append(data)
|
109
|
+
raise 'must call Buffer#bind first' unless @io
|
110
|
+
row = Masamune::Schema::Row.new(parent: @table, values: data.to_hash)
|
111
|
+
write_headers = @store.headers && @lines < 1
|
112
|
+
@csv ||= CSV.new(@io, options.merge(headers: row.headers, write_headers: write_headers))
|
113
|
+
if row.missing_required_columns.any?
|
114
|
+
missing_required_column_names = row.missing_required_columns.map(&:name)
|
115
|
+
@store.logger.warn("row '#{row.to_hash}' is missing required columns '#{missing_required_column_names.join(',')}', skipping")
|
116
|
+
else
|
117
|
+
@csv << row.serialize if append?(row.serialize)
|
118
|
+
end
|
119
|
+
@lines += 1
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def options
|
125
|
+
{skip_blanks: true}.tap do | opts|
|
126
|
+
opts[:col_sep] = "\t" if @store.format == :tsv
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def safe_row(data)
|
131
|
+
row = Masamune::Schema::Row.new(parent: @table, values: data.to_hash, strict: false)
|
132
|
+
row.to_hash
|
133
|
+
rescue
|
134
|
+
@store.logger.warn("failed to parse '#{data.to_hash}' for #{@table.name}, skipping")
|
135
|
+
end
|
136
|
+
|
137
|
+
def append?(elem)
|
138
|
+
return true unless @options[:distinct]
|
139
|
+
@seen ||= Set.new
|
140
|
+
@seen.add?(elem)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
DEFAULT_ATTRIBUTES =
|
145
|
+
{
|
146
|
+
source: nil,
|
147
|
+
target: nil,
|
148
|
+
columns: nil,
|
149
|
+
store: nil,
|
150
|
+
function: ->(row) { row },
|
151
|
+
distinct: false,
|
152
|
+
debug: false
|
153
|
+
}
|
154
|
+
|
155
|
+
DEFAULT_ATTRIBUTES.keys.each do |attr|
|
156
|
+
attr_accessor attr
|
157
|
+
end
|
158
|
+
|
159
|
+
def initialize(opts = {})
|
160
|
+
opts.symbolize_keys!
|
161
|
+
raise ArgumentError, 'required parameter source: missing' unless opts.key?(:source)
|
162
|
+
raise ArgumentError, 'required parameter target: missing' unless opts.key?(:target)
|
163
|
+
DEFAULT_ATTRIBUTES.merge(opts).each do |name, value|
|
164
|
+
public_send("#{name}=", value)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def source=(source)
|
169
|
+
@source = source
|
170
|
+
end
|
171
|
+
|
172
|
+
# FIXME: avoid implict conversions
|
173
|
+
def target=(target)
|
174
|
+
@target = target.type == :four ? target.ledger_table : target
|
175
|
+
end
|
176
|
+
|
177
|
+
def intermediate_columns
|
178
|
+
output = function.call(default_row(source.columns))
|
179
|
+
example = Array.wrap(output).first
|
180
|
+
raise ArgumentError, "function for map between '#{source.name}' and '#{target.name}' does not return output for default input" unless example
|
181
|
+
example.keys
|
182
|
+
end
|
183
|
+
|
184
|
+
def intermediate
|
185
|
+
target.stage_table(columns: columns || intermediate_columns, inherit: false)
|
186
|
+
end
|
187
|
+
|
188
|
+
def apply(input_files, output_file)
|
189
|
+
input_buffer = Buffer.new(source)
|
190
|
+
output_buffer = Buffer.new(intermediate, distinct: distinct)
|
191
|
+
self.class.convert_files(input_files).each do |input_file|
|
192
|
+
open_stream(input_file, 'r') do |input_stream|
|
193
|
+
input_buffer.bind(input_stream)
|
194
|
+
open_stream(output_file, 'a+') do |output_stream|
|
195
|
+
output_buffer.bind(output_stream)
|
196
|
+
apply_buffer(input_buffer, output_buffer)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
intermediate
|
201
|
+
end
|
202
|
+
|
203
|
+
def open_stream(file, mode, &block)
|
204
|
+
case file
|
205
|
+
when IO, StringIO
|
206
|
+
file.flush
|
207
|
+
yield file
|
208
|
+
when String, Tempfile
|
209
|
+
File.open(file, mode) do |io|
|
210
|
+
yield io
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
class << self
|
216
|
+
def convert_file(file)
|
217
|
+
if file.respond_to?(:path)
|
218
|
+
file.flush if file.respond_to?(:flush) && file.respond_to?(:open?) && file.open?
|
219
|
+
file.path
|
220
|
+
else
|
221
|
+
file
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def convert_files(files)
|
226
|
+
case files
|
227
|
+
when Set
|
228
|
+
files.map { |file| convert_file(file) }.to_a
|
229
|
+
when Array
|
230
|
+
files.map { |file| convert_file(file) }.to_a
|
231
|
+
else
|
232
|
+
[convert_file(files)]
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
private
|
238
|
+
|
239
|
+
def default_row(columns)
|
240
|
+
{}.tap do |row|
|
241
|
+
columns.each do |_, column|
|
242
|
+
row[column.name] = column.default_ruby_value
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def apply_buffer(input_buffer, output_buffer)
|
248
|
+
input_buffer.each do |input|
|
249
|
+
safe_apply_function(input) do |output|
|
250
|
+
output_buffer.append output
|
251
|
+
end
|
252
|
+
end
|
253
|
+
output_buffer.flush
|
254
|
+
end
|
255
|
+
|
256
|
+
def safe_apply_function(input, &block)
|
257
|
+
return unless input
|
258
|
+
Array.wrap(function.call(input)).each do |output|
|
259
|
+
yield output
|
260
|
+
end
|
261
|
+
rescue
|
262
|
+
@store.logger.warn("failed to process '#{input}' for #{target.name}, skipping")
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|