masamune 0.18.4 → 0.18.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/masamune/actions/aws_emr.rb +9 -7
- data/lib/masamune/actions/data_flow.rb +9 -7
- data/lib/masamune/actions/execute.rb +12 -8
- data/lib/masamune/actions/hive.rb +13 -9
- data/lib/masamune/actions/invoke_parallel.rb +1 -0
- data/lib/masamune/actions/postgres.rb +13 -10
- data/lib/masamune/cached_filesystem.rb +5 -4
- data/lib/masamune/commands/postgres.rb +3 -4
- data/lib/masamune/commands/shell.rb +2 -5
- data/lib/masamune/data_plan/elem.rb +2 -3
- data/lib/masamune/data_plan/engine.rb +12 -8
- data/lib/masamune/data_plan/set.rb +2 -0
- data/lib/masamune/filesystem.rb +1 -1
- data/lib/masamune/io.rb +12 -8
- data/lib/masamune/schema/column.rb +1 -1
- data/lib/masamune/schema/table.rb +9 -7
- data/lib/masamune/transform/postgres/stage_fact.rb +6 -4
- data/lib/masamune/version.rb +1 -1
- data/spec/masamune/filesystem_spec.rb +1 -1
- data/spec/support/masamune/job_example_group.rb +1 -0
- data/spec/support/masamune/step_example_group.rb +2 -0
- data/spec/support/masamune/task_example_group.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22e1c9eb4862cc2a74652f543e04efef5540467e
|
4
|
+
data.tar.gz: 1f8e4cdbf34061ebbe4bb379186b0479cb237d11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e2898f8c2bb0984088592ae9bb6acc292db8357a92bb5401351429349284c2b2f0041e6fff45bff5892a0067f5e70bc60fe445300af201950762290ca88fcd4
|
7
|
+
data.tar.gz: 75ea31bb20edc4de25757e29bc8c8ef9a5b6d78eb38a3beb83d7b9419a8ee4455958f28e4ccd943874a7da547c16628e0870ac2cd441620e105e1bf1a14a05ff
|
@@ -42,13 +42,15 @@ module Masamune::Actions
|
|
42
42
|
|
43
43
|
included do |base|
|
44
44
|
base.class_option :cluster_id, desc: 'AWS EMR cluster_id ID (Hint: `masamune-emr-aws list-clusters`)' if defined?(base.class_option)
|
45
|
-
base.after_initialize
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
45
|
+
if defined?(base.after_initialize)
|
46
|
+
base.after_initialize(:early) do |thor, options|
|
47
|
+
next unless thor.configuration.commands.aws_emr.any?
|
48
|
+
next unless thor.configuration.commands.aws_emr.fetch(:enabled, true)
|
49
|
+
thor.configuration.commands.aws_emr[:cluster_id] = options[:cluster_id] if options[:cluster_id]
|
50
|
+
next unless options[:initialize]
|
51
|
+
thor.validate_cluster_id!
|
52
|
+
end
|
53
|
+
end
|
52
54
|
end
|
53
55
|
end
|
54
56
|
end
|
@@ -88,14 +88,16 @@ module Masamune::Actions
|
|
88
88
|
class_option :resolve, type: :boolean, desc: 'Recursively resolve data dependencies', default: true
|
89
89
|
end
|
90
90
|
|
91
|
-
base.after_initialize
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
thor.
|
91
|
+
if defined?(base.after_initialize)
|
92
|
+
base.after_initialize(:final) do |thor, options|
|
93
|
+
thor.engine.environment = thor.environment
|
94
|
+
thor.engine.filesystem.environment = thor.environment
|
95
|
+
thor.environment.with_process_lock(:data_flow_after_initialize) do
|
96
|
+
thor.send(:prepare_and_execute, options)
|
97
|
+
end
|
98
|
+
exit 0 if thor.top_level?
|
96
99
|
end
|
97
|
-
|
98
|
-
end if defined?(base.after_initialize)
|
100
|
+
end
|
99
101
|
end
|
100
102
|
|
101
103
|
# rubocop:disable Style/ClassVars
|
@@ -44,17 +44,21 @@ module Masamune::Actions
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
if opts[:input]
|
48
|
+
klass.class_eval do
|
49
|
+
define_method(:stdin) do
|
50
|
+
@stdin ||= StringIO.new(opts[:input])
|
51
|
+
end
|
50
52
|
end
|
51
|
-
end
|
53
|
+
end
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
55
|
+
if block_given?
|
56
|
+
klass.class_eval do
|
57
|
+
define_method(:handle_stdout) do |line, line_no|
|
58
|
+
yield(line, line_no)
|
59
|
+
end
|
56
60
|
end
|
57
|
-
end
|
61
|
+
end
|
58
62
|
|
59
63
|
command = klass.new(self)
|
60
64
|
command = Masamune::Commands::RetryWithBackoff.new(command, opts)
|
@@ -60,16 +60,20 @@ module Masamune::Actions
|
|
60
60
|
end
|
61
61
|
|
62
62
|
included do |base|
|
63
|
-
base.after_initialize
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
if defined?(base.after_initialize)
|
64
|
+
base.after_initialize do |thor, options|
|
65
|
+
next unless options[:initialize]
|
66
|
+
thor.create_hive_database_if_not_exists
|
67
|
+
thor.load_hive_schema
|
68
|
+
end
|
69
|
+
end
|
68
70
|
|
69
|
-
base.after_initialize
|
70
|
-
|
71
|
-
|
72
|
-
|
71
|
+
if defined?(base.after_initialize)
|
72
|
+
base.after_initialize(:later) do |thor, options|
|
73
|
+
next unless options[:dry_run]
|
74
|
+
raise ::Thor::InvocationError, 'Dry run of hive failed' unless thor.hive(exec: 'SHOW TABLES;', safe: true, fail_fast: false).success?
|
75
|
+
end
|
76
|
+
end
|
73
77
|
end
|
74
78
|
end
|
75
79
|
end
|
@@ -42,17 +42,18 @@ module Masamune::Actions
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def create_postgres_database_if_not_exists
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
return unless configuration.commands.postgres.key?(:database)
|
46
|
+
return if postgres_helper.database_exists?
|
47
|
+
postgres_admin(action: :create, database: configuration.commands.postgres[:database], safe: true)
|
48
48
|
end
|
49
49
|
|
50
50
|
def load_postgres_setup_files
|
51
|
+
return unless configuration.commands.postgres.key?(:setup_files)
|
51
52
|
configuration.commands.postgres[:setup_files].each do |file|
|
52
53
|
configuration.with_quiet do
|
53
54
|
postgres(file: file, retries: 0)
|
54
55
|
end
|
55
|
-
end
|
56
|
+
end
|
56
57
|
end
|
57
58
|
|
58
59
|
def load_postgres_schema
|
@@ -66,12 +67,14 @@ module Masamune::Actions
|
|
66
67
|
end
|
67
68
|
|
68
69
|
included do |base|
|
69
|
-
base.after_initialize
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
70
|
+
if defined?(base.after_initialize)
|
71
|
+
base.after_initialize do |thor, options|
|
72
|
+
next unless options[:initialize]
|
73
|
+
thor.create_postgres_database_if_not_exists
|
74
|
+
thor.load_postgres_setup_files
|
75
|
+
thor.load_postgres_schema
|
76
|
+
end
|
77
|
+
end
|
75
78
|
end
|
76
79
|
end
|
77
80
|
end
|
@@ -74,11 +74,12 @@ module Masamune
|
|
74
74
|
end
|
75
75
|
|
76
76
|
file_regexp = glob_to_regexp(file_or_glob, options)
|
77
|
+
return unless depth.zero?
|
77
78
|
@cache.get(dirname).each do |entry|
|
78
79
|
next if entry.name == dirname
|
79
80
|
next unless entry.name =~ file_regexp
|
80
81
|
yield entry
|
81
|
-
end
|
82
|
+
end
|
82
83
|
end
|
83
84
|
|
84
85
|
# FIXME: cache eviction policy can be more precise
|
@@ -100,15 +101,15 @@ module Masamune
|
|
100
101
|
return if @filesystem.root_path?(path)
|
101
102
|
put(File.join(@filesystem.dirname(path), '.'), OpenStruct.new(name: @filesystem.dirname(path)))
|
102
103
|
paths = path_split(path)
|
103
|
-
elems = paths.reverse.inject(entry) { |
|
104
|
+
elems = paths.reverse.inject(entry) { |acc, elem| { elem => acc } }
|
104
105
|
@cache.deep_merge!(elems)
|
105
106
|
end
|
106
107
|
|
107
108
|
def get(path)
|
108
109
|
return unless path
|
109
110
|
paths = path_split(path)
|
110
|
-
|
111
|
-
emit(
|
111
|
+
value = paths.inject(@cache) { |acc, elem| acc.is_a?(Hash) ? acc.fetch(elem, {}) : acc }
|
112
|
+
emit(value)
|
112
113
|
rescue KeyError
|
113
114
|
EMPTY_SET
|
114
115
|
end
|
@@ -93,10 +93,9 @@ module Masamune::Commands
|
|
93
93
|
logger.debug("#{output}:\n" + File.read(output))
|
94
94
|
end
|
95
95
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
96
|
+
return unless @exec
|
97
|
+
console("postgres exec '#{strip_sql(@exec)}' #{'into ' + @output if @output}")
|
98
|
+
@file = exec_file
|
100
99
|
end
|
101
100
|
|
102
101
|
def handle_stdout(line, _line_no)
|
@@ -81,11 +81,8 @@ module Masamune::Commands
|
|
81
81
|
end
|
82
82
|
|
83
83
|
def command_args
|
84
|
-
|
85
|
-
|
86
|
-
else
|
87
|
-
raise 'no command_args'
|
88
|
-
end
|
84
|
+
raise 'no command_args' unless @delegate.respond_to?(:command_args) && @delegate.command_args
|
85
|
+
Array.wrap(@delegate.command_args).flatten.compact.map(&:to_s)
|
89
86
|
end
|
90
87
|
|
91
88
|
def command_bin
|
@@ -53,9 +53,8 @@ class Masamune::DataPlan::Elem
|
|
53
53
|
end
|
54
54
|
|
55
55
|
def complete?
|
56
|
-
|
57
|
-
|
58
|
-
end
|
56
|
+
return false unless rule.for_targets?
|
57
|
+
sources.existing.map(&:start_date).uniq.length == sources.map(&:start_date).uniq.length
|
59
58
|
end
|
60
59
|
|
61
60
|
def last_modified_at
|
@@ -121,22 +121,26 @@ class Masamune::DataPlan::Engine
|
|
121
121
|
@target_rules[rule].try(:prepare)
|
122
122
|
@source_rules[rule].try(:prepare)
|
123
123
|
|
124
|
-
|
125
|
-
|
126
|
-
|
124
|
+
if options.fetch(:resolve, true)
|
125
|
+
constrain_max_depth(rule) do
|
126
|
+
sources(rule).group_by { |source| rule_for_target(source.input) }.each do |derived_rule, sources|
|
127
|
+
prepare(derived_rule, targets: sources.map(&:input)) if derived_rule != Masamune::DataPlan::Rule::TERMINAL
|
128
|
+
end
|
127
129
|
end
|
128
|
-
end
|
130
|
+
end
|
129
131
|
clear!
|
130
132
|
end
|
131
133
|
|
132
134
|
def execute(rule, options = {})
|
133
135
|
return if targets(rule).actionable.empty?
|
134
136
|
|
135
|
-
|
136
|
-
|
137
|
-
|
137
|
+
if options.fetch(:resolve, true)
|
138
|
+
constrain_max_depth(rule) do
|
139
|
+
sources(rule).group_by { |source| rule_for_target(source.input) }.each do |derived_rule, _sources|
|
140
|
+
execute(derived_rule, options) if derived_rule != Masamune::DataPlan::Rule::TERMINAL
|
141
|
+
end
|
138
142
|
end
|
139
|
-
end
|
143
|
+
end
|
140
144
|
|
141
145
|
@command_rules[rule].call(self, rule, options)
|
142
146
|
clear!
|
@@ -145,6 +145,7 @@ class Masamune::DataPlan::Set < Set
|
|
145
145
|
def convert_elem(elem)
|
146
146
|
case elem
|
147
147
|
when nil
|
148
|
+
nil
|
148
149
|
when Masamune::DataPlan::Elem
|
149
150
|
elem
|
150
151
|
when String
|
@@ -157,6 +158,7 @@ class Masamune::DataPlan::Set < Set
|
|
157
158
|
def convert_enum(enum)
|
158
159
|
case enum
|
159
160
|
when nil
|
161
|
+
nil
|
160
162
|
when Array
|
161
163
|
enum.flatten.uniq
|
162
164
|
when Set, self.class, Enumerator
|
data/lib/masamune/filesystem.rb
CHANGED
@@ -410,7 +410,7 @@ module Masamune
|
|
410
410
|
when :hdfs
|
411
411
|
hadoop_fs('-chown', '-R', [user, group].compact.join(':'), *file_set)
|
412
412
|
when :s3
|
413
|
-
# NOTE intentionally skip
|
413
|
+
nil # NOTE intentionally skip
|
414
414
|
when :local
|
415
415
|
FileUtils.chown_R(user, group, file_set, file_util_args)
|
416
416
|
end
|
data/lib/masamune/io.rb
CHANGED
@@ -21,13 +21,17 @@
|
|
21
21
|
# THE SOFTWARE.
|
22
22
|
|
23
23
|
class IO
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
unless IO.method_defined?(:wait_readable)
|
25
|
+
def wait_readable(timeout = 0)
|
26
|
+
rs, _ws = IO.select([self], [], [], timeout)
|
27
|
+
rs && rs[0]
|
28
|
+
end
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
unless IO.method_defined?(:wait_writable)
|
32
|
+
def wait_writable(timeout = 0)
|
33
|
+
_rs, ws = IO.select([], [self], [], timeout)
|
34
|
+
ws && ws[0]
|
35
|
+
end
|
36
|
+
end
|
33
37
|
end
|
@@ -476,7 +476,7 @@ module Masamune::Schema
|
|
476
476
|
when Array
|
477
477
|
value.map { |elem| ruby_value(elem, false) }
|
478
478
|
when String
|
479
|
-
Array.wrap(JSON.load(value)).map { |elem| ruby_value(elem, false) }
|
479
|
+
Array.wrap(JSON.load(value)).map { |elem| ruby_value(elem, false) } # rubocop:disable Security/JSONLoad
|
480
480
|
when nil
|
481
481
|
[]
|
482
482
|
end
|
@@ -352,14 +352,16 @@ module Masamune::Schema
|
|
352
352
|
def unique_constraints_map
|
353
353
|
@unique_constraints_map ||= begin
|
354
354
|
map = Hash.new { |h, k| h[k] = [] }
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
355
|
+
unless temporary?
|
356
|
+
columns.each do |_, column|
|
357
|
+
next if column.auto_reference
|
358
|
+
column.unique.each do |unique|
|
359
|
+
map[unique] += auto_surrogate_keys.map(&:name)
|
360
|
+
map[unique] << column.name
|
361
|
+
map[unique].uniq!
|
362
|
+
end
|
361
363
|
end
|
362
|
-
end
|
364
|
+
end
|
363
365
|
Hash[map.sort_by { |k, v| [v.length, k.to_s] }]
|
364
366
|
end
|
365
367
|
end
|
@@ -88,10 +88,12 @@ module Masamune::Transform::Postgres
|
|
88
88
|
coalesce_values << cross_references.map { |cross_reference, cross_column| cross_column.qualified_name(cross_reference.label) }
|
89
89
|
end
|
90
90
|
|
91
|
-
column.reference
|
92
|
-
|
93
|
-
|
94
|
-
|
91
|
+
if column.reference
|
92
|
+
column.reference.auto_surrogate_keys.each do |auto_surrogate_key|
|
93
|
+
next unless auto_surrogate_key.default
|
94
|
+
conditions[reference_name].conditions << "#{auto_surrogate_key.qualified_name(reference.label)} = #{auto_surrogate_key.default}"
|
95
|
+
end
|
96
|
+
end
|
95
97
|
|
96
98
|
if column.reference && !column.reference.default.nil? && column.adjacent.natural_key
|
97
99
|
coalesce_values << column.reference.default(column.adjacent)
|
data/lib/masamune/version.rb
CHANGED
@@ -747,7 +747,7 @@ shared_examples_for 'Filesystem' do
|
|
747
747
|
end
|
748
748
|
|
749
749
|
it { is_expected.to eq(%w(/tmp/c/00.txt /tmp/b/01.txt /tmp/a/02.txt)) }
|
750
|
-
it { expect { |b| instance.glob_sort('/tmp/*', order: :basename, &b) }.to yield_successive_args(
|
750
|
+
it { expect { |b| instance.glob_sort('/tmp/*', order: :basename, &b) }.to yield_successive_args('/tmp/c/00.txt', '/tmp/b/01.txt', '/tmp/a/02.txt') }
|
751
751
|
end
|
752
752
|
|
753
753
|
describe '#copy_file_to_file' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: masamune
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.18.
|
4
|
+
version: 0.18.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Andrews
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|