bigquery_migration 0.1.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +19 -0
- data/README.md +107 -0
- data/Rakefile +10 -0
- data/bigquery_migration.gemspec +31 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/example.yml +22 -0
- data/example/schema.json +22 -0
- data/exe/bq_migrate +4 -0
- data/lib/bigquery_migration.rb +29 -0
- data/lib/bigquery_migration/action.rb +85 -0
- data/lib/bigquery_migration/action_runner.rb +60 -0
- data/lib/bigquery_migration/bigquery_wrapper.rb +675 -0
- data/lib/bigquery_migration/cli.rb +105 -0
- data/lib/bigquery_migration/config_loader.rb +51 -0
- data/lib/bigquery_migration/error.rb +6 -0
- data/lib/bigquery_migration/hash_util.rb +35 -0
- data/lib/bigquery_migration/logger.rb +45 -0
- data/lib/bigquery_migration/schema.rb +388 -0
- data/lib/bigquery_migration/time_with_zone.rb +38 -0
- data/lib/bigquery_migration/version.rb +3 -0
- metadata +183 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'json'
|
3
|
+
require 'bigquery_migration'
|
4
|
+
require_relative 'action_runner'
|
5
|
+
require_relative 'hash_util'
|
6
|
+
|
7
|
+
class BigqueryMigration
|
8
|
+
class CLI < Thor
|
9
|
+
# cf. http://qiita.com/KitaitiMakoto/items/c6b9d6311c20a3cc21f9
|
10
|
+
def self.exit_on_failure?
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
# `run` is reserved by thor, we have to use def _run
|
15
|
+
map "run" => "_run"
|
16
|
+
|
17
|
+
option :config_path, :aliases => ['-c'], :type => :string,
|
18
|
+
:default => 'config.yml'
|
19
|
+
option :log_level, :aliases => ["-l"], :type => :string,
|
20
|
+
:desc => 'Log level such as fatal, error, warn, info, or debug',
|
21
|
+
:default => 'info'
|
22
|
+
option :log, :type => :string,
|
23
|
+
:desc => 'Output log to a file',
|
24
|
+
:default => 'STDOUT'
|
25
|
+
option :stdout, :type => :string,
|
26
|
+
:desc => 'Redirect STDOUT to a file',
|
27
|
+
:default => 'STDOUT'
|
28
|
+
option :stderr, :type => :string,
|
29
|
+
:desc => 'Redirect STDERR to a file',
|
30
|
+
:default => 'STDERR'
|
31
|
+
option :exec, :type => :boolean,
|
32
|
+
:desc => 'Execute or dry-run (Default: dry-run)',
|
33
|
+
:default => false
|
34
|
+
option :vars, :type => :hash,
|
35
|
+
:desc => 'Variables used in ERB, thor hash format'
|
36
|
+
option :output, :aliases => ["-o"], :type => :string,
|
37
|
+
:desc => 'Output result yaml to a file',
|
38
|
+
:default => 'STDOUT'
|
39
|
+
|
40
|
+
desc 'run <config.yml>', 'run bigquery_migration'
|
41
|
+
def _run(config_path)
|
42
|
+
opts = options.merge(
|
43
|
+
dry_run: !options[:exec]
|
44
|
+
)
|
45
|
+
|
46
|
+
init_logger
|
47
|
+
reopen_stdout
|
48
|
+
reopen_stderr
|
49
|
+
|
50
|
+
result = ActionRunner.new(config_path, opts).run
|
51
|
+
open_output do |io|
|
52
|
+
io.puts mask_secret(HashUtil.deep_stringify_keys(result).to_yaml)
|
53
|
+
logger.info { "DRY-RUN has finished. Use --exec option to run." } if opts[:dry_run]
|
54
|
+
end
|
55
|
+
exit(1) unless result[:success]
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def logger
|
61
|
+
BigqueryMigration.logger
|
62
|
+
end
|
63
|
+
|
64
|
+
def init_logger
|
65
|
+
logger = BigqueryMigration::Logger.new(options[:log])
|
66
|
+
logger.level = options[:log_level]
|
67
|
+
BigqueryMigration.logger = logger
|
68
|
+
end
|
69
|
+
|
70
|
+
def reopen_stdout
|
71
|
+
unless options[:stdout] == 'STDOUT'
|
72
|
+
$stdout.reopen(options[:stdout])
|
73
|
+
end
|
74
|
+
$stdout.sync = true
|
75
|
+
end
|
76
|
+
|
77
|
+
def reopen_stderr
|
78
|
+
unless options[:stderr] == 'STDERR'
|
79
|
+
$stderr.reopen(options[:stderr])
|
80
|
+
end
|
81
|
+
$stderr.sync = true
|
82
|
+
end
|
83
|
+
|
84
|
+
def open_output
|
85
|
+
output = options[:output]
|
86
|
+
if output == 'STDOUT'
|
87
|
+
yield($stdout)
|
88
|
+
elsif output == 'STDERR'
|
89
|
+
yield($stderr)
|
90
|
+
else
|
91
|
+
File.open(output, 'w') do |io|
|
92
|
+
yield(io)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def mask_secret(yaml_string)
|
98
|
+
%w(password key).each do |secret|
|
99
|
+
yaml_string.gsub!(/([^ ]*#{secret}): .*$/, '\1: xxxxx')
|
100
|
+
end
|
101
|
+
yaml_string.gsub!(/(-----BEGIN\s+PRIVATE\s+KEY-----)[0-9A-Za-z+\/=\s\\]+(-----END\s+PRIVATE\s+KEY-----)/m, '\1 xxxxx \2')
|
102
|
+
yaml_string
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'yaml'
|
3
|
+
require 'erb'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
class BigqueryMigration
|
7
|
+
class ConfigLoader
|
8
|
+
attr_reader :config_path, :namespace
|
9
|
+
|
10
|
+
class AlreayIncluded < ::StandardError; end
|
11
|
+
|
12
|
+
def initialize(config_path, vars = {})
|
13
|
+
@config_path = File.expand_path(config_path)
|
14
|
+
@included_files = Set.new
|
15
|
+
@namespace = OpenStruct.new(vars)
|
16
|
+
|
17
|
+
unless @namespace.respond_to?(:include_file)
|
18
|
+
itself = self
|
19
|
+
# ToDo: better way?
|
20
|
+
@namespace.define_singleton_method(:include_file) do |path|
|
21
|
+
caller_path = caller[0][/^([^:]+):\d+:in `[^']*'$/, 1]
|
22
|
+
abs_path = File.expand_path(path, File.dirname(caller_path))
|
23
|
+
if File.extname(path) == '.erb'
|
24
|
+
itself.load_erb(abs_path)
|
25
|
+
else
|
26
|
+
File.read(abs_path)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def load
|
33
|
+
if File.extname(config_path) == '.erb'
|
34
|
+
YAML.load(load_erb(config_path))
|
35
|
+
else
|
36
|
+
YAML.load(File.read(config_path))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def load_erb(path = config_path)
|
41
|
+
unless @included_files.add?(path)
|
42
|
+
raise AlreayIncluded, "#{path} was included twice"
|
43
|
+
end
|
44
|
+
|
45
|
+
raw = File.read(path)
|
46
|
+
erb = ERB.new(raw, nil, "-")
|
47
|
+
erb.filename = path
|
48
|
+
erb.result(namespace.instance_eval { binding })
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class BigqueryMigration
|
2
|
+
class HashUtil
|
3
|
+
def self.deep_symbolize_keys(hash)
|
4
|
+
if hash.is_a?(Hash)
|
5
|
+
hash.map do |key, val|
|
6
|
+
new_key = key.to_sym
|
7
|
+
new_val = deep_symbolize_keys(val)
|
8
|
+
[new_key, new_val]
|
9
|
+
end.to_h
|
10
|
+
elsif hash.is_a?(Array)
|
11
|
+
hash.map do |val|
|
12
|
+
deep_symbolize_keys(val)
|
13
|
+
end
|
14
|
+
else
|
15
|
+
hash
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.deep_stringify_keys(hash)
|
20
|
+
if hash.is_a?(Hash)
|
21
|
+
hash.map do |key, val|
|
22
|
+
new_key = key.to_s
|
23
|
+
new_val = deep_stringify_keys(val)
|
24
|
+
[new_key, new_val]
|
25
|
+
end.to_h
|
26
|
+
elsif hash.is_a?(Array)
|
27
|
+
hash.map do |val|
|
28
|
+
deep_stringify_keys(val)
|
29
|
+
end
|
30
|
+
else
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
class BigqueryMigration
|
4
|
+
class LogFormatter
|
5
|
+
FORMAT = "%s [%s] %s\n"
|
6
|
+
|
7
|
+
def initialize(opts={})
|
8
|
+
end
|
9
|
+
|
10
|
+
def call(severity, time, progname, msg)
|
11
|
+
FORMAT % [format_datetime(time), severity, format_message(msg)]
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def format_datetime(time)
|
16
|
+
time.iso8601
|
17
|
+
end
|
18
|
+
|
19
|
+
def format_severity(severity)
|
20
|
+
severity
|
21
|
+
end
|
22
|
+
|
23
|
+
def format_message(message)
|
24
|
+
case message
|
25
|
+
when ::Exception
|
26
|
+
e = message
|
27
|
+
"#{e.class} (#{e.message})\n #{e.backtrace.join("\n ")}"
|
28
|
+
else
|
29
|
+
message.to_s
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Logger < ::Logger
|
35
|
+
def initialize(logdev, shift_age = 0, shift_size = 1048576)
|
36
|
+
logdev = STDOUT if logdev == 'STDOUT'
|
37
|
+
super(logdev, shift_age, shift_size)
|
38
|
+
@formatter = LogFormatter.new
|
39
|
+
end
|
40
|
+
|
41
|
+
def write(msg)
|
42
|
+
@logdev.write msg
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,388 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require_relative 'error'
|
4
|
+
|
5
|
+
class BigqueryMigration
|
6
|
+
class Schema < ::Array
|
7
|
+
ALLOWED_FIELD_TYPES = Set.new(['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'RECORD', 'TIMESTAMP'])
|
8
|
+
ALLOWED_FIELD_MODES = Set.new(['NULLABLE', 'REQUIRED', 'REPEATED'])
|
9
|
+
|
10
|
+
def initialize(columns = [])
|
11
|
+
normalized = self.class.normalize_columns(columns)
|
12
|
+
super(normalized)
|
13
|
+
validate_columns!
|
14
|
+
end
|
15
|
+
|
16
|
+
def find_column_by_name(name)
|
17
|
+
self.class.find_column_by_name(self, name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def validate_columns!
|
21
|
+
self.class.validate_columns!(self)
|
22
|
+
end
|
23
|
+
|
24
|
+
def validate_permitted_operations!(source_columns)
|
25
|
+
target_columns = self
|
26
|
+
self.class.validate_permitted_operations!(source_columns, target_columns)
|
27
|
+
end
|
28
|
+
|
29
|
+
def normalize_columns
|
30
|
+
self.class.normalize_columns(self)
|
31
|
+
end
|
32
|
+
|
33
|
+
def shallow_normalize_columns
|
34
|
+
self.class.shallow_normalize_columns(self)
|
35
|
+
end
|
36
|
+
def shallow_normalize_columns!
|
37
|
+
self.class.shallow_normalize_column!(self)
|
38
|
+
end
|
39
|
+
|
40
|
+
def flattened_columns
|
41
|
+
self.class.flattened_columns(self)
|
42
|
+
end
|
43
|
+
|
44
|
+
def equals?(source_columns)
|
45
|
+
self.class.equals?(source_columns, self)
|
46
|
+
end
|
47
|
+
|
48
|
+
# self - source_columns
|
49
|
+
def diff_columns(source_columns)
|
50
|
+
self.class.diff_columns(source_columns, self)
|
51
|
+
end
|
52
|
+
|
53
|
+
# diff with only column names
|
54
|
+
# self - source_columns
|
55
|
+
def diff_columns_by_name(source_columns)
|
56
|
+
self.class.diff_columns_by_name(source_columns, self)
|
57
|
+
end
|
58
|
+
|
59
|
+
# A.merge!(B) => B overwrites A
|
60
|
+
# A.reverse_merge!(B) => A overwrites B, but A is modified
|
61
|
+
def reverse_merge!(source_columns)
|
62
|
+
self.class.reverse_merge!(source_columns, self)
|
63
|
+
end
|
64
|
+
|
65
|
+
def reject_columns!(drop_columns)
|
66
|
+
self.class.reject_columns!(drop_columns, self)
|
67
|
+
end
|
68
|
+
|
69
|
+
def build_query_fields(source_columns)
|
70
|
+
self.class.build_query_fields(source_columns, self)
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
class << self
|
75
|
+
# The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
|
76
|
+
# and must start with a letter or underscore. The maximum length is 128 characters.
|
77
|
+
def validate_name!(name)
|
78
|
+
unless name =~ /\A[a-zA-Z_]+\w*\Z/
|
79
|
+
raise ConfigError, "Column name `#{name}` is invalid format"
|
80
|
+
end
|
81
|
+
unless name.length < 128
|
82
|
+
raise ConfigError, "Column name `#{name}` must be less than 128"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def validate_type!(type)
|
87
|
+
unless ALLOWED_FIELD_TYPES.include?(type)
|
88
|
+
raise ConfigError, "Column type `#{type}` is not allowed type"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def validate_mode!(mode)
|
93
|
+
unless ALLOWED_FIELD_MODES.include?(mode)
|
94
|
+
raise ConfigError, "Column mode `#{mode}` is not allowed mode"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def validate_columns!(columns)
|
99
|
+
columns.each do |column|
|
100
|
+
validate_name!(column[:name])
|
101
|
+
validate_type!(column[:type])
|
102
|
+
validate_mode!(column[:mode]) if column[:mode]
|
103
|
+
|
104
|
+
if column[:type] == 'RECORD'
|
105
|
+
validate_columns!(column[:fields])
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def find_column_by_name(columns, name)
|
111
|
+
(columns || []).find { |c| c[:name] == name }
|
112
|
+
end
|
113
|
+
|
114
|
+
# validates permitted changes from old schema to new schema
|
115
|
+
def validate_permitted_operations!(source_columns, target_columns)
|
116
|
+
flattened_source_columns = flattened_columns(normalize_columns(source_columns))
|
117
|
+
flattened_target_columns = flattened_columns(normalize_columns(target_columns))
|
118
|
+
|
119
|
+
flattened_target_columns.keys.each do |flattened_name|
|
120
|
+
next unless flattened_source_columns.key?(flattened_name)
|
121
|
+
validate_permitted_operations_for_type!(
|
122
|
+
flattened_source_columns[flattened_name],
|
123
|
+
flattened_target_columns[flattened_name]
|
124
|
+
)
|
125
|
+
validate_permitted_operations_for_mode!(
|
126
|
+
flattened_source_columns[flattened_name],
|
127
|
+
flattened_target_columns[flattened_name]
|
128
|
+
)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @param [Hash] source_column
|
133
|
+
# @param [Hash] target_column
|
134
|
+
#
|
135
|
+
# Disallowed conversion rule is as follows:
|
136
|
+
#
|
137
|
+
# type: RECORD => type: others
|
138
|
+
# mode: REPEATED => change type
|
139
|
+
#
|
140
|
+
def validate_permitted_operations_for_type!(source_column, target_column)
|
141
|
+
source_column = shallow_normalize_column(source_column)
|
142
|
+
target_column = shallow_normalize_column(target_column)
|
143
|
+
|
144
|
+
msg = "(#{source_column.to_h} => #{target_column.to_h})"
|
145
|
+
if source_column[:type] == 'RECORD'
|
146
|
+
if target_column[:type] != 'RECORD'
|
147
|
+
raise ConfigError, "`RECORD` can not be changed #{msg}"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
if source_column[:mode] and source_column[:mode] == 'REPEATED'
|
151
|
+
if source_column[:type] != target_column[:type]
|
152
|
+
raise ConfigError, "`REPEATED` mode column's type can not be changed #{msg}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# @param [Hash] source_column
|
158
|
+
# @param [Hash] target_column
|
159
|
+
#
|
160
|
+
# Allowed conversion rule is as follows:
|
161
|
+
#
|
162
|
+
# (new) => NULLABLE, REPEATED
|
163
|
+
# NULLABLE => NULLABLE
|
164
|
+
# REQUIRED => REQUIRED, NULLABLE
|
165
|
+
# REPEATED => REPEATED
|
166
|
+
def validate_permitted_operations_for_mode!(source_column, target_column)
|
167
|
+
source_column = shallow_normalize_column(source_column)
|
168
|
+
target_column = shallow_normalize_column(target_column)
|
169
|
+
source_mode = source_column[:mode]
|
170
|
+
target_mode = target_column[:mode]
|
171
|
+
|
172
|
+
return if source_mode == target_mode
|
173
|
+
msg = "(#{source_column.to_h} => #{target_column.to_h})"
|
174
|
+
|
175
|
+
case source_mode
|
176
|
+
when nil
|
177
|
+
if target_mode == 'REQUIRED'
|
178
|
+
raise ConfigError, "Newly adding a `REQUIRED` column is not allowed #{msg}"
|
179
|
+
end
|
180
|
+
when 'NULLABLE'
|
181
|
+
raise ConfigError, "`NULLABLE` column can not be changed #{msg}"
|
182
|
+
when 'REQUIRED'
|
183
|
+
if target_mode == 'REPEATED'
|
184
|
+
raise ConfigError, "`REQUIRED` column can not be changed to `REPEATED` #{msg}"
|
185
|
+
end
|
186
|
+
when 'REPEATED'
|
187
|
+
raise ConfigError, "`REPEATED` column can not be changed #{msg}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def normalize_columns(columns)
|
192
|
+
columns = shallow_normalize_columns(columns)
|
193
|
+
columns.map do |column|
|
194
|
+
if column[:type] == 'RECORD' and column[:fields]
|
195
|
+
column[:fields] = normalize_columns(column[:fields])
|
196
|
+
end
|
197
|
+
column
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
def shallow_normalize_columns(columns)
|
202
|
+
columns.map {|column| shallow_normalize_column(column) }
|
203
|
+
end
|
204
|
+
|
205
|
+
def shallow_normalize_columns!(columns)
|
206
|
+
columns.each {|column| shallow_normalize_column!(column) }
|
207
|
+
columns
|
208
|
+
end
|
209
|
+
|
210
|
+
def shallow_normalize_column(column)
|
211
|
+
shallow_normalize_column!(column.dup)
|
212
|
+
end
|
213
|
+
|
214
|
+
def shallow_normalize_column!(column)
|
215
|
+
symbolize_keys!(column)
|
216
|
+
column[:type] = column[:type].upcase if column[:type]
|
217
|
+
column[:mode] ||= 'NULLABLE'
|
218
|
+
column[:mode] = column[:mode].upcase
|
219
|
+
column
|
220
|
+
end
|
221
|
+
|
222
|
+
def symbolize_keys!(column)
|
223
|
+
new_column = column.map do |key, val|
|
224
|
+
[key.to_sym, val]
|
225
|
+
end.to_h
|
226
|
+
column.replace(new_column)
|
227
|
+
end
|
228
|
+
|
229
|
+
# @param [Array] columns
|
230
|
+
# [{
|
231
|
+
# name: 'citiesLived',
|
232
|
+
# type: 'RECORD',
|
233
|
+
# fields: [
|
234
|
+
# {
|
235
|
+
# name: 'place', type: 'RECORD',
|
236
|
+
# fields: [
|
237
|
+
# { name: 'city', type: 'STRING' }, { name: 'postcode', type: 'STRING' }
|
238
|
+
# ]
|
239
|
+
# },
|
240
|
+
# { name: 'yearsLived', type: 'INTEGER' }
|
241
|
+
# ]
|
242
|
+
# }]
|
243
|
+
# @return Hash
|
244
|
+
# {
|
245
|
+
# 'citiesLived.place.city' => {
|
246
|
+
# type: 'STRING'
|
247
|
+
# },
|
248
|
+
# 'citiesLived.place.postcode' => {
|
249
|
+
# type: 'STRING'
|
250
|
+
# },
|
251
|
+
# 'citiesLived.yearsLived' => {
|
252
|
+
# type: 'INTEGER'
|
253
|
+
# }
|
254
|
+
# }
|
255
|
+
def flattened_columns(columns, parent_name: nil)
|
256
|
+
result = {}
|
257
|
+
columns.each do |column|
|
258
|
+
column_name = parent_name.nil? ? column[:name] : "#{parent_name}.#{column[:name]}"
|
259
|
+
if column[:type].upcase != 'RECORD'
|
260
|
+
result[column_name] = {}.tap do |value|
|
261
|
+
value[:type] = column[:type]
|
262
|
+
value[:mode] = column[:mode] if column[:mode]
|
263
|
+
end
|
264
|
+
else
|
265
|
+
result.merge!(flattened_columns(column[:fields], parent_name: column_name))
|
266
|
+
end
|
267
|
+
end
|
268
|
+
result
|
269
|
+
end
|
270
|
+
|
271
|
+
def equals?(source_columns, target_columns)
|
272
|
+
diff_columns(source_columns, target_columns).empty? and \
|
273
|
+
diff_columns(target_columns, source_columns).empty?
|
274
|
+
end
|
275
|
+
|
276
|
+
# target_columns - source_columns
|
277
|
+
def diff_columns(source_columns, target_columns)
|
278
|
+
_target_columns = shallow_normalize_columns(target_columns)
|
279
|
+
_source_columns = shallow_normalize_columns(source_columns)
|
280
|
+
diff_columns = _target_columns - _source_columns # shallow diff
|
281
|
+
|
282
|
+
diff_columns.map do |target_column|
|
283
|
+
t = target_column
|
284
|
+
source_column = find_column_by_name(_source_columns, target_column[:name])
|
285
|
+
next t unless source_column
|
286
|
+
next t unless target_column[:type] == 'RECORD' and source_column[:type] == 'RECORD'
|
287
|
+
next t unless target_column[:fields] and source_column[:fields]
|
288
|
+
# recusive diff for RECORD columns
|
289
|
+
diff_fields = diff_columns(source_column[:fields], target_column[:fields])
|
290
|
+
next nil if diff_fields.empty? # remove
|
291
|
+
target_column[:fields] = diff_fields
|
292
|
+
target_column
|
293
|
+
end.compact
|
294
|
+
end
|
295
|
+
|
296
|
+
# diff with only column_names
|
297
|
+
# target_columns - source_columns
|
298
|
+
def diff_columns_by_name(source_columns, target_columns)
|
299
|
+
_target_columns = shallow_normalize_columns(target_columns)
|
300
|
+
_source_columns = shallow_normalize_columns(source_columns)
|
301
|
+
diff_columns = _target_columns - _source_columns # shallow diff
|
302
|
+
|
303
|
+
diff_columns.map do |target_column|
|
304
|
+
t = target_column
|
305
|
+
source_column = find_column_by_name(_source_columns, target_column[:name])
|
306
|
+
next t unless source_column
|
307
|
+
next nil unless target_column[:type] == 'RECORD' and source_column[:type] == 'RECORD'
|
308
|
+
next nil unless target_column[:fields] and source_column[:fields]
|
309
|
+
# recusive diff for RECORD columns
|
310
|
+
diff_fields = diff_columns_by_name(source_column[:fields], target_column[:fields])
|
311
|
+
next nil if diff_fields.empty? # remove
|
312
|
+
target_column[:fields] = diff_fields
|
313
|
+
target_column
|
314
|
+
end.compact
|
315
|
+
end
|
316
|
+
|
317
|
+
# 1. target_column[:mode] ||= source_column[:mode] || 'NULLABLE' (not overwrite, but set if does not exist)
|
318
|
+
# 2. Add into target_columns if a source column does not exist in target_columns
|
319
|
+
#
|
320
|
+
# @param [Array] source_columns
|
321
|
+
# @param [Array] target_columns
|
322
|
+
def reverse_merge!(source_columns, target_columns)
|
323
|
+
shallow_normalize_columns!(source_columns)
|
324
|
+
shallow_normalize_columns!(target_columns)
|
325
|
+
|
326
|
+
source_columns.map do |source_column|
|
327
|
+
if target_column = find_column_by_name(target_columns, source_column[:name])
|
328
|
+
target_column[:mode] ||= source_column[:mode] || 'NULLABLE'
|
329
|
+
target_column[:type] ||= source_column[:type] # should never be happened
|
330
|
+
# Recursive merge fields of `RECORD` type
|
331
|
+
if target_column[:type] == 'RECORD' and target_column[:fields] and source_column[:fields]
|
332
|
+
reverse_merge!(source_column[:fields], target_column[:fields])
|
333
|
+
end
|
334
|
+
else
|
335
|
+
target_column = source_column.dup
|
336
|
+
target_column[:mode] ||= 'NULLABLE'
|
337
|
+
target_columns << target_column
|
338
|
+
end
|
339
|
+
end
|
340
|
+
target_columns
|
341
|
+
end
|
342
|
+
|
343
|
+
def reject_columns!(drop_columns, target_columns)
|
344
|
+
flattened_drop_columns = flattened_columns(drop_columns)
|
345
|
+
|
346
|
+
flattened_drop_columns.keys.each do |flattened_name|
|
347
|
+
# paths like a %w(citiesLived place city child1)
|
348
|
+
paths = flattened_name.split('.')
|
349
|
+
# object_id of fields and target_columns are different.
|
350
|
+
# But the internal elements refer to the same ones
|
351
|
+
fields = target_columns
|
352
|
+
paths.each do |path|
|
353
|
+
# The last element of the path does not have the fields
|
354
|
+
next if path == paths.last
|
355
|
+
# find recursively
|
356
|
+
column = fields.find { |f| f[:name] == path }
|
357
|
+
next if column.nil?
|
358
|
+
fields = column[:fields]
|
359
|
+
end
|
360
|
+
|
361
|
+
unless fields.empty?
|
362
|
+
fields.delete_if { |f| f[:name] == paths.last }
|
363
|
+
end
|
364
|
+
end
|
365
|
+
target_columns
|
366
|
+
end
|
367
|
+
|
368
|
+
def build_query_fields(source_columns, target_columns)
|
369
|
+
flattened_source_columns = flattened_columns(source_columns)
|
370
|
+
flattened_target_columns = flattened_columns(target_columns)
|
371
|
+
|
372
|
+
query_fields = flattened_target_columns.map do |flattened_name, flattened_target_column|
|
373
|
+
flattened_source_column = flattened_source_columns[flattened_name]
|
374
|
+
target_type = flattened_target_column[:type].upcase
|
375
|
+
|
376
|
+
if flattened_source_column
|
377
|
+
"#{target_type}(#{flattened_name}) AS #{flattened_name}"
|
378
|
+
else
|
379
|
+
flattened_name
|
380
|
+
# MEMO: NULL cast like "#{target_type}(NULL) AS #{flattened_name}" breaks RECORD columns as
|
381
|
+
# INTEGER(NULL) AS add_record.add_record.add_column1 => add_record_add_record_add_column1
|
382
|
+
# We have to add columns with patch_table beforehand
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
end
|