samidare 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +15 -15
- data/README.md +179 -117
- data/Rakefile +7 -7
- data/lib/samidare.rb +42 -42
- data/lib/samidare/bigquery_utility.rb +85 -85
- data/lib/samidare/embulk.rb +60 -60
- data/lib/samidare/embulk_utility.rb +42 -42
- data/lib/samidare/mysql.rb +117 -117
- data/lib/samidare/version.rb +3 -3
- data/samidare.gemspec +3 -3
- data/spec/samidare/bigquery_utility_spec.rb +85 -85
- data/spec/samidare/embulk_spec.rb +23 -23
- data/spec/samidare/embulk_utility_spec.rb +119 -119
- data/spec/samidare/mysql_spec.rb +135 -135
- data/spec/samidare_spec.rb +7 -7
- data/spec/support/databe.yml +13 -13
- data/spec/support/table.yml +11 -11
- metadata +9 -9
data/lib/samidare/embulk.rb
CHANGED
@@ -1,61 +1,61 @@
|
|
1
|
-
module Samidare
|
2
|
-
class Embulk
|
3
|
-
def run(database_configs, all_table_configs, bq_config, target_table_names = [])
|
4
|
-
error_tables = []
|
5
|
-
database_configs.keys.each do |db_name|
|
6
|
-
table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
|
7
|
-
error_tables = error_tables + run_by_database(
|
8
|
-
db_name,
|
9
|
-
table_configs,
|
10
|
-
database_configs[db_name]['bq_dataset'],
|
11
|
-
bq_config)
|
12
|
-
end
|
13
|
-
error_tables
|
14
|
-
end
|
15
|
-
|
16
|
-
def target_table_configs(table_configs, target_table_names)
|
17
|
-
return table_configs if target_table_names.empty?
|
18
|
-
table_configs.select { |table_config| target_table_names.include?(table_config.name) }
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
def run_by_database(db_name, table_configs, bq_dataset, bq_config)
|
23
|
-
process_times = []
|
24
|
-
error_tables = []
|
25
|
-
big_query = Samidare::BigQueryUtility.new(bq_config)
|
26
|
-
table_configs.each do |table_config|
|
27
|
-
start_time = Time.now
|
28
|
-
log "table: #{table_config.name} - start"
|
29
|
-
|
30
|
-
begin
|
31
|
-
big_query.delete_table(bq_dataset, table_config.name)
|
32
|
-
log "table: #{table_config.name} - deleted"
|
33
|
-
rescue
|
34
|
-
log "table: #{table_config.name} - does not exist"
|
35
|
-
end
|
36
|
-
|
37
|
-
cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
|
38
|
-
log "cmd: #{cmd}"
|
39
|
-
if system(cmd)
|
40
|
-
result = 'success'
|
41
|
-
else
|
42
|
-
result = 'error'
|
43
|
-
error_tables << table_config.name
|
44
|
-
end
|
45
|
-
|
46
|
-
process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
|
47
|
-
log process_time
|
48
|
-
process_times << process_time
|
49
|
-
end
|
50
|
-
log '------------------------------------'
|
51
|
-
log "db_name: #{db_name}"
|
52
|
-
process_times.each { |process_time| log process_time }
|
53
|
-
|
54
|
-
error_tables
|
55
|
-
end
|
56
|
-
|
57
|
-
def log(message)
|
58
|
-
puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
59
|
-
end
|
60
|
-
end
|
1
|
+
module Samidare
|
2
|
+
class Embulk
|
3
|
+
def run(database_configs, all_table_configs, bq_config, target_table_names = [])
|
4
|
+
error_tables = []
|
5
|
+
database_configs.keys.each do |db_name|
|
6
|
+
table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
|
7
|
+
error_tables = error_tables + run_by_database(
|
8
|
+
db_name,
|
9
|
+
table_configs,
|
10
|
+
database_configs[db_name]['bq_dataset'],
|
11
|
+
bq_config)
|
12
|
+
end
|
13
|
+
error_tables
|
14
|
+
end
|
15
|
+
|
16
|
+
def target_table_configs(table_configs, target_table_names)
|
17
|
+
return table_configs if target_table_names.empty?
|
18
|
+
table_configs.select { |table_config| target_table_names.include?(table_config.name) }
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def run_by_database(db_name, table_configs, bq_dataset, bq_config)
|
23
|
+
process_times = []
|
24
|
+
error_tables = []
|
25
|
+
big_query = Samidare::BigQueryUtility.new(bq_config)
|
26
|
+
table_configs.each do |table_config|
|
27
|
+
start_time = Time.now
|
28
|
+
log "table: #{table_config.name} - start"
|
29
|
+
|
30
|
+
begin
|
31
|
+
big_query.delete_table(bq_dataset, table_config.name)
|
32
|
+
log "table: #{table_config.name} - deleted"
|
33
|
+
rescue
|
34
|
+
log "table: #{table_config.name} - does not exist"
|
35
|
+
end
|
36
|
+
|
37
|
+
cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
|
38
|
+
log "cmd: #{cmd}"
|
39
|
+
if system(cmd)
|
40
|
+
result = 'success'
|
41
|
+
else
|
42
|
+
result = 'error'
|
43
|
+
error_tables << table_config.name
|
44
|
+
end
|
45
|
+
|
46
|
+
process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
|
47
|
+
log process_time
|
48
|
+
process_times << process_time
|
49
|
+
end
|
50
|
+
log '------------------------------------'
|
51
|
+
log "db_name: #{db_name}"
|
52
|
+
process_times.each { |process_time| log process_time }
|
53
|
+
|
54
|
+
error_tables
|
55
|
+
end
|
56
|
+
|
57
|
+
def log(message)
|
58
|
+
puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
59
|
+
end
|
60
|
+
end
|
61
61
|
end
|
@@ -1,42 +1,42 @@
|
|
1
|
-
module Samidare
|
2
|
-
module EmbulkUtility
|
3
|
-
class ConfigGenerator
|
4
|
-
def generate_config(database_configs, bq_config)
|
5
|
-
bq_utility = BigQueryUtility.new(bq_config)
|
6
|
-
|
7
|
-
database_configs.keys.each do |db_name|
|
8
|
-
database_config = database_configs[db_name]
|
9
|
-
table_configs = all_table_configs[db_name]
|
10
|
-
mysql_client = MySQL::MySQLClient.new(database_config)
|
11
|
-
|
12
|
-
table_configs.each do |table_config|
|
13
|
-
write(
|
14
|
-
"#{bq_config['schema_dir']}/#{db_name}",
|
15
|
-
"#{table_config.name}.json",
|
16
|
-
mysql_client.generate_bq_schema(table_config.name)
|
17
|
-
)
|
18
|
-
write(
|
19
|
-
"#{bq_config['config_dir']}/#{db_name}",
|
20
|
-
"#{table_config.name}.yml",
|
21
|
-
bq_utility.generate_embulk_config(
|
22
|
-
db_name,
|
23
|
-
database_config,
|
24
|
-
table_config,
|
25
|
-
mysql_client.columns(table_config.name))
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
def write(directory, file_name, content)
|
33
|
-
FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
|
34
|
-
File.write("#{directory}/#{file_name}", content)
|
35
|
-
end
|
36
|
-
|
37
|
-
def all_table_configs
|
38
|
-
@all_table_configs ||= MySQL::TableConfig.generate_table_configs
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
1
|
+
module Samidare
|
2
|
+
module EmbulkUtility
|
3
|
+
class ConfigGenerator
|
4
|
+
def generate_config(database_configs, bq_config)
|
5
|
+
bq_utility = BigQueryUtility.new(bq_config)
|
6
|
+
|
7
|
+
database_configs.keys.each do |db_name|
|
8
|
+
database_config = database_configs[db_name]
|
9
|
+
table_configs = all_table_configs[db_name]
|
10
|
+
mysql_client = MySQL::MySQLClient.new(database_config)
|
11
|
+
|
12
|
+
table_configs.each do |table_config|
|
13
|
+
write(
|
14
|
+
"#{bq_config['schema_dir']}/#{db_name}",
|
15
|
+
"#{table_config.name}.json",
|
16
|
+
mysql_client.generate_bq_schema(table_config.name)
|
17
|
+
)
|
18
|
+
write(
|
19
|
+
"#{bq_config['config_dir']}/#{db_name}",
|
20
|
+
"#{table_config.name}.yml",
|
21
|
+
bq_utility.generate_embulk_config(
|
22
|
+
db_name,
|
23
|
+
database_config,
|
24
|
+
table_config,
|
25
|
+
mysql_client.columns(table_config.name))
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def write(directory, file_name, content)
|
33
|
+
FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
|
34
|
+
File.write("#{directory}/#{file_name}", content)
|
35
|
+
end
|
36
|
+
|
37
|
+
def all_table_configs
|
38
|
+
@all_table_configs ||= MySQL::TableConfig.generate_table_configs
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/samidare/mysql.rb
CHANGED
@@ -1,117 +1,117 @@
|
|
1
|
-
require 'mysql2-cs-bind'
|
2
|
-
require 'json'
|
3
|
-
require 'yaml'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'samidare/bigquery_utility'
|
6
|
-
|
7
|
-
module Samidare
|
8
|
-
module MySQL
|
9
|
-
class MySQLClient
|
10
|
-
COLUMN_SQL = <<-SQL
|
11
|
-
SELECT column_name, data_type
|
12
|
-
FROM INFORMATION_SCHEMA.COLUMNS
|
13
|
-
WHERE table_schema = ?
|
14
|
-
AND table_name = ?
|
15
|
-
ORDER BY ordinal_position
|
16
|
-
SQL
|
17
|
-
|
18
|
-
def initialize(database_config)
|
19
|
-
@database_config = database_config
|
20
|
-
end
|
21
|
-
|
22
|
-
def client
|
23
|
-
@client ||= Mysql2::Client.new(
|
24
|
-
:host => @database_config['host'],
|
25
|
-
:username => @database_config['username'],
|
26
|
-
:password => @database_config['password'],
|
27
|
-
:database => @database_config['database'])
|
28
|
-
end
|
29
|
-
|
30
|
-
def generate_bq_schema(table_name)
|
31
|
-
infos = columns(table_name)
|
32
|
-
BigQueryUtility.generate_schema(infos)
|
33
|
-
end
|
34
|
-
|
35
|
-
def columns(table_name)
|
36
|
-
rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
|
37
|
-
rows.map { |row| Column.new(row['column_name'], row['data_type']) }
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
class TableConfig
|
42
|
-
attr_reader :name, :daily_snapshot, :condition
|
43
|
-
|
44
|
-
def initialize(config)
|
45
|
-
@name = config['name']
|
46
|
-
@daily_snapshot = config['daily_snapshot'] || false
|
47
|
-
@condition = config['condition']
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.generate_table_configs(file_path = 'table.yml')
|
51
|
-
configs = YAML.load_file(file_path)
|
52
|
-
configs.each_with_object({}) do |(db, database_config), table_configs|
|
53
|
-
table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
|
54
|
-
table_configs
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def ==(another)
|
59
|
-
self.instance_variables.all? do |v|
|
60
|
-
self.instance_variable_get(v) == another.instance_variable_get(v)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
class Column
|
66
|
-
attr_reader :column_name, :data_type
|
67
|
-
|
68
|
-
TYPE_MAPPINGS = {
|
69
|
-
'int' => 'integer',
|
70
|
-
'tinyint' => 'integer',
|
71
|
-
'smallint' => 'integer',
|
72
|
-
'mediumint' => 'integer',
|
73
|
-
'bigint' => 'integer',
|
74
|
-
'float' => 'float',
|
75
|
-
'double' => 'float',
|
76
|
-
'decimal' => 'float',
|
77
|
-
'char' => 'string',
|
78
|
-
'varchar' => 'string',
|
79
|
-
'tinytext' => 'string',
|
80
|
-
'text' => 'string',
|
81
|
-
'date' => 'timestamp',
|
82
|
-
'datetime' => 'timestamp',
|
83
|
-
'timestamp' => 'timestamp'
|
84
|
-
}
|
85
|
-
|
86
|
-
def initialize(column_name, data_type)
|
87
|
-
@column_name = column_name
|
88
|
-
@data_type = data_type
|
89
|
-
end
|
90
|
-
|
91
|
-
def bigquery_data_type
|
92
|
-
TYPE_MAPPINGS[@data_type]
|
93
|
-
end
|
94
|
-
|
95
|
-
def converted_value
|
96
|
-
if bigquery_data_type == 'timestamp'
|
97
|
-
# time zone translate to UTC
|
98
|
-
"UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
|
99
|
-
elsif data_type == 'tinyint'
|
100
|
-
# for MySQL tinyint(1) problem
|
101
|
-
"CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
|
102
|
-
else
|
103
|
-
escaped_column_name
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def to_json(*a)
|
108
|
-
{ "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
|
109
|
-
end
|
110
|
-
|
111
|
-
private
|
112
|
-
def escaped_column_name
|
113
|
-
"`#{@column_name}`"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
1
|
+
require 'mysql2-cs-bind'
|
2
|
+
require 'json'
|
3
|
+
require 'yaml'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'samidare/bigquery_utility'
|
6
|
+
|
7
|
+
module Samidare
|
8
|
+
module MySQL
|
9
|
+
class MySQLClient
|
10
|
+
COLUMN_SQL = <<-SQL
|
11
|
+
SELECT column_name, data_type
|
12
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
13
|
+
WHERE table_schema = ?
|
14
|
+
AND table_name = ?
|
15
|
+
ORDER BY ordinal_position
|
16
|
+
SQL
|
17
|
+
|
18
|
+
def initialize(database_config)
|
19
|
+
@database_config = database_config
|
20
|
+
end
|
21
|
+
|
22
|
+
def client
|
23
|
+
@client ||= Mysql2::Client.new(
|
24
|
+
:host => @database_config['host'],
|
25
|
+
:username => @database_config['username'],
|
26
|
+
:password => @database_config['password'],
|
27
|
+
:database => @database_config['database'])
|
28
|
+
end
|
29
|
+
|
30
|
+
def generate_bq_schema(table_name)
|
31
|
+
infos = columns(table_name)
|
32
|
+
BigQueryUtility.generate_schema(infos)
|
33
|
+
end
|
34
|
+
|
35
|
+
def columns(table_name)
|
36
|
+
rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
|
37
|
+
rows.map { |row| Column.new(row['column_name'], row['data_type']) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class TableConfig
|
42
|
+
attr_reader :name, :daily_snapshot, :condition
|
43
|
+
|
44
|
+
def initialize(config)
|
45
|
+
@name = config['name']
|
46
|
+
@daily_snapshot = config['daily_snapshot'] || false
|
47
|
+
@condition = config['condition']
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.generate_table_configs(file_path = 'table.yml')
|
51
|
+
configs = YAML.load_file(file_path)
|
52
|
+
configs.each_with_object({}) do |(db, database_config), table_configs|
|
53
|
+
table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
|
54
|
+
table_configs
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def ==(another)
|
59
|
+
self.instance_variables.all? do |v|
|
60
|
+
self.instance_variable_get(v) == another.instance_variable_get(v)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Column
|
66
|
+
attr_reader :column_name, :data_type
|
67
|
+
|
68
|
+
TYPE_MAPPINGS = {
|
69
|
+
'int' => 'integer',
|
70
|
+
'tinyint' => 'integer',
|
71
|
+
'smallint' => 'integer',
|
72
|
+
'mediumint' => 'integer',
|
73
|
+
'bigint' => 'integer',
|
74
|
+
'float' => 'float',
|
75
|
+
'double' => 'float',
|
76
|
+
'decimal' => 'float',
|
77
|
+
'char' => 'string',
|
78
|
+
'varchar' => 'string',
|
79
|
+
'tinytext' => 'string',
|
80
|
+
'text' => 'string',
|
81
|
+
'date' => 'timestamp',
|
82
|
+
'datetime' => 'timestamp',
|
83
|
+
'timestamp' => 'timestamp'
|
84
|
+
}
|
85
|
+
|
86
|
+
def initialize(column_name, data_type)
|
87
|
+
@column_name = column_name
|
88
|
+
@data_type = data_type
|
89
|
+
end
|
90
|
+
|
91
|
+
def bigquery_data_type
|
92
|
+
TYPE_MAPPINGS[@data_type]
|
93
|
+
end
|
94
|
+
|
95
|
+
def converted_value
|
96
|
+
if bigquery_data_type == 'timestamp'
|
97
|
+
# time zone translate to UTC
|
98
|
+
"UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
|
99
|
+
elsif data_type == 'tinyint'
|
100
|
+
# for MySQL tinyint(1) problem
|
101
|
+
"CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
|
102
|
+
else
|
103
|
+
escaped_column_name
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def to_json(*a)
|
108
|
+
{ "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
def escaped_column_name
|
113
|
+
"`#{@column_name}`"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
data/lib/samidare/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module Samidare
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
1
|
+
module Samidare
|
2
|
+
VERSION = "0.2.0"
|
3
|
+
end
|
data/samidare.gemspec
CHANGED
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
|
|
25
25
|
|
26
26
|
spec.add_dependency 'unindent', '1.0'
|
27
27
|
spec.add_dependency 'mysql2-cs-bind', '0.0.6'
|
28
|
-
spec.add_dependency 'embulk-output-bigquery', '0.
|
29
|
-
spec.add_dependency 'embulk-input-mysql', '0.
|
30
|
-
spec.add_dependency 'embulk-parser-jsonl', '0.0
|
28
|
+
spec.add_dependency 'embulk-output-bigquery', '0.4.3'
|
29
|
+
spec.add_dependency 'embulk-input-mysql', '0.8.2'
|
30
|
+
spec.add_dependency 'embulk-parser-jsonl', '0.2.0'
|
31
31
|
spec.add_dependency 'embulk-formatter-jsonl', '0.1.4'
|
32
32
|
spec.add_dependency 'bigquery', '0.8.3'
|
33
33
|
end
|