samidare 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +15 -15
- data/README.md +179 -117
- data/Rakefile +7 -7
- data/lib/samidare.rb +42 -42
- data/lib/samidare/bigquery_utility.rb +85 -85
- data/lib/samidare/embulk.rb +60 -60
- data/lib/samidare/embulk_utility.rb +42 -42
- data/lib/samidare/mysql.rb +117 -117
- data/lib/samidare/version.rb +3 -3
- data/samidare.gemspec +3 -3
- data/spec/samidare/bigquery_utility_spec.rb +85 -85
- data/spec/samidare/embulk_spec.rb +23 -23
- data/spec/samidare/embulk_utility_spec.rb +119 -119
- data/spec/samidare/mysql_spec.rb +135 -135
- data/spec/samidare_spec.rb +7 -7
- data/spec/support/databe.yml +13 -13
- data/spec/support/table.yml +11 -11
- metadata +9 -9
data/lib/samidare/embulk.rb
CHANGED
@@ -1,61 +1,61 @@
|
|
1
|
-
module Samidare
|
2
|
-
class Embulk
|
3
|
-
def run(database_configs, all_table_configs, bq_config, target_table_names = [])
|
4
|
-
error_tables = []
|
5
|
-
database_configs.keys.each do |db_name|
|
6
|
-
table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
|
7
|
-
error_tables = error_tables + run_by_database(
|
8
|
-
db_name,
|
9
|
-
table_configs,
|
10
|
-
database_configs[db_name]['bq_dataset'],
|
11
|
-
bq_config)
|
12
|
-
end
|
13
|
-
error_tables
|
14
|
-
end
|
15
|
-
|
16
|
-
def target_table_configs(table_configs, target_table_names)
|
17
|
-
return table_configs if target_table_names.empty?
|
18
|
-
table_configs.select { |table_config| target_table_names.include?(table_config.name) }
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
def run_by_database(db_name, table_configs, bq_dataset, bq_config)
|
23
|
-
process_times = []
|
24
|
-
error_tables = []
|
25
|
-
big_query = Samidare::BigQueryUtility.new(bq_config)
|
26
|
-
table_configs.each do |table_config|
|
27
|
-
start_time = Time.now
|
28
|
-
log "table: #{table_config.name} - start"
|
29
|
-
|
30
|
-
begin
|
31
|
-
big_query.delete_table(bq_dataset, table_config.name)
|
32
|
-
log "table: #{table_config.name} - deleted"
|
33
|
-
rescue
|
34
|
-
log "table: #{table_config.name} - does not exist"
|
35
|
-
end
|
36
|
-
|
37
|
-
cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
|
38
|
-
log "cmd: #{cmd}"
|
39
|
-
if system(cmd)
|
40
|
-
result = 'success'
|
41
|
-
else
|
42
|
-
result = 'error'
|
43
|
-
error_tables << table_config.name
|
44
|
-
end
|
45
|
-
|
46
|
-
process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
|
47
|
-
log process_time
|
48
|
-
process_times << process_time
|
49
|
-
end
|
50
|
-
log '------------------------------------'
|
51
|
-
log "db_name: #{db_name}"
|
52
|
-
process_times.each { |process_time| log process_time }
|
53
|
-
|
54
|
-
error_tables
|
55
|
-
end
|
56
|
-
|
57
|
-
def log(message)
|
58
|
-
puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
59
|
-
end
|
60
|
-
end
|
1
|
+
module Samidare
|
2
|
+
class Embulk
|
3
|
+
def run(database_configs, all_table_configs, bq_config, target_table_names = [])
|
4
|
+
error_tables = []
|
5
|
+
database_configs.keys.each do |db_name|
|
6
|
+
table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
|
7
|
+
error_tables = error_tables + run_by_database(
|
8
|
+
db_name,
|
9
|
+
table_configs,
|
10
|
+
database_configs[db_name]['bq_dataset'],
|
11
|
+
bq_config)
|
12
|
+
end
|
13
|
+
error_tables
|
14
|
+
end
|
15
|
+
|
16
|
+
def target_table_configs(table_configs, target_table_names)
|
17
|
+
return table_configs if target_table_names.empty?
|
18
|
+
table_configs.select { |table_config| target_table_names.include?(table_config.name) }
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def run_by_database(db_name, table_configs, bq_dataset, bq_config)
|
23
|
+
process_times = []
|
24
|
+
error_tables = []
|
25
|
+
big_query = Samidare::BigQueryUtility.new(bq_config)
|
26
|
+
table_configs.each do |table_config|
|
27
|
+
start_time = Time.now
|
28
|
+
log "table: #{table_config.name} - start"
|
29
|
+
|
30
|
+
begin
|
31
|
+
big_query.delete_table(bq_dataset, table_config.name)
|
32
|
+
log "table: #{table_config.name} - deleted"
|
33
|
+
rescue
|
34
|
+
log "table: #{table_config.name} - does not exist"
|
35
|
+
end
|
36
|
+
|
37
|
+
cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
|
38
|
+
log "cmd: #{cmd}"
|
39
|
+
if system(cmd)
|
40
|
+
result = 'success'
|
41
|
+
else
|
42
|
+
result = 'error'
|
43
|
+
error_tables << table_config.name
|
44
|
+
end
|
45
|
+
|
46
|
+
process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
|
47
|
+
log process_time
|
48
|
+
process_times << process_time
|
49
|
+
end
|
50
|
+
log '------------------------------------'
|
51
|
+
log "db_name: #{db_name}"
|
52
|
+
process_times.each { |process_time| log process_time }
|
53
|
+
|
54
|
+
error_tables
|
55
|
+
end
|
56
|
+
|
57
|
+
def log(message)
|
58
|
+
puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
59
|
+
end
|
60
|
+
end
|
61
61
|
end
|
@@ -1,42 +1,42 @@
|
|
1
|
-
module Samidare
|
2
|
-
module EmbulkUtility
|
3
|
-
class ConfigGenerator
|
4
|
-
def generate_config(database_configs, bq_config)
|
5
|
-
bq_utility = BigQueryUtility.new(bq_config)
|
6
|
-
|
7
|
-
database_configs.keys.each do |db_name|
|
8
|
-
database_config = database_configs[db_name]
|
9
|
-
table_configs = all_table_configs[db_name]
|
10
|
-
mysql_client = MySQL::MySQLClient.new(database_config)
|
11
|
-
|
12
|
-
table_configs.each do |table_config|
|
13
|
-
write(
|
14
|
-
"#{bq_config['schema_dir']}/#{db_name}",
|
15
|
-
"#{table_config.name}.json",
|
16
|
-
mysql_client.generate_bq_schema(table_config.name)
|
17
|
-
)
|
18
|
-
write(
|
19
|
-
"#{bq_config['config_dir']}/#{db_name}",
|
20
|
-
"#{table_config.name}.yml",
|
21
|
-
bq_utility.generate_embulk_config(
|
22
|
-
db_name,
|
23
|
-
database_config,
|
24
|
-
table_config,
|
25
|
-
mysql_client.columns(table_config.name))
|
26
|
-
)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
def write(directory, file_name, content)
|
33
|
-
FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
|
34
|
-
File.write("#{directory}/#{file_name}", content)
|
35
|
-
end
|
36
|
-
|
37
|
-
def all_table_configs
|
38
|
-
@all_table_configs ||= MySQL::TableConfig.generate_table_configs
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
1
|
+
module Samidare
|
2
|
+
module EmbulkUtility
|
3
|
+
class ConfigGenerator
|
4
|
+
def generate_config(database_configs, bq_config)
|
5
|
+
bq_utility = BigQueryUtility.new(bq_config)
|
6
|
+
|
7
|
+
database_configs.keys.each do |db_name|
|
8
|
+
database_config = database_configs[db_name]
|
9
|
+
table_configs = all_table_configs[db_name]
|
10
|
+
mysql_client = MySQL::MySQLClient.new(database_config)
|
11
|
+
|
12
|
+
table_configs.each do |table_config|
|
13
|
+
write(
|
14
|
+
"#{bq_config['schema_dir']}/#{db_name}",
|
15
|
+
"#{table_config.name}.json",
|
16
|
+
mysql_client.generate_bq_schema(table_config.name)
|
17
|
+
)
|
18
|
+
write(
|
19
|
+
"#{bq_config['config_dir']}/#{db_name}",
|
20
|
+
"#{table_config.name}.yml",
|
21
|
+
bq_utility.generate_embulk_config(
|
22
|
+
db_name,
|
23
|
+
database_config,
|
24
|
+
table_config,
|
25
|
+
mysql_client.columns(table_config.name))
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
def write(directory, file_name, content)
|
33
|
+
FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
|
34
|
+
File.write("#{directory}/#{file_name}", content)
|
35
|
+
end
|
36
|
+
|
37
|
+
def all_table_configs
|
38
|
+
@all_table_configs ||= MySQL::TableConfig.generate_table_configs
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/samidare/mysql.rb
CHANGED
@@ -1,117 +1,117 @@
|
|
1
|
-
require 'mysql2-cs-bind'
|
2
|
-
require 'json'
|
3
|
-
require 'yaml'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'samidare/bigquery_utility'
|
6
|
-
|
7
|
-
module Samidare
|
8
|
-
module MySQL
|
9
|
-
class MySQLClient
|
10
|
-
COLUMN_SQL = <<-SQL
|
11
|
-
SELECT column_name, data_type
|
12
|
-
FROM INFORMATION_SCHEMA.COLUMNS
|
13
|
-
WHERE table_schema = ?
|
14
|
-
AND table_name = ?
|
15
|
-
ORDER BY ordinal_position
|
16
|
-
SQL
|
17
|
-
|
18
|
-
def initialize(database_config)
|
19
|
-
@database_config = database_config
|
20
|
-
end
|
21
|
-
|
22
|
-
def client
|
23
|
-
@client ||= Mysql2::Client.new(
|
24
|
-
:host => @database_config['host'],
|
25
|
-
:username => @database_config['username'],
|
26
|
-
:password => @database_config['password'],
|
27
|
-
:database => @database_config['database'])
|
28
|
-
end
|
29
|
-
|
30
|
-
def generate_bq_schema(table_name)
|
31
|
-
infos = columns(table_name)
|
32
|
-
BigQueryUtility.generate_schema(infos)
|
33
|
-
end
|
34
|
-
|
35
|
-
def columns(table_name)
|
36
|
-
rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
|
37
|
-
rows.map { |row| Column.new(row['column_name'], row['data_type']) }
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
class TableConfig
|
42
|
-
attr_reader :name, :daily_snapshot, :condition
|
43
|
-
|
44
|
-
def initialize(config)
|
45
|
-
@name = config['name']
|
46
|
-
@daily_snapshot = config['daily_snapshot'] || false
|
47
|
-
@condition = config['condition']
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.generate_table_configs(file_path = 'table.yml')
|
51
|
-
configs = YAML.load_file(file_path)
|
52
|
-
configs.each_with_object({}) do |(db, database_config), table_configs|
|
53
|
-
table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
|
54
|
-
table_configs
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def ==(another)
|
59
|
-
self.instance_variables.all? do |v|
|
60
|
-
self.instance_variable_get(v) == another.instance_variable_get(v)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
class Column
|
66
|
-
attr_reader :column_name, :data_type
|
67
|
-
|
68
|
-
TYPE_MAPPINGS = {
|
69
|
-
'int' => 'integer',
|
70
|
-
'tinyint' => 'integer',
|
71
|
-
'smallint' => 'integer',
|
72
|
-
'mediumint' => 'integer',
|
73
|
-
'bigint' => 'integer',
|
74
|
-
'float' => 'float',
|
75
|
-
'double' => 'float',
|
76
|
-
'decimal' => 'float',
|
77
|
-
'char' => 'string',
|
78
|
-
'varchar' => 'string',
|
79
|
-
'tinytext' => 'string',
|
80
|
-
'text' => 'string',
|
81
|
-
'date' => 'timestamp',
|
82
|
-
'datetime' => 'timestamp',
|
83
|
-
'timestamp' => 'timestamp'
|
84
|
-
}
|
85
|
-
|
86
|
-
def initialize(column_name, data_type)
|
87
|
-
@column_name = column_name
|
88
|
-
@data_type = data_type
|
89
|
-
end
|
90
|
-
|
91
|
-
def bigquery_data_type
|
92
|
-
TYPE_MAPPINGS[@data_type]
|
93
|
-
end
|
94
|
-
|
95
|
-
def converted_value
|
96
|
-
if bigquery_data_type == 'timestamp'
|
97
|
-
# time zone translate to UTC
|
98
|
-
"UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
|
99
|
-
elsif data_type == 'tinyint'
|
100
|
-
# for MySQL tinyint(1) problem
|
101
|
-
"CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
|
102
|
-
else
|
103
|
-
escaped_column_name
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def to_json(*a)
|
108
|
-
{ "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
|
109
|
-
end
|
110
|
-
|
111
|
-
private
|
112
|
-
def escaped_column_name
|
113
|
-
"`#{@column_name}`"
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
1
|
+
require 'mysql2-cs-bind'
|
2
|
+
require 'json'
|
3
|
+
require 'yaml'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'samidare/bigquery_utility'
|
6
|
+
|
7
|
+
module Samidare
|
8
|
+
module MySQL
|
9
|
+
class MySQLClient
|
10
|
+
COLUMN_SQL = <<-SQL
|
11
|
+
SELECT column_name, data_type
|
12
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
13
|
+
WHERE table_schema = ?
|
14
|
+
AND table_name = ?
|
15
|
+
ORDER BY ordinal_position
|
16
|
+
SQL
|
17
|
+
|
18
|
+
def initialize(database_config)
|
19
|
+
@database_config = database_config
|
20
|
+
end
|
21
|
+
|
22
|
+
def client
|
23
|
+
@client ||= Mysql2::Client.new(
|
24
|
+
:host => @database_config['host'],
|
25
|
+
:username => @database_config['username'],
|
26
|
+
:password => @database_config['password'],
|
27
|
+
:database => @database_config['database'])
|
28
|
+
end
|
29
|
+
|
30
|
+
def generate_bq_schema(table_name)
|
31
|
+
infos = columns(table_name)
|
32
|
+
BigQueryUtility.generate_schema(infos)
|
33
|
+
end
|
34
|
+
|
35
|
+
def columns(table_name)
|
36
|
+
rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
|
37
|
+
rows.map { |row| Column.new(row['column_name'], row['data_type']) }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class TableConfig
|
42
|
+
attr_reader :name, :daily_snapshot, :condition
|
43
|
+
|
44
|
+
def initialize(config)
|
45
|
+
@name = config['name']
|
46
|
+
@daily_snapshot = config['daily_snapshot'] || false
|
47
|
+
@condition = config['condition']
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.generate_table_configs(file_path = 'table.yml')
|
51
|
+
configs = YAML.load_file(file_path)
|
52
|
+
configs.each_with_object({}) do |(db, database_config), table_configs|
|
53
|
+
table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
|
54
|
+
table_configs
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def ==(another)
|
59
|
+
self.instance_variables.all? do |v|
|
60
|
+
self.instance_variable_get(v) == another.instance_variable_get(v)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Column
|
66
|
+
attr_reader :column_name, :data_type
|
67
|
+
|
68
|
+
TYPE_MAPPINGS = {
|
69
|
+
'int' => 'integer',
|
70
|
+
'tinyint' => 'integer',
|
71
|
+
'smallint' => 'integer',
|
72
|
+
'mediumint' => 'integer',
|
73
|
+
'bigint' => 'integer',
|
74
|
+
'float' => 'float',
|
75
|
+
'double' => 'float',
|
76
|
+
'decimal' => 'float',
|
77
|
+
'char' => 'string',
|
78
|
+
'varchar' => 'string',
|
79
|
+
'tinytext' => 'string',
|
80
|
+
'text' => 'string',
|
81
|
+
'date' => 'timestamp',
|
82
|
+
'datetime' => 'timestamp',
|
83
|
+
'timestamp' => 'timestamp'
|
84
|
+
}
|
85
|
+
|
86
|
+
def initialize(column_name, data_type)
|
87
|
+
@column_name = column_name
|
88
|
+
@data_type = data_type
|
89
|
+
end
|
90
|
+
|
91
|
+
def bigquery_data_type
|
92
|
+
TYPE_MAPPINGS[@data_type]
|
93
|
+
end
|
94
|
+
|
95
|
+
def converted_value
|
96
|
+
if bigquery_data_type == 'timestamp'
|
97
|
+
# time zone translate to UTC
|
98
|
+
"UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
|
99
|
+
elsif data_type == 'tinyint'
|
100
|
+
# for MySQL tinyint(1) problem
|
101
|
+
"CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
|
102
|
+
else
|
103
|
+
escaped_column_name
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def to_json(*a)
|
108
|
+
{ "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
def escaped_column_name
|
113
|
+
"`#{@column_name}`"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
data/lib/samidare/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module Samidare
|
2
|
-
VERSION = "0.
|
3
|
-
end
|
1
|
+
module Samidare
|
2
|
+
VERSION = "0.2.0"
|
3
|
+
end
|
data/samidare.gemspec
CHANGED
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
|
|
25
25
|
|
26
26
|
spec.add_dependency 'unindent', '1.0'
|
27
27
|
spec.add_dependency 'mysql2-cs-bind', '0.0.6'
|
28
|
-
spec.add_dependency 'embulk-output-bigquery', '0.
|
29
|
-
spec.add_dependency 'embulk-input-mysql', '0.
|
30
|
-
spec.add_dependency 'embulk-parser-jsonl', '0.0
|
28
|
+
spec.add_dependency 'embulk-output-bigquery', '0.4.3'
|
29
|
+
spec.add_dependency 'embulk-input-mysql', '0.8.2'
|
30
|
+
spec.add_dependency 'embulk-parser-jsonl', '0.2.0'
|
31
31
|
spec.add_dependency 'embulk-formatter-jsonl', '0.1.4'
|
32
32
|
spec.add_dependency 'bigquery', '0.8.3'
|
33
33
|
end
|