samidare 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,61 +1,61 @@
1
- module Samidare
2
- class Embulk
3
- def run(database_configs, all_table_configs, bq_config, target_table_names = [])
4
- error_tables = []
5
- database_configs.keys.each do |db_name|
6
- table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
7
- error_tables = error_tables + run_by_database(
8
- db_name,
9
- table_configs,
10
- database_configs[db_name]['bq_dataset'],
11
- bq_config)
12
- end
13
- error_tables
14
- end
15
-
16
- def target_table_configs(table_configs, target_table_names)
17
- return table_configs if target_table_names.empty?
18
- table_configs.select { |table_config| target_table_names.include?(table_config.name) }
19
- end
20
-
21
- private
22
- def run_by_database(db_name, table_configs, bq_dataset, bq_config)
23
- process_times = []
24
- error_tables = []
25
- big_query = Samidare::BigQueryUtility.new(bq_config)
26
- table_configs.each do |table_config|
27
- start_time = Time.now
28
- log "table: #{table_config.name} - start"
29
-
30
- begin
31
- big_query.delete_table(bq_dataset, table_config.name)
32
- log "table: #{table_config.name} - deleted"
33
- rescue
34
- log "table: #{table_config.name} - does not exist"
35
- end
36
-
37
- cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
38
- log "cmd: #{cmd}"
39
- if system(cmd)
40
- result = 'success'
41
- else
42
- result = 'error'
43
- error_tables << table_config.name
44
- end
45
-
46
- process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
47
- log process_time
48
- process_times << process_time
49
- end
50
- log '------------------------------------'
51
- log "db_name: #{db_name}"
52
- process_times.each { |process_time| log process_time }
53
-
54
- error_tables
55
- end
56
-
57
- def log(message)
58
- puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
59
- end
60
- end
1
+ module Samidare
2
+ class Embulk
3
+ def run(database_configs, all_table_configs, bq_config, target_table_names = [])
4
+ error_tables = []
5
+ database_configs.keys.each do |db_name|
6
+ table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
7
+ error_tables = error_tables + run_by_database(
8
+ db_name,
9
+ table_configs,
10
+ database_configs[db_name]['bq_dataset'],
11
+ bq_config)
12
+ end
13
+ error_tables
14
+ end
15
+
16
+ def target_table_configs(table_configs, target_table_names)
17
+ return table_configs if target_table_names.empty?
18
+ table_configs.select { |table_config| target_table_names.include?(table_config.name) }
19
+ end
20
+
21
+ private
22
+ def run_by_database(db_name, table_configs, bq_dataset, bq_config)
23
+ process_times = []
24
+ error_tables = []
25
+ big_query = Samidare::BigQueryUtility.new(bq_config)
26
+ table_configs.each do |table_config|
27
+ start_time = Time.now
28
+ log "table: #{table_config.name} - start"
29
+
30
+ begin
31
+ big_query.delete_table(bq_dataset, table_config.name)
32
+ log "table: #{table_config.name} - deleted"
33
+ rescue
34
+ log "table: #{table_config.name} - does not exist"
35
+ end
36
+
37
+ cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
38
+ log "cmd: #{cmd}"
39
+ if system(cmd)
40
+ result = 'success'
41
+ else
42
+ result = 'error'
43
+ error_tables << table_config.name
44
+ end
45
+
46
+ process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
47
+ log process_time
48
+ process_times << process_time
49
+ end
50
+ log '------------------------------------'
51
+ log "db_name: #{db_name}"
52
+ process_times.each { |process_time| log process_time }
53
+
54
+ error_tables
55
+ end
56
+
57
+ def log(message)
58
+ puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
59
+ end
60
+ end
61
61
  end
@@ -1,42 +1,42 @@
1
- module Samidare
2
- module EmbulkUtility
3
- class ConfigGenerator
4
- def generate_config(database_configs, bq_config)
5
- bq_utility = BigQueryUtility.new(bq_config)
6
-
7
- database_configs.keys.each do |db_name|
8
- database_config = database_configs[db_name]
9
- table_configs = all_table_configs[db_name]
10
- mysql_client = MySQL::MySQLClient.new(database_config)
11
-
12
- table_configs.each do |table_config|
13
- write(
14
- "#{bq_config['schema_dir']}/#{db_name}",
15
- "#{table_config.name}.json",
16
- mysql_client.generate_bq_schema(table_config.name)
17
- )
18
- write(
19
- "#{bq_config['config_dir']}/#{db_name}",
20
- "#{table_config.name}.yml",
21
- bq_utility.generate_embulk_config(
22
- db_name,
23
- database_config,
24
- table_config,
25
- mysql_client.columns(table_config.name))
26
- )
27
- end
28
- end
29
- end
30
-
31
- private
32
- def write(directory, file_name, content)
33
- FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
34
- File.write("#{directory}/#{file_name}", content)
35
- end
36
-
37
- def all_table_configs
38
- @all_table_configs ||= MySQL::TableConfig.generate_table_configs
39
- end
40
- end
41
- end
42
- end
1
+ module Samidare
2
+ module EmbulkUtility
3
+ class ConfigGenerator
4
+ def generate_config(database_configs, bq_config)
5
+ bq_utility = BigQueryUtility.new(bq_config)
6
+
7
+ database_configs.keys.each do |db_name|
8
+ database_config = database_configs[db_name]
9
+ table_configs = all_table_configs[db_name]
10
+ mysql_client = MySQL::MySQLClient.new(database_config)
11
+
12
+ table_configs.each do |table_config|
13
+ write(
14
+ "#{bq_config['schema_dir']}/#{db_name}",
15
+ "#{table_config.name}.json",
16
+ mysql_client.generate_bq_schema(table_config.name)
17
+ )
18
+ write(
19
+ "#{bq_config['config_dir']}/#{db_name}",
20
+ "#{table_config.name}.yml",
21
+ bq_utility.generate_embulk_config(
22
+ db_name,
23
+ database_config,
24
+ table_config,
25
+ mysql_client.columns(table_config.name))
26
+ )
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+ def write(directory, file_name, content)
33
+ FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
34
+ File.write("#{directory}/#{file_name}", content)
35
+ end
36
+
37
+ def all_table_configs
38
+ @all_table_configs ||= MySQL::TableConfig.generate_table_configs
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,117 +1,117 @@
1
- require 'mysql2-cs-bind'
2
- require 'json'
3
- require 'yaml'
4
- require 'fileutils'
5
- require 'samidare/bigquery_utility'
6
-
7
- module Samidare
8
- module MySQL
9
- class MySQLClient
10
- COLUMN_SQL = <<-SQL
11
- SELECT column_name, data_type
12
- FROM INFORMATION_SCHEMA.COLUMNS
13
- WHERE table_schema = ?
14
- AND table_name = ?
15
- ORDER BY ordinal_position
16
- SQL
17
-
18
- def initialize(database_config)
19
- @database_config = database_config
20
- end
21
-
22
- def client
23
- @client ||= Mysql2::Client.new(
24
- :host => @database_config['host'],
25
- :username => @database_config['username'],
26
- :password => @database_config['password'],
27
- :database => @database_config['database'])
28
- end
29
-
30
- def generate_bq_schema(table_name)
31
- infos = columns(table_name)
32
- BigQueryUtility.generate_schema(infos)
33
- end
34
-
35
- def columns(table_name)
36
- rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
37
- rows.map { |row| Column.new(row['column_name'], row['data_type']) }
38
- end
39
- end
40
-
41
- class TableConfig
42
- attr_reader :name, :daily_snapshot, :condition
43
-
44
- def initialize(config)
45
- @name = config['name']
46
- @daily_snapshot = config['daily_snapshot'] || false
47
- @condition = config['condition']
48
- end
49
-
50
- def self.generate_table_configs(file_path = 'table.yml')
51
- configs = YAML.load_file(file_path)
52
- configs.each_with_object({}) do |(db, database_config), table_configs|
53
- table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
54
- table_configs
55
- end
56
- end
57
-
58
- def ==(another)
59
- self.instance_variables.all? do |v|
60
- self.instance_variable_get(v) == another.instance_variable_get(v)
61
- end
62
- end
63
- end
64
-
65
- class Column
66
- attr_reader :column_name, :data_type
67
-
68
- TYPE_MAPPINGS = {
69
- 'int' => 'integer',
70
- 'tinyint' => 'integer',
71
- 'smallint' => 'integer',
72
- 'mediumint' => 'integer',
73
- 'bigint' => 'integer',
74
- 'float' => 'float',
75
- 'double' => 'float',
76
- 'decimal' => 'float',
77
- 'char' => 'string',
78
- 'varchar' => 'string',
79
- 'tinytext' => 'string',
80
- 'text' => 'string',
81
- 'date' => 'timestamp',
82
- 'datetime' => 'timestamp',
83
- 'timestamp' => 'timestamp'
84
- }
85
-
86
- def initialize(column_name, data_type)
87
- @column_name = column_name
88
- @data_type = data_type
89
- end
90
-
91
- def bigquery_data_type
92
- TYPE_MAPPINGS[@data_type]
93
- end
94
-
95
- def converted_value
96
- if bigquery_data_type == 'timestamp'
97
- # time zone translate to UTC
98
- "UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
99
- elsif data_type == 'tinyint'
100
- # for MySQL tinyint(1) problem
101
- "CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
102
- else
103
- escaped_column_name
104
- end
105
- end
106
-
107
- def to_json(*a)
108
- { "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
109
- end
110
-
111
- private
112
- def escaped_column_name
113
- "`#{@column_name}`"
114
- end
115
- end
116
- end
117
- end
1
+ require 'mysql2-cs-bind'
2
+ require 'json'
3
+ require 'yaml'
4
+ require 'fileutils'
5
+ require 'samidare/bigquery_utility'
6
+
7
+ module Samidare
8
+ module MySQL
9
+ class MySQLClient
10
+ COLUMN_SQL = <<-SQL
11
+ SELECT column_name, data_type
12
+ FROM INFORMATION_SCHEMA.COLUMNS
13
+ WHERE table_schema = ?
14
+ AND table_name = ?
15
+ ORDER BY ordinal_position
16
+ SQL
17
+
18
+ def initialize(database_config)
19
+ @database_config = database_config
20
+ end
21
+
22
+ def client
23
+ @client ||= Mysql2::Client.new(
24
+ :host => @database_config['host'],
25
+ :username => @database_config['username'],
26
+ :password => @database_config['password'],
27
+ :database => @database_config['database'])
28
+ end
29
+
30
+ def generate_bq_schema(table_name)
31
+ infos = columns(table_name)
32
+ BigQueryUtility.generate_schema(infos)
33
+ end
34
+
35
+ def columns(table_name)
36
+ rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
37
+ rows.map { |row| Column.new(row['column_name'], row['data_type']) }
38
+ end
39
+ end
40
+
41
+ class TableConfig
42
+ attr_reader :name, :daily_snapshot, :condition
43
+
44
+ def initialize(config)
45
+ @name = config['name']
46
+ @daily_snapshot = config['daily_snapshot'] || false
47
+ @condition = config['condition']
48
+ end
49
+
50
+ def self.generate_table_configs(file_path = 'table.yml')
51
+ configs = YAML.load_file(file_path)
52
+ configs.each_with_object({}) do |(db, database_config), table_configs|
53
+ table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
54
+ table_configs
55
+ end
56
+ end
57
+
58
+ def ==(another)
59
+ self.instance_variables.all? do |v|
60
+ self.instance_variable_get(v) == another.instance_variable_get(v)
61
+ end
62
+ end
63
+ end
64
+
65
+ class Column
66
+ attr_reader :column_name, :data_type
67
+
68
+ TYPE_MAPPINGS = {
69
+ 'int' => 'integer',
70
+ 'tinyint' => 'integer',
71
+ 'smallint' => 'integer',
72
+ 'mediumint' => 'integer',
73
+ 'bigint' => 'integer',
74
+ 'float' => 'float',
75
+ 'double' => 'float',
76
+ 'decimal' => 'float',
77
+ 'char' => 'string',
78
+ 'varchar' => 'string',
79
+ 'tinytext' => 'string',
80
+ 'text' => 'string',
81
+ 'date' => 'timestamp',
82
+ 'datetime' => 'timestamp',
83
+ 'timestamp' => 'timestamp'
84
+ }
85
+
86
+ def initialize(column_name, data_type)
87
+ @column_name = column_name
88
+ @data_type = data_type
89
+ end
90
+
91
+ def bigquery_data_type
92
+ TYPE_MAPPINGS[@data_type]
93
+ end
94
+
95
+ def converted_value
96
+ if bigquery_data_type == 'timestamp'
97
+ # time zone translate to UTC
98
+ "UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
99
+ elsif data_type == 'tinyint'
100
+ # for MySQL tinyint(1) problem
101
+ "CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
102
+ else
103
+ escaped_column_name
104
+ end
105
+ end
106
+
107
+ def to_json(*a)
108
+ { "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
109
+ end
110
+
111
+ private
112
+ def escaped_column_name
113
+ "`#{@column_name}`"
114
+ end
115
+ end
116
+ end
117
+ end
@@ -1,3 +1,3 @@
1
- module Samidare
2
- VERSION = "0.1.2"
3
- end
1
+ module Samidare
2
+ VERSION = "0.2.0"
3
+ end
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
25
25
 
26
26
  spec.add_dependency 'unindent', '1.0'
27
27
  spec.add_dependency 'mysql2-cs-bind', '0.0.6'
28
- spec.add_dependency 'embulk-output-bigquery', '0.1.7'
29
- spec.add_dependency 'embulk-input-mysql', '0.6.0'
30
- spec.add_dependency 'embulk-parser-jsonl', '0.0.1'
28
+ spec.add_dependency 'embulk-output-bigquery', '0.4.3'
29
+ spec.add_dependency 'embulk-input-mysql', '0.8.2'
30
+ spec.add_dependency 'embulk-parser-jsonl', '0.2.0'
31
31
  spec.add_dependency 'embulk-formatter-jsonl', '0.1.4'
32
32
  spec.add_dependency 'bigquery', '0.8.3'
33
33
  end