samidare 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,61 +1,61 @@
1
- module Samidare
2
- class Embulk
3
- def run(database_configs, all_table_configs, bq_config, target_table_names = [])
4
- error_tables = []
5
- database_configs.keys.each do |db_name|
6
- table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
7
- error_tables = error_tables + run_by_database(
8
- db_name,
9
- table_configs,
10
- database_configs[db_name]['bq_dataset'],
11
- bq_config)
12
- end
13
- error_tables
14
- end
15
-
16
- def target_table_configs(table_configs, target_table_names)
17
- return table_configs if target_table_names.empty?
18
- table_configs.select { |table_config| target_table_names.include?(table_config.name) }
19
- end
20
-
21
- private
22
- def run_by_database(db_name, table_configs, bq_dataset, bq_config)
23
- process_times = []
24
- error_tables = []
25
- big_query = Samidare::BigQueryUtility.new(bq_config)
26
- table_configs.each do |table_config|
27
- start_time = Time.now
28
- log "table: #{table_config.name} - start"
29
-
30
- begin
31
- big_query.delete_table(bq_dataset, table_config.name)
32
- log "table: #{table_config.name} - deleted"
33
- rescue
34
- log "table: #{table_config.name} - does not exist"
35
- end
36
-
37
- cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
38
- log "cmd: #{cmd}"
39
- if system(cmd)
40
- result = 'success'
41
- else
42
- result = 'error'
43
- error_tables << table_config.name
44
- end
45
-
46
- process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
47
- log process_time
48
- process_times << process_time
49
- end
50
- log '------------------------------------'
51
- log "db_name: #{db_name}"
52
- process_times.each { |process_time| log process_time }
53
-
54
- error_tables
55
- end
56
-
57
- def log(message)
58
- puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
59
- end
60
- end
1
+ module Samidare
2
+ class Embulk
3
+ def run(database_configs, all_table_configs, bq_config, target_table_names = [])
4
+ error_tables = []
5
+ database_configs.keys.each do |db_name|
6
+ table_configs = target_table_configs(all_table_configs[db_name], target_table_names)
7
+ error_tables = error_tables + run_by_database(
8
+ db_name,
9
+ table_configs,
10
+ database_configs[db_name]['bq_dataset'],
11
+ bq_config)
12
+ end
13
+ error_tables
14
+ end
15
+
16
+ def target_table_configs(table_configs, target_table_names)
17
+ return table_configs if target_table_names.empty?
18
+ table_configs.select { |table_config| target_table_names.include?(table_config.name) }
19
+ end
20
+
21
+ private
22
+ def run_by_database(db_name, table_configs, bq_dataset, bq_config)
23
+ process_times = []
24
+ error_tables = []
25
+ big_query = Samidare::BigQueryUtility.new(bq_config)
26
+ table_configs.each do |table_config|
27
+ start_time = Time.now
28
+ log "table: #{table_config.name} - start"
29
+
30
+ begin
31
+ big_query.delete_table(bq_dataset, table_config.name)
32
+ log "table: #{table_config.name} - deleted"
33
+ rescue
34
+ log "table: #{table_config.name} - does not exist"
35
+ end
36
+
37
+ cmd = "embulk run #{bq_config['config_dir']}/#{db_name}/#{table_config.name}.yml"
38
+ log "cmd: #{cmd}"
39
+ if system(cmd)
40
+ result = 'success'
41
+ else
42
+ result = 'error'
43
+ error_tables << table_config.name
44
+ end
45
+
46
+ process_time = "table: #{table_config.name} - result: #{result} #{sprintf('%10.1f', Time.now - start_time)}sec"
47
+ log process_time
48
+ process_times << process_time
49
+ end
50
+ log '------------------------------------'
51
+ log "db_name: #{db_name}"
52
+ process_times.each { |process_time| log process_time }
53
+
54
+ error_tables
55
+ end
56
+
57
+ def log(message)
58
+ puts "[#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
59
+ end
60
+ end
61
61
  end
@@ -1,42 +1,42 @@
1
- module Samidare
2
- module EmbulkUtility
3
- class ConfigGenerator
4
- def generate_config(database_configs, bq_config)
5
- bq_utility = BigQueryUtility.new(bq_config)
6
-
7
- database_configs.keys.each do |db_name|
8
- database_config = database_configs[db_name]
9
- table_configs = all_table_configs[db_name]
10
- mysql_client = MySQL::MySQLClient.new(database_config)
11
-
12
- table_configs.each do |table_config|
13
- write(
14
- "#{bq_config['schema_dir']}/#{db_name}",
15
- "#{table_config.name}.json",
16
- mysql_client.generate_bq_schema(table_config.name)
17
- )
18
- write(
19
- "#{bq_config['config_dir']}/#{db_name}",
20
- "#{table_config.name}.yml",
21
- bq_utility.generate_embulk_config(
22
- db_name,
23
- database_config,
24
- table_config,
25
- mysql_client.columns(table_config.name))
26
- )
27
- end
28
- end
29
- end
30
-
31
- private
32
- def write(directory, file_name, content)
33
- FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
34
- File.write("#{directory}/#{file_name}", content)
35
- end
36
-
37
- def all_table_configs
38
- @all_table_configs ||= MySQL::TableConfig.generate_table_configs
39
- end
40
- end
41
- end
42
- end
1
+ module Samidare
2
+ module EmbulkUtility
3
+ class ConfigGenerator
4
+ def generate_config(database_configs, bq_config)
5
+ bq_utility = BigQueryUtility.new(bq_config)
6
+
7
+ database_configs.keys.each do |db_name|
8
+ database_config = database_configs[db_name]
9
+ table_configs = all_table_configs[db_name]
10
+ mysql_client = MySQL::MySQLClient.new(database_config)
11
+
12
+ table_configs.each do |table_config|
13
+ write(
14
+ "#{bq_config['schema_dir']}/#{db_name}",
15
+ "#{table_config.name}.json",
16
+ mysql_client.generate_bq_schema(table_config.name)
17
+ )
18
+ write(
19
+ "#{bq_config['config_dir']}/#{db_name}",
20
+ "#{table_config.name}.yml",
21
+ bq_utility.generate_embulk_config(
22
+ db_name,
23
+ database_config,
24
+ table_config,
25
+ mysql_client.columns(table_config.name))
26
+ )
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+ def write(directory, file_name, content)
33
+ FileUtils.mkdir_p(directory) unless FileTest.exist?(directory)
34
+ File.write("#{directory}/#{file_name}", content)
35
+ end
36
+
37
+ def all_table_configs
38
+ @all_table_configs ||= MySQL::TableConfig.generate_table_configs
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,117 +1,117 @@
1
- require 'mysql2-cs-bind'
2
- require 'json'
3
- require 'yaml'
4
- require 'fileutils'
5
- require 'samidare/bigquery_utility'
6
-
7
- module Samidare
8
- module MySQL
9
- class MySQLClient
10
- COLUMN_SQL = <<-SQL
11
- SELECT column_name, data_type
12
- FROM INFORMATION_SCHEMA.COLUMNS
13
- WHERE table_schema = ?
14
- AND table_name = ?
15
- ORDER BY ordinal_position
16
- SQL
17
-
18
- def initialize(database_config)
19
- @database_config = database_config
20
- end
21
-
22
- def client
23
- @client ||= Mysql2::Client.new(
24
- :host => @database_config['host'],
25
- :username => @database_config['username'],
26
- :password => @database_config['password'],
27
- :database => @database_config['database'])
28
- end
29
-
30
- def generate_bq_schema(table_name)
31
- infos = columns(table_name)
32
- BigQueryUtility.generate_schema(infos)
33
- end
34
-
35
- def columns(table_name)
36
- rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
37
- rows.map { |row| Column.new(row['column_name'], row['data_type']) }
38
- end
39
- end
40
-
41
- class TableConfig
42
- attr_reader :name, :daily_snapshot, :condition
43
-
44
- def initialize(config)
45
- @name = config['name']
46
- @daily_snapshot = config['daily_snapshot'] || false
47
- @condition = config['condition']
48
- end
49
-
50
- def self.generate_table_configs(file_path = 'table.yml')
51
- configs = YAML.load_file(file_path)
52
- configs.each_with_object({}) do |(db, database_config), table_configs|
53
- table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
54
- table_configs
55
- end
56
- end
57
-
58
- def ==(another)
59
- self.instance_variables.all? do |v|
60
- self.instance_variable_get(v) == another.instance_variable_get(v)
61
- end
62
- end
63
- end
64
-
65
- class Column
66
- attr_reader :column_name, :data_type
67
-
68
- TYPE_MAPPINGS = {
69
- 'int' => 'integer',
70
- 'tinyint' => 'integer',
71
- 'smallint' => 'integer',
72
- 'mediumint' => 'integer',
73
- 'bigint' => 'integer',
74
- 'float' => 'float',
75
- 'double' => 'float',
76
- 'decimal' => 'float',
77
- 'char' => 'string',
78
- 'varchar' => 'string',
79
- 'tinytext' => 'string',
80
- 'text' => 'string',
81
- 'date' => 'timestamp',
82
- 'datetime' => 'timestamp',
83
- 'timestamp' => 'timestamp'
84
- }
85
-
86
- def initialize(column_name, data_type)
87
- @column_name = column_name
88
- @data_type = data_type
89
- end
90
-
91
- def bigquery_data_type
92
- TYPE_MAPPINGS[@data_type]
93
- end
94
-
95
- def converted_value
96
- if bigquery_data_type == 'timestamp'
97
- # time zone translate to UTC
98
- "UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
99
- elsif data_type == 'tinyint'
100
- # for MySQL tinyint(1) problem
101
- "CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
102
- else
103
- escaped_column_name
104
- end
105
- end
106
-
107
- def to_json(*a)
108
- { "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
109
- end
110
-
111
- private
112
- def escaped_column_name
113
- "`#{@column_name}`"
114
- end
115
- end
116
- end
117
- end
1
+ require 'mysql2-cs-bind'
2
+ require 'json'
3
+ require 'yaml'
4
+ require 'fileutils'
5
+ require 'samidare/bigquery_utility'
6
+
7
+ module Samidare
8
+ module MySQL
9
+ class MySQLClient
10
+ COLUMN_SQL = <<-SQL
11
+ SELECT column_name, data_type
12
+ FROM INFORMATION_SCHEMA.COLUMNS
13
+ WHERE table_schema = ?
14
+ AND table_name = ?
15
+ ORDER BY ordinal_position
16
+ SQL
17
+
18
+ def initialize(database_config)
19
+ @database_config = database_config
20
+ end
21
+
22
+ def client
23
+ @client ||= Mysql2::Client.new(
24
+ :host => @database_config['host'],
25
+ :username => @database_config['username'],
26
+ :password => @database_config['password'],
27
+ :database => @database_config['database'])
28
+ end
29
+
30
+ def generate_bq_schema(table_name)
31
+ infos = columns(table_name)
32
+ BigQueryUtility.generate_schema(infos)
33
+ end
34
+
35
+ def columns(table_name)
36
+ rows = client.xquery(COLUMN_SQL, @database_config['database'], table_name)
37
+ rows.map { |row| Column.new(row['column_name'], row['data_type']) }
38
+ end
39
+ end
40
+
41
+ class TableConfig
42
+ attr_reader :name, :daily_snapshot, :condition
43
+
44
+ def initialize(config)
45
+ @name = config['name']
46
+ @daily_snapshot = config['daily_snapshot'] || false
47
+ @condition = config['condition']
48
+ end
49
+
50
+ def self.generate_table_configs(file_path = 'table.yml')
51
+ configs = YAML.load_file(file_path)
52
+ configs.each_with_object({}) do |(db, database_config), table_configs|
53
+ table_configs[db] = database_config['tables'].map { |config| TableConfig.new(config) }
54
+ table_configs
55
+ end
56
+ end
57
+
58
+ def ==(another)
59
+ self.instance_variables.all? do |v|
60
+ self.instance_variable_get(v) == another.instance_variable_get(v)
61
+ end
62
+ end
63
+ end
64
+
65
+ class Column
66
+ attr_reader :column_name, :data_type
67
+
68
+ TYPE_MAPPINGS = {
69
+ 'int' => 'integer',
70
+ 'tinyint' => 'integer',
71
+ 'smallint' => 'integer',
72
+ 'mediumint' => 'integer',
73
+ 'bigint' => 'integer',
74
+ 'float' => 'float',
75
+ 'double' => 'float',
76
+ 'decimal' => 'float',
77
+ 'char' => 'string',
78
+ 'varchar' => 'string',
79
+ 'tinytext' => 'string',
80
+ 'text' => 'string',
81
+ 'date' => 'timestamp',
82
+ 'datetime' => 'timestamp',
83
+ 'timestamp' => 'timestamp'
84
+ }
85
+
86
+ def initialize(column_name, data_type)
87
+ @column_name = column_name
88
+ @data_type = data_type
89
+ end
90
+
91
+ def bigquery_data_type
92
+ TYPE_MAPPINGS[@data_type]
93
+ end
94
+
95
+ def converted_value
96
+ if bigquery_data_type == 'timestamp'
97
+ # time zone translate to UTC
98
+ "UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
99
+ elsif data_type == 'tinyint'
100
+ # for MySQL tinyint(1) problem
101
+ "CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
102
+ else
103
+ escaped_column_name
104
+ end
105
+ end
106
+
107
+ def to_json(*a)
108
+ { "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
109
+ end
110
+
111
+ private
112
+ def escaped_column_name
113
+ "`#{@column_name}`"
114
+ end
115
+ end
116
+ end
117
+ end
@@ -1,3 +1,3 @@
1
- module Samidare
2
- VERSION = "0.1.2"
3
- end
1
+ module Samidare
2
+ VERSION = "0.2.0"
3
+ end
@@ -25,9 +25,9 @@ Gem::Specification.new do |spec|
25
25
 
26
26
  spec.add_dependency 'unindent', '1.0'
27
27
  spec.add_dependency 'mysql2-cs-bind', '0.0.6'
28
- spec.add_dependency 'embulk-output-bigquery', '0.1.7'
29
- spec.add_dependency 'embulk-input-mysql', '0.6.0'
30
- spec.add_dependency 'embulk-parser-jsonl', '0.0.1'
28
+ spec.add_dependency 'embulk-output-bigquery', '0.4.3'
29
+ spec.add_dependency 'embulk-input-mysql', '0.8.2'
30
+ spec.add_dependency 'embulk-parser-jsonl', '0.2.0'
31
31
  spec.add_dependency 'embulk-formatter-jsonl', '0.1.4'
32
32
  spec.add_dependency 'bigquery', '0.8.3'
33
33
  end