samidare 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec15b952226e1fe811ab80393aa6e23d078cbf0d
4
- data.tar.gz: ea9f9a470b9c13b05891bc7c5672b36c1d1abf68
3
+ metadata.gz: a515fd2312967aab172fba7b511223f40bc85f93
4
+ data.tar.gz: 344ef5f724464bb45cd6a858e99958978d615922
5
5
  SHA512:
6
- metadata.gz: e1cb9d8ffacf52b9673ca9f6f192650c7202e8593aca20b2474687867326f9dcc6e8e417ad6733a43cd0c34a155dd26874eba9da0e48ba251fcb5aa3bee0807c
7
- data.tar.gz: e4c0e30a4e533f51d0a54cb8fc41f332b16a0dbf4868470d897967fb64a39e0cf93a698fc93ad64669880cc3953e05ccff5abff6ef6131b73ba58ac01f13d9f3
6
+ metadata.gz: b90e3598957f6d64a1a8e24ff3bc8a207267dd116a062c0d84b6b555934b7ca77357d4854c6fe40abb6a2ce60405aef2e34db482bc2a007d8585047994db29ed
7
+ data.tar.gz: 29aba7ab5b5c45d81807e6fc1d56f0ee69bb7a394684162bbc846d84d9970a5448a1cd19698eb2d1fffa9c4efe712e13271c89c30943562c70022db3b0bac600
data/README.md CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
19
19
  $ gem install samidare
20
20
 
21
21
  ## Usage
22
- Require `database.yml` and `table.yml`.
22
+ Require `database.yml` and `table.yml`.
23
23
  Below is a sample config file.
24
24
 
25
25
  #### database.yml
@@ -27,19 +27,21 @@ Below is a sample config file.
27
27
  db01:
28
28
  host: localhost
29
29
  username: root
30
- password:
30
+ password: pswd
31
31
  database: production
32
32
  bq_dataset: mysql_db01
33
33
 
34
34
  db02:
35
35
  host: localhost
36
36
  username: root
37
- password:
37
+ password: pswd
38
38
  database: production
39
39
  bq_dataset: mysql_db02
40
40
 
41
41
  ```
42
42
 
43
+ **Caution: Embulk doesn't allow no password for MySQL**
44
+
43
45
  #### table.yml
44
46
  ```yml
45
47
  db01:
@@ -54,11 +56,11 @@ db02:
54
56
  - name: configs
55
57
  ```
56
58
 
57
- Samidare requires BigQuery parameters.
59
+ Samidare requires BigQuery parameters like below.
58
60
  ```ruby
61
+ [sample.rb]
59
62
  config = {
60
63
  'project_id' => 'BIGQUERY_PROJECT_ID',
61
- 'project_name' => 'BIGQUERY_PROJECT_NAME',
62
64
  'service_email' => 'SERVICE_ACCOUNT_EMAIL',
63
65
  'key' => '/etc/embulk/bigquery.p12',
64
66
  'schema_dir' => '/var/tmp/embulk/schema',
@@ -71,6 +73,41 @@ client.generate_config(config)
71
73
  client.run(config)
72
74
  ```
73
75
 
76
+ ```bash
77
+ bundle exec ruby sample.rb
78
+ ```
79
+
80
+ ## Features
81
+ #### daily snapshot
82
+ BigQuery supports table wildcard expression of a specific set of daily tables, for example, `sales20150701` .
83
+ If you need daily snapshot of a table for BigQuery, use `daily_snapshot` option to `database.yml` or `table.yml` like below.
84
+ `daily_snapshot` option effects all tables in case of `database.yml` .
85
+ On the other hand, only target table in `table.yml` .
86
+ **Daily part is determined by execute date.**
87
+
88
+ ```yml
89
+ [database.yml]
90
+ production:
91
+ host: localhost
92
+ username: root
93
+ password: pswd
94
+ database: production
95
+ bq_dataset: mysql
96
+ daily_snapshot: true
97
+ ```
98
+
99
+ ```yml
100
+ [table.yml]
101
+ production:
102
+ tables:
103
+ - name: users
104
+ - name: events
105
+ daily_snapshot: true
106
+ - name: hobbies
107
+
108
+ Only `events` is renamed to `eventsYYYYMMDD` for BigQuery.
109
+ ```
110
+
74
111
  ## Contributing
75
112
 
76
113
  1. Fork it ( https://github.com/[my-github-username]/samidare/fork )
@@ -44,10 +44,12 @@ module Samidare
44
44
  "[\n" + json_body + "\n]\n"
45
45
  end
46
46
 
47
- def self.generate_sql(table_name, column_infos)
47
+ def self.generate_sql(table_info, column_infos)
48
48
  columns = column_infos.map { |column_info| column_info.converted_value }
49
49
  sql = "SELECT " + columns.join(",")
50
- sql << " FROM #{table_name}\n"
50
+ sql << " FROM #{table_info.name}\n"
51
+ sql << "WHERE #{table_info.condition}\n" if table_info.condition
52
+ sql
51
53
  end
52
54
 
53
55
  def generate_embulk_config(db_name, db_info, table_info, column_infos)
@@ -55,7 +57,7 @@ module Samidare
55
57
  user = db_info['username']
56
58
  password = db_info['password']
57
59
  database = db_info['database']
58
- query = Samidare::BigQueryUtility.generate_sql(table_info.name, column_infos)
60
+ query = Samidare::BigQueryUtility.generate_sql(table_info, column_infos)
59
61
  project = @config['project_id']
60
62
  p12_keyfile_path = @config['key']
61
63
  service_account_email = @config['service_email']
@@ -102,6 +102,10 @@ module Samidare
102
102
  def daily_snapshot
103
103
  @config['daily_snapshot'] || false
104
104
  end
105
+
106
+ def condition
107
+ @config['condition']
108
+ end
105
109
  end
106
110
 
107
111
  class ColumnInfo
@@ -137,18 +141,23 @@ module Samidare
137
141
  def converted_value
138
142
  if bigquery_data_type == 'timestamp'
139
143
  # time zone translate to UTC
140
- "UNIX_TIMESTAMP(#{@column_name}) AS #{@column_name}"
144
+ "UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
141
145
  elsif data_type == 'tinyint'
142
146
  # for MySQL tinyint(1) problem
143
- "CAST(#{@column_name} AS signed) AS #{@column_name}"
147
+ "CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
144
148
  else
145
- @column_name
149
+ escaped_column_name
146
150
  end
147
151
  end
148
152
 
149
153
  def to_json(*a)
150
154
  { "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
151
155
  end
156
+
157
+ private
158
+ def escaped_column_name
159
+ "`#{@column_name}`"
160
+ end
152
161
  end
153
162
  end
154
163
  end
@@ -1,3 +1,3 @@
1
1
  module Samidare
2
- VERSION = "0.0.9"
2
+ VERSION = "0.0.10"
3
3
  end
@@ -24,16 +24,44 @@ describe Samidare::BigQueryUtility do
24
24
  end
25
25
 
26
26
  describe '.generate_sql' do
27
- subject { Samidare::BigQueryUtility.generate_sql(table_name, column_infos) }
27
+ subject { Samidare::BigQueryUtility.generate_sql(table_info, column_infos) }
28
28
 
29
- let(:table_name) { 'simple' }
30
29
  let(:column_infos) { [
31
30
  Samidare::EmbulkUtility::ColumnInfo.new('id', 'int'),
32
31
  Samidare::EmbulkUtility::ColumnInfo.new('name', 'varchar'),
33
32
  Samidare::EmbulkUtility::ColumnInfo.new('created_at', 'datetime')
34
33
  ] }
35
- let(:sql) { "SELECT id,name,UNIX_TIMESTAMP(created_at) AS created_at FROM simple\n" }
36
- it { expect(subject).to eq sql }
34
+
35
+ context 'no condition' do
36
+ let(:table_info) { Samidare::EmbulkUtility::TableInfo.new({ 'name' => 'simple' }) }
37
+ let(:sql) { "SELECT `id`,`name`,UNIX_TIMESTAMP(`created_at`) AS `created_at` FROM simple\n" }
38
+ it { expect(subject).to eq sql }
39
+ end
40
+
41
+ context 'has condition' do
42
+ let(:table_info) { Samidare::EmbulkUtility::TableInfo.new({ 'name' => 'simple', 'condition' => 'created_at >= CURRENT_DATE() - INTERVAL 3 MONTH' }) }
43
+ let(:sql) { "SELECT `id`,`name`,UNIX_TIMESTAMP(`created_at`) AS `created_at` FROM simple\nWHERE created_at >= CURRENT_DATE() - INTERVAL 3 MONTH\n" }
44
+ it { expect(subject).to eq sql }
45
+ end
46
+ end
47
+
48
+ describe '#actual_table_name' do
49
+ before { Timecop.freeze(Time.now) }
50
+
51
+ after { Timecop.return }
52
+
53
+ subject { Samidare::BigQueryUtility.new({}).actual_table_name(table_name, daily_snapshot) }
54
+ let(:table_name) { 'users' }
55
+ let(:daily_snapshot) { false }
56
+
57
+ context 'do not use daily snapshot' do
58
+ it { expect(subject).to eq table_name }
59
+ end
60
+
61
+ context 'use daily snapshot' do
62
+ let(:daily_snapshot) { true }
63
+ it { expect(subject).to eq table_name + Time.now.strftime('%Y%m%d') }
64
+ end
37
65
  end
38
66
 
39
67
  describe '#actual_table_name' do
@@ -93,19 +93,19 @@ describe Samidare::EmbulkUtility::ColumnInfo do
93
93
  context 'datetime' do
94
94
  let(:column_name) { 'create_at' }
95
95
  let(:data_type) { 'datetime' }
96
- it { expect(subject).to eq 'UNIX_TIMESTAMP(create_at) AS create_at' }
96
+ it { expect(subject).to eq 'UNIX_TIMESTAMP(`create_at`) AS `create_at`' }
97
97
  end
98
98
 
99
99
  context 'int' do
100
100
  let(:column_name) { 'id' }
101
101
  let(:data_type) { 'int' }
102
- it { expect(subject).to eq 'id' }
102
+ it { expect(subject).to eq '`id`' }
103
103
  end
104
104
 
105
105
  context 'varchar' do
106
106
  let(:column_name) { 'explanation' }
107
107
  let(:data_type) { 'varchar' }
108
- it { expect(subject).to eq 'explanation' }
108
+ it { expect(subject).to eq '`explanation`' }
109
109
  end
110
110
  end
111
111
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: samidare
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryoji Kobori
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-30 00:00:00.000000000 Z
11
+ date: 2015-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -215,4 +215,3 @@ test_files:
215
215
  - spec/samidare/embulk_utility_spec.rb
216
216
  - spec/samidare_spec.rb
217
217
  - spec/spec_helper.rb
218
- has_rdoc: