samidare 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -5
- data/lib/samidare/bigquery_utility.rb +5 -3
- data/lib/samidare/embulk_utility.rb +12 -3
- data/lib/samidare/version.rb +1 -1
- data/spec/samidare/bigquery_utility_spec.rb +32 -4
- data/spec/samidare/embulk_utility_spec.rb +3 -3
- metadata +2 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a515fd2312967aab172fba7b511223f40bc85f93
|
4
|
+
data.tar.gz: 344ef5f724464bb45cd6a858e99958978d615922
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b90e3598957f6d64a1a8e24ff3bc8a207267dd116a062c0d84b6b555934b7ca77357d4854c6fe40abb6a2ce60405aef2e34db482bc2a007d8585047994db29ed
|
7
|
+
data.tar.gz: 29aba7ab5b5c45d81807e6fc1d56f0ee69bb7a394684162bbc846d84d9970a5448a1cd19698eb2d1fffa9c4efe712e13271c89c30943562c70022db3b0bac600
|
data/README.md
CHANGED
@@ -19,7 +19,7 @@ Or install it yourself as:
|
|
19
19
|
$ gem install samidare
|
20
20
|
|
21
21
|
## Usage
|
22
|
-
Require `database.yml` and `table.yml`.
|
22
|
+
Require `database.yml` and `table.yml`.
|
23
23
|
Below is a sample config file.
|
24
24
|
|
25
25
|
#### database.yml
|
@@ -27,19 +27,21 @@ Below is a sample config file.
|
|
27
27
|
db01:
|
28
28
|
host: localhost
|
29
29
|
username: root
|
30
|
-
password:
|
30
|
+
password: pswd
|
31
31
|
database: production
|
32
32
|
bq_dataset: mysql_db01
|
33
33
|
|
34
34
|
db02:
|
35
35
|
host: localhost
|
36
36
|
username: root
|
37
|
-
password:
|
37
|
+
password: pswd
|
38
38
|
database: production
|
39
39
|
bq_dataset: mysql_db02
|
40
40
|
|
41
41
|
```
|
42
42
|
|
43
|
+
**Caution: Embulk doesn't allow no password for MySQL**
|
44
|
+
|
43
45
|
#### table.yml
|
44
46
|
```yml
|
45
47
|
db01:
|
@@ -54,11 +56,11 @@ db02:
|
|
54
56
|
- name: configs
|
55
57
|
```
|
56
58
|
|
57
|
-
Samidare requires BigQuery parameters.
|
59
|
+
Samidare requires BigQuery parameters like below.
|
58
60
|
```ruby
|
61
|
+
[sample.rb]
|
59
62
|
config = {
|
60
63
|
'project_id' => 'BIGQUERY_PROJECT_ID',
|
61
|
-
'project_name' => 'BIGQUERY_PROJECT_NAME',
|
62
64
|
'service_email' => 'SERVICE_ACCOUNT_EMAIL',
|
63
65
|
'key' => '/etc/embulk/bigquery.p12',
|
64
66
|
'schema_dir' => '/var/tmp/embulk/schema',
|
@@ -71,6 +73,41 @@ client.generate_config(config)
|
|
71
73
|
client.run(config)
|
72
74
|
```
|
73
75
|
|
76
|
+
```bash
|
77
|
+
bundle exec ruby sample.rb
|
78
|
+
```
|
79
|
+
|
80
|
+
## Features
|
81
|
+
#### daily snapshot
|
82
|
+
BigQuery supports table wildcard expression of a specific set of daily tables, for example, `sales20150701` .
|
83
|
+
If you need daily snapshot of a table for BigQuery, use `daily_snapshot` option to `database.yml` or `table.yml` like below.
|
84
|
+
`daily_snapshot` option effects all tables in case of `database.yml` .
|
85
|
+
On the other hand, only target table in `table.yml` .
|
86
|
+
**Daily part is determined by execute date.**
|
87
|
+
|
88
|
+
```yml
|
89
|
+
[database.yml]
|
90
|
+
production:
|
91
|
+
host: localhost
|
92
|
+
username: root
|
93
|
+
password: pswd
|
94
|
+
database: production
|
95
|
+
bq_dataset: mysql
|
96
|
+
daily_snapshot: true
|
97
|
+
```
|
98
|
+
|
99
|
+
```yml
|
100
|
+
[table.yml]
|
101
|
+
production:
|
102
|
+
tables:
|
103
|
+
- name: users
|
104
|
+
- name: events
|
105
|
+
daily_snapshot: true
|
106
|
+
- name: hobbies
|
107
|
+
|
108
|
+
Only `events` is renamed to `eventsYYYYMMDD` for BigQuery.
|
109
|
+
```
|
110
|
+
|
74
111
|
## Contributing
|
75
112
|
|
76
113
|
1. Fork it ( https://github.com/[my-github-username]/samidare/fork )
|
@@ -44,10 +44,12 @@ module Samidare
|
|
44
44
|
"[\n" + json_body + "\n]\n"
|
45
45
|
end
|
46
46
|
|
47
|
-
def self.generate_sql(
|
47
|
+
def self.generate_sql(table_info, column_infos)
|
48
48
|
columns = column_infos.map { |column_info| column_info.converted_value }
|
49
49
|
sql = "SELECT " + columns.join(",")
|
50
|
-
sql << " FROM #{
|
50
|
+
sql << " FROM #{table_info.name}\n"
|
51
|
+
sql << "WHERE #{table_info.condition}\n" if table_info.condition
|
52
|
+
sql
|
51
53
|
end
|
52
54
|
|
53
55
|
def generate_embulk_config(db_name, db_info, table_info, column_infos)
|
@@ -55,7 +57,7 @@ module Samidare
|
|
55
57
|
user = db_info['username']
|
56
58
|
password = db_info['password']
|
57
59
|
database = db_info['database']
|
58
|
-
query = Samidare::BigQueryUtility.generate_sql(table_info
|
60
|
+
query = Samidare::BigQueryUtility.generate_sql(table_info, column_infos)
|
59
61
|
project = @config['project_id']
|
60
62
|
p12_keyfile_path = @config['key']
|
61
63
|
service_account_email = @config['service_email']
|
@@ -102,6 +102,10 @@ module Samidare
|
|
102
102
|
def daily_snapshot
|
103
103
|
@config['daily_snapshot'] || false
|
104
104
|
end
|
105
|
+
|
106
|
+
def condition
|
107
|
+
@config['condition']
|
108
|
+
end
|
105
109
|
end
|
106
110
|
|
107
111
|
class ColumnInfo
|
@@ -137,18 +141,23 @@ module Samidare
|
|
137
141
|
def converted_value
|
138
142
|
if bigquery_data_type == 'timestamp'
|
139
143
|
# time zone translate to UTC
|
140
|
-
"UNIX_TIMESTAMP(#{
|
144
|
+
"UNIX_TIMESTAMP(#{escaped_column_name}) AS #{escaped_column_name}"
|
141
145
|
elsif data_type == 'tinyint'
|
142
146
|
# for MySQL tinyint(1) problem
|
143
|
-
"CAST(#{
|
147
|
+
"CAST(#{escaped_column_name} AS signed) AS #{escaped_column_name}"
|
144
148
|
else
|
145
|
-
|
149
|
+
escaped_column_name
|
146
150
|
end
|
147
151
|
end
|
148
152
|
|
149
153
|
def to_json(*a)
|
150
154
|
{ "name" => @column_name, "type" => bigquery_data_type }.to_json(*a)
|
151
155
|
end
|
156
|
+
|
157
|
+
private
|
158
|
+
def escaped_column_name
|
159
|
+
"`#{@column_name}`"
|
160
|
+
end
|
152
161
|
end
|
153
162
|
end
|
154
163
|
end
|
data/lib/samidare/version.rb
CHANGED
@@ -24,16 +24,44 @@ describe Samidare::BigQueryUtility do
|
|
24
24
|
end
|
25
25
|
|
26
26
|
describe '.generate_sql' do
|
27
|
-
subject { Samidare::BigQueryUtility.generate_sql(
|
27
|
+
subject { Samidare::BigQueryUtility.generate_sql(table_info, column_infos) }
|
28
28
|
|
29
|
-
let(:table_name) { 'simple' }
|
30
29
|
let(:column_infos) { [
|
31
30
|
Samidare::EmbulkUtility::ColumnInfo.new('id', 'int'),
|
32
31
|
Samidare::EmbulkUtility::ColumnInfo.new('name', 'varchar'),
|
33
32
|
Samidare::EmbulkUtility::ColumnInfo.new('created_at', 'datetime')
|
34
33
|
] }
|
35
|
-
|
36
|
-
|
34
|
+
|
35
|
+
context 'no condition' do
|
36
|
+
let(:table_info) { Samidare::EmbulkUtility::TableInfo.new({ 'name' => 'simple' }) }
|
37
|
+
let(:sql) { "SELECT `id`,`name`,UNIX_TIMESTAMP(`created_at`) AS `created_at` FROM simple\n" }
|
38
|
+
it { expect(subject).to eq sql }
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'has condition' do
|
42
|
+
let(:table_info) { Samidare::EmbulkUtility::TableInfo.new({ 'name' => 'simple', 'condition' => 'created_at >= CURRENT_DATE() - INTERVAL 3 MONTH' }) }
|
43
|
+
let(:sql) { "SELECT `id`,`name`,UNIX_TIMESTAMP(`created_at`) AS `created_at` FROM simple\nWHERE created_at >= CURRENT_DATE() - INTERVAL 3 MONTH\n" }
|
44
|
+
it { expect(subject).to eq sql }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#actual_table_name' do
|
49
|
+
before { Timecop.freeze(Time.now) }
|
50
|
+
|
51
|
+
after { Timecop.return }
|
52
|
+
|
53
|
+
subject { Samidare::BigQueryUtility.new({}).actual_table_name(table_name, daily_snapshot) }
|
54
|
+
let(:table_name) { 'users' }
|
55
|
+
let(:daily_snapshot) { false }
|
56
|
+
|
57
|
+
context 'do not use daily snapshot' do
|
58
|
+
it { expect(subject).to eq table_name }
|
59
|
+
end
|
60
|
+
|
61
|
+
context 'use daily snapshot' do
|
62
|
+
let(:daily_snapshot) { true }
|
63
|
+
it { expect(subject).to eq table_name + Time.now.strftime('%Y%m%d') }
|
64
|
+
end
|
37
65
|
end
|
38
66
|
|
39
67
|
describe '#actual_table_name' do
|
@@ -93,19 +93,19 @@ describe Samidare::EmbulkUtility::ColumnInfo do
|
|
93
93
|
context 'datetime' do
|
94
94
|
let(:column_name) { 'create_at' }
|
95
95
|
let(:data_type) { 'datetime' }
|
96
|
-
it { expect(subject).to eq 'UNIX_TIMESTAMP(create_at) AS create_at' }
|
96
|
+
it { expect(subject).to eq 'UNIX_TIMESTAMP(`create_at`) AS `create_at`' }
|
97
97
|
end
|
98
98
|
|
99
99
|
context 'int' do
|
100
100
|
let(:column_name) { 'id' }
|
101
101
|
let(:data_type) { 'int' }
|
102
|
-
it { expect(subject).to eq 'id' }
|
102
|
+
it { expect(subject).to eq '`id`' }
|
103
103
|
end
|
104
104
|
|
105
105
|
context 'varchar' do
|
106
106
|
let(:column_name) { 'explanation' }
|
107
107
|
let(:data_type) { 'varchar' }
|
108
|
-
it { expect(subject).to eq 'explanation' }
|
108
|
+
it { expect(subject).to eq '`explanation`' }
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: samidare
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryoji Kobori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -215,4 +215,3 @@ test_files:
|
|
215
215
|
- spec/samidare/embulk_utility_spec.rb
|
216
216
|
- spec/samidare_spec.rb
|
217
217
|
- spec/spec_helper.rb
|
218
|
-
has_rdoc:
|