redata 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +235 -0
- data/Rakefile +2 -0
- data/bin/adjust +17 -0
- data/bin/console +14 -0
- data/bin/notice +20 -0
- data/bin/redata +20 -0
- data/bin/setup +8 -0
- data/lib/redata.rb +5 -0
- data/lib/redata/bucket.rb +27 -0
- data/lib/redata/config.rb +108 -0
- data/lib/redata/database.rb +92 -0
- data/lib/redata/log.rb +20 -0
- data/lib/redata/notice.rb +21 -0
- data/lib/redata/parser.rb +147 -0
- data/lib/redata/relation.rb +17 -0
- data/lib/redata/relation/export.rb +30 -0
- data/lib/redata/relation/table.rb +24 -0
- data/lib/redata/relation/view.rb +24 -0
- data/lib/redata/requires.rb +24 -0
- data/lib/redata/schema.rb +64 -0
- data/lib/redata/ssh.rb +28 -0
- data/lib/redata/tasks.rb +84 -0
- data/lib/redata/version.rb +3 -0
- data/redata.gemspec +29 -0
- metadata +172 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 58c04577fb55ac865dda405347811acf421dac3e
|
4
|
+
data.tar.gz: a1a33fb66a93f198aca56731417c2a2d90b65192
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 199b5da361e782c96ba4dbb1c5e465ca3fd7e9a8f4471776efb0915408ec9f43428aadfda25f5037e883c4798bbec4633c2e0aa7ddb76d59daafc3e8c0ed56dc
|
7
|
+
data.tar.gz: 551db6a70bbcdfd29a39aed84d69258d883afcbede934bc5f7fc08ed165b8c3978671f2cba2e8d26ee3bc5a6a8349cc4cde86028824241b01dfce1fa6e6dd092
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 goshan_tvi
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,235 @@
|
|
1
|
+
# Redata
|
2
|
+
|
3
|
+
Help you to controll data process in redshift with easy query and command line
|
4
|
+
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'redata'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle install
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install redata
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
### Config
|
25
|
+
|
26
|
+
+ config `config/redata.yml` for general setting
|
27
|
+
|
28
|
+
```YAML
|
29
|
+
start_date: "2016-04-04" # default data start date
|
30
|
+
timezone: "Asia/Tokyo"
|
31
|
+
s3:
|
32
|
+
aws_access_key_id: {key_id}
|
33
|
+
aws_secret_access_key: {key_secret}
|
34
|
+
region: {s3_region}
|
35
|
+
bucket:
|
36
|
+
production: {bucket_name}
|
37
|
+
development: {bucket_name}
|
38
|
+
ssh: # this setting will be used in ssh mode when you access private database
|
39
|
+
HostName: {gateway_host}
|
40
|
+
IdentityFile: {~/.ssh/key.pem}
|
41
|
+
User: {username}
|
42
|
+
slack_bot: # this setting will be used for slack notice push
|
43
|
+
token: {bot_token}
|
44
|
+
channel: {slack_channel}
|
45
|
+
```
|
46
|
+
|
47
|
+
+ config `config/database.yml` for development and production environment in redshift database
|
48
|
+
eg.
|
49
|
+
|
50
|
+
```YAML
|
51
|
+
development:
|
52
|
+
host: localhost
|
53
|
+
username: user
|
54
|
+
password: ''
|
55
|
+
database: dev
|
56
|
+
export: # target platform db(mysql) which export data to
|
57
|
+
app: # platform name
|
58
|
+
username: root
|
59
|
+
password: ''
|
60
|
+
host: localhost
|
61
|
+
database: app
|
62
|
+
```
|
63
|
+
|
64
|
+
+ config `config/relations.rb` for data object in redshift and exporting process to mysql
|
65
|
+
eg.
|
66
|
+
|
67
|
+
```RUBY
|
68
|
+
Redata::Task.schema.config do
|
69
|
+
# Example of declaring a global table
|
70
|
+
table 'table_name'
|
71
|
+
# This declaration means
|
72
|
+
# query file: database/sources/table_name.sql
|
73
|
+
# redshift table: table_name
|
74
|
+
# update type: renewal, delete and re-create when update
|
75
|
+
# key used in command line: table_name
|
76
|
+
|
77
|
+
# Example of declaring a global table with customizing options
|
78
|
+
table 'new_table_name', :dir => 'dir', :file => 'query_file', :update => :append, :as => :alias
|
79
|
+
# This declaration means
|
80
|
+
# query file: database/sources/dir/query_file.sql
|
81
|
+
# redshift table: new_table_name
|
82
|
+
# update type: append, only appending to existing table
|
83
|
+
# key used in command line: alias
|
84
|
+
|
85
|
+
# view is same to table but the update type only has renewal mode
|
86
|
+
table 'view_name'
|
87
|
+
table 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
|
88
|
+
|
89
|
+
# Example of declaring with category
|
90
|
+
category :test_category do
|
91
|
+
table 'test_table'
|
92
|
+
# This declaration means
|
93
|
+
# query file: database/sources/test_category/test_table.sql
|
94
|
+
# redshift table: test_category_test_table
|
95
|
+
# update type: renewal
|
96
|
+
# key used in command line: test_category_test_table
|
97
|
+
|
98
|
+
table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :update => append, :as => :alias_oth
|
99
|
+
# This declaration means
|
100
|
+
# query file: database/sources/dir/query_file_oth.sql
|
101
|
+
# redshift table: test_category_test_table
|
102
|
+
# update type: append
|
103
|
+
# key used in command line: test_category_alias_oth
|
104
|
+
|
105
|
+
# view is same to table without appending update type
|
106
|
+
view 'test_view'
|
107
|
+
view 'test_view_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_view_oth
|
108
|
+
|
109
|
+
#Example of convertor declaration
|
110
|
+
export 'test_export'
|
111
|
+
# This declaration means
|
112
|
+
# convertor file: database/convertors/test_category/test_export.conv
|
113
|
+
# target mysql database name: test_category (Also see: export config in config/database.yml{:export})
|
114
|
+
# target mysql table: test_export
|
115
|
+
# update type: renewal, delete all records and insert new records
|
116
|
+
# key used in command line: test_category_test_export
|
117
|
+
|
118
|
+
#Example of convertor declaration
|
119
|
+
export 'test_export', :dir => 'dir', :file => 'conv_file', :update => 'append', :as => 'alias_export'
|
120
|
+
# This declaration means
|
121
|
+
# convertor file: database/convertors/dir/conv_file.conv
|
122
|
+
# target mysql database name: test_category
|
123
|
+
# target mysql table: test_export
|
124
|
+
# update type: append, append insert new records without deleting
|
125
|
+
# key used in command line: test_category_alias_export
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
```
|
130
|
+
|
131
|
+
### Query file
|
132
|
+
|
133
|
+
Query file was used for create table of view in redshift. It is almost like PostgreSQL file but with same new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view, for table, if you use append mode, the result will only be append-inserted to table.
|
134
|
+
eg.
|
135
|
+
|
136
|
+
```SQL
|
137
|
+
-- query file in data/sources/...
|
138
|
+
|
139
|
+
#include 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.sql in same folder or database/shared/
|
140
|
+
#include 'sub_query_b' --> :b
|
141
|
+
|
142
|
+
|
143
|
+
select a.col1, a.col2, b.col1, b.col2, b.col3
|
144
|
+
from {a} -- use object a included from sub query file '_sub_query_a.sql'
|
145
|
+
join {b} on b.col1 = a.col1
|
146
|
+
-- If in append mode and this table was setted appending update type, then start_time getting from command input such as `-start_time 2016-11-08` will be used here. When missing input this param, as default [2 days ago] will be used.
|
147
|
+
-- Or if not append mode, start_date will be used as default (Also see config/redata.yml). set start_time when running command , if missing in command, default_start_date will be used
|
148
|
+
where a.col1 >= [start_time]
|
149
|
+
-- current time in setted timezone will be used (About timezon, also see config/redata.yml)
|
150
|
+
and a.col1 <= [current_time]
|
151
|
+
-- some params getting from command input such as `-param_from_command param_value`
|
152
|
+
and a.col2 = [param_from_command]
|
153
|
+
-- x days before today, x will be a integer
|
154
|
+
and b.col3 >= [x days ago]
|
155
|
+
```
|
156
|
+
|
157
|
+
### Convertor config file
|
158
|
+
|
159
|
+
Convertor file was used to generate a select query to get data from redshift and unload to S3. But you have no need to wirte a unload query. If you are using append mode, only data 2 days ago will be select.
|
160
|
+
eg.
|
161
|
+
|
162
|
+
```
|
163
|
+
source: redshift_source_table_or_view
|
164
|
+
columns:
|
165
|
+
cm_id
|
166
|
+
segment_type{'C' => 0, 'T' => 1, 'M1' => 2, 'M2' => 3, 'M3' => 4, 'F1' => 5, 'F2' => 6, 'F3' => 7}
|
167
|
+
v
|
168
|
+
e
|
169
|
+
base_ai
|
170
|
+
sample_num
|
171
|
+
grp
|
172
|
+
```
|
173
|
+
|
174
|
+
> convertor config file in `data/convertors/...`
|
175
|
+
> `source` means the source table in redshift
|
176
|
+
> `columns` means the source columns in source table
|
177
|
+
|
178
|
+
### Command
|
179
|
+
|
180
|
+
There are 3 executable file in bin/
|
181
|
+
- redata --> manage redshift table/view and export data to mysql
|
182
|
+
- adjust --> run some single sql query file in redshift or mysql
|
183
|
+
- notice --> push notice to slack etc.
|
184
|
+
|
185
|
+
#### redata
|
186
|
+
|
187
|
+
Usage: `redata [-options] [action] [object key] {platform}`
|
188
|
+
+ action
|
189
|
+
- create --> create a table/view or append data to table in redshift
|
190
|
+
- delete --> delete a table/view in redshift
|
191
|
+
- checkout --> export data in table/view of redshift into S3
|
192
|
+
- inject --> import data into mysql table from S3
|
193
|
+
+ object key --> object will be create/delete/checkout/inject declared in `config/relation.rb`
|
194
|
+
+ platform --> when injecting data into mysql, there may be several platform declared in `config/database.yml{:export}` for same database, here is setting which platform to use. *If the platform here could not be found in `database.yml` or have not set platform, the default export will be used.*
|
195
|
+
+ options
|
196
|
+
- -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
|
197
|
+
- -e --> environment: `production`, `development`, etc.
|
198
|
+
- -f --> force mode, use `CADCASE` when removing view or table in redshift
|
199
|
+
- -ssh --> use ssh accessing to private database with ssh config in `config/redata.yml`
|
200
|
+
- -append_mode --> use `append_mode`, the objects in relations.rb with appending update type will go to appending operation.
|
201
|
+
+ delete will only delete objects with renewal update type
|
202
|
+
+ create will append-insert data after `-start_time`(set in command) or default `2 days ago` for appending update type, still create table/view for renewal type
|
203
|
+
+ checkout will only fetch data after `-start_time` or default `2 days ago` to upload to S3, renewal type will still be uploaded all data
|
204
|
+
+ inject will insert data to mysql without `--delete` option, renewal still delete all firstly
|
205
|
+
- other options --> some params will be used in query file when declared, such `start_time`
|
206
|
+
|
207
|
+
#### adjust
|
208
|
+
|
209
|
+
Use adjust when you just want to run a query file without declaring in `config/relations.rb`
|
210
|
+
Usage: `adjust [-options] [database] [query file] {platform}`
|
211
|
+
+ database --> `redshift` or database declared in `config/database.yml{export}`
|
212
|
+
+ query file --> query file which will be run in `database/adjust/`, **without extends `.sql`**
|
213
|
+
+ platform --> same to `redata`
|
214
|
+
+ options
|
215
|
+
- -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
|
216
|
+
- -e --> environment: `production`, `development`, etc.
|
217
|
+
- -ssh --> use ssh accessing to private database with ssh config in `config/redata.yml`
|
218
|
+
- other options --> some params will be used in query file when declared, such `start_time`
|
219
|
+
|
220
|
+
#### notice
|
221
|
+
|
222
|
+
Usage: `notice [-options] [action]`
|
223
|
+
+ action: currently, there is only `update` action which means send 'finish updating' message to slack
|
224
|
+
+ options
|
225
|
+
- -e --> environment: `production`, `development`, etc. **Only production could send notice**
|
226
|
+
|
227
|
+
## Contributing
|
228
|
+
|
229
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/goshan/redata.
|
230
|
+
|
231
|
+
|
232
|
+
## License
|
233
|
+
|
234
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
235
|
+
|
data/Rakefile
ADDED
data/bin/adjust
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path '../../lib/redata', __FILE__
|
4
|
+
|
5
|
+
|
6
|
+
query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
|
7
|
+
tmp_script_file = Redata::RED.root.join 'tmp', 'queries', "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}.sql"
|
8
|
+
|
9
|
+
Redata::Log.action "QUERY: Run query file [#{query_file.relative_path_from Redata::RED.root}] in #{Redata::RED.params[0]}"
|
10
|
+
Redata::Parser.gen_adjust_file query_file, tmp_script_file
|
11
|
+
|
12
|
+
if Redata::RED.params[0] == "redshift"
|
13
|
+
Redata::DATABASE.connect_with_file tmp_script_file
|
14
|
+
else
|
15
|
+
Redata::DATABASE.connect_mysql_with_file tmp_script_file, Redata::RED.params[0], Redata::RED.params[2]
|
16
|
+
end
|
17
|
+
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "redata"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/notice
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'slack'
|
4
|
+
require File.expand_path '../../lib/redata', __FILE__
|
5
|
+
require File.expand_path '../../lib/redata/notice', __FILE__
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
if ['update'].include? Redata::RED.params[0]
|
10
|
+
notice = Redata::Notice.new
|
11
|
+
case Redata::RED.params[0]
|
12
|
+
when 'update'
|
13
|
+
if Redata::RED.production?
|
14
|
+
notice.send "今日の自動更新を完了しました!\n今のデータ期間は `2015-07-29 ~ #{Redata::RED.date_days_ago 3}` になってます"
|
15
|
+
else
|
16
|
+
Redata::Log.warning "WARNING: Could send notice only in production env"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
data/bin/redata
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require File.expand_path '../../lib/redata', __FILE__
|
4
|
+
|
5
|
+
require Redata::RED.root.join('config', 'relations.rb').to_s
|
6
|
+
|
7
|
+
|
8
|
+
if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
|
9
|
+
case Redata::RED.params[0]
|
10
|
+
when 'create'
|
11
|
+
Redata::Task.create_datasource Redata::RED.params[1]
|
12
|
+
when 'delete'
|
13
|
+
Redata::Task.delete_datasource Redata::RED.params[1]
|
14
|
+
when 'checkout'
|
15
|
+
Redata::Task.checkout_datasource Redata::RED.params[1]
|
16
|
+
when 'inject'
|
17
|
+
Redata::Task.inject Redata::RED.params[1], Redata::RED.params[2]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
data/bin/setup
ADDED
data/lib/redata.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Redata
|
2
|
+
class S3Bucket
|
3
|
+
def initialize
|
4
|
+
s3 = Aws::S3::Resource.new
|
5
|
+
@bucket = s3.bucket RED.s3['bucket']
|
6
|
+
end
|
7
|
+
|
8
|
+
def move(source, target)
|
9
|
+
from = @bucket.object source
|
10
|
+
to = @bucket.object target
|
11
|
+
from.move_to to if from.exists?
|
12
|
+
end
|
13
|
+
|
14
|
+
def exist?(file)
|
15
|
+
@bucket.object(file).exists?
|
16
|
+
end
|
17
|
+
|
18
|
+
def delete(file)
|
19
|
+
@bucket.object(file).delete if exist?(file)
|
20
|
+
end
|
21
|
+
|
22
|
+
def make_public(file, is_public)
|
23
|
+
acl = is_public ? 'public-read' : 'private'
|
24
|
+
@bucket.object(file).acl.put({:acl => acl}) if exist?(file)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
module Redata
|
2
|
+
class Config
|
3
|
+
attr_accessor :root, :env, :is_forced, :is_ssh, :is_append, :params, :locals
|
4
|
+
|
5
|
+
def initialize(argv=nil)
|
6
|
+
# params
|
7
|
+
@argv = parse_params argv
|
8
|
+
if !@argv[:dir]
|
9
|
+
@root = Pathname.new(Dir.pwd)
|
10
|
+
elsif Pathname.new(@argv[:dir]).absolute?
|
11
|
+
@root = Pathname.new(@argv[:dir])
|
12
|
+
else
|
13
|
+
@root = Pathname.new(Dir.pwd).join @argv[:dir]
|
14
|
+
end
|
15
|
+
@env = @argv[:env] || 'development'
|
16
|
+
@is_forced = @argv[:force]
|
17
|
+
@is_ssh = @argv[:ssh]
|
18
|
+
@is_append = @argv[:append_mode]
|
19
|
+
@locals = @argv[:locals]
|
20
|
+
@params = @argv[:params]
|
21
|
+
|
22
|
+
# config file
|
23
|
+
@config = YAML.load(ERB.new(File.read(@root.join 'config', 'redata.yml')).result(binding))
|
24
|
+
@s3_config = @config['s3']
|
25
|
+
@s3_config['bucket'] = @s3_config['bucket'][@env]
|
26
|
+
@s3_config['host'] = "https://s3-#{@s3_config['region']}.amazonaws.com/#{@s3_config['bucket']}"
|
27
|
+
Aws.config.update({
|
28
|
+
region: @s3_config['region'],
|
29
|
+
credentials: Aws::Credentials.new(@s3_config['aws_access_key_id'], @s3_config['aws_secret_access_key'])
|
30
|
+
})
|
31
|
+
@tz_local = Timezone[@config['timezone']]
|
32
|
+
@slack_token = @config['slack_bot']
|
33
|
+
end
|
34
|
+
|
35
|
+
def development?
|
36
|
+
@env == 'development'
|
37
|
+
end
|
38
|
+
|
39
|
+
def production?
|
40
|
+
@env == 'production'
|
41
|
+
end
|
42
|
+
|
43
|
+
def default_start_date
|
44
|
+
@config['start_date']
|
45
|
+
end
|
46
|
+
|
47
|
+
def ssh
|
48
|
+
@config['ssh']
|
49
|
+
end
|
50
|
+
|
51
|
+
def s3
|
52
|
+
@s3_config
|
53
|
+
end
|
54
|
+
|
55
|
+
def slack
|
56
|
+
@slack_token
|
57
|
+
end
|
58
|
+
|
59
|
+
def current_time
|
60
|
+
@tz_local.utc_to_local(Time.now.utc).strftime('%Y-%m-%d %H:%M:%S')
|
61
|
+
end
|
62
|
+
|
63
|
+
def default_append_date
|
64
|
+
# 2 days ago bacause there is only data 2 days ago in redshift
|
65
|
+
@tz_local.utc_to_local(Time.now.utc-2*24*3600).strftime('%Y-%m-%d')
|
66
|
+
end
|
67
|
+
|
68
|
+
def date_days_ago(days)
|
69
|
+
@tz_local.utc_to_local(Time.now.utc-days*24*3600).strftime('%Y-%m-%d')
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def parse_params(argv)
|
74
|
+
new_argv = {:params => [], :locals => {}}
|
75
|
+
return new_argv unless argv
|
76
|
+
i = 0
|
77
|
+
while i < argv.count
|
78
|
+
case argv[i]
|
79
|
+
when '-dir'
|
80
|
+
i += 1
|
81
|
+
new_argv[:dir] = argv[i]
|
82
|
+
when '-e'
|
83
|
+
i += 1
|
84
|
+
new_argv[:env] = argv[i]
|
85
|
+
when '-f'
|
86
|
+
new_argv[:force] = true
|
87
|
+
when '-ssh'
|
88
|
+
new_argv[:ssh] = true
|
89
|
+
when '-append_mode'
|
90
|
+
new_argv[:append_mode] = true
|
91
|
+
else
|
92
|
+
if argv[i] =~ /-(.+)/
|
93
|
+
key = argv[i].match(/-(.+)/)[1]
|
94
|
+
i += 1
|
95
|
+
new_argv[:locals][key.to_sym] = argv[i]
|
96
|
+
else
|
97
|
+
new_argv[:params].push argv[i]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
i += 1
|
101
|
+
end
|
102
|
+
new_argv
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
RED = Config.new ARGV
|
107
|
+
end
|
108
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Redata
|
2
|
+
class DataBase
|
3
|
+
REDSHIFT_PORT = 5439
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@config = YAML.load(ERB.new(File.read(RED.root.join 'config', 'database.yml')).result(binding))[RED.env]
|
7
|
+
Log.error! "ERROR: Database of #{RED.env} was not declared in config/database.yml" unless @config
|
8
|
+
|
9
|
+
@ssh = Ssh.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def connect_with_file(file)
|
13
|
+
cmd = make_redshift_cmd
|
14
|
+
if @ssh.run_with_ssh?
|
15
|
+
@ssh.upload_file file
|
16
|
+
@ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{file.basename}"
|
17
|
+
else
|
18
|
+
system "#{cmd} -f #{file}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def connect_with_query(query)
|
23
|
+
cmd = make_redshift_cmd
|
24
|
+
if @ssh.run_with_ssh?
|
25
|
+
@ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -c '#{query}'"
|
26
|
+
else
|
27
|
+
system "#{cmd} -c '#{query}'"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def inject_to_mysql(config, platform)
|
32
|
+
if @ssh.run_with_ssh?
|
33
|
+
@ssh.upload_file config.tmp_data_file, config.name
|
34
|
+
data_file = "~/tmp/#{config.name}"
|
35
|
+
else
|
36
|
+
data_file = config.tmp_data_file
|
37
|
+
end
|
38
|
+
|
39
|
+
is_append = RED.is_append && config.update_type == :append
|
40
|
+
cmd = "mysqlimport #{make_mysql_cmd_config(config.category.to_s, platform)} #{data_file} --local #{is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
|
41
|
+
|
42
|
+
if @ssh.run_with_ssh?
|
43
|
+
@ssh.run_command cmd
|
44
|
+
else
|
45
|
+
system "#{cmd}"
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
def connect_mysql_with_file(query_file, category, platform)
|
51
|
+
if @ssh.run_with_ssh?
|
52
|
+
@ssh.upload_file query_file, query_file.basename
|
53
|
+
data_file = "~/tmp/#{query_file.basename}"
|
54
|
+
else
|
55
|
+
data_file = query_file
|
56
|
+
end
|
57
|
+
|
58
|
+
cmd = "mysql #{make_mysql_cmd_config(category, platform)} < #{data_file}"
|
59
|
+
|
60
|
+
if @ssh.run_with_ssh?
|
61
|
+
@ssh.run_command cmd
|
62
|
+
else
|
63
|
+
system cmd
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def make_redshift_cmd
|
69
|
+
ENV['PGPASSWORD'] = @config['password']
|
70
|
+
return "psql -h #{@config['host']} -p #{REDSHIFT_PORT} -U #{@config['username']} -d #{@config['database']}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def make_mysql_cmd_config(category, platform)
|
74
|
+
export_db_config = @config['export'][category]
|
75
|
+
Log.error! "ERROR: Export config of #{category} was not found in config/database.yml" unless export_db_config
|
76
|
+
if platform
|
77
|
+
if export_db_config[platform]
|
78
|
+
export_db_config = export_db_config[platform]
|
79
|
+
else
|
80
|
+
Log.warning "WARNING: Platform #{platform} was not declared in config/database.yml, ignore platform setting"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
return "-h#{export_db_config['host']} -u#{export_db_config['username']} #{export_db_config['password'].empty? ? '' : '-p'+export_db_config['password']} #{export_db_config['database']}"
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
DATABASE = DataBase.new
|
90
|
+
end
|
91
|
+
|
92
|
+
|
data/lib/redata/log.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Redata
|
2
|
+
class Log
|
3
|
+
def self.error!(msg)
|
4
|
+
puts "[#{RED.current_time}] #{msg.red}"
|
5
|
+
exit 1
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.warning(msg)
|
9
|
+
puts "[#{RED.current_time}] #{msg.yellow}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.action(msg)
|
13
|
+
puts "[#{RED.current_time}] #{msg.cyan}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.log(msg)
|
17
|
+
puts "[#{RED.current_time}] #{msg}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Redata
|
2
|
+
class Notice
|
3
|
+
def initialize
|
4
|
+
Log.error! "ERROR: slack bot token missing" if !RED.slack['token'] || RED.slack['token'].empty?
|
5
|
+
@slack = Slack::Client.new :token => RED.slack['token']
|
6
|
+
channel_exist = false
|
7
|
+
@slack.channels_list['channels'].each do |channel|
|
8
|
+
channel_exist = true if channel['name'] == RED.slack['channel']
|
9
|
+
end
|
10
|
+
Log.error! "ERROR: slack channel #{RED.slack['channel']} not exists" unless channel_exist
|
11
|
+
end
|
12
|
+
|
13
|
+
def send(msg)
|
14
|
+
@slack.chat_postMessage({
|
15
|
+
:channel => RED.slack['channel'],
|
16
|
+
:text => "<!here> #{msg}",
|
17
|
+
:as_user => true
|
18
|
+
})
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Redata
|
2
|
+
class Parser
|
3
|
+
INCLUDE_REGEX = /#include (.*)-->(.*)/
|
4
|
+
REF_REGEX = /{([^{}]+)}/
|
5
|
+
REF_SPLIT_REGEX = /\s*{[^{}]+}\s*/
|
6
|
+
START_TIME_REGEX = /\[start_time\]/
|
7
|
+
TIME_OFFSET_REGEX = /\[(\d+) days ago\]/
|
8
|
+
CURRENT_TIME_REGEX = /\[current_time\]/
|
9
|
+
LOCALS_REGEX = /\[([^\[\]]+)\]/
|
10
|
+
|
11
|
+
CONV_TABLE_REGEX = /source:(.*)/
|
12
|
+
CONV_COLUMN_REGEX = /columns:\s*/
|
13
|
+
CONV_SWITCHDEF_REGEX = /(.+){(.*)}/
|
14
|
+
CONV_SWITCH_REGEX = /([^,]+)=>([^,]+)/
|
15
|
+
CONV_TIMESTAMP_REGEX = /\[time_stamp\]/
|
16
|
+
|
17
|
+
def self.gen_redshift_query(config, start_time=nil)
|
18
|
+
Log.error! "ERROR: Query file '#{config.query_file.relative_path_from RED.root}' not exists" unless config.query_file.exist?
|
19
|
+
|
20
|
+
File.open config.tmp_script_file, 'w' do |f|
|
21
|
+
if start_time && config.type == :table
|
22
|
+
f.puts "INSERT INTO #{config.source_name} ("
|
23
|
+
else
|
24
|
+
start_time = RED.default_start_date
|
25
|
+
f.puts "CREATE #{config.type} #{config.source_name} AS ("
|
26
|
+
end
|
27
|
+
self.parse_redshift_file config.query_file, f, start_time
|
28
|
+
f.puts ");"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.gen_export_query(config, start_time=nil)
|
33
|
+
Log.error! "ERROR: Convertor config '#{config.conv_file.relative_path_from RED.root}' not exists" unless config.conv_file.exist?
|
34
|
+
|
35
|
+
File.open config.tmp_script_file, 'w' do |f|
|
36
|
+
f.puts "UNLOAD ('"
|
37
|
+
f.puts self.parse_convertor_file config.conv_file
|
38
|
+
f.puts "where date >= \\'#{start_time}\\'" if start_time
|
39
|
+
f.puts "') to 's3://#{RED.s3['bucket']}/#{config.bucket_file}'"
|
40
|
+
f.puts "CREDENTIALS 'aws_access_key_id=#{RED.s3['aws_access_key_id']};aws_secret_access_key=#{RED.s3['aws_secret_access_key']}'"
|
41
|
+
f.puts "ESCAPE ALLOWOVERWRITE PARALLEL OFF DELIMITER AS '\\t';"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.gen_adjust_file(query_file, tmp_script_file)
|
46
|
+
Log.error! "ERROR: Query file '#{query_file.relative_path_from RED.root}' not exists" unless query_file.exist?
|
47
|
+
|
48
|
+
File.open tmp_script_file, 'w' do |f|
|
49
|
+
self.parse_redshift_file query_file, f, RED.default_start_date
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private
|
55
|
+
def self.parse_redshift_file(in_file, out, start_time)
|
56
|
+
links = {}
|
57
|
+
File.open(in_file).each.with_index do |line, index|
|
58
|
+
if line =~ INCLUDE_REGEX
|
59
|
+
# parse include syntax
|
60
|
+
res = line.scan(INCLUDE_REGEX).first
|
61
|
+
sub = res[0].gsub /[\s|\'|\"]+/, ''
|
62
|
+
link = res[1].gsub /[\s|:]+/, ''
|
63
|
+
Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: include query is missing file or alias" if sub.empty? || link.empty?
|
64
|
+
|
65
|
+
sub_file = in_file.parent.join "_#{sub}.sql"
|
66
|
+
sub_file = RED.root.join 'database', 'shared', "_#{sub}.sql" unless sub_file.exist?
|
67
|
+
Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: included file _#{sub}.sql could not be found in ./ or {root}/database/shared/" unless sub_file.exist?
|
68
|
+
|
69
|
+
Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: alias #{link} was declared multiple times" if links[link]
|
70
|
+
|
71
|
+
links[link] = sub_file
|
72
|
+
elsif line =~ REF_REGEX
|
73
|
+
# parse {ref} syntax
|
74
|
+
res = line.scan REF_REGEX
|
75
|
+
refs = res.map{|r| r.first.gsub /\s+/, ''}
|
76
|
+
origins = line.split REF_SPLIT_REGEX
|
77
|
+
|
78
|
+
out.puts origins[0].gsub(';', '')
|
79
|
+
refs.each_with_index do |ref, i|
|
80
|
+
Log.error! "QUERY ERROR: #{in_file}:#{index+1}:\nsub query #{ref} not found." unless links[ref]
|
81
|
+
out.puts "("
|
82
|
+
self.parse_redshift_file links[ref], out, start_time
|
83
|
+
out.puts ") as #{ref}"
|
84
|
+
out.puts origins[i+1].gsub(';', '') if origins[i+1]
|
85
|
+
end
|
86
|
+
elsif line =~ START_TIME_REGEX
|
87
|
+
# parse [start_time] syntax
|
88
|
+
out.puts line.gsub(START_TIME_REGEX, "'#{start_time}'").gsub(';', '')
|
89
|
+
elsif line =~ TIME_OFFSET_REGEX
|
90
|
+
# parse [3 days ago]
|
91
|
+
res = line.scan(TIME_OFFSET_REGEX).each do |res|
|
92
|
+
line = line.gsub "[#{res[0]} days ago]", "#{RED.date_days_ago(res[0].to_i)}"
|
93
|
+
end
|
94
|
+
out.puts line
|
95
|
+
elsif line =~ CURRENT_TIME_REGEX
|
96
|
+
line = line.gsub "[current_time]", "#{RED.current_time}"
|
97
|
+
out.puts line
|
98
|
+
elsif line =~ LOCALS_REGEX
|
99
|
+
# parse [locals] syntax
|
100
|
+
line.scan(LOCALS_REGEX).each do |res|
|
101
|
+
key = res.first
|
102
|
+
Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
|
103
|
+
line = line.gsub "[#{key}]", "'#{RED.locals[key.to_sym]}'"
|
104
|
+
end
|
105
|
+
out.puts line.gsub ';', ''
|
106
|
+
else
|
107
|
+
# other, print absolutely
|
108
|
+
out.puts line.gsub ';', ''
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.parse_convertor_file(in_file)
|
114
|
+
is_parsing_column = false
|
115
|
+
columns = []
|
116
|
+
source = ""
|
117
|
+
File.open(in_file).each.with_index do |line, index|
|
118
|
+
if line =~ CONV_TABLE_REGEX
|
119
|
+
# parse table declare
|
120
|
+
res = line.scan(CONV_TABLE_REGEX).first
|
121
|
+
source = res[0].gsub /\s+/, ''
|
122
|
+
is_parsing_column = false
|
123
|
+
elsif line =~ CONV_COLUMN_REGEX
|
124
|
+
is_parsing_column = true
|
125
|
+
elsif is_parsing_column
|
126
|
+
line.gsub! /\s+/, ''
|
127
|
+
if line =~ CONV_SWITCHDEF_REGEX
|
128
|
+
res = line.scan(CONV_SWITCHDEF_REGEX).first
|
129
|
+
res[1].gsub!("'", "\\\\'")
|
130
|
+
switches = res[1].scan CONV_SWITCH_REGEX
|
131
|
+
switches.map! do |m|
|
132
|
+
"when #{m[0]} then #{m[1]}"
|
133
|
+
end
|
134
|
+
columns.push "case #{res[0]} #{switches.join ' '} end as #{res[0]}"
|
135
|
+
elsif line =~ CONV_TIMESTAMP_REGEX
|
136
|
+
columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
|
137
|
+
columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
|
138
|
+
else
|
139
|
+
columns.push line.gsub("'", "\\\\'").gsub('NULL', "\\\\'NULL\\\\'") unless line.empty?
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
"select #{columns.join ','} from #{source}"
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Redata
|
2
|
+
class Relation
|
3
|
+
attr_accessor :category, :name, :key, :file, :dir, :type, :update_type
|
4
|
+
def initialize(category, name, setting)
|
5
|
+
@category = category
|
6
|
+
@name = name
|
7
|
+
@key = setting[:as] || @name.to_sym
|
8
|
+
@file = setting[:file] || @name
|
9
|
+
@dir = setting[:dir] || (@category == :main ? nil : @category.to_s)
|
10
|
+
end
|
11
|
+
|
12
|
+
def global_key
|
13
|
+
@category == :main ? @key : "#{@category}_#{@key}".to_sym
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Redata
|
2
|
+
class Export < Relation
|
3
|
+
def initialize(category, name, setting)
|
4
|
+
super category, name, setting
|
5
|
+
@type = :export
|
6
|
+
@update_type = setting[:update] || :renewal
|
7
|
+
end
|
8
|
+
|
9
|
+
def conv_file
|
10
|
+
conv_file = RED.root.join 'database', 'convertors'
|
11
|
+
conv_file = conv_file.join @dir if @dir
|
12
|
+
conv_file = conv_file.join "#{@file}.conv"
|
13
|
+
conv_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def tmp_script_file
|
17
|
+
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
18
|
+
end
|
19
|
+
|
20
|
+
def tmp_data_file
|
21
|
+
RED.root.join 'tmp', "data", "#{@name}.tsv"
|
22
|
+
end
|
23
|
+
|
24
|
+
def bucket_file
|
25
|
+
bucket_dir = RED.default_append_date
|
26
|
+
bucket_dir = RED.locals[:start_time] if RED.is_append && @update_type == :append && RED.locals[:start_time]
|
27
|
+
"#{bucket_dir}/#{@category}/#{@name}.tsv"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Redata
|
2
|
+
class Table < Relation
|
3
|
+
def initialize(category, name, setting)
|
4
|
+
super category, name, setting
|
5
|
+
@type = :table
|
6
|
+
@update_type = setting[:update] || :renewal
|
7
|
+
end
|
8
|
+
|
9
|
+
def source_name
|
10
|
+
@category == :main ? @name : "#{@category}_#{@name}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def query_file
|
14
|
+
query_file = RED.root.join 'database', 'sources'
|
15
|
+
query_file = query_file.join @dir if @dir
|
16
|
+
query_file = query_file.join "#{@file}.sql"
|
17
|
+
query_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def tmp_script_file
|
21
|
+
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Redata
|
2
|
+
class View < Relation
|
3
|
+
def initialize(category, name, setting)
|
4
|
+
super category, name, setting
|
5
|
+
@type = :view
|
6
|
+
@update_type = :renewal
|
7
|
+
end
|
8
|
+
|
9
|
+
def source_name
|
10
|
+
@category == :main ? @name : "#{@category}_#{@name}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def query_file
|
14
|
+
query_file = RED.root.join 'database', 'sources'
|
15
|
+
query_file = query_file.join @dir if @dir
|
16
|
+
query_file = query_file.join "#{@file}.sql"
|
17
|
+
query_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def tmp_script_file
|
21
|
+
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# ruby default lib
|
2
|
+
require 'pathname'
|
3
|
+
require 'yaml'
|
4
|
+
require 'erb'
|
5
|
+
|
6
|
+
# gem lib
|
7
|
+
require 'json'
|
8
|
+
require 'colorize'
|
9
|
+
require 'aws-sdk'
|
10
|
+
require 'timezone'
|
11
|
+
|
12
|
+
# local lib
|
13
|
+
require 'redata/config'
|
14
|
+
require 'redata/log'
|
15
|
+
require 'redata/ssh'
|
16
|
+
require 'redata/database'
|
17
|
+
require 'redata/bucket'
|
18
|
+
require 'redata/relation'
|
19
|
+
require 'redata/relation/table'
|
20
|
+
require 'redata/relation/view'
|
21
|
+
require 'redata/relation/export'
|
22
|
+
require 'redata/schema'
|
23
|
+
require 'redata/parser'
|
24
|
+
require 'redata/tasks'
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Redata
|
2
|
+
class Schema
|
3
|
+
def initialize
|
4
|
+
@category = :main
|
5
|
+
@order = []
|
6
|
+
@index = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def config(&block)
|
10
|
+
self.instance_eval &block
|
11
|
+
end
|
12
|
+
|
13
|
+
def category(prefix, &block)
|
14
|
+
@category = prefix
|
15
|
+
self.instance_eval &block
|
16
|
+
@category = :main
|
17
|
+
end
|
18
|
+
|
19
|
+
def view(view, setting={})
|
20
|
+
register view, :view, setting
|
21
|
+
end
|
22
|
+
|
23
|
+
def table(table, setting={})
|
24
|
+
register table, :table, setting
|
25
|
+
end
|
26
|
+
|
27
|
+
def export(target, setting={})
|
28
|
+
register target, :export, setting
|
29
|
+
end
|
30
|
+
|
31
|
+
def insert(target, setting={})
|
32
|
+
register target, :insert, setting
|
33
|
+
end
|
34
|
+
|
35
|
+
def config_with(key)
|
36
|
+
config = @index[key]
|
37
|
+
return nil unless config
|
38
|
+
config
|
39
|
+
end
|
40
|
+
|
41
|
+
def category_configs(category, types=[])
|
42
|
+
res = []
|
43
|
+
@order.each do |global_key|
|
44
|
+
config = @index[global_key]
|
45
|
+
res.push config if (!category || config.category == category) && (types.empty? || types.include?(config.type))
|
46
|
+
end
|
47
|
+
res
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def register(name, type, setting={})
|
52
|
+
cla = Redata.const_get type.capitalize
|
53
|
+
relation = cla.new @category, name, setting
|
54
|
+
|
55
|
+
if @index[relation.global_key]
|
56
|
+
Log.log "in #{caller.first}"
|
57
|
+
Log.error! "ERROR: Duplicated view alias '#{global_key}'"
|
58
|
+
end
|
59
|
+
|
60
|
+
@index[relation.global_key] = relation
|
61
|
+
@order.push relation.global_key
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/redata/ssh.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Redata
|
2
|
+
class Ssh
|
3
|
+
def initialize
|
4
|
+
@ssh = RED.ssh
|
5
|
+
end
|
6
|
+
|
7
|
+
def run_with_ssh?
|
8
|
+
if RED.is_ssh && @ssh && !RED.production?
|
9
|
+
Log.warning "WARNING: Using gateway server #{@ssh['HostName']}"
|
10
|
+
return true
|
11
|
+
end
|
12
|
+
|
13
|
+
if RED.is_ssh
|
14
|
+
Log.warning "WARNING: SSH config file was not found. Ignore this config." unless @ssh
|
15
|
+
Log.warning "WARNING: Could not use ssh mode in production. Ignore this config." if RED.production?
|
16
|
+
end
|
17
|
+
return false
|
18
|
+
end
|
19
|
+
|
20
|
+
def upload_file(file, target_file=nil)
|
21
|
+
system "scp -i #{@ssh['IdentityFile']} #{file} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/#{target_file}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_command(cmd)
|
25
|
+
system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"#{cmd}\""
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/redata/tasks.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
module Redata
|
2
|
+
class Task
|
3
|
+
@@schema = Schema.new
|
4
|
+
|
5
|
+
def self.schema
|
6
|
+
@@schema
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.create_datasource(key)
|
10
|
+
self.parse_key(key, [:table, :view]).each do |config|
|
11
|
+
if RED.is_append && config.update_type == :append
|
12
|
+
start_time = RED.locals[:start_time] || RED.default_append_date
|
13
|
+
Parser.gen_redshift_query config, start_time
|
14
|
+
Log.action "QUERY: Append data after #{start_time} into [#{config.source_name}]"
|
15
|
+
DATABASE.connect_with_file config.tmp_script_file
|
16
|
+
else
|
17
|
+
Parser.gen_redshift_query config
|
18
|
+
Log.action "QUERY: Create #{config.type} [#{config.source_name}]"
|
19
|
+
DATABASE.connect_with_file config.tmp_script_file
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.delete_datasource(key)
|
25
|
+
self.parse_key(key, [:table, :view]).reverse.each do |config|
|
26
|
+
unless RED.is_append && config.update_type == :append
|
27
|
+
Log.action "QUERY: Drop #{config.type} [#{config.source_name}]"
|
28
|
+
Log.warning "WARNING: CASCADE mode will also drop other sources that depend on this #{config.type}" if RED.is_forced
|
29
|
+
DATABASE.connect_with_query "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.checkout_datasource(key)
|
35
|
+
self.parse_key(key, [:export]).each do |config|
|
36
|
+
if RED.is_append && config.update_type == :append
|
37
|
+
start_time = RED.locals[:start_time] || RED.default_append_date
|
38
|
+
Parser.gen_export_query config, start_time
|
39
|
+
Log.action "QUERY: Checkout data after #{start_time} to bucket [#{config.bucket_file}]"
|
40
|
+
else
|
41
|
+
Parser.gen_export_query config
|
42
|
+
Log.action "QUERY: Checkout data to bucket [#{config.bucket_file}]"
|
43
|
+
end
|
44
|
+
DATABASE.connect_with_file config.tmp_script_file
|
45
|
+
bucket = S3Bucket.new
|
46
|
+
bucket.move "#{config.bucket_file}000", config.bucket_file
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.inject(key, platform=nil)
|
51
|
+
self.parse_key(key, [:export]).each do |config|
|
52
|
+
Log.action "BUCKET: Make [#{config.bucket_file}] public"
|
53
|
+
bucket = S3Bucket.new
|
54
|
+
bucket.make_public config.bucket_file, true
|
55
|
+
|
56
|
+
Log.action "DOWNLOAD: Downlaod [#{config.bucket_file}] from bucket"
|
57
|
+
system "wget #{RED.s3['host']}/#{config.bucket_file} -O #{config.tmp_data_file} --quiet"
|
58
|
+
|
59
|
+
Log.action "BUCKET: Make [#{config.bucket_file}] private"
|
60
|
+
bucket.make_public config.bucket_file, false
|
61
|
+
|
62
|
+
Log.action "QUERY: Inject data to [#{config.name}] of #{config.category}"
|
63
|
+
DATABASE.inject_to_mysql config, platform
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
private
|
69
|
+
def self.parse_key(key, types)
|
70
|
+
key = key.to_sym if key
|
71
|
+
configs = []
|
72
|
+
|
73
|
+
configs = @@schema.category_configs(key, types)
|
74
|
+
if configs.empty?
|
75
|
+
config = @@schema.config_with key if key
|
76
|
+
Log.error! "ERROR: Data source relation #{key} was not defined in config/relations.rb" unless config
|
77
|
+
configs.push config
|
78
|
+
end
|
79
|
+
configs
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
data/redata.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'redata/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "redata"
|
8
|
+
spec.version = Redata::VERSION
|
9
|
+
spec.authors = ["goshan"]
|
10
|
+
spec.email = ["goshan.hanqiu@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{a AWS Redshift data process controller}
|
13
|
+
spec.description = %q{Controll data process by sub query and easy command line}
|
14
|
+
spec.homepage = "https://github.com/goshan/redata"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.executables = ["redata", "adjust", "notice"]
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
|
24
|
+
spec.add_runtime_dependency "json", "~> 2.0"
|
25
|
+
spec.add_runtime_dependency "colorize", "~> 0.8"
|
26
|
+
spec.add_runtime_dependency "aws-sdk", "~> 2.6"
|
27
|
+
spec.add_runtime_dependency "timezone", "~> 1.2"
|
28
|
+
spec.add_runtime_dependency "slack-api", "~> 1.2"
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redata
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- goshan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-12-09 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: colorize
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.8'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.8'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: aws-sdk
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.6'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.6'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: timezone
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: slack-api
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.2'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.2'
|
111
|
+
description: Controll data process by sub query and easy command line
|
112
|
+
email:
|
113
|
+
- goshan.hanqiu@gmail.com
|
114
|
+
executables:
|
115
|
+
- redata
|
116
|
+
- adjust
|
117
|
+
- notice
|
118
|
+
extensions: []
|
119
|
+
extra_rdoc_files: []
|
120
|
+
files:
|
121
|
+
- ".gitignore"
|
122
|
+
- Gemfile
|
123
|
+
- LICENSE.txt
|
124
|
+
- README.md
|
125
|
+
- Rakefile
|
126
|
+
- bin/adjust
|
127
|
+
- bin/console
|
128
|
+
- bin/notice
|
129
|
+
- bin/redata
|
130
|
+
- bin/setup
|
131
|
+
- lib/redata.rb
|
132
|
+
- lib/redata/bucket.rb
|
133
|
+
- lib/redata/config.rb
|
134
|
+
- lib/redata/database.rb
|
135
|
+
- lib/redata/log.rb
|
136
|
+
- lib/redata/notice.rb
|
137
|
+
- lib/redata/parser.rb
|
138
|
+
- lib/redata/relation.rb
|
139
|
+
- lib/redata/relation/export.rb
|
140
|
+
- lib/redata/relation/table.rb
|
141
|
+
- lib/redata/relation/view.rb
|
142
|
+
- lib/redata/requires.rb
|
143
|
+
- lib/redata/schema.rb
|
144
|
+
- lib/redata/ssh.rb
|
145
|
+
- lib/redata/tasks.rb
|
146
|
+
- lib/redata/version.rb
|
147
|
+
- redata.gemspec
|
148
|
+
homepage: https://github.com/goshan/redata
|
149
|
+
licenses:
|
150
|
+
- MIT
|
151
|
+
metadata: {}
|
152
|
+
post_install_message:
|
153
|
+
rdoc_options: []
|
154
|
+
require_paths:
|
155
|
+
- lib
|
156
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - ">="
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: '0'
|
161
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">="
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
requirements: []
|
167
|
+
rubyforge_project:
|
168
|
+
rubygems_version: 2.4.5
|
169
|
+
signing_key:
|
170
|
+
specification_version: 4
|
171
|
+
summary: a AWS Redshift data process controller
|
172
|
+
test_files: []
|