redata 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +88 -98
- data/bin/adjust +11 -6
- data/bin/notice +5 -7
- data/bin/redata +3 -3
- data/lib/redata/config.rb +26 -14
- data/lib/redata/database.rb +26 -51
- data/lib/redata/notice.rb +15 -2
- data/lib/redata/parser.rb +138 -107
- data/lib/redata/relation.rb +27 -1
- data/lib/redata/relation/table.rb +0 -16
- data/lib/redata/relation/view.rb +4 -12
- data/lib/redata/requires.rb +0 -1
- data/lib/redata/ssh.rb +6 -2
- data/lib/redata/tasks.rb +28 -31
- data/lib/redata/version.rb +1 -1
- metadata +2 -3
- data/lib/redata/relation/export.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba347eb745baeb8cf5ffee03842482bfcc2b98d7
|
4
|
+
data.tar.gz: cb86f147ae139d3e87af8afa6e74a823abe02c02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47cabaf3d9fcab6058a587c823eeee1a6bc39a33051b7675e176dcd7b812e33678ecd89e2f4b4463747813ade4f287947119623b7ec19b45cbf9dbf316a87dc4
|
7
|
+
data.tar.gz: 13991f7d3daa93f15f31bd1e6654147098c98f36f7d1eb19f2003f58792ede7c9b806be33571593d138db5f2425ef9e63444103b8ff9063d98778e14bfdd9dc7
|
data/README.md
CHANGED
@@ -26,22 +26,25 @@ Or install it yourself as:
|
|
26
26
|
+ config `config/redata.yml` for general setting
|
27
27
|
|
28
28
|
```YAML
|
29
|
-
|
29
|
+
create_interval: # default date for create mode
|
30
|
+
start_time: "2016-04-04"
|
31
|
+
end_time: 2 # days ago
|
32
|
+
append_interval: # date fetching interval for append mode
|
33
|
+
start: 3 # days ago
|
34
|
+
end: 2 # days ago
|
30
35
|
timezone: "Asia/Tokyo"
|
36
|
+
keep_tmp: true # or false. whether keep temp query file in ./tmp after finished query
|
31
37
|
s3:
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
bucket:
|
36
|
-
production: {bucket_name}
|
37
|
-
development: {bucket_name}
|
38
|
+
bucket: bucket_name
|
39
|
+
aws_access_key_id: key_id
|
40
|
+
aws_secret_access_key: key_secret
|
38
41
|
ssh: # this setting will be used in ssh mode when you access private database
|
39
|
-
HostName:
|
40
|
-
IdentityFile:
|
41
|
-
User:
|
42
|
+
HostName: gateway_host
|
43
|
+
IdentityFile: ~/.ssh/key.pem
|
44
|
+
User: username
|
42
45
|
slack_bot: # this setting will be used for slack notice push
|
43
|
-
token:
|
44
|
-
channel:
|
46
|
+
token: bot_token
|
47
|
+
channel: slack_channel
|
45
48
|
```
|
46
49
|
|
47
50
|
+ config `config/database.yml` for development and production environment in redshift database
|
@@ -53,12 +56,15 @@ development:
|
|
53
56
|
username: user
|
54
57
|
password: ''
|
55
58
|
database: dev
|
56
|
-
|
57
|
-
app: #
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
deploy: # target platform db(mysql) which export data to
|
60
|
+
app: # category name, using database
|
61
|
+
pro: # stage name(you can still declare under category absolutely)
|
62
|
+
username: root
|
63
|
+
password: ''
|
64
|
+
host: localhost
|
65
|
+
database: app
|
66
|
+
file: # another category, using local file
|
67
|
+
local_dir: '~/data'
|
62
68
|
```
|
63
69
|
|
64
70
|
+ config `config/relations.rb` for data object in redshift and exporting process to mysql
|
@@ -69,60 +75,38 @@ Redata::Task.schema.config do
|
|
69
75
|
# Example of declaring a global table
|
70
76
|
table 'table_name'
|
71
77
|
# This declaration means
|
72
|
-
# query file: database/sources/table_name.sql
|
78
|
+
# query file: database/sources/table_name.red.sql
|
73
79
|
# redshift table: table_name
|
74
|
-
# update type: renewal, delete and re-create when update
|
75
80
|
# key used in command line: table_name
|
76
|
-
|
81
|
+
|
77
82
|
# Example of declaring a global table with customizing options
|
78
|
-
table 'new_table_name', :dir => 'dir', :file => 'query_file', :
|
83
|
+
table 'new_table_name', :dir => 'dir', :file => 'query_file', :as => :alias
|
79
84
|
# This declaration means
|
80
|
-
# query file: database/sources/dir/query_file.sql
|
85
|
+
# query file: database/sources/dir/query_file.red.sql
|
81
86
|
# redshift table: new_table_name
|
82
|
-
# update type: append, only appending to existing table
|
83
87
|
# key used in command line: alias
|
84
|
-
|
85
|
-
# view is same to table but
|
86
|
-
|
87
|
-
|
88
|
-
|
88
|
+
|
89
|
+
# view is same to table but will still be created in append_mode
|
90
|
+
view 'view_name'
|
91
|
+
view 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
|
92
|
+
|
89
93
|
# Example of declaring with category
|
90
94
|
category :test_category do
|
91
95
|
table 'test_table'
|
92
96
|
# This declaration means
|
93
|
-
# query file: database/sources/test_category/test_table.sql
|
97
|
+
# query file: database/sources/test_category/test_table.red.sql
|
94
98
|
# redshift table: test_category_test_table
|
95
|
-
# update type: renewal
|
96
99
|
# key used in command line: test_category_test_table
|
97
|
-
|
98
|
-
table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :
|
100
|
+
|
101
|
+
table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
|
99
102
|
# This declaration means
|
100
|
-
# query file: database/sources/dir/query_file_oth.sql
|
103
|
+
# query file: database/sources/dir/query_file_oth.red.sql
|
101
104
|
# redshift table: test_category_test_table
|
102
|
-
# update type: append
|
103
105
|
# key used in command line: test_category_alias_oth
|
104
|
-
|
106
|
+
|
105
107
|
# view is same to table without appending update type
|
106
108
|
view 'test_view'
|
107
109
|
view 'test_view_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_view_oth
|
108
|
-
|
109
|
-
#Example of convertor declaration
|
110
|
-
export 'test_export'
|
111
|
-
# This declaration means
|
112
|
-
# convertor file: database/convertors/test_category/test_export.conv
|
113
|
-
# target mysql database name: test_category (Also see: export config in config/database.yml{:export})
|
114
|
-
# target mysql table: test_export
|
115
|
-
# update type: renewal, delete all records and insert new records
|
116
|
-
# key used in command line: test_category_test_export
|
117
|
-
|
118
|
-
#Example of convertor declaration
|
119
|
-
export 'test_export', :dir => 'dir', :file => 'conv_file', :update => 'append', :as => 'alias_export'
|
120
|
-
# This declaration means
|
121
|
-
# convertor file: database/convertors/dir/conv_file.conv
|
122
|
-
# target mysql database name: test_category
|
123
|
-
# target mysql table: test_export
|
124
|
-
# update type: append, append insert new records without deleting
|
125
|
-
# key used in command line: test_category_alias_export
|
126
110
|
end
|
127
111
|
|
128
112
|
end
|
@@ -130,50 +114,40 @@ end
|
|
130
114
|
|
131
115
|
### Query file
|
132
116
|
|
133
|
-
Query file was used for create table
|
117
|
+
Query file was used for create table or view in redshift. It is almost like PostgreSQL file but with some new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view. For table, if you use append mode, the result will only be append-inserted to table.
|
134
118
|
eg.
|
135
119
|
|
136
120
|
```SQL
|
137
|
-
-- query file in data/sources
|
121
|
+
-- query file in data/sources/*.red.sql
|
138
122
|
|
139
|
-
#
|
140
|
-
#
|
123
|
+
#load 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.red.sql in same folder
|
124
|
+
#load 'sub_query_b' --> :b
|
141
125
|
|
142
126
|
|
127
|
+
-- use can use if logic to control whether run part of a query
|
128
|
+
-- 'endif' could stop one or many continuous if logic above. (use if which is from second just like 'else if')
|
129
|
+
-- TIPS: we have not supported 'else if', 'else' syntax and nested if logic
|
130
|
+
[if var is 'value1']
|
143
131
|
select a.col1, a.col2, b.col1, b.col2, b.col3
|
132
|
+
[if var is 'value1']
|
133
|
+
select a.col3, b.col4
|
134
|
+
[endif]
|
144
135
|
from {a} -- use object a included from sub query file '_sub_query_a.sql'
|
145
136
|
join {b} on b.col1 = a.col1
|
146
|
-
--
|
147
|
-
--
|
137
|
+
-- For [start_time] and [end_time], there are 3 options.
|
138
|
+
-- use command params when set
|
139
|
+
-- in append mode, use [append_interval][start_time] or [append_interval][end_time] (See config/redata.yml).
|
140
|
+
-- in create mode, use [create_interval][start_time] or [create_interval][end_time] (See config/redata.yml).
|
148
141
|
where a.col1 >= [start_time]
|
149
|
-
|
150
|
-
and a.col1 <= [current_time]
|
142
|
+
and a.col1 < [end_time]
|
151
143
|
-- some params getting from command input such as `-param_from_command param_value`
|
152
144
|
and a.col2 = [param_from_command]
|
145
|
+
-- current time in setted timezone will be used (About timezon, also see config/redata.yml)
|
146
|
+
and b.col2 <= [current_time]
|
153
147
|
-- x days before today, x will be a integer
|
154
148
|
and b.col3 >= [x days ago]
|
155
149
|
```
|
156
150
|
|
157
|
-
### Convertor config file
|
158
|
-
|
159
|
-
Convertor file was used to generate a select query to get data from redshift and unload to S3. But you have no need to wirte a unload query. If you are using append mode, only data 2 days ago will be select.
|
160
|
-
eg.
|
161
|
-
|
162
|
-
```
|
163
|
-
source: redshift_source_table_or_view
|
164
|
-
columns:
|
165
|
-
cm_id
|
166
|
-
segment_type{'C' => 0, 'T' => 1, 'M1' => 2, 'M2' => 3, 'M3' => 4, 'F1' => 5, 'F2' => 6, 'F3' => 7}
|
167
|
-
v
|
168
|
-
e
|
169
|
-
base_ai
|
170
|
-
sample_num
|
171
|
-
grp
|
172
|
-
```
|
173
|
-
|
174
|
-
> convertor config file in `data/convertors/...`
|
175
|
-
> `source` means the source table in redshift
|
176
|
-
> `columns` means the source columns in source table
|
177
151
|
|
178
152
|
### Command
|
179
153
|
|
@@ -184,32 +158,28 @@ There are 3 executable file in bin/
|
|
184
158
|
|
185
159
|
#### redata
|
186
160
|
|
187
|
-
Usage: `redata [-options] [action] [object key] {
|
161
|
+
Usage: `redata [-options] [action] [object key] {stage}`
|
188
162
|
+ action
|
189
163
|
- create --> create a table/view or append data to table in redshift
|
190
164
|
- delete --> delete a table/view in redshift
|
191
165
|
- checkout --> export data in table/view of redshift into S3
|
192
|
-
-
|
193
|
-
+ object key --> object
|
194
|
-
+
|
166
|
+
- deploy --> deploy data from S3 to local db or file
|
167
|
+
+ object key --> object declared in `config/relation.rb` will be create/delete/checkout/deploy
|
168
|
+
+ stage --> when injecting data into mysql, there may be several stage declared in `config/database.yml{:deploy}` for same database, this could choose which stage to use.
|
195
169
|
+ options
|
196
170
|
- -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
|
197
171
|
- -e --> environment: `production`, `development`, etc.
|
198
172
|
- -f --> force mode, use `CADCASE` when removing view or table in redshift
|
199
173
|
- -ssh --> use ssh accessing to private database with ssh config in `config/redata.yml`
|
200
|
-
- -
|
201
|
-
+ delete will only delete objects with renewal update type
|
202
|
-
+ create will append-insert data after `-start_time`(set in command) or default `2 days ago` for appending update type, still create table/view for renewal type
|
203
|
-
+ checkout will only fetch data after `-start_time` or default `2 days ago` to upload to S3, renewal type will still be uploaded all data
|
204
|
-
+ inject will insert data to mysql without `--delete` option, renewal still delete all firstly
|
174
|
+
- -append --> use `append_mode`, append new data into existing table for redshift or inject into local db without deleting. view has no append mode.
|
205
175
|
- other options --> some params will be used in query file when declared, such `start_time`
|
206
176
|
|
207
177
|
#### adjust
|
208
178
|
|
209
|
-
Use adjust when you just want to run a query file without declaring in `config/relations.rb`
|
179
|
+
Use adjust when you just want to run a query file without declaring in `config/relations.rb`
|
210
180
|
Usage: `adjust [-options] [database] [query file] {platform}`
|
211
|
-
+ database --> `redshift` or database declared in `config/database.yml{
|
212
|
-
+ query file --> query file which will be run in `database/adjust/`, **without extends `.sql`**
|
181
|
+
+ database --> `redshift` or database declared in `config/database.yml{:deploy}`
|
182
|
+
+ query file --> query file which will be run in `database/adjust/`, **without extends `.red.sql`**
|
213
183
|
+ platform --> same to `redata`
|
214
184
|
+ options
|
215
185
|
- -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
|
@@ -220,9 +190,9 @@ Usage: `adjust [-options] [database] [query file] {platform}`
|
|
220
190
|
#### notice
|
221
191
|
|
222
192
|
Usage: `notice [-options] [action]`
|
223
|
-
+ action
|
224
|
-
|
225
|
-
-
|
193
|
+
+ action
|
194
|
+
- log --> send a message to slack with a log file
|
195
|
+
- mention --> send a message to slack with mention someone
|
226
196
|
|
227
197
|
## Contributing
|
228
198
|
|
@@ -231,5 +201,25 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/goshan
|
|
231
201
|
|
232
202
|
## License
|
233
203
|
|
234
|
-
|
204
|
+
Copyright 2013, Han Qiu(goshan), All rights reserved.
|
205
|
+
|
206
|
+
MIT License
|
207
|
+
|
208
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
209
|
+
of this software and associated documentation files (the "Software"), to deal
|
210
|
+
in the Software without restriction, including without limitation the rights
|
211
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
212
|
+
copies of the Software, and to permit persons to whom the Software is
|
213
|
+
furnished to do so, subject to the following conditions:
|
214
|
+
|
215
|
+
The above copyright notice and this permission notice shall be included in
|
216
|
+
all copies or substantial portions of the Software.
|
217
|
+
|
218
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
219
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
220
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
221
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
222
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
223
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
224
|
+
THE SOFTWARE.
|
235
225
|
|
data/bin/adjust
CHANGED
@@ -3,15 +3,20 @@
|
|
3
3
|
require File.expand_path '../../lib/redata', __FILE__
|
4
4
|
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
temp_config = OpenStruct.new
|
7
|
+
temp_config.query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
|
8
|
+
temp_config.tmp_file_dir = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}"
|
9
|
+
temp_config.tmp_exec_file = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}", "exec.sql"
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
+
Dir.mkdir temp_config.tmp_file_dir unless temp_config.tmp_file_dir.exist?
|
12
|
+
|
13
|
+
Redata::Log.action "ADJUST<#{Redata::RED.params[0]}>: use [#{temp_config.query_file.relative_path_from Redata::RED.root}]"
|
14
|
+
Redata::Parser.gen_adjust_query temp_config
|
11
15
|
|
12
16
|
if Redata::RED.params[0] == "redshift"
|
13
|
-
Redata::DATABASE.
|
17
|
+
Redata::DATABASE.connect_redshift temp_config
|
14
18
|
else
|
15
|
-
Redata::DATABASE.
|
19
|
+
Redata::DATABASE.connect_mysql temp_config.tmp_exec_file, Redata::RED.params[0], Redata::RED.params[2]
|
16
20
|
end
|
17
21
|
|
22
|
+
FileUtils.rm_r temp_config.tmp_file_dir if temp_config.tmp_file_dir.exist?
|
data/bin/notice
CHANGED
@@ -6,15 +6,13 @@ require File.expand_path '../../lib/redata/notice', __FILE__
|
|
6
6
|
|
7
7
|
|
8
8
|
|
9
|
-
if ['
|
9
|
+
if ['log', 'mention'].include? Redata::RED.params[0]
|
10
10
|
notice = Redata::Notice.new
|
11
11
|
case Redata::RED.params[0]
|
12
|
-
when '
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
Redata::Log.warning "WARNING: Could send notice only in production env"
|
17
|
-
end
|
12
|
+
when 'log'
|
13
|
+
notice.log Redata::RED.params[1], Redata::RED.params[2]
|
14
|
+
when 'mention'
|
15
|
+
notice.mention Redata::RED.params[1], Redata::RED.params[2]
|
18
16
|
end
|
19
17
|
end
|
20
18
|
|
data/bin/redata
CHANGED
@@ -5,7 +5,7 @@ require File.expand_path '../../lib/redata', __FILE__
|
|
5
5
|
require Redata::RED.root.join('config', 'relations.rb').to_s
|
6
6
|
|
7
7
|
|
8
|
-
if ['create', 'delete', 'checkout', '
|
8
|
+
if ['create', 'delete', 'checkout', 'deploy'].include? Redata::RED.params[0]
|
9
9
|
case Redata::RED.params[0]
|
10
10
|
when 'create'
|
11
11
|
Redata::Task.create_datasource Redata::RED.params[1]
|
@@ -13,8 +13,8 @@ if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
|
|
13
13
|
Redata::Task.delete_datasource Redata::RED.params[1]
|
14
14
|
when 'checkout'
|
15
15
|
Redata::Task.checkout_datasource Redata::RED.params[1]
|
16
|
-
when '
|
17
|
-
Redata::Task.
|
16
|
+
when 'deploy'
|
17
|
+
Redata::Task.deploy_datasource Redata::RED.params[1], Redata::RED.params[2]
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
data/lib/redata/config.rb
CHANGED
@@ -22,7 +22,8 @@ module Redata
|
|
22
22
|
# config file
|
23
23
|
@config = YAML.load(ERB.new(File.read(@root.join 'config', 'redata.yml')).result(binding))
|
24
24
|
@s3_config = @config['s3']
|
25
|
-
@s3_config['bucket']
|
25
|
+
@s3_config['bucket'] += "-dev" unless @env == 'production'
|
26
|
+
@s3_config['region'] = 'ap-northeast-1'
|
26
27
|
@s3_config['host'] = "https://s3-#{@s3_config['region']}.amazonaws.com/#{@s3_config['bucket']}"
|
27
28
|
Aws.config.update({
|
28
29
|
region: @s3_config['region'],
|
@@ -30,6 +31,7 @@ module Redata
|
|
30
31
|
})
|
31
32
|
@tz_local = Timezone[@config['timezone']]
|
32
33
|
@slack_token = @config['slack_bot']
|
34
|
+
@keep_tmp = @config['keep_tmp']
|
33
35
|
end
|
34
36
|
|
35
37
|
def development?
|
@@ -40,8 +42,26 @@ module Redata
|
|
40
42
|
@env == 'production'
|
41
43
|
end
|
42
44
|
|
43
|
-
def
|
44
|
-
@
|
45
|
+
def keep_tmp?
|
46
|
+
@keep_tmp
|
47
|
+
end
|
48
|
+
|
49
|
+
def start_time
|
50
|
+
return @locals[:start_time] if @locals[:start_time]
|
51
|
+
if @is_append
|
52
|
+
@tz_local.utc_to_local(Time.now.utc-@config['append_interval']['start_time']*24*3600).strftime('%Y-%m-%d')
|
53
|
+
else
|
54
|
+
@config['create_interval']['start_time']
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def end_time
|
59
|
+
return @locals[:end_time] if @locals[:end_time]
|
60
|
+
if @is_append
|
61
|
+
@tz_local.utc_to_local(Time.now.utc-@config['append_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
|
62
|
+
else
|
63
|
+
@tz_local.utc_to_local(Time.now.utc-@config['create_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
|
64
|
+
end
|
45
65
|
end
|
46
66
|
|
47
67
|
def ssh
|
@@ -60,11 +80,6 @@ module Redata
|
|
60
80
|
@tz_local.utc_to_local(Time.now.utc).strftime('%Y-%m-%d %H:%M:%S')
|
61
81
|
end
|
62
82
|
|
63
|
-
def default_append_date
|
64
|
-
# 2 days ago bacause there is only data 2 days ago in redshift
|
65
|
-
@tz_local.utc_to_local(Time.now.utc-2*24*3600).strftime('%Y-%m-%d')
|
66
|
-
end
|
67
|
-
|
68
83
|
def date_days_ago(days)
|
69
84
|
@tz_local.utc_to_local(Time.now.utc-days*24*3600).strftime('%Y-%m-%d')
|
70
85
|
end
|
@@ -76,9 +91,6 @@ module Redata
|
|
76
91
|
i = 0
|
77
92
|
while i < argv.count
|
78
93
|
case argv[i]
|
79
|
-
when '-dir'
|
80
|
-
i += 1
|
81
|
-
new_argv[:dir] = argv[i]
|
82
94
|
when '-e'
|
83
95
|
i += 1
|
84
96
|
new_argv[:env] = argv[i]
|
@@ -86,11 +98,11 @@ module Redata
|
|
86
98
|
new_argv[:force] = true
|
87
99
|
when '-ssh'
|
88
100
|
new_argv[:ssh] = true
|
89
|
-
when '-
|
101
|
+
when '-append'
|
90
102
|
new_argv[:append_mode] = true
|
91
103
|
else
|
92
|
-
if argv[i] =~
|
93
|
-
key = argv[i].match(
|
104
|
+
if argv[i] =~ /\A-(.+)/
|
105
|
+
key = argv[i].match(/\A-(.+)/)[1]
|
94
106
|
i += 1
|
95
107
|
new_argv[:locals][key.to_sym] = argv[i]
|
96
108
|
else
|
data/lib/redata/database.rb
CHANGED
@@ -9,59 +9,44 @@ module Redata
|
|
9
9
|
@ssh = Ssh.new
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
12
|
+
def connect_redshift(config)
|
13
13
|
cmd = make_redshift_cmd
|
14
14
|
if @ssh.run_with_ssh?
|
15
|
-
@ssh.
|
16
|
-
@ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{
|
15
|
+
@ssh.upload_dir config.tmp_file_dir
|
16
|
+
@ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{config.tmp_file_dir.basename}/exec.sql"
|
17
|
+
@ssh.remove_dir "~/tmp/#{config.tmp_file_dir.basename}"
|
17
18
|
else
|
18
|
-
system "#{cmd} -f #{
|
19
|
+
system "#{cmd} -f #{config.tmp_exec_file}"
|
19
20
|
end
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
@ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -c '#{query}'"
|
26
|
-
else
|
27
|
-
system "#{cmd} -c '#{query}'"
|
28
|
-
end
|
29
|
-
end
|
23
|
+
def inject_data(config, stage)
|
24
|
+
target_config = @config['deploy'][config.category.to_s]
|
25
|
+
Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
|
30
26
|
|
31
|
-
|
32
|
-
|
33
|
-
@ssh.upload_file config.tmp_data_file, config.name
|
34
|
-
data_file = "~/tmp/#{config.name}"
|
35
|
-
else
|
36
|
-
data_file = config.tmp_data_file
|
37
|
-
end
|
27
|
+
target_config = target_config[stage] if stage
|
28
|
+
Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
|
38
29
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
30
|
+
if target_config['local_dir']
|
31
|
+
cmd = "mv #{config.tmp_data_file} #{target_config['local_dir']}/#{config.source_name}.tsv"
|
32
|
+
elsif target_config['database']
|
33
|
+
import_params = "--local #{RED.is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
|
34
|
+
cmd = "mysqlimport #{make_mysql_cmd_params(target_config)} #{config.tmp_data_file} #{import_params}"
|
44
35
|
else
|
45
|
-
|
36
|
+
Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
|
46
37
|
end
|
47
|
-
|
38
|
+
system cmd
|
48
39
|
end
|
49
40
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
data_file = "~/tmp/#{query_file.basename}"
|
54
|
-
else
|
55
|
-
data_file = query_file
|
56
|
-
end
|
41
|
+
def connect_mysql(query_file, category, stage)
|
42
|
+
target_config = @config['deploy'][category.to_s]
|
43
|
+
Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
|
57
44
|
|
58
|
-
|
45
|
+
target_config = target_config[stage] if stage
|
46
|
+
Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
|
59
47
|
|
60
|
-
|
61
|
-
|
62
|
-
else
|
63
|
-
system cmd
|
64
|
-
end
|
48
|
+
cmd = "mysql #{make_mysql_cmd_params(target_config)} < #{query_file}"
|
49
|
+
system cmd
|
65
50
|
end
|
66
51
|
|
67
52
|
private
|
@@ -70,18 +55,8 @@ module Redata
|
|
70
55
|
return "psql -h #{@config['host']} -p #{REDSHIFT_PORT} -U #{@config['username']} -d #{@config['database']}"
|
71
56
|
end
|
72
57
|
|
73
|
-
def
|
74
|
-
|
75
|
-
Log.error! "ERROR: Export config of #{category} was not found in config/database.yml" unless export_db_config
|
76
|
-
if platform
|
77
|
-
if export_db_config[platform]
|
78
|
-
export_db_config = export_db_config[platform]
|
79
|
-
else
|
80
|
-
Log.warning "WARNING: Platform #{platform} was not declared in config/database.yml, ignore platform setting"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
return "-h#{export_db_config['host']} -u#{export_db_config['username']} #{export_db_config['password'].empty? ? '' : '-p'+export_db_config['password']} #{export_db_config['database']}"
|
58
|
+
def make_mysql_cmd_params(db_config)
|
59
|
+
return "-h#{db_config['host']} -u#{db_config['username']} #{db_config['password'].empty? ? '' : '-p'+db_config['password']} #{db_config['database']}"
|
85
60
|
end
|
86
61
|
|
87
62
|
end
|
data/lib/redata/notice.rb
CHANGED
@@ -10,12 +10,25 @@ module Redata
|
|
10
10
|
Log.error! "ERROR: slack channel #{RED.slack['channel']} not exists" unless channel_exist
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
13
|
+
def log(msg, log=nil)
|
14
|
+
log_content = "```\n#{File.read(log).split("\n").map{|line| line.gsub(/\[0;\d{2};\d{2}m/, '').gsub(/\[0m/, '')}.join("\n")}\n```" if log
|
14
15
|
@slack.chat_postMessage({
|
15
16
|
:channel => RED.slack['channel'],
|
16
|
-
:text => "
|
17
|
+
:text => "#{msg}\n#{log_content}",
|
17
18
|
:as_user => true
|
18
19
|
})
|
19
20
|
end
|
21
|
+
|
22
|
+
def mention(user_name, msg)
|
23
|
+
@slack.users_list['members'].each do |user|
|
24
|
+
if user['name'] == user_name
|
25
|
+
@slack.chat_postMessage({
|
26
|
+
:channel => RED.slack['channel'],
|
27
|
+
:text => "<@#{user['id']}> #{msg}",
|
28
|
+
:as_user => true
|
29
|
+
})
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
20
33
|
end
|
21
34
|
end
|
data/lib/redata/parser.rb
CHANGED
@@ -1,146 +1,177 @@
|
|
1
1
|
module Redata
|
2
2
|
class Parser
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
COMMENT_REGEX = /-{2}.*/
|
4
|
+
LOAD_REGEX = /#load (.*)->(.*)/
|
5
|
+
IF_REGEX = /\[\s*if ([^\s]*) is ([^\]]*)\]/
|
6
|
+
IFNUL_REGEX = /\[\s*if ([^\s]*) is null\s*\]/
|
7
|
+
ENDIF_REGEX = /\[\s*endif\s*\]/
|
6
8
|
START_TIME_REGEX = /\[start_time\]/
|
9
|
+
END_TIME_REGEX = /\[end_time\]/
|
7
10
|
TIME_OFFSET_REGEX = /\[(\d+) days ago\]/
|
8
11
|
CURRENT_TIME_REGEX = /\[current_time\]/
|
9
|
-
LOCALS_REGEX = /\[([^\[\]]+)\]/
|
12
|
+
LOCALS_REGEX = /\[([^\[\]<>\s]+)\]/
|
13
|
+
LOCALS_LIST_REGEX = /\[<([^\[\]<>\s]+)>\]/
|
10
14
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
Log.error! "ERROR: Query file '#{config.query_file.relative_path_from RED.root}' not exists" unless config.query_file.exist?
|
15
|
+
def self.gen_create_query(config)
|
16
|
+
if config.type == :table
|
17
|
+
self.gen_table_query config
|
18
|
+
elsif config.type == :view
|
19
|
+
self.gen_view_query config
|
20
|
+
end
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
else
|
24
|
-
start_time = RED.default_start_date
|
25
|
-
f.puts "CREATE #{config.type} #{config.source_name} AS ("
|
26
|
-
end
|
27
|
-
self.parse_redshift_file config.query_file, f, start_time
|
28
|
-
f.puts ");"
|
23
|
+
def self.gen_delete_query(config)
|
24
|
+
File.open config.tmp_exec_file, 'w' do |f|
|
25
|
+
f.puts "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'};"
|
29
26
|
end
|
30
27
|
end
|
31
28
|
|
32
|
-
def self.
|
33
|
-
Log.error! "ERROR:
|
29
|
+
def self.gen_checkout_query(config)
|
30
|
+
Log.error! "ERROR: Only could checkout data from view" unless config.type == :view
|
34
31
|
|
35
|
-
File.open config.
|
32
|
+
File.open config.tmp_exec_file, 'w' do |f|
|
36
33
|
f.puts "UNLOAD ('"
|
37
|
-
f.puts
|
38
|
-
f.puts "where date >= \\'#{start_time}\\'" if start_time
|
34
|
+
f.puts "SELECT * FROM #{config.source_name}"
|
39
35
|
f.puts "') to 's3://#{RED.s3['bucket']}/#{config.bucket_file}'"
|
40
36
|
f.puts "CREDENTIALS 'aws_access_key_id=#{RED.s3['aws_access_key_id']};aws_secret_access_key=#{RED.s3['aws_secret_access_key']}'"
|
41
37
|
f.puts "ESCAPE ALLOWOVERWRITE PARALLEL OFF DELIMITER AS '\\t';"
|
42
38
|
end
|
43
39
|
end
|
44
40
|
|
45
|
-
def self.
|
46
|
-
|
41
|
+
def self.gen_adjust_query(config)
|
42
|
+
self.parse config.query_file, config.tmp_exec_file, ''
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
private
|
47
|
+
def self.gen_table_query(config)
|
48
|
+
Log.error! "ERROR: Relation error" unless config.type == :table
|
47
49
|
|
48
|
-
|
49
|
-
|
50
|
+
tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
|
51
|
+
temp_tables = self.parse config.query_file, tmp_file
|
52
|
+
|
53
|
+
File.open config.tmp_exec_file, 'w' do |f|
|
54
|
+
# print temp tables
|
55
|
+
temp_tables.each do |name|
|
56
|
+
f.puts "CREATE TEMP TABLE #{name} AS ("
|
57
|
+
f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
|
58
|
+
f.puts ");"
|
59
|
+
end
|
60
|
+
|
61
|
+
# print create or insert query
|
62
|
+
if RED.is_append
|
63
|
+
f.puts "INSERT INTO #{config.source_name} ("
|
64
|
+
elsif
|
65
|
+
f.puts "CREATE #{config.type} #{config.source_name} AS ("
|
66
|
+
end
|
67
|
+
f.puts File.read tmp_file
|
68
|
+
f.puts ");"
|
50
69
|
end
|
51
70
|
end
|
52
71
|
|
72
|
+
def self.gen_view_query(config)
|
73
|
+
Log.error! "ERROR: Relation error" unless config.type == :view
|
53
74
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
File.open
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
75
|
+
tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
|
76
|
+
temp_tables = self.parse config.query_file, tmp_file
|
77
|
+
|
78
|
+
File.open config.tmp_exec_file, 'w' do |f|
|
79
|
+
f.puts "CREATE #{config.type} #{config.source_name} AS ("
|
80
|
+
temp_tables.each_with_index do |name, index|
|
81
|
+
f.puts "#{index == 0 ? 'WITH' : ','} #{name} AS ("
|
82
|
+
f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
|
83
|
+
f.puts ")"
|
84
|
+
end
|
85
|
+
|
86
|
+
# print create query
|
87
|
+
main = File.read tmp_file
|
88
|
+
unless temp_tables.empty?
|
89
|
+
main.gsub! 'WITH', ','
|
90
|
+
main.gsub! 'with', ','
|
91
|
+
end
|
92
|
+
f.puts main
|
93
|
+
f.puts ");"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.parse(in_file, out_file, skip_char=';')
|
98
|
+
Log.error! "ERROR: Query file '#{in_file.relative_path_from RED.root}' not exists" unless in_file.exist?
|
99
|
+
|
100
|
+
temp_tables = []
|
101
|
+
parse_enable = true
|
102
|
+
File.open out_file, 'w' do |out|
|
103
|
+
File.open(in_file).each do |line|
|
104
|
+
# remove comments
|
105
|
+
line.gsub!(COMMENT_REGEX, '')
|
106
|
+
# remove skip_char
|
107
|
+
line.gsub!(skip_char, '')
|
108
|
+
# remove empty line
|
109
|
+
next if !line || line.empty? || line =~ /^\s*$/
|
110
|
+
|
111
|
+
# check if else condition
|
112
|
+
if line =~ IFNUL_REGEX
|
113
|
+
res = line.scan(IFNUL_REGEX).first
|
114
|
+
var = res[0]
|
115
|
+
parse_enable = RED.locals[var.to_sym].nil?
|
116
|
+
next
|
117
|
+
elsif line =~ IF_REGEX
|
118
|
+
res = line.scan(IF_REGEX).first
|
119
|
+
var = res[0]
|
120
|
+
val = res[1].gsub /[\s|\'|\"]+/, ''
|
121
|
+
parse_enable = (RED.locals[var.to_sym] == val)
|
122
|
+
next
|
123
|
+
elsif line =~ ENDIF_REGEX
|
124
|
+
parse_enable = true
|
125
|
+
next
|
126
|
+
end
|
127
|
+
next unless parse_enable
|
128
|
+
|
129
|
+
# compile sub file
|
130
|
+
if line =~ LOAD_REGEX
|
131
|
+
# parse load syntax
|
132
|
+
res = line.scan(LOAD_REGEX).first
|
133
|
+
sub = res[0].gsub /[\s|\'|\"]+/, ''
|
134
|
+
name = res[1].gsub /[\s|:]+/, ''
|
135
|
+
Log.error! "QUERY ERROR: syntax error for load query: #{line}" if sub.empty? || name.empty?
|
136
|
+
|
137
|
+
sub_file = in_file.parent.join "_#{sub}.red.sql"
|
138
|
+
sub_file = RED.root.join 'database', 'shared', "_#{sub}.rea.sql" unless sub_file.exist?
|
139
|
+
sub_temp_tables = self.parse sub_file, out_file.dirname.join("#{name}.resql")
|
140
|
+
sub_temp_tables.each do |n|
|
141
|
+
temp_tables.push n unless temp_tables.include? n
|
142
|
+
end
|
143
|
+
temp_tables.push name unless temp_tables.include? name
|
144
|
+
next # load query line can not contain other content
|
85
145
|
end
|
86
|
-
|
146
|
+
|
87
147
|
# parse [start_time] syntax
|
88
|
-
|
89
|
-
|
148
|
+
line.gsub! START_TIME_REGEX, "'#{RED.start_time}'"
|
149
|
+
# parse [end_time] syntax
|
150
|
+
line.gsub! END_TIME_REGEX, "'#{RED.end_time}'"
|
151
|
+
# parse [current_time] syntax
|
152
|
+
line.gsub! CURRENT_TIME_REGEX, "'#{RED.current_time}'"
|
153
|
+
|
90
154
|
# parse [3 days ago]
|
91
155
|
res = line.scan(TIME_OFFSET_REGEX).each do |res|
|
92
|
-
line
|
156
|
+
line.gsub! "[#{res[0]} days ago]", "'#{RED.date_days_ago(res[0].to_i)}'"
|
93
157
|
end
|
94
|
-
out.puts line
|
95
|
-
elsif line =~ CURRENT_TIME_REGEX
|
96
|
-
line = line.gsub "[current_time]", "#{RED.current_time}"
|
97
|
-
out.puts line
|
98
|
-
elsif line =~ LOCALS_REGEX
|
99
158
|
# parse [locals] syntax
|
100
159
|
line.scan(LOCALS_REGEX).each do |res|
|
101
160
|
key = res.first
|
102
161
|
Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
|
103
|
-
line
|
162
|
+
line.gsub! "[#{key}]", "'#{RED.locals[key.to_sym]}'"
|
104
163
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
def self.parse_convertor_file(in_file)
|
114
|
-
is_parsing_column = false
|
115
|
-
columns = []
|
116
|
-
source = ""
|
117
|
-
File.open(in_file).each.with_index do |line, index|
|
118
|
-
if line =~ CONV_TABLE_REGEX
|
119
|
-
# parse table declare
|
120
|
-
res = line.scan(CONV_TABLE_REGEX).first
|
121
|
-
source = res[0].gsub /\s+/, ''
|
122
|
-
is_parsing_column = false
|
123
|
-
elsif line =~ CONV_COLUMN_REGEX
|
124
|
-
is_parsing_column = true
|
125
|
-
elsif is_parsing_column
|
126
|
-
line.gsub! /\s+/, ''
|
127
|
-
if line =~ CONV_SWITCHDEF_REGEX
|
128
|
-
res = line.scan(CONV_SWITCHDEF_REGEX).first
|
129
|
-
res[1].gsub!("'", "\\\\'")
|
130
|
-
switches = res[1].scan CONV_SWITCH_REGEX
|
131
|
-
switches.map! do |m|
|
132
|
-
"when #{m[0]} then #{m[1]}"
|
133
|
-
end
|
134
|
-
columns.push "case #{res[0]} #{switches.join ' '} end as #{res[0]}"
|
135
|
-
elsif line =~ CONV_TIMESTAMP_REGEX
|
136
|
-
columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
|
137
|
-
columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
|
138
|
-
else
|
139
|
-
columns.push line.gsub("'", "\\\\'").gsub('NULL', "\\\\'NULL\\\\'") unless line.empty?
|
164
|
+
# parse [<local_list>] syntax
|
165
|
+
line.scan(LOCALS_LIST_REGEX).each do |res|
|
166
|
+
key = res.first
|
167
|
+
Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
|
168
|
+
line = line.gsub "[<#{key}>]", "(#{RED.locals[key.to_sym].split(',').map{|e| "'#{e}'"}.join(',')})"
|
140
169
|
end
|
170
|
+
|
171
|
+
out.puts line.gsub skip_char, ''
|
141
172
|
end
|
142
173
|
end
|
143
|
-
|
174
|
+
temp_tables
|
144
175
|
end
|
145
176
|
|
146
177
|
end
|
data/lib/redata/relation.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Redata
|
2
2
|
class Relation
|
3
|
-
attr_accessor :category, :name, :key, :file, :dir, :type
|
3
|
+
attr_accessor :category, :name, :key, :file, :dir, :type
|
4
4
|
def initialize(category, name, setting)
|
5
5
|
@category = category
|
6
6
|
@name = name
|
@@ -13,5 +13,31 @@ module Redata
|
|
13
13
|
@category == :main ? @key : "#{@category}_#{@key}".to_sym
|
14
14
|
end
|
15
15
|
|
16
|
+
def source_name
|
17
|
+
@category == :main ? @name : "#{@category}_#{@name}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def query_file
|
21
|
+
query_file = RED.root.join 'database', 'sources'
|
22
|
+
query_file = query_file.join @dir if @dir
|
23
|
+
query_file = query_file.join "#{@file}.red.sql"
|
24
|
+
query_file
|
25
|
+
end
|
26
|
+
|
27
|
+
def tmp_file_dir
|
28
|
+
RED.root.join 'tmp', "#{@category}_#{@name}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def tmp_exec_file
|
32
|
+
self.tmp_file_dir.join "exec.sql"
|
33
|
+
end
|
34
|
+
|
35
|
+
def tmp_mkdir
|
36
|
+
Dir.mkdir self.tmp_file_dir unless self.tmp_file_dir.exist?
|
37
|
+
end
|
38
|
+
|
39
|
+
def tmp_rmdir
|
40
|
+
FileUtils.rm_r self.tmp_file_dir if !RED.keep_tmp? && self.tmp_file_dir.exist?
|
41
|
+
end
|
16
42
|
end
|
17
43
|
end
|
@@ -3,22 +3,6 @@ module Redata
|
|
3
3
|
def initialize(category, name, setting)
|
4
4
|
super category, name, setting
|
5
5
|
@type = :table
|
6
|
-
@update_type = setting[:update] || :renewal
|
7
|
-
end
|
8
|
-
|
9
|
-
def source_name
|
10
|
-
@category == :main ? @name : "#{@category}_#{@name}"
|
11
|
-
end
|
12
|
-
|
13
|
-
def query_file
|
14
|
-
query_file = RED.root.join 'database', 'sources'
|
15
|
-
query_file = query_file.join @dir if @dir
|
16
|
-
query_file = query_file.join "#{@file}.sql"
|
17
|
-
query_file
|
18
|
-
end
|
19
|
-
|
20
|
-
def tmp_script_file
|
21
|
-
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
22
6
|
end
|
23
7
|
end
|
24
8
|
end
|
data/lib/redata/relation/view.rb
CHANGED
@@ -3,22 +3,14 @@ module Redata
|
|
3
3
|
def initialize(category, name, setting)
|
4
4
|
super category, name, setting
|
5
5
|
@type = :view
|
6
|
-
@update_type = :renewal
|
7
6
|
end
|
8
7
|
|
9
|
-
def
|
10
|
-
|
8
|
+
def bucket_file
|
9
|
+
"#{RED.end_time}/#{@category}/#{@name}.tsv"
|
11
10
|
end
|
12
11
|
|
13
|
-
def
|
14
|
-
|
15
|
-
query_file = query_file.join @dir if @dir
|
16
|
-
query_file = query_file.join "#{@file}.sql"
|
17
|
-
query_file
|
18
|
-
end
|
19
|
-
|
20
|
-
def tmp_script_file
|
21
|
-
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
12
|
+
def tmp_data_file
|
13
|
+
self.tmp_file_dir.join "#{@name}.tsv"
|
22
14
|
end
|
23
15
|
end
|
24
16
|
end
|
data/lib/redata/requires.rb
CHANGED
data/lib/redata/ssh.rb
CHANGED
@@ -17,12 +17,16 @@ module Redata
|
|
17
17
|
return false
|
18
18
|
end
|
19
19
|
|
20
|
-
def
|
21
|
-
system "scp -i #{@ssh['IdentityFile']} #{
|
20
|
+
def upload_dir(dir)
|
21
|
+
system "scp -r -i #{@ssh['IdentityFile']} #{dir} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/"
|
22
22
|
end
|
23
23
|
|
24
24
|
def run_command(cmd)
|
25
25
|
system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"#{cmd}\""
|
26
26
|
end
|
27
|
+
|
28
|
+
def remove_dir(dir)
|
29
|
+
system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"rm -rf #{dir}\""
|
30
|
+
end
|
27
31
|
end
|
28
32
|
end
|
data/lib/redata/tasks.rb
CHANGED
@@ -8,59 +8,56 @@ module Redata
|
|
8
8
|
|
9
9
|
def self.create_datasource(key)
|
10
10
|
self.parse_key(key, [:table, :view]).each do |config|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
DATABASE.connect_with_file config.tmp_script_file
|
11
|
+
config.tmp_mkdir
|
12
|
+
Parser.gen_create_query config
|
13
|
+
if RED.is_append
|
14
|
+
dLog.action "APPEND<#{config.type}>: data(#{RED.start_time} ~ #{RED.end_time}) into [#{config.source_name}]"
|
16
15
|
else
|
17
|
-
|
18
|
-
Log.action "QUERY: Create #{config.type} [#{config.source_name}]"
|
19
|
-
DATABASE.connect_with_file config.tmp_script_file
|
16
|
+
Log.action "CREATE<#{config.type}>: [#{config.source_name}]"
|
20
17
|
end
|
18
|
+
DATABASE.connect_redshift config
|
19
|
+
config.tmp_rmdir
|
21
20
|
end
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.delete_datasource(key)
|
25
24
|
self.parse_key(key, [:table, :view]).reverse.each do |config|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
25
|
+
config.tmp_mkdir
|
26
|
+
Parser.gen_delete_query config
|
27
|
+
Log.action "DROP<#{config.type}>: [#{config.source_name}]"
|
28
|
+
Log.warning "WARNING: CASCADE mode will also drop other views that depend on this" if RED.is_forced
|
29
|
+
DATABASE.connect_redshift config
|
30
|
+
config.tmp_rmdir
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
def self.checkout_datasource(key)
|
35
|
-
self.parse_key(key, [:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
Parser.gen_export_query config
|
42
|
-
Log.action "QUERY: Checkout data to bucket [#{config.bucket_file}]"
|
43
|
-
end
|
44
|
-
DATABASE.connect_with_file config.tmp_script_file
|
35
|
+
self.parse_key(key, [:view]).each do |config|
|
36
|
+
config.tmp_mkdir
|
37
|
+
Parser.gen_checkout_query config
|
38
|
+
Log.action "CHECKOUT<#{config.category}>: to bucket [#{config.bucket_file}]"
|
39
|
+
DATABASE.connect_redshift config
|
40
|
+
|
45
41
|
bucket = S3Bucket.new
|
46
42
|
bucket.move "#{config.bucket_file}000", config.bucket_file
|
43
|
+
config.tmp_rmdir
|
47
44
|
end
|
48
45
|
end
|
49
46
|
|
50
|
-
def self.
|
51
|
-
self.parse_key(key, [:
|
52
|
-
|
47
|
+
def self.deploy_datasource(key, stage)
|
48
|
+
self.parse_key(key, [:view]).each do |config|
|
49
|
+
config.tmp_mkdir
|
53
50
|
bucket = S3Bucket.new
|
54
51
|
bucket.make_public config.bucket_file, true
|
55
52
|
|
56
|
-
Log.action "DOWNLOAD
|
53
|
+
Log.action "DOWNLOAD<bucket>: from [#{config.bucket_file}]"
|
57
54
|
system "wget #{RED.s3['host']}/#{config.bucket_file} -O #{config.tmp_data_file} --quiet"
|
58
55
|
|
59
|
-
Log.action "BUCKET: Make [#{config.bucket_file}] private"
|
60
56
|
bucket.make_public config.bucket_file, false
|
61
57
|
|
62
|
-
Log.action "
|
63
|
-
DATABASE.
|
58
|
+
Log.action "INJECT<#{config.category}>: with [#{config.name}] #{stage ? 'for stage '+stage : ''}"
|
59
|
+
DATABASE.inject_data config, stage
|
60
|
+
config.tmp_rmdir
|
64
61
|
end
|
65
62
|
end
|
66
63
|
|
@@ -73,7 +70,7 @@ module Redata
|
|
73
70
|
configs = @@schema.category_configs(key, types)
|
74
71
|
if configs.empty?
|
75
72
|
config = @@schema.config_with key if key
|
76
|
-
Log.error! "ERROR: Data source relation #{key} was not defined
|
73
|
+
Log.error! "ERROR: Data source relation #{key} was not defined" unless config
|
77
74
|
configs.push config
|
78
75
|
end
|
79
76
|
configs
|
data/lib/redata/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- goshan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,7 +136,6 @@ files:
|
|
136
136
|
- lib/redata/notice.rb
|
137
137
|
- lib/redata/parser.rb
|
138
138
|
- lib/redata/relation.rb
|
139
|
-
- lib/redata/relation/export.rb
|
140
139
|
- lib/redata/relation/table.rb
|
141
140
|
- lib/redata/relation/view.rb
|
142
141
|
- lib/redata/requires.rb
|
@@ -1,30 +0,0 @@
|
|
1
|
-
module Redata
|
2
|
-
class Export < Relation
|
3
|
-
def initialize(category, name, setting)
|
4
|
-
super category, name, setting
|
5
|
-
@type = :export
|
6
|
-
@update_type = setting[:update] || :renewal
|
7
|
-
end
|
8
|
-
|
9
|
-
def conv_file
|
10
|
-
conv_file = RED.root.join 'database', 'convertors'
|
11
|
-
conv_file = conv_file.join @dir if @dir
|
12
|
-
conv_file = conv_file.join "#{@file}.conv"
|
13
|
-
conv_file
|
14
|
-
end
|
15
|
-
|
16
|
-
def tmp_script_file
|
17
|
-
RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
|
18
|
-
end
|
19
|
-
|
20
|
-
def tmp_data_file
|
21
|
-
RED.root.join 'tmp', "data", "#{@name}.tsv"
|
22
|
-
end
|
23
|
-
|
24
|
-
def bucket_file
|
25
|
-
bucket_dir = RED.default_append_date
|
26
|
-
bucket_dir = RED.locals[:start_time] if RED.is_append && @update_type == :append && RED.locals[:start_time]
|
27
|
-
"#{bucket_dir}/#{@category}/#{@name}.tsv"
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|