redata 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58c04577fb55ac865dda405347811acf421dac3e
4
- data.tar.gz: a1a33fb66a93f198aca56731417c2a2d90b65192
3
+ metadata.gz: ba347eb745baeb8cf5ffee03842482bfcc2b98d7
4
+ data.tar.gz: cb86f147ae139d3e87af8afa6e74a823abe02c02
5
5
  SHA512:
6
- metadata.gz: 199b5da361e782c96ba4dbb1c5e465ca3fd7e9a8f4471776efb0915408ec9f43428aadfda25f5037e883c4798bbec4633c2e0aa7ddb76d59daafc3e8c0ed56dc
7
- data.tar.gz: 551db6a70bbcdfd29a39aed84d69258d883afcbede934bc5f7fc08ed165b8c3978671f2cba2e8d26ee3bc5a6a8349cc4cde86028824241b01dfce1fa6e6dd092
6
+ metadata.gz: 47cabaf3d9fcab6058a587c823eeee1a6bc39a33051b7675e176dcd7b812e33678ecd89e2f4b4463747813ade4f287947119623b7ec19b45cbf9dbf316a87dc4
7
+ data.tar.gz: 13991f7d3daa93f15f31bd1e6654147098c98f36f7d1eb19f2003f58792ede7c9b806be33571593d138db5f2425ef9e63444103b8ff9063d98778e14bfdd9dc7
data/README.md CHANGED
@@ -26,22 +26,25 @@ Or install it yourself as:
26
26
  + config `config/redata.yml` for general setting
27
27
 
28
28
  ```YAML
29
- start_date: "2016-04-04" # default data start date
29
+ create_interval: # default date for create mode
30
+ start_time: "2016-04-04"
31
+ end_time: 2 # days ago
32
+ append_interval: # date fetching interval for append mode
33
+ start: 3 # days ago
34
+ end: 2 # days ago
30
35
  timezone: "Asia/Tokyo"
36
+ keep_tmp: true # or false. whether keep temp query file in ./tmp after finished query
31
37
  s3:
32
- aws_access_key_id: {key_id}
33
- aws_secret_access_key: {key_secret}
34
- region: {s3_region}
35
- bucket:
36
- production: {bucket_name}
37
- development: {bucket_name}
38
+ bucket: bucket_name
39
+ aws_access_key_id: key_id
40
+ aws_secret_access_key: key_secret
38
41
  ssh: # this setting will be used in ssh mode when you access private database
39
- HostName: {gateway_host}
40
- IdentityFile: {~/.ssh/key.pem}
41
- User: {username}
42
+ HostName: gateway_host
43
+ IdentityFile: ~/.ssh/key.pem
44
+ User: username
42
45
  slack_bot: # this setting will be used for slack notice push
43
- token: {bot_token}
44
- channel: {slack_channel}
46
+ token: bot_token
47
+ channel: slack_channel
45
48
  ```
46
49
 
47
50
  + config `config/database.yml` for development and production environment in redshift database
@@ -53,12 +56,15 @@ development:
53
56
  username: user
54
57
  password: ''
55
58
  database: dev
56
- export: # target platform db(mysql) which export data to
57
- app: # platform name
58
- username: root
59
- password: ''
60
- host: localhost
61
- database: app
59
+ deploy: # target platform db(mysql) which export data to
60
+ app: # category name, using database
61
+ pro: # stage name(you can still declare under category absolutely)
62
+ username: root
63
+ password: ''
64
+ host: localhost
65
+ database: app
66
+ file: # another category, using local file
67
+ local_dir: '~/data'
62
68
  ```
63
69
 
64
70
  + config `config/relations.rb` for data object in redshift and exporting process to mysql
@@ -69,60 +75,38 @@ Redata::Task.schema.config do
69
75
  # Example of declaring a global table
70
76
  table 'table_name'
71
77
  # This declaration means
72
- # query file: database/sources/table_name.sql
78
+ # query file: database/sources/table_name.red.sql
73
79
  # redshift table: table_name
74
- # update type: renewal, delete and re-create when update
75
80
  # key used in command line: table_name
76
-
81
+
77
82
  # Example of declaring a global table with customizing options
78
- table 'new_table_name', :dir => 'dir', :file => 'query_file', :update => :append, :as => :alias
83
+ table 'new_table_name', :dir => 'dir', :file => 'query_file', :as => :alias
79
84
  # This declaration means
80
- # query file: database/sources/dir/query_file.sql
85
+ # query file: database/sources/dir/query_file.red.sql
81
86
  # redshift table: new_table_name
82
- # update type: append, only appending to existing table
83
87
  # key used in command line: alias
84
-
85
- # view is same to table but the update type only has renewal mode
86
- table 'view_name'
87
- table 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
88
-
88
+
89
+ # view is same to table but will still be created in append_mode
90
+ view 'view_name'
91
+ view 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
92
+
89
93
  # Example of declaring with category
90
94
  category :test_category do
91
95
  table 'test_table'
92
96
  # This declaration means
93
- # query file: database/sources/test_category/test_table.sql
97
+ # query file: database/sources/test_category/test_table.red.sql
94
98
  # redshift table: test_category_test_table
95
- # update type: renewal
96
99
  # key used in command line: test_category_test_table
97
-
98
- table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :update => append, :as => :alias_oth
100
+
101
+ table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
99
102
  # This declaration means
100
- # query file: database/sources/dir/query_file_oth.sql
103
+ # query file: database/sources/dir/query_file_oth.red.sql
101
104
  # redshift table: test_category_test_table
102
- # update type: append
103
105
  # key used in command line: test_category_alias_oth
104
-
106
+
105
107
  # view is same to table without appending update type
106
108
  view 'test_view'
107
109
  view 'test_view_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_view_oth
108
-
109
- #Example of convertor declaration
110
- export 'test_export'
111
- # This declaration means
112
- # convertor file: database/convertors/test_category/test_export.conv
113
- # target mysql database name: test_category (Also see: export config in config/database.yml{:export})
114
- # target mysql table: test_export
115
- # update type: renewal, delete all records and insert new records
116
- # key used in command line: test_category_test_export
117
-
118
- #Example of convertor declaration
119
- export 'test_export', :dir => 'dir', :file => 'conv_file', :update => 'append', :as => 'alias_export'
120
- # This declaration means
121
- # convertor file: database/convertors/dir/conv_file.conv
122
- # target mysql database name: test_category
123
- # target mysql table: test_export
124
- # update type: append, append insert new records without deleting
125
- # key used in command line: test_category_alias_export
126
110
  end
127
111
 
128
112
  end
@@ -130,50 +114,40 @@ end
130
114
 
131
115
  ### Query file
132
116
 
133
- Query file was used for create table of view in redshift. It is almost like PostgreSQL file but with same new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view, for table, if you use append mode, the result will only be append-inserted to table.
117
+ Query file was used for create table or view in redshift. It is almost like PostgreSQL file but with some new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view. For table, if you use append mode, the result will only be append-inserted to table.
134
118
  eg.
135
119
 
136
120
  ```SQL
137
- -- query file in data/sources/...
121
+ -- query file in data/sources/*.red.sql
138
122
 
139
- #include 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.sql in same folder or database/shared/
140
- #include 'sub_query_b' --> :b
123
+ #load 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.red.sql in same folder
124
+ #load 'sub_query_b' --> :b
141
125
 
142
126
 
127
+ -- use can use if logic to control whether run part of a query
128
+ -- 'endif' could stop one or many continuous if logic above. (use if which is from second just like 'else if')
129
+ -- TIPS: we have not supported 'else if', 'else' syntax and nested if logic
130
+ [if var is 'value1']
143
131
  select a.col1, a.col2, b.col1, b.col2, b.col3
132
+ [if var is 'value1']
133
+ select a.col3, b.col4
134
+ [endif]
144
135
  from {a} -- use object a included from sub query file '_sub_query_a.sql'
145
136
  join {b} on b.col1 = a.col1
146
- -- If in append mode and this table was setted appending update type, then start_time getting from command input such as `-start_time 2016-11-08` will be used here. When missing input this param, as default [2 days ago] will be used.
147
- -- Or if not append mode, start_date will be used as default (Also see config/redata.yml). set start_time when running command , if missing in command, default_start_date will be used
137
+ -- For [start_time] and [end_time], there are 3 options.
138
+ -- use command params when set
139
+ -- in append mode, use [append_interval][start_time] or [append_interval][end_time] (See config/redata.yml).
140
+ -- in create mode, use [create_interval][start_time] or [create_interval][end_time] (See config/redata.yml).
148
141
  where a.col1 >= [start_time]
149
- -- current time in setted timezone will be used (About timezon, also see config/redata.yml)
150
- and a.col1 <= [current_time]
142
+ and a.col1 < [end_time]
151
143
  -- some params getting from command input such as `-param_from_command param_value`
152
144
  and a.col2 = [param_from_command]
145
+ -- current time in setted timezone will be used (About timezon, also see config/redata.yml)
146
+ and b.col2 <= [current_time]
153
147
  -- x days before today, x will be a integer
154
148
  and b.col3 >= [x days ago]
155
149
  ```
156
150
 
157
- ### Convertor config file
158
-
159
- Convertor file was used to generate a select query to get data from redshift and unload to S3. But you have no need to wirte a unload query. If you are using append mode, only data 2 days ago will be select.
160
- eg.
161
-
162
- ```
163
- source: redshift_source_table_or_view
164
- columns:
165
- cm_id
166
- segment_type{'C' => 0, 'T' => 1, 'M1' => 2, 'M2' => 3, 'M3' => 4, 'F1' => 5, 'F2' => 6, 'F3' => 7}
167
- v
168
- e
169
- base_ai
170
- sample_num
171
- grp
172
- ```
173
-
174
- > convertor config file in `data/convertors/...`
175
- > `source` means the source table in redshift
176
- > `columns` means the source columns in source table
177
151
 
178
152
  ### Command
179
153
 
@@ -184,32 +158,28 @@ There are 3 executable file in bin/
184
158
 
185
159
  #### redata
186
160
 
187
- Usage: `redata [-options] [action] [object key] {platform}`
161
+ Usage: `redata [-options] [action] [object key] {stage}`
188
162
  + action
189
163
  - create --> create a table/view or append data to table in redshift
190
164
  - delete --> delete a table/view in redshift
191
165
  - checkout --> export data in table/view of redshift into S3
192
- - inject --> import data into mysql table from S3
193
- + object key --> object will be create/delete/checkout/inject declared in `config/relation.rb`
194
- + platform --> when injecting data into mysql, there may be several platform declared in `config/database.yml{:export}` for same database, here is setting which platform to use. *If the platform here could not be found in `database.yml` or have not set platform, the default export will be used.*
166
+ - deploy --> deploy data from S3 to local db or file
167
+ + object key --> object declared in `config/relation.rb` will be create/delete/checkout/deploy
168
+ + stage --> when injecting data into mysql, there may be several stage declared in `config/database.yml{:deploy}` for same database, this could choose which stage to use.
195
169
  + options
196
170
  - -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
197
171
  - -e --> environment: `production`, `development`, etc.
198
172
  - -f --> force mode, use `CADCASE` when removing view or table in redshift
199
173
  - -ssh --> use ssh accessing to private database with ssh config in `config/redata.yml`
200
- - -append_mode --> use `append_mode`, the objects in relations.rb with appending update type will go to appending operation.
201
- + delete will only delete objects with renewal update type
202
- + create will append-insert data after `-start_time`(set in command) or default `2 days ago` for appending update type, still create table/view for renewal type
203
- + checkout will only fetch data after `-start_time` or default `2 days ago` to upload to S3, renewal type will still be uploaded all data
204
- + inject will insert data to mysql without `--delete` option, renewal still delete all firstly
174
+ - -append --> use `append_mode`, append new data into existing table for redshift or inject into local db without deleting. view has no append mode.
205
175
  - other options --> some params will be used in query file when declared, such `start_time`
206
176
 
207
177
  #### adjust
208
178
 
209
- Use adjust when you just want to run a query file without declaring in `config/relations.rb`
179
+ Use adjust when you just want to run a query file without declaring in `config/relations.rb`
210
180
  Usage: `adjust [-options] [database] [query file] {platform}`
211
- + database --> `redshift` or database declared in `config/database.yml{export}`
212
- + query file --> query file which will be run in `database/adjust/`, **without extends `.sql`**
181
+ + database --> `redshift` or database declared in `config/database.yml{:deploy}`
182
+ + query file --> query file which will be run in `database/adjust/`, **without extends `.red.sql`**
213
183
  + platform --> same to `redata`
214
184
  + options
215
185
  - -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
@@ -220,9 +190,9 @@ Usage: `adjust [-options] [database] [query file] {platform}`
220
190
  #### notice
221
191
 
222
192
  Usage: `notice [-options] [action]`
223
- + action: currently, there is only `update` action which means send 'finish updating' message to slack
224
- + options
225
- - -e --> environment: `production`, `development`, etc. **Only production could send notice**
193
+ + action
194
+ - log --> send a message to slack with a log file
195
+ - mention --> send a message to slack with mention someone
226
196
 
227
197
  ## Contributing
228
198
 
@@ -231,5 +201,25 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/goshan
231
201
 
232
202
  ## License
233
203
 
234
- The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
204
+ Copyright 2013, Han Qiu(goshan), All rights reserved.
205
+
206
+ MIT License
207
+
208
+ Permission is hereby granted, free of charge, to any person obtaining a copy
209
+ of this software and associated documentation files (the "Software"), to deal
210
+ in the Software without restriction, including without limitation the rights
211
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
212
+ copies of the Software, and to permit persons to whom the Software is
213
+ furnished to do so, subject to the following conditions:
214
+
215
+ The above copyright notice and this permission notice shall be included in
216
+ all copies or substantial portions of the Software.
217
+
218
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
219
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
220
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
221
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
222
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
223
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
224
+ THE SOFTWARE.
235
225
 
data/bin/adjust CHANGED
@@ -3,15 +3,20 @@
3
3
  require File.expand_path '../../lib/redata', __FILE__
4
4
 
5
5
 
6
- query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
7
- tmp_script_file = Redata::RED.root.join 'tmp', 'queries', "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}.sql"
6
+ temp_config = OpenStruct.new
7
+ temp_config.query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
8
+ temp_config.tmp_file_dir = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}"
9
+ temp_config.tmp_exec_file = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}", "exec.sql"
8
10
 
9
- Redata::Log.action "QUERY: Run query file [#{query_file.relative_path_from Redata::RED.root}] in #{Redata::RED.params[0]}"
10
- Redata::Parser.gen_adjust_file query_file, tmp_script_file
11
+ Dir.mkdir temp_config.tmp_file_dir unless temp_config.tmp_file_dir.exist?
12
+
13
+ Redata::Log.action "ADJUST<#{Redata::RED.params[0]}>: use [#{temp_config.query_file.relative_path_from Redata::RED.root}]"
14
+ Redata::Parser.gen_adjust_query temp_config
11
15
 
12
16
  if Redata::RED.params[0] == "redshift"
13
- Redata::DATABASE.connect_with_file tmp_script_file
17
+ Redata::DATABASE.connect_redshift temp_config
14
18
  else
15
- Redata::DATABASE.connect_mysql_with_file tmp_script_file, Redata::RED.params[0], Redata::RED.params[2]
19
+ Redata::DATABASE.connect_mysql temp_config.tmp_exec_file, Redata::RED.params[0], Redata::RED.params[2]
16
20
  end
17
21
 
22
+ FileUtils.rm_r temp_config.tmp_file_dir if temp_config.tmp_file_dir.exist?
data/bin/notice CHANGED
@@ -6,15 +6,13 @@ require File.expand_path '../../lib/redata/notice', __FILE__
6
6
 
7
7
 
8
8
 
9
- if ['update'].include? Redata::RED.params[0]
9
+ if ['log', 'mention'].include? Redata::RED.params[0]
10
10
  notice = Redata::Notice.new
11
11
  case Redata::RED.params[0]
12
- when 'update'
13
- if Redata::RED.production?
14
- notice.send "今日の自動更新を完了しました!\n今のデータ期間は `2015-07-29 ~ #{Redata::RED.date_days_ago 3}` になってます"
15
- else
16
- Redata::Log.warning "WARNING: Could send notice only in production env"
17
- end
12
+ when 'log'
13
+ notice.log Redata::RED.params[1], Redata::RED.params[2]
14
+ when 'mention'
15
+ notice.mention Redata::RED.params[1], Redata::RED.params[2]
18
16
  end
19
17
  end
20
18
 
data/bin/redata CHANGED
@@ -5,7 +5,7 @@ require File.expand_path '../../lib/redata', __FILE__
5
5
  require Redata::RED.root.join('config', 'relations.rb').to_s
6
6
 
7
7
 
8
- if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
8
+ if ['create', 'delete', 'checkout', 'deploy'].include? Redata::RED.params[0]
9
9
  case Redata::RED.params[0]
10
10
  when 'create'
11
11
  Redata::Task.create_datasource Redata::RED.params[1]
@@ -13,8 +13,8 @@ if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
13
13
  Redata::Task.delete_datasource Redata::RED.params[1]
14
14
  when 'checkout'
15
15
  Redata::Task.checkout_datasource Redata::RED.params[1]
16
- when 'inject'
17
- Redata::Task.inject Redata::RED.params[1], Redata::RED.params[2]
16
+ when 'deploy'
17
+ Redata::Task.deploy_datasource Redata::RED.params[1], Redata::RED.params[2]
18
18
  end
19
19
  end
20
20
 
data/lib/redata/config.rb CHANGED
@@ -22,7 +22,8 @@ module Redata
22
22
  # config file
23
23
  @config = YAML.load(ERB.new(File.read(@root.join 'config', 'redata.yml')).result(binding))
24
24
  @s3_config = @config['s3']
25
- @s3_config['bucket'] = @s3_config['bucket'][@env]
25
+ @s3_config['bucket'] += "-dev" unless @env == 'production'
26
+ @s3_config['region'] = 'ap-northeast-1'
26
27
  @s3_config['host'] = "https://s3-#{@s3_config['region']}.amazonaws.com/#{@s3_config['bucket']}"
27
28
  Aws.config.update({
28
29
  region: @s3_config['region'],
@@ -30,6 +31,7 @@ module Redata
30
31
  })
31
32
  @tz_local = Timezone[@config['timezone']]
32
33
  @slack_token = @config['slack_bot']
34
+ @keep_tmp = @config['keep_tmp']
33
35
  end
34
36
 
35
37
  def development?
@@ -40,8 +42,26 @@ module Redata
40
42
  @env == 'production'
41
43
  end
42
44
 
43
- def default_start_date
44
- @config['start_date']
45
+ def keep_tmp?
46
+ @keep_tmp
47
+ end
48
+
49
+ def start_time
50
+ return @locals[:start_time] if @locals[:start_time]
51
+ if @is_append
52
+ @tz_local.utc_to_local(Time.now.utc-@config['append_interval']['start_time']*24*3600).strftime('%Y-%m-%d')
53
+ else
54
+ @config['create_interval']['start_time']
55
+ end
56
+ end
57
+
58
+ def end_time
59
+ return @locals[:end_time] if @locals[:end_time]
60
+ if @is_append
61
+ @tz_local.utc_to_local(Time.now.utc-@config['append_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
62
+ else
63
+ @tz_local.utc_to_local(Time.now.utc-@config['create_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
64
+ end
45
65
  end
46
66
 
47
67
  def ssh
@@ -60,11 +80,6 @@ module Redata
60
80
  @tz_local.utc_to_local(Time.now.utc).strftime('%Y-%m-%d %H:%M:%S')
61
81
  end
62
82
 
63
- def default_append_date
64
- # 2 days ago bacause there is only data 2 days ago in redshift
65
- @tz_local.utc_to_local(Time.now.utc-2*24*3600).strftime('%Y-%m-%d')
66
- end
67
-
68
83
  def date_days_ago(days)
69
84
  @tz_local.utc_to_local(Time.now.utc-days*24*3600).strftime('%Y-%m-%d')
70
85
  end
@@ -76,9 +91,6 @@ module Redata
76
91
  i = 0
77
92
  while i < argv.count
78
93
  case argv[i]
79
- when '-dir'
80
- i += 1
81
- new_argv[:dir] = argv[i]
82
94
  when '-e'
83
95
  i += 1
84
96
  new_argv[:env] = argv[i]
@@ -86,11 +98,11 @@ module Redata
86
98
  new_argv[:force] = true
87
99
  when '-ssh'
88
100
  new_argv[:ssh] = true
89
- when '-append_mode'
101
+ when '-append'
90
102
  new_argv[:append_mode] = true
91
103
  else
92
- if argv[i] =~ /-(.+)/
93
- key = argv[i].match(/-(.+)/)[1]
104
+ if argv[i] =~ /\A-(.+)/
105
+ key = argv[i].match(/\A-(.+)/)[1]
94
106
  i += 1
95
107
  new_argv[:locals][key.to_sym] = argv[i]
96
108
  else
@@ -9,59 +9,44 @@ module Redata
9
9
  @ssh = Ssh.new
10
10
  end
11
11
 
12
- def connect_with_file(file)
12
+ def connect_redshift(config)
13
13
  cmd = make_redshift_cmd
14
14
  if @ssh.run_with_ssh?
15
- @ssh.upload_file file
16
- @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{file.basename}"
15
+ @ssh.upload_dir config.tmp_file_dir
16
+ @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{config.tmp_file_dir.basename}/exec.sql"
17
+ @ssh.remove_dir "~/tmp/#{config.tmp_file_dir.basename}"
17
18
  else
18
- system "#{cmd} -f #{file}"
19
+ system "#{cmd} -f #{config.tmp_exec_file}"
19
20
  end
20
21
  end
21
22
 
22
- def connect_with_query(query)
23
- cmd = make_redshift_cmd
24
- if @ssh.run_with_ssh?
25
- @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -c '#{query}'"
26
- else
27
- system "#{cmd} -c '#{query}'"
28
- end
29
- end
23
+ def inject_data(config, stage)
24
+ target_config = @config['deploy'][config.category.to_s]
25
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
30
26
 
31
- def inject_to_mysql(config, platform)
32
- if @ssh.run_with_ssh?
33
- @ssh.upload_file config.tmp_data_file, config.name
34
- data_file = "~/tmp/#{config.name}"
35
- else
36
- data_file = config.tmp_data_file
37
- end
27
+ target_config = target_config[stage] if stage
28
+ Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
38
29
 
39
- is_append = RED.is_append && config.update_type == :append
40
- cmd = "mysqlimport #{make_mysql_cmd_config(config.category.to_s, platform)} #{data_file} --local #{is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
41
-
42
- if @ssh.run_with_ssh?
43
- @ssh.run_command cmd
30
+ if target_config['local_dir']
31
+ cmd = "mv #{config.tmp_data_file} #{target_config['local_dir']}/#{config.source_name}.tsv"
32
+ elsif target_config['database']
33
+ import_params = "--local #{RED.is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
34
+ cmd = "mysqlimport #{make_mysql_cmd_params(target_config)} #{config.tmp_data_file} #{import_params}"
44
35
  else
45
- system "#{cmd}"
36
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
46
37
  end
47
-
38
+ system cmd
48
39
  end
49
40
 
50
- def connect_mysql_with_file(query_file, category, platform)
51
- if @ssh.run_with_ssh?
52
- @ssh.upload_file query_file, query_file.basename
53
- data_file = "~/tmp/#{query_file.basename}"
54
- else
55
- data_file = query_file
56
- end
41
+ def connect_mysql(query_file, category, stage)
42
+ target_config = @config['deploy'][category.to_s]
43
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
57
44
 
58
- cmd = "mysql #{make_mysql_cmd_config(category, platform)} < #{data_file}"
45
+ target_config = target_config[stage] if stage
46
+ Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
59
47
 
60
- if @ssh.run_with_ssh?
61
- @ssh.run_command cmd
62
- else
63
- system cmd
64
- end
48
+ cmd = "mysql #{make_mysql_cmd_params(target_config)} < #{query_file}"
49
+ system cmd
65
50
  end
66
51
 
67
52
  private
@@ -70,18 +55,8 @@ module Redata
70
55
  return "psql -h #{@config['host']} -p #{REDSHIFT_PORT} -U #{@config['username']} -d #{@config['database']}"
71
56
  end
72
57
 
73
- def make_mysql_cmd_config(category, platform)
74
- export_db_config = @config['export'][category]
75
- Log.error! "ERROR: Export config of #{category} was not found in config/database.yml" unless export_db_config
76
- if platform
77
- if export_db_config[platform]
78
- export_db_config = export_db_config[platform]
79
- else
80
- Log.warning "WARNING: Platform #{platform} was not declared in config/database.yml, ignore platform setting"
81
- end
82
- end
83
-
84
- return "-h#{export_db_config['host']} -u#{export_db_config['username']} #{export_db_config['password'].empty? ? '' : '-p'+export_db_config['password']} #{export_db_config['database']}"
58
+ def make_mysql_cmd_params(db_config)
59
+ return "-h#{db_config['host']} -u#{db_config['username']} #{db_config['password'].empty? ? '' : '-p'+db_config['password']} #{db_config['database']}"
85
60
  end
86
61
 
87
62
  end
data/lib/redata/notice.rb CHANGED
@@ -10,12 +10,25 @@ module Redata
10
10
  Log.error! "ERROR: slack channel #{RED.slack['channel']} not exists" unless channel_exist
11
11
  end
12
12
 
13
- def send(msg)
13
+ def log(msg, log=nil)
14
+ log_content = "```\n#{File.read(log).split("\n").map{|line| line.gsub(/\[0;\d{2};\d{2}m/, '').gsub(/\[0m/, '')}.join("\n")}\n```" if log
14
15
  @slack.chat_postMessage({
15
16
  :channel => RED.slack['channel'],
16
- :text => "<!here> #{msg}",
17
+ :text => "#{msg}\n#{log_content}",
17
18
  :as_user => true
18
19
  })
19
20
  end
21
+
22
+ def mention(user_name, msg)
23
+ @slack.users_list['members'].each do |user|
24
+ if user['name'] == user_name
25
+ @slack.chat_postMessage({
26
+ :channel => RED.slack['channel'],
27
+ :text => "<@#{user['id']}> #{msg}",
28
+ :as_user => true
29
+ })
30
+ end
31
+ end
32
+ end
20
33
  end
21
34
  end
data/lib/redata/parser.rb CHANGED
@@ -1,146 +1,177 @@
1
1
  module Redata
2
2
  class Parser
3
- INCLUDE_REGEX = /#include (.*)-->(.*)/
4
- REF_REGEX = /{([^{}]+)}/
5
- REF_SPLIT_REGEX = /\s*{[^{}]+}\s*/
3
+ COMMENT_REGEX = /-{2}.*/
4
+ LOAD_REGEX = /#load (.*)->(.*)/
5
+ IF_REGEX = /\[\s*if ([^\s]*) is ([^\]]*)\]/
6
+ IFNUL_REGEX = /\[\s*if ([^\s]*) is null\s*\]/
7
+ ENDIF_REGEX = /\[\s*endif\s*\]/
6
8
  START_TIME_REGEX = /\[start_time\]/
9
+ END_TIME_REGEX = /\[end_time\]/
7
10
  TIME_OFFSET_REGEX = /\[(\d+) days ago\]/
8
11
  CURRENT_TIME_REGEX = /\[current_time\]/
9
- LOCALS_REGEX = /\[([^\[\]]+)\]/
12
+ LOCALS_REGEX = /\[([^\[\]<>\s]+)\]/
13
+ LOCALS_LIST_REGEX = /\[<([^\[\]<>\s]+)>\]/
10
14
 
11
- CONV_TABLE_REGEX = /source:(.*)/
12
- CONV_COLUMN_REGEX = /columns:\s*/
13
- CONV_SWITCHDEF_REGEX = /(.+){(.*)}/
14
- CONV_SWITCH_REGEX = /([^,]+)=>([^,]+)/
15
- CONV_TIMESTAMP_REGEX = /\[time_stamp\]/
16
-
17
- def self.gen_redshift_query(config, start_time=nil)
18
- Log.error! "ERROR: Query file '#{config.query_file.relative_path_from RED.root}' not exists" unless config.query_file.exist?
15
+ def self.gen_create_query(config)
16
+ if config.type == :table
17
+ self.gen_table_query config
18
+ elsif config.type == :view
19
+ self.gen_view_query config
20
+ end
21
+ end
19
22
 
20
- File.open config.tmp_script_file, 'w' do |f|
21
- if start_time && config.type == :table
22
- f.puts "INSERT INTO #{config.source_name} ("
23
- else
24
- start_time = RED.default_start_date
25
- f.puts "CREATE #{config.type} #{config.source_name} AS ("
26
- end
27
- self.parse_redshift_file config.query_file, f, start_time
28
- f.puts ");"
23
+ def self.gen_delete_query(config)
24
+ File.open config.tmp_exec_file, 'w' do |f|
25
+ f.puts "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'};"
29
26
  end
30
27
  end
31
28
 
32
- def self.gen_export_query(config, start_time=nil)
33
- Log.error! "ERROR: Convertor config '#{config.conv_file.relative_path_from RED.root}' not exists" unless config.conv_file.exist?
29
+ def self.gen_checkout_query(config)
30
+ Log.error! "ERROR: Only could checkout data from view" unless config.type == :view
34
31
 
35
- File.open config.tmp_script_file, 'w' do |f|
32
+ File.open config.tmp_exec_file, 'w' do |f|
36
33
  f.puts "UNLOAD ('"
37
- f.puts self.parse_convertor_file config.conv_file
38
- f.puts "where date >= \\'#{start_time}\\'" if start_time
34
+ f.puts "SELECT * FROM #{config.source_name}"
39
35
  f.puts "') to 's3://#{RED.s3['bucket']}/#{config.bucket_file}'"
40
36
  f.puts "CREDENTIALS 'aws_access_key_id=#{RED.s3['aws_access_key_id']};aws_secret_access_key=#{RED.s3['aws_secret_access_key']}'"
41
37
  f.puts "ESCAPE ALLOWOVERWRITE PARALLEL OFF DELIMITER AS '\\t';"
42
38
  end
43
39
  end
44
40
 
45
- def self.gen_adjust_file(query_file, tmp_script_file)
46
- Log.error! "ERROR: Query file '#{query_file.relative_path_from RED.root}' not exists" unless query_file.exist?
41
+ def self.gen_adjust_query(config)
42
+ self.parse config.query_file, config.tmp_exec_file, ''
43
+ end
44
+
45
+
46
+ private
47
+ def self.gen_table_query(config)
48
+ Log.error! "ERROR: Relation error" unless config.type == :table
47
49
 
48
- File.open tmp_script_file, 'w' do |f|
49
- self.parse_redshift_file query_file, f, RED.default_start_date
50
+ tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
51
+ temp_tables = self.parse config.query_file, tmp_file
52
+
53
+ File.open config.tmp_exec_file, 'w' do |f|
54
+ # print temp tables
55
+ temp_tables.each do |name|
56
+ f.puts "CREATE TEMP TABLE #{name} AS ("
57
+ f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
58
+ f.puts ");"
59
+ end
60
+
61
+ # print create or insert query
62
+ if RED.is_append
63
+ f.puts "INSERT INTO #{config.source_name} ("
64
+ elsif
65
+ f.puts "CREATE #{config.type} #{config.source_name} AS ("
66
+ end
67
+ f.puts File.read tmp_file
68
+ f.puts ");"
50
69
  end
51
70
  end
52
71
 
72
+ def self.gen_view_query(config)
73
+ Log.error! "ERROR: Relation error" unless config.type == :view
53
74
 
54
- private
55
- def self.parse_redshift_file(in_file, out, start_time)
56
- links = {}
57
- File.open(in_file).each.with_index do |line, index|
58
- if line =~ INCLUDE_REGEX
59
- # parse include syntax
60
- res = line.scan(INCLUDE_REGEX).first
61
- sub = res[0].gsub /[\s|\'|\"]+/, ''
62
- link = res[1].gsub /[\s|:]+/, ''
63
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: include query is missing file or alias" if sub.empty? || link.empty?
64
-
65
- sub_file = in_file.parent.join "_#{sub}.sql"
66
- sub_file = RED.root.join 'database', 'shared', "_#{sub}.sql" unless sub_file.exist?
67
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: included file _#{sub}.sql could not be found in ./ or {root}/database/shared/" unless sub_file.exist?
68
-
69
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: alias #{link} was declared multiple times" if links[link]
70
-
71
- links[link] = sub_file
72
- elsif line =~ REF_REGEX
73
- # parse {ref} syntax
74
- res = line.scan REF_REGEX
75
- refs = res.map{|r| r.first.gsub /\s+/, ''}
76
- origins = line.split REF_SPLIT_REGEX
77
-
78
- out.puts origins[0].gsub(';', '')
79
- refs.each_with_index do |ref, i|
80
- Log.error! "QUERY ERROR: #{in_file}:#{index+1}:\nsub query #{ref} not found." unless links[ref]
81
- out.puts "("
82
- self.parse_redshift_file links[ref], out, start_time
83
- out.puts ") as #{ref}"
84
- out.puts origins[i+1].gsub(';', '') if origins[i+1]
75
+ tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
76
+ temp_tables = self.parse config.query_file, tmp_file
77
+
78
+ File.open config.tmp_exec_file, 'w' do |f|
79
+ f.puts "CREATE #{config.type} #{config.source_name} AS ("
80
+ temp_tables.each_with_index do |name, index|
81
+ f.puts "#{index == 0 ? 'WITH' : ','} #{name} AS ("
82
+ f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
83
+ f.puts ")"
84
+ end
85
+
86
+ # print create query
87
+ main = File.read tmp_file
88
+ unless temp_tables.empty?
89
+ main.gsub! 'WITH', ','
90
+ main.gsub! 'with', ','
91
+ end
92
+ f.puts main
93
+ f.puts ");"
94
+ end
95
+ end
96
+
97
+ def self.parse(in_file, out_file, skip_char=';')
98
+ Log.error! "ERROR: Query file '#{in_file.relative_path_from RED.root}' not exists" unless in_file.exist?
99
+
100
+ temp_tables = []
101
+ parse_enable = true
102
+ File.open out_file, 'w' do |out|
103
+ File.open(in_file).each do |line|
104
+ # remove comments
105
+ line.gsub!(COMMENT_REGEX, '')
106
+ # remove skip_char
107
+ line.gsub!(skip_char, '')
108
+ # remove empty line
109
+ next if !line || line.empty? || line =~ /^\s*$/
110
+
111
+ # check if else condition
112
+ if line =~ IFNUL_REGEX
113
+ res = line.scan(IFNUL_REGEX).first
114
+ var = res[0]
115
+ parse_enable = RED.locals[var.to_sym].nil?
116
+ next
117
+ elsif line =~ IF_REGEX
118
+ res = line.scan(IF_REGEX).first
119
+ var = res[0]
120
+ val = res[1].gsub /[\s|\'|\"]+/, ''
121
+ parse_enable = (RED.locals[var.to_sym] == val)
122
+ next
123
+ elsif line =~ ENDIF_REGEX
124
+ parse_enable = true
125
+ next
126
+ end
127
+ next unless parse_enable
128
+
129
+ # compile sub file
130
+ if line =~ LOAD_REGEX
131
+ # parse load syntax
132
+ res = line.scan(LOAD_REGEX).first
133
+ sub = res[0].gsub /[\s|\'|\"]+/, ''
134
+ name = res[1].gsub /[\s|:]+/, ''
135
+ Log.error! "QUERY ERROR: syntax error for load query: #{line}" if sub.empty? || name.empty?
136
+
137
+ sub_file = in_file.parent.join "_#{sub}.red.sql"
138
+ sub_file = RED.root.join 'database', 'shared', "_#{sub}.rea.sql" unless sub_file.exist?
139
+ sub_temp_tables = self.parse sub_file, out_file.dirname.join("#{name}.resql")
140
+ sub_temp_tables.each do |n|
141
+ temp_tables.push n unless temp_tables.include? n
142
+ end
143
+ temp_tables.push name unless temp_tables.include? name
144
+ next # load query line can not contain other content
85
145
  end
86
- elsif line =~ START_TIME_REGEX
146
+
87
147
  # parse [start_time] syntax
88
- out.puts line.gsub(START_TIME_REGEX, "'#{start_time}'").gsub(';', '')
89
- elsif line =~ TIME_OFFSET_REGEX
148
+ line.gsub! START_TIME_REGEX, "'#{RED.start_time}'"
149
+ # parse [end_time] syntax
150
+ line.gsub! END_TIME_REGEX, "'#{RED.end_time}'"
151
+ # parse [current_time] syntax
152
+ line.gsub! CURRENT_TIME_REGEX, "'#{RED.current_time}'"
153
+
90
154
  # parse [3 days ago]
91
155
  res = line.scan(TIME_OFFSET_REGEX).each do |res|
92
- line = line.gsub "[#{res[0]} days ago]", "#{RED.date_days_ago(res[0].to_i)}"
156
+ line.gsub! "[#{res[0]} days ago]", "'#{RED.date_days_ago(res[0].to_i)}'"
93
157
  end
94
- out.puts line
95
- elsif line =~ CURRENT_TIME_REGEX
96
- line = line.gsub "[current_time]", "#{RED.current_time}"
97
- out.puts line
98
- elsif line =~ LOCALS_REGEX
99
158
  # parse [locals] syntax
100
159
  line.scan(LOCALS_REGEX).each do |res|
101
160
  key = res.first
102
161
  Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
103
- line = line.gsub "[#{key}]", "'#{RED.locals[key.to_sym]}'"
162
+ line.gsub! "[#{key}]", "'#{RED.locals[key.to_sym]}'"
104
163
  end
105
- out.puts line.gsub ';', ''
106
- else
107
- # other, print absolutely
108
- out.puts line.gsub ';', ''
109
- end
110
- end
111
- end
112
-
113
- def self.parse_convertor_file(in_file)
114
- is_parsing_column = false
115
- columns = []
116
- source = ""
117
- File.open(in_file).each.with_index do |line, index|
118
- if line =~ CONV_TABLE_REGEX
119
- # parse table declare
120
- res = line.scan(CONV_TABLE_REGEX).first
121
- source = res[0].gsub /\s+/, ''
122
- is_parsing_column = false
123
- elsif line =~ CONV_COLUMN_REGEX
124
- is_parsing_column = true
125
- elsif is_parsing_column
126
- line.gsub! /\s+/, ''
127
- if line =~ CONV_SWITCHDEF_REGEX
128
- res = line.scan(CONV_SWITCHDEF_REGEX).first
129
- res[1].gsub!("'", "\\\\'")
130
- switches = res[1].scan CONV_SWITCH_REGEX
131
- switches.map! do |m|
132
- "when #{m[0]} then #{m[1]}"
133
- end
134
- columns.push "case #{res[0]} #{switches.join ' '} end as #{res[0]}"
135
- elsif line =~ CONV_TIMESTAMP_REGEX
136
- columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
137
- columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
138
- else
139
- columns.push line.gsub("'", "\\\\'").gsub('NULL', "\\\\'NULL\\\\'") unless line.empty?
164
+ # parse [<local_list>] syntax
165
+ line.scan(LOCALS_LIST_REGEX).each do |res|
166
+ key = res.first
167
+ Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
168
+ line = line.gsub "[<#{key}>]", "(#{RED.locals[key.to_sym].split(',').map{|e| "'#{e}'"}.join(',')})"
140
169
  end
170
+
171
+ out.puts line.gsub skip_char, ''
141
172
  end
142
173
  end
143
- "select #{columns.join ','} from #{source}"
174
+ temp_tables
144
175
  end
145
176
 
146
177
  end
@@ -1,6 +1,6 @@
1
1
  module Redata
2
2
  class Relation
3
- attr_accessor :category, :name, :key, :file, :dir, :type, :update_type
3
+ attr_accessor :category, :name, :key, :file, :dir, :type
4
4
  def initialize(category, name, setting)
5
5
  @category = category
6
6
  @name = name
@@ -13,5 +13,31 @@ module Redata
13
13
  @category == :main ? @key : "#{@category}_#{@key}".to_sym
14
14
  end
15
15
 
16
+ def source_name
17
+ @category == :main ? @name : "#{@category}_#{@name}"
18
+ end
19
+
20
+ def query_file
21
+ query_file = RED.root.join 'database', 'sources'
22
+ query_file = query_file.join @dir if @dir
23
+ query_file = query_file.join "#{@file}.red.sql"
24
+ query_file
25
+ end
26
+
27
+ def tmp_file_dir
28
+ RED.root.join 'tmp', "#{@category}_#{@name}"
29
+ end
30
+
31
+ def tmp_exec_file
32
+ self.tmp_file_dir.join "exec.sql"
33
+ end
34
+
35
+ def tmp_mkdir
36
+ Dir.mkdir self.tmp_file_dir unless self.tmp_file_dir.exist?
37
+ end
38
+
39
+ def tmp_rmdir
40
+ FileUtils.rm_r self.tmp_file_dir if !RED.keep_tmp? && self.tmp_file_dir.exist?
41
+ end
16
42
  end
17
43
  end
@@ -3,22 +3,6 @@ module Redata
3
3
  def initialize(category, name, setting)
4
4
  super category, name, setting
5
5
  @type = :table
6
- @update_type = setting[:update] || :renewal
7
- end
8
-
9
- def source_name
10
- @category == :main ? @name : "#{@category}_#{@name}"
11
- end
12
-
13
- def query_file
14
- query_file = RED.root.join 'database', 'sources'
15
- query_file = query_file.join @dir if @dir
16
- query_file = query_file.join "#{@file}.sql"
17
- query_file
18
- end
19
-
20
- def tmp_script_file
21
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
22
6
  end
23
7
  end
24
8
  end
@@ -3,22 +3,14 @@ module Redata
3
3
  def initialize(category, name, setting)
4
4
  super category, name, setting
5
5
  @type = :view
6
- @update_type = :renewal
7
6
  end
8
7
 
9
- def source_name
10
- @category == :main ? @name : "#{@category}_#{@name}"
8
+ def bucket_file
9
+ "#{RED.end_time}/#{@category}/#{@name}.tsv"
11
10
  end
12
11
 
13
- def query_file
14
- query_file = RED.root.join 'database', 'sources'
15
- query_file = query_file.join @dir if @dir
16
- query_file = query_file.join "#{@file}.sql"
17
- query_file
18
- end
19
-
20
- def tmp_script_file
21
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
12
+ def tmp_data_file
13
+ self.tmp_file_dir.join "#{@name}.tsv"
22
14
  end
23
15
  end
24
16
  end
@@ -18,7 +18,6 @@ require 'redata/bucket'
18
18
  require 'redata/relation'
19
19
  require 'redata/relation/table'
20
20
  require 'redata/relation/view'
21
- require 'redata/relation/export'
22
21
  require 'redata/schema'
23
22
  require 'redata/parser'
24
23
  require 'redata/tasks'
data/lib/redata/ssh.rb CHANGED
@@ -17,12 +17,16 @@ module Redata
17
17
  return false
18
18
  end
19
19
 
20
- def upload_file(file, target_file=nil)
21
- system "scp -i #{@ssh['IdentityFile']} #{file} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/#{target_file}"
20
+ def upload_dir(dir)
21
+ system "scp -r -i #{@ssh['IdentityFile']} #{dir} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/"
22
22
  end
23
23
 
24
24
  def run_command(cmd)
25
25
  system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"#{cmd}\""
26
26
  end
27
+
28
+ def remove_dir(dir)
29
+ system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"rm -rf #{dir}\""
30
+ end
27
31
  end
28
32
  end
data/lib/redata/tasks.rb CHANGED
@@ -8,59 +8,56 @@ module Redata
8
8
 
9
9
  def self.create_datasource(key)
10
10
  self.parse_key(key, [:table, :view]).each do |config|
11
- if RED.is_append && config.update_type == :append
12
- start_time = RED.locals[:start_time] || RED.default_append_date
13
- Parser.gen_redshift_query config, start_time
14
- Log.action "QUERY: Append data after #{start_time} into [#{config.source_name}]"
15
- DATABASE.connect_with_file config.tmp_script_file
11
+ config.tmp_mkdir
12
+ Parser.gen_create_query config
13
+ if RED.is_append
14
+ dLog.action "APPEND<#{config.type}>: data(#{RED.start_time} ~ #{RED.end_time}) into [#{config.source_name}]"
16
15
  else
17
- Parser.gen_redshift_query config
18
- Log.action "QUERY: Create #{config.type} [#{config.source_name}]"
19
- DATABASE.connect_with_file config.tmp_script_file
16
+ Log.action "CREATE<#{config.type}>: [#{config.source_name}]"
20
17
  end
18
+ DATABASE.connect_redshift config
19
+ config.tmp_rmdir
21
20
  end
22
21
  end
23
22
 
24
23
  def self.delete_datasource(key)
25
24
  self.parse_key(key, [:table, :view]).reverse.each do |config|
26
- unless RED.is_append && config.update_type == :append
27
- Log.action "QUERY: Drop #{config.type} [#{config.source_name}]"
28
- Log.warning "WARNING: CASCADE mode will also drop other sources that depend on this #{config.type}" if RED.is_forced
29
- DATABASE.connect_with_query "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'}"
30
- end
25
+ config.tmp_mkdir
26
+ Parser.gen_delete_query config
27
+ Log.action "DROP<#{config.type}>: [#{config.source_name}]"
28
+ Log.warning "WARNING: CASCADE mode will also drop other views that depend on this" if RED.is_forced
29
+ DATABASE.connect_redshift config
30
+ config.tmp_rmdir
31
31
  end
32
32
  end
33
33
 
34
34
  def self.checkout_datasource(key)
35
- self.parse_key(key, [:export]).each do |config|
36
- if RED.is_append && config.update_type == :append
37
- start_time = RED.locals[:start_time] || RED.default_append_date
38
- Parser.gen_export_query config, start_time
39
- Log.action "QUERY: Checkout data after #{start_time} to bucket [#{config.bucket_file}]"
40
- else
41
- Parser.gen_export_query config
42
- Log.action "QUERY: Checkout data to bucket [#{config.bucket_file}]"
43
- end
44
- DATABASE.connect_with_file config.tmp_script_file
35
+ self.parse_key(key, [:view]).each do |config|
36
+ config.tmp_mkdir
37
+ Parser.gen_checkout_query config
38
+ Log.action "CHECKOUT<#{config.category}>: to bucket [#{config.bucket_file}]"
39
+ DATABASE.connect_redshift config
40
+
45
41
  bucket = S3Bucket.new
46
42
  bucket.move "#{config.bucket_file}000", config.bucket_file
43
+ config.tmp_rmdir
47
44
  end
48
45
  end
49
46
 
50
- def self.inject(key, platform=nil)
51
- self.parse_key(key, [:export]).each do |config|
52
- Log.action "BUCKET: Make [#{config.bucket_file}] public"
47
+ def self.deploy_datasource(key, stage)
48
+ self.parse_key(key, [:view]).each do |config|
49
+ config.tmp_mkdir
53
50
  bucket = S3Bucket.new
54
51
  bucket.make_public config.bucket_file, true
55
52
 
56
- Log.action "DOWNLOAD: Downlaod [#{config.bucket_file}] from bucket"
53
+ Log.action "DOWNLOAD<bucket>: from [#{config.bucket_file}]"
57
54
  system "wget #{RED.s3['host']}/#{config.bucket_file} -O #{config.tmp_data_file} --quiet"
58
55
 
59
- Log.action "BUCKET: Make [#{config.bucket_file}] private"
60
56
  bucket.make_public config.bucket_file, false
61
57
 
62
- Log.action "QUERY: Inject data to [#{config.name}] of #{config.category}"
63
- DATABASE.inject_to_mysql config, platform
58
+ Log.action "INJECT<#{config.category}>: with [#{config.name}] #{stage ? 'for stage '+stage : ''}"
59
+ DATABASE.inject_data config, stage
60
+ config.tmp_rmdir
64
61
  end
65
62
  end
66
63
 
@@ -73,7 +70,7 @@ module Redata
73
70
  configs = @@schema.category_configs(key, types)
74
71
  if configs.empty?
75
72
  config = @@schema.config_with key if key
76
- Log.error! "ERROR: Data source relation #{key} was not defined in config/relations.rb" unless config
73
+ Log.error! "ERROR: Data source relation #{key} was not defined" unless config
77
74
  configs.push config
78
75
  end
79
76
  configs
@@ -1,3 +1,3 @@
1
1
  module Redata
2
- VERSION = "0.1.0"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - goshan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-09 00:00:00.000000000 Z
11
+ date: 2017-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,7 +136,6 @@ files:
136
136
  - lib/redata/notice.rb
137
137
  - lib/redata/parser.rb
138
138
  - lib/redata/relation.rb
139
- - lib/redata/relation/export.rb
140
139
  - lib/redata/relation/table.rb
141
140
  - lib/redata/relation/view.rb
142
141
  - lib/redata/requires.rb
@@ -1,30 +0,0 @@
1
- module Redata
2
- class Export < Relation
3
- def initialize(category, name, setting)
4
- super category, name, setting
5
- @type = :export
6
- @update_type = setting[:update] || :renewal
7
- end
8
-
9
- def conv_file
10
- conv_file = RED.root.join 'database', 'convertors'
11
- conv_file = conv_file.join @dir if @dir
12
- conv_file = conv_file.join "#{@file}.conv"
13
- conv_file
14
- end
15
-
16
- def tmp_script_file
17
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
18
- end
19
-
20
- def tmp_data_file
21
- RED.root.join 'tmp', "data", "#{@name}.tsv"
22
- end
23
-
24
- def bucket_file
25
- bucket_dir = RED.default_append_date
26
- bucket_dir = RED.locals[:start_time] if RED.is_append && @update_type == :append && RED.locals[:start_time]
27
- "#{bucket_dir}/#{@category}/#{@name}.tsv"
28
- end
29
- end
30
- end