redata 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 58c04577fb55ac865dda405347811acf421dac3e
4
- data.tar.gz: a1a33fb66a93f198aca56731417c2a2d90b65192
3
+ metadata.gz: ba347eb745baeb8cf5ffee03842482bfcc2b98d7
4
+ data.tar.gz: cb86f147ae139d3e87af8afa6e74a823abe02c02
5
5
  SHA512:
6
- metadata.gz: 199b5da361e782c96ba4dbb1c5e465ca3fd7e9a8f4471776efb0915408ec9f43428aadfda25f5037e883c4798bbec4633c2e0aa7ddb76d59daafc3e8c0ed56dc
7
- data.tar.gz: 551db6a70bbcdfd29a39aed84d69258d883afcbede934bc5f7fc08ed165b8c3978671f2cba2e8d26ee3bc5a6a8349cc4cde86028824241b01dfce1fa6e6dd092
6
+ metadata.gz: 47cabaf3d9fcab6058a587c823eeee1a6bc39a33051b7675e176dcd7b812e33678ecd89e2f4b4463747813ade4f287947119623b7ec19b45cbf9dbf316a87dc4
7
+ data.tar.gz: 13991f7d3daa93f15f31bd1e6654147098c98f36f7d1eb19f2003f58792ede7c9b806be33571593d138db5f2425ef9e63444103b8ff9063d98778e14bfdd9dc7
data/README.md CHANGED
@@ -26,22 +26,25 @@ Or install it yourself as:
26
26
  + config `config/redata.yml` for general setting
27
27
 
28
28
  ```YAML
29
- start_date: "2016-04-04" # default data start date
29
+ create_interval: # default date for create mode
30
+ start_time: "2016-04-04"
31
+ end_time: 2 # days ago
32
+ append_interval: # date fetching interval for append mode
33
+ start: 3 # days ago
34
+ end: 2 # days ago
30
35
  timezone: "Asia/Tokyo"
36
+ keep_tmp: true # or false. whether keep temp query file in ./tmp after finished query
31
37
  s3:
32
- aws_access_key_id: {key_id}
33
- aws_secret_access_key: {key_secret}
34
- region: {s3_region}
35
- bucket:
36
- production: {bucket_name}
37
- development: {bucket_name}
38
+ bucket: bucket_name
39
+ aws_access_key_id: key_id
40
+ aws_secret_access_key: key_secret
38
41
  ssh: # this setting will be used in ssh mode when you access private database
39
- HostName: {gateway_host}
40
- IdentityFile: {~/.ssh/key.pem}
41
- User: {username}
42
+ HostName: gateway_host
43
+ IdentityFile: ~/.ssh/key.pem
44
+ User: username
42
45
  slack_bot: # this setting will be used for slack notice push
43
- token: {bot_token}
44
- channel: {slack_channel}
46
+ token: bot_token
47
+ channel: slack_channel
45
48
  ```
46
49
 
47
50
  + config `config/database.yml` for development and production environment in redshift database
@@ -53,12 +56,15 @@ development:
53
56
  username: user
54
57
  password: ''
55
58
  database: dev
56
- export: # target platform db(mysql) which export data to
57
- app: # platform name
58
- username: root
59
- password: ''
60
- host: localhost
61
- database: app
59
+ deploy: # target platform db(mysql) which export data to
60
+ app: # category name, using database
61
+ pro: # stage name(you can still declare under category absolutely)
62
+ username: root
63
+ password: ''
64
+ host: localhost
65
+ database: app
66
+ file: # another category, using local file
67
+ local_dir: '~/data'
62
68
  ```
63
69
 
64
70
  + config `config/relations.rb` for data object in redshift and exporting process to mysql
@@ -69,60 +75,38 @@ Redata::Task.schema.config do
69
75
  # Example of declaring a global table
70
76
  table 'table_name'
71
77
  # This declaration means
72
- # query file: database/sources/table_name.sql
78
+ # query file: database/sources/table_name.red.sql
73
79
  # redshift table: table_name
74
- # update type: renewal, delete and re-create when update
75
80
  # key used in command line: table_name
76
-
81
+
77
82
  # Example of declaring a global table with customizing options
78
- table 'new_table_name', :dir => 'dir', :file => 'query_file', :update => :append, :as => :alias
83
+ table 'new_table_name', :dir => 'dir', :file => 'query_file', :as => :alias
79
84
  # This declaration means
80
- # query file: database/sources/dir/query_file.sql
85
+ # query file: database/sources/dir/query_file.red.sql
81
86
  # redshift table: new_table_name
82
- # update type: append, only appending to existing table
83
87
  # key used in command line: alias
84
-
85
- # view is same to table but the update type only has renewal mode
86
- table 'view_name'
87
- table 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
88
-
88
+
89
+ # view is same to table but will still be created in append_mode
90
+ view 'view_name'
91
+ view 'new_view_name', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
92
+
89
93
  # Example of declaring with category
90
94
  category :test_category do
91
95
  table 'test_table'
92
96
  # This declaration means
93
- # query file: database/sources/test_category/test_table.sql
97
+ # query file: database/sources/test_category/test_table.red.sql
94
98
  # redshift table: test_category_test_table
95
- # update type: renewal
96
99
  # key used in command line: test_category_test_table
97
-
98
- table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :update => append, :as => :alias_oth
100
+
101
+ table 'test_table_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_oth
99
102
  # This declaration means
100
- # query file: database/sources/dir/query_file_oth.sql
103
+ # query file: database/sources/dir/query_file_oth.red.sql
101
104
  # redshift table: test_category_test_table
102
- # update type: append
103
105
  # key used in command line: test_category_alias_oth
104
-
106
+
105
107
  # view is same to table without appending update type
106
108
  view 'test_view'
107
109
  view 'test_view_oth', :dir => 'dir', :file => 'query_file_oth', :as => :alias_view_oth
108
-
109
- #Example of convertor declaration
110
- export 'test_export'
111
- # This declaration means
112
- # convertor file: database/convertors/test_category/test_export.conv
113
- # target mysql database name: test_category (Also see: export config in config/database.yml{:export})
114
- # target mysql table: test_export
115
- # update type: renewal, delete all records and insert new records
116
- # key used in command line: test_category_test_export
117
-
118
- #Example of convertor declaration
119
- export 'test_export', :dir => 'dir', :file => 'conv_file', :update => 'append', :as => 'alias_export'
120
- # This declaration means
121
- # convertor file: database/convertors/dir/conv_file.conv
122
- # target mysql database name: test_category
123
- # target mysql table: test_export
124
- # update type: append, append insert new records without deleting
125
- # key used in command line: test_category_alias_export
126
110
  end
127
111
 
128
112
  end
@@ -130,50 +114,40 @@ end
130
114
 
131
115
  ### Query file
132
116
 
133
- Query file was used for create table of view in redshift. It is almost like PostgreSQL file but with same new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view, for table, if you use append mode, the result will only be append-inserted to table.
117
+ Query file was used for create table or view in redshift. It is almost like PostgreSQL file but with some new feature. And you have no need to write a create table/view query, the result after running query file will used to create a new table/view. For table, if you use append mode, the result will only be append-inserted to table.
134
118
  eg.
135
119
 
136
120
  ```SQL
137
- -- query file in data/sources/...
121
+ -- query file in data/sources/*.red.sql
138
122
 
139
- #include 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.sql in same folder or database/shared/
140
- #include 'sub_query_b' --> :b
123
+ #load 'sub_query_a' --> :a -- include a sub query as object a from _sub_query_a.red.sql in same folder
124
+ #load 'sub_query_b' --> :b
141
125
 
142
126
 
127
+ -- use can use if logic to control whether run part of a query
128
+ -- 'endif' could stop one or many continuous if logic above. (use if which is from second just like 'else if')
129
+ -- TIPS: we have not supported 'else if', 'else' syntax and nested if logic
130
+ [if var is 'value1']
143
131
  select a.col1, a.col2, b.col1, b.col2, b.col3
132
+ [if var is 'value1']
133
+ select a.col3, b.col4
134
+ [endif]
144
135
  from {a} -- use object a included from sub query file '_sub_query_a.sql'
145
136
  join {b} on b.col1 = a.col1
146
- -- If in append mode and this table was setted appending update type, then start_time getting from command input such as `-start_time 2016-11-08` will be used here. When missing input this param, as default [2 days ago] will be used.
147
- -- Or if not append mode, start_date will be used as default (Also see config/redata.yml). set start_time when running command , if missing in command, default_start_date will be used
137
+ -- For [start_time] and [end_time], there are 3 options.
138
+ -- use command params when set
139
+ -- in append mode, use [append_interval][start_time] or [append_interval][end_time] (See config/redata.yml).
140
+ -- in create mode, use [create_interval][start_time] or [create_interval][end_time] (See config/redata.yml).
148
141
  where a.col1 >= [start_time]
149
- -- current time in setted timezone will be used (About timezon, also see config/redata.yml)
150
- and a.col1 <= [current_time]
142
+ and a.col1 < [end_time]
151
143
  -- some params getting from command input such as `-param_from_command param_value`
152
144
  and a.col2 = [param_from_command]
145
+ -- current time in setted timezone will be used (About timezon, also see config/redata.yml)
146
+ and b.col2 <= [current_time]
153
147
  -- x days before today, x will be a integer
154
148
  and b.col3 >= [x days ago]
155
149
  ```
156
150
 
157
- ### Convertor config file
158
-
159
- Convertor file was used to generate a select query to get data from redshift and unload to S3. But you have no need to wirte a unload query. If you are using append mode, only data 2 days ago will be select.
160
- eg.
161
-
162
- ```
163
- source: redshift_source_table_or_view
164
- columns:
165
- cm_id
166
- segment_type{'C' => 0, 'T' => 1, 'M1' => 2, 'M2' => 3, 'M3' => 4, 'F1' => 5, 'F2' => 6, 'F3' => 7}
167
- v
168
- e
169
- base_ai
170
- sample_num
171
- grp
172
- ```
173
-
174
- > convertor config file in `data/convertors/...`
175
- > `source` means the source table in redshift
176
- > `columns` means the source columns in source table
177
151
 
178
152
  ### Command
179
153
 
@@ -184,32 +158,28 @@ There are 3 executable file in bin/
184
158
 
185
159
  #### redata
186
160
 
187
- Usage: `redata [-options] [action] [object key] {platform}`
161
+ Usage: `redata [-options] [action] [object key] {stage}`
188
162
  + action
189
163
  - create --> create a table/view or append data to table in redshift
190
164
  - delete --> delete a table/view in redshift
191
165
  - checkout --> export data in table/view of redshift into S3
192
- - inject --> import data into mysql table from S3
193
- + object key --> object will be create/delete/checkout/inject declared in `config/relation.rb`
194
- + platform --> when injecting data into mysql, there may be several platform declared in `config/database.yml{:export}` for same database, here is setting which platform to use. *If the platform here could not be found in `database.yml` or have not set platform, the default export will be used.*
166
+ - deploy --> deploy data from S3 to local db or file
167
+ + object key --> object declared in `config/relation.rb` will be create/delete/checkout/deploy
168
+ + stage --> when injecting data into mysql, there may be several stage declared in `config/database.yml{:deploy}` for same database, this could choose which stage to use.
195
169
  + options
196
170
  - -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
197
171
  - -e --> environment: `production`, `development`, etc.
198
172
  - -f --> force mode, use `CADCASE` when removing view or table in redshift
199
173
  - -ssh --> use ssh accessing to private database with ssh config in `config/redata.yml`
200
- - -append_mode --> use `append_mode`, the objects in relations.rb with appending update type will go to appending operation.
201
- + delete will only delete objects with renewal update type
202
- + create will append-insert data after `-start_time`(set in command) or default `2 days ago` for appending update type, still create table/view for renewal type
203
- + checkout will only fetch data after `-start_time` or default `2 days ago` to upload to S3, renewal type will still be uploaded all data
204
- + inject will insert data to mysql without `--delete` option, renewal still delete all firstly
174
+ - -append --> use `append_mode`, append new data into existing table for redshift or inject into local db without deleting. view has no append mode.
205
175
  - other options --> some params will be used in query file when declared, such `start_time`
206
176
 
207
177
  #### adjust
208
178
 
209
- Use adjust when you just want to run a query file without declaring in `config/relations.rb`
179
+ Use adjust when you just want to run a query file without declaring in `config/relations.rb`
210
180
  Usage: `adjust [-options] [database] [query file] {platform}`
211
- + database --> `redshift` or database declared in `config/database.yml{export}`
212
- + query file --> query file which will be run in `database/adjust/`, **without extends `.sql`**
181
+ + database --> `redshift` or database declared in `config/database.yml{:deploy}`
182
+ + query file --> query file which will be run in `database/adjust/`, **without extends `.red.sql`**
213
183
  + platform --> same to `redata`
214
184
  + options
215
185
  - -dir --> project directory, both absolute path and realtive path will be okay. default is current directory.
@@ -220,9 +190,9 @@ Usage: `adjust [-options] [database] [query file] {platform}`
220
190
  #### notice
221
191
 
222
192
  Usage: `notice [-options] [action]`
223
- + action: currently, there is only `update` action which means send 'finish updating' message to slack
224
- + options
225
- - -e --> environment: `production`, `development`, etc. **Only production could send notice**
193
+ + action
194
+ - log --> send a message to slack with a log file
195
+ - mention --> send a message to slack with mention someone
226
196
 
227
197
  ## Contributing
228
198
 
@@ -231,5 +201,25 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/goshan
231
201
 
232
202
  ## License
233
203
 
234
- The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
204
+ Copyright 2013, Han Qiu(goshan), All rights reserved.
205
+
206
+ MIT License
207
+
208
+ Permission is hereby granted, free of charge, to any person obtaining a copy
209
+ of this software and associated documentation files (the "Software"), to deal
210
+ in the Software without restriction, including without limitation the rights
211
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
212
+ copies of the Software, and to permit persons to whom the Software is
213
+ furnished to do so, subject to the following conditions:
214
+
215
+ The above copyright notice and this permission notice shall be included in
216
+ all copies or substantial portions of the Software.
217
+
218
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
219
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
220
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
221
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
222
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
223
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
224
+ THE SOFTWARE.
235
225
 
data/bin/adjust CHANGED
@@ -3,15 +3,20 @@
3
3
  require File.expand_path '../../lib/redata', __FILE__
4
4
 
5
5
 
6
- query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
7
- tmp_script_file = Redata::RED.root.join 'tmp', 'queries', "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}.sql"
6
+ temp_config = OpenStruct.new
7
+ temp_config.query_file = Redata::RED.root.join 'database', 'adjust', "#{Redata::RED.params[1]}.sql"
8
+ temp_config.tmp_file_dir = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}"
9
+ temp_config.tmp_exec_file = Redata::RED.root.join "tmp", "adj_#{Redata::RED.params[0]}_#{Redata::RED.params[1]}", "exec.sql"
8
10
 
9
- Redata::Log.action "QUERY: Run query file [#{query_file.relative_path_from Redata::RED.root}] in #{Redata::RED.params[0]}"
10
- Redata::Parser.gen_adjust_file query_file, tmp_script_file
11
+ Dir.mkdir temp_config.tmp_file_dir unless temp_config.tmp_file_dir.exist?
12
+
13
+ Redata::Log.action "ADJUST<#{Redata::RED.params[0]}>: use [#{temp_config.query_file.relative_path_from Redata::RED.root}]"
14
+ Redata::Parser.gen_adjust_query temp_config
11
15
 
12
16
  if Redata::RED.params[0] == "redshift"
13
- Redata::DATABASE.connect_with_file tmp_script_file
17
+ Redata::DATABASE.connect_redshift temp_config
14
18
  else
15
- Redata::DATABASE.connect_mysql_with_file tmp_script_file, Redata::RED.params[0], Redata::RED.params[2]
19
+ Redata::DATABASE.connect_mysql temp_config.tmp_exec_file, Redata::RED.params[0], Redata::RED.params[2]
16
20
  end
17
21
 
22
+ FileUtils.rm_r temp_config.tmp_file_dir if temp_config.tmp_file_dir.exist?
data/bin/notice CHANGED
@@ -6,15 +6,13 @@ require File.expand_path '../../lib/redata/notice', __FILE__
6
6
 
7
7
 
8
8
 
9
- if ['update'].include? Redata::RED.params[0]
9
+ if ['log', 'mention'].include? Redata::RED.params[0]
10
10
  notice = Redata::Notice.new
11
11
  case Redata::RED.params[0]
12
- when 'update'
13
- if Redata::RED.production?
14
- notice.send "今日の自動更新を完了しました!\n今のデータ期間は `2015-07-29 ~ #{Redata::RED.date_days_ago 3}` になってます"
15
- else
16
- Redata::Log.warning "WARNING: Could send notice only in production env"
17
- end
12
+ when 'log'
13
+ notice.log Redata::RED.params[1], Redata::RED.params[2]
14
+ when 'mention'
15
+ notice.mention Redata::RED.params[1], Redata::RED.params[2]
18
16
  end
19
17
  end
20
18
 
data/bin/redata CHANGED
@@ -5,7 +5,7 @@ require File.expand_path '../../lib/redata', __FILE__
5
5
  require Redata::RED.root.join('config', 'relations.rb').to_s
6
6
 
7
7
 
8
- if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
8
+ if ['create', 'delete', 'checkout', 'deploy'].include? Redata::RED.params[0]
9
9
  case Redata::RED.params[0]
10
10
  when 'create'
11
11
  Redata::Task.create_datasource Redata::RED.params[1]
@@ -13,8 +13,8 @@ if ['create', 'delete', 'checkout', 'inject'].include? Redata::RED.params[0]
13
13
  Redata::Task.delete_datasource Redata::RED.params[1]
14
14
  when 'checkout'
15
15
  Redata::Task.checkout_datasource Redata::RED.params[1]
16
- when 'inject'
17
- Redata::Task.inject Redata::RED.params[1], Redata::RED.params[2]
16
+ when 'deploy'
17
+ Redata::Task.deploy_datasource Redata::RED.params[1], Redata::RED.params[2]
18
18
  end
19
19
  end
20
20
 
data/lib/redata/config.rb CHANGED
@@ -22,7 +22,8 @@ module Redata
22
22
  # config file
23
23
  @config = YAML.load(ERB.new(File.read(@root.join 'config', 'redata.yml')).result(binding))
24
24
  @s3_config = @config['s3']
25
- @s3_config['bucket'] = @s3_config['bucket'][@env]
25
+ @s3_config['bucket'] += "-dev" unless @env == 'production'
26
+ @s3_config['region'] = 'ap-northeast-1'
26
27
  @s3_config['host'] = "https://s3-#{@s3_config['region']}.amazonaws.com/#{@s3_config['bucket']}"
27
28
  Aws.config.update({
28
29
  region: @s3_config['region'],
@@ -30,6 +31,7 @@ module Redata
30
31
  })
31
32
  @tz_local = Timezone[@config['timezone']]
32
33
  @slack_token = @config['slack_bot']
34
+ @keep_tmp = @config['keep_tmp']
33
35
  end
34
36
 
35
37
  def development?
@@ -40,8 +42,26 @@ module Redata
40
42
  @env == 'production'
41
43
  end
42
44
 
43
- def default_start_date
44
- @config['start_date']
45
+ def keep_tmp?
46
+ @keep_tmp
47
+ end
48
+
49
+ def start_time
50
+ return @locals[:start_time] if @locals[:start_time]
51
+ if @is_append
52
+ @tz_local.utc_to_local(Time.now.utc-@config['append_interval']['start_time']*24*3600).strftime('%Y-%m-%d')
53
+ else
54
+ @config['create_interval']['start_time']
55
+ end
56
+ end
57
+
58
+ def end_time
59
+ return @locals[:end_time] if @locals[:end_time]
60
+ if @is_append
61
+ @tz_local.utc_to_local(Time.now.utc-@config['append_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
62
+ else
63
+ @tz_local.utc_to_local(Time.now.utc-@config['create_interval']['end_time']*24*3600).strftime('%Y-%m-%d')
64
+ end
45
65
  end
46
66
 
47
67
  def ssh
@@ -60,11 +80,6 @@ module Redata
60
80
  @tz_local.utc_to_local(Time.now.utc).strftime('%Y-%m-%d %H:%M:%S')
61
81
  end
62
82
 
63
- def default_append_date
64
- # 2 days ago bacause there is only data 2 days ago in redshift
65
- @tz_local.utc_to_local(Time.now.utc-2*24*3600).strftime('%Y-%m-%d')
66
- end
67
-
68
83
  def date_days_ago(days)
69
84
  @tz_local.utc_to_local(Time.now.utc-days*24*3600).strftime('%Y-%m-%d')
70
85
  end
@@ -76,9 +91,6 @@ module Redata
76
91
  i = 0
77
92
  while i < argv.count
78
93
  case argv[i]
79
- when '-dir'
80
- i += 1
81
- new_argv[:dir] = argv[i]
82
94
  when '-e'
83
95
  i += 1
84
96
  new_argv[:env] = argv[i]
@@ -86,11 +98,11 @@ module Redata
86
98
  new_argv[:force] = true
87
99
  when '-ssh'
88
100
  new_argv[:ssh] = true
89
- when '-append_mode'
101
+ when '-append'
90
102
  new_argv[:append_mode] = true
91
103
  else
92
- if argv[i] =~ /-(.+)/
93
- key = argv[i].match(/-(.+)/)[1]
104
+ if argv[i] =~ /\A-(.+)/
105
+ key = argv[i].match(/\A-(.+)/)[1]
94
106
  i += 1
95
107
  new_argv[:locals][key.to_sym] = argv[i]
96
108
  else
@@ -9,59 +9,44 @@ module Redata
9
9
  @ssh = Ssh.new
10
10
  end
11
11
 
12
- def connect_with_file(file)
12
+ def connect_redshift(config)
13
13
  cmd = make_redshift_cmd
14
14
  if @ssh.run_with_ssh?
15
- @ssh.upload_file file
16
- @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{file.basename}"
15
+ @ssh.upload_dir config.tmp_file_dir
16
+ @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -f ~/tmp/#{config.tmp_file_dir.basename}/exec.sql"
17
+ @ssh.remove_dir "~/tmp/#{config.tmp_file_dir.basename}"
17
18
  else
18
- system "#{cmd} -f #{file}"
19
+ system "#{cmd} -f #{config.tmp_exec_file}"
19
20
  end
20
21
  end
21
22
 
22
- def connect_with_query(query)
23
- cmd = make_redshift_cmd
24
- if @ssh.run_with_ssh?
25
- @ssh.run_command "export PGPASSWORD='#{ENV['PGPASSWORD']}';#{cmd} -c '#{query}'"
26
- else
27
- system "#{cmd} -c '#{query}'"
28
- end
29
- end
23
+ def inject_data(config, stage)
24
+ target_config = @config['deploy'][config.category.to_s]
25
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
30
26
 
31
- def inject_to_mysql(config, platform)
32
- if @ssh.run_with_ssh?
33
- @ssh.upload_file config.tmp_data_file, config.name
34
- data_file = "~/tmp/#{config.name}"
35
- else
36
- data_file = config.tmp_data_file
37
- end
27
+ target_config = target_config[stage] if stage
28
+ Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
38
29
 
39
- is_append = RED.is_append && config.update_type == :append
40
- cmd = "mysqlimport #{make_mysql_cmd_config(config.category.to_s, platform)} #{data_file} --local #{is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
41
-
42
- if @ssh.run_with_ssh?
43
- @ssh.run_command cmd
30
+ if target_config['local_dir']
31
+ cmd = "mv #{config.tmp_data_file} #{target_config['local_dir']}/#{config.source_name}.tsv"
32
+ elsif target_config['database']
33
+ import_params = "--local #{RED.is_append ? '' : '--delete'} --fields-terminated-by='\\t' --fields-enclosed-by='\\\"' --lines-terminated-by='\\n'"
34
+ cmd = "mysqlimport #{make_mysql_cmd_params(target_config)} #{config.tmp_data_file} #{import_params}"
44
35
  else
45
- system "#{cmd}"
36
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
46
37
  end
47
-
38
+ system cmd
48
39
  end
49
40
 
50
- def connect_mysql_with_file(query_file, category, platform)
51
- if @ssh.run_with_ssh?
52
- @ssh.upload_file query_file, query_file.basename
53
- data_file = "~/tmp/#{query_file.basename}"
54
- else
55
- data_file = query_file
56
- end
41
+ def connect_mysql(query_file, category, stage)
42
+ target_config = @config['deploy'][category.to_s]
43
+ Log.error! "ERROR: Export config of #{config.category} was not found" unless target_config
57
44
 
58
- cmd = "mysql #{make_mysql_cmd_config(category, platform)} < #{data_file}"
45
+ target_config = target_config[stage] if stage
46
+ Log.error! "ERROR: Export config of #{config.category} for stage #{stage} was not found" unless target_config
59
47
 
60
- if @ssh.run_with_ssh?
61
- @ssh.run_command cmd
62
- else
63
- system cmd
64
- end
48
+ cmd = "mysql #{make_mysql_cmd_params(target_config)} < #{query_file}"
49
+ system cmd
65
50
  end
66
51
 
67
52
  private
@@ -70,18 +55,8 @@ module Redata
70
55
  return "psql -h #{@config['host']} -p #{REDSHIFT_PORT} -U #{@config['username']} -d #{@config['database']}"
71
56
  end
72
57
 
73
- def make_mysql_cmd_config(category, platform)
74
- export_db_config = @config['export'][category]
75
- Log.error! "ERROR: Export config of #{category} was not found in config/database.yml" unless export_db_config
76
- if platform
77
- if export_db_config[platform]
78
- export_db_config = export_db_config[platform]
79
- else
80
- Log.warning "WARNING: Platform #{platform} was not declared in config/database.yml, ignore platform setting"
81
- end
82
- end
83
-
84
- return "-h#{export_db_config['host']} -u#{export_db_config['username']} #{export_db_config['password'].empty? ? '' : '-p'+export_db_config['password']} #{export_db_config['database']}"
58
+ def make_mysql_cmd_params(db_config)
59
+ return "-h#{db_config['host']} -u#{db_config['username']} #{db_config['password'].empty? ? '' : '-p'+db_config['password']} #{db_config['database']}"
85
60
  end
86
61
 
87
62
  end
data/lib/redata/notice.rb CHANGED
@@ -10,12 +10,25 @@ module Redata
10
10
  Log.error! "ERROR: slack channel #{RED.slack['channel']} not exists" unless channel_exist
11
11
  end
12
12
 
13
- def send(msg)
13
+ def log(msg, log=nil)
14
+ log_content = "```\n#{File.read(log).split("\n").map{|line| line.gsub(/\[0;\d{2};\d{2}m/, '').gsub(/\[0m/, '')}.join("\n")}\n```" if log
14
15
  @slack.chat_postMessage({
15
16
  :channel => RED.slack['channel'],
16
- :text => "<!here> #{msg}",
17
+ :text => "#{msg}\n#{log_content}",
17
18
  :as_user => true
18
19
  })
19
20
  end
21
+
22
+ def mention(user_name, msg)
23
+ @slack.users_list['members'].each do |user|
24
+ if user['name'] == user_name
25
+ @slack.chat_postMessage({
26
+ :channel => RED.slack['channel'],
27
+ :text => "<@#{user['id']}> #{msg}",
28
+ :as_user => true
29
+ })
30
+ end
31
+ end
32
+ end
20
33
  end
21
34
  end
data/lib/redata/parser.rb CHANGED
@@ -1,146 +1,177 @@
1
1
  module Redata
2
2
  class Parser
3
- INCLUDE_REGEX = /#include (.*)-->(.*)/
4
- REF_REGEX = /{([^{}]+)}/
5
- REF_SPLIT_REGEX = /\s*{[^{}]+}\s*/
3
+ COMMENT_REGEX = /-{2}.*/
4
+ LOAD_REGEX = /#load (.*)->(.*)/
5
+ IF_REGEX = /\[\s*if ([^\s]*) is ([^\]]*)\]/
6
+ IFNUL_REGEX = /\[\s*if ([^\s]*) is null\s*\]/
7
+ ENDIF_REGEX = /\[\s*endif\s*\]/
6
8
  START_TIME_REGEX = /\[start_time\]/
9
+ END_TIME_REGEX = /\[end_time\]/
7
10
  TIME_OFFSET_REGEX = /\[(\d+) days ago\]/
8
11
  CURRENT_TIME_REGEX = /\[current_time\]/
9
- LOCALS_REGEX = /\[([^\[\]]+)\]/
12
+ LOCALS_REGEX = /\[([^\[\]<>\s]+)\]/
13
+ LOCALS_LIST_REGEX = /\[<([^\[\]<>\s]+)>\]/
10
14
 
11
- CONV_TABLE_REGEX = /source:(.*)/
12
- CONV_COLUMN_REGEX = /columns:\s*/
13
- CONV_SWITCHDEF_REGEX = /(.+){(.*)}/
14
- CONV_SWITCH_REGEX = /([^,]+)=>([^,]+)/
15
- CONV_TIMESTAMP_REGEX = /\[time_stamp\]/
16
-
17
- def self.gen_redshift_query(config, start_time=nil)
18
- Log.error! "ERROR: Query file '#{config.query_file.relative_path_from RED.root}' not exists" unless config.query_file.exist?
15
+ def self.gen_create_query(config)
16
+ if config.type == :table
17
+ self.gen_table_query config
18
+ elsif config.type == :view
19
+ self.gen_view_query config
20
+ end
21
+ end
19
22
 
20
- File.open config.tmp_script_file, 'w' do |f|
21
- if start_time && config.type == :table
22
- f.puts "INSERT INTO #{config.source_name} ("
23
- else
24
- start_time = RED.default_start_date
25
- f.puts "CREATE #{config.type} #{config.source_name} AS ("
26
- end
27
- self.parse_redshift_file config.query_file, f, start_time
28
- f.puts ");"
23
+ def self.gen_delete_query(config)
24
+ File.open config.tmp_exec_file, 'w' do |f|
25
+ f.puts "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'};"
29
26
  end
30
27
  end
31
28
 
32
- def self.gen_export_query(config, start_time=nil)
33
- Log.error! "ERROR: Convertor config '#{config.conv_file.relative_path_from RED.root}' not exists" unless config.conv_file.exist?
29
+ def self.gen_checkout_query(config)
30
+ Log.error! "ERROR: Only could checkout data from view" unless config.type == :view
34
31
 
35
- File.open config.tmp_script_file, 'w' do |f|
32
+ File.open config.tmp_exec_file, 'w' do |f|
36
33
  f.puts "UNLOAD ('"
37
- f.puts self.parse_convertor_file config.conv_file
38
- f.puts "where date >= \\'#{start_time}\\'" if start_time
34
+ f.puts "SELECT * FROM #{config.source_name}"
39
35
  f.puts "') to 's3://#{RED.s3['bucket']}/#{config.bucket_file}'"
40
36
  f.puts "CREDENTIALS 'aws_access_key_id=#{RED.s3['aws_access_key_id']};aws_secret_access_key=#{RED.s3['aws_secret_access_key']}'"
41
37
  f.puts "ESCAPE ALLOWOVERWRITE PARALLEL OFF DELIMITER AS '\\t';"
42
38
  end
43
39
  end
44
40
 
45
- def self.gen_adjust_file(query_file, tmp_script_file)
46
- Log.error! "ERROR: Query file '#{query_file.relative_path_from RED.root}' not exists" unless query_file.exist?
41
+ def self.gen_adjust_query(config)
42
+ self.parse config.query_file, config.tmp_exec_file, ''
43
+ end
44
+
45
+
46
+ private
47
+ def self.gen_table_query(config)
48
+ Log.error! "ERROR: Relation error" unless config.type == :table
47
49
 
48
- File.open tmp_script_file, 'w' do |f|
49
- self.parse_redshift_file query_file, f, RED.default_start_date
50
+ tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
51
+ temp_tables = self.parse config.query_file, tmp_file
52
+
53
+ File.open config.tmp_exec_file, 'w' do |f|
54
+ # print temp tables
55
+ temp_tables.each do |name|
56
+ f.puts "CREATE TEMP TABLE #{name} AS ("
57
+ f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
58
+ f.puts ");"
59
+ end
60
+
61
+ # print create or insert query
62
+ if RED.is_append
63
+ f.puts "INSERT INTO #{config.source_name} ("
64
+ elsif
65
+ f.puts "CREATE #{config.type} #{config.source_name} AS ("
66
+ end
67
+ f.puts File.read tmp_file
68
+ f.puts ");"
50
69
  end
51
70
  end
52
71
 
72
+ def self.gen_view_query(config)
73
+ Log.error! "ERROR: Relation error" unless config.type == :view
53
74
 
54
- private
55
- def self.parse_redshift_file(in_file, out, start_time)
56
- links = {}
57
- File.open(in_file).each.with_index do |line, index|
58
- if line =~ INCLUDE_REGEX
59
- # parse include syntax
60
- res = line.scan(INCLUDE_REGEX).first
61
- sub = res[0].gsub /[\s|\'|\"]+/, ''
62
- link = res[1].gsub /[\s|:]+/, ''
63
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: include query is missing file or alias" if sub.empty? || link.empty?
64
-
65
- sub_file = in_file.parent.join "_#{sub}.sql"
66
- sub_file = RED.root.join 'database', 'shared', "_#{sub}.sql" unless sub_file.exist?
67
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: included file _#{sub}.sql could not be found in ./ or {root}/database/shared/" unless sub_file.exist?
68
-
69
- Log.error! "QUERY ERROR: #{in_file.relative_path_from RED.root}:#{index+1}: alias #{link} was declared multiple times" if links[link]
70
-
71
- links[link] = sub_file
72
- elsif line =~ REF_REGEX
73
- # parse {ref} syntax
74
- res = line.scan REF_REGEX
75
- refs = res.map{|r| r.first.gsub /\s+/, ''}
76
- origins = line.split REF_SPLIT_REGEX
77
-
78
- out.puts origins[0].gsub(';', '')
79
- refs.each_with_index do |ref, i|
80
- Log.error! "QUERY ERROR: #{in_file}:#{index+1}:\nsub query #{ref} not found." unless links[ref]
81
- out.puts "("
82
- self.parse_redshift_file links[ref], out, start_time
83
- out.puts ") as #{ref}"
84
- out.puts origins[i+1].gsub(';', '') if origins[i+1]
75
+ tmp_file = config.tmp_file_dir.join "#{config.source_name}.resql"
76
+ temp_tables = self.parse config.query_file, tmp_file
77
+
78
+ File.open config.tmp_exec_file, 'w' do |f|
79
+ f.puts "CREATE #{config.type} #{config.source_name} AS ("
80
+ temp_tables.each_with_index do |name, index|
81
+ f.puts "#{index == 0 ? 'WITH' : ','} #{name} AS ("
82
+ f.puts File.read(config.tmp_file_dir.join "#{name}.resql")
83
+ f.puts ")"
84
+ end
85
+
86
+ # print create query
87
+ main = File.read tmp_file
88
+ unless temp_tables.empty?
89
+ main.gsub! 'WITH', ','
90
+ main.gsub! 'with', ','
91
+ end
92
+ f.puts main
93
+ f.puts ");"
94
+ end
95
+ end
96
+
97
+ def self.parse(in_file, out_file, skip_char=';')
98
+ Log.error! "ERROR: Query file '#{in_file.relative_path_from RED.root}' not exists" unless in_file.exist?
99
+
100
+ temp_tables = []
101
+ parse_enable = true
102
+ File.open out_file, 'w' do |out|
103
+ File.open(in_file).each do |line|
104
+ # remove comments
105
+ line.gsub!(COMMENT_REGEX, '')
106
+ # remove skip_char
107
+ line.gsub!(skip_char, '')
108
+ # remove empty line
109
+ next if !line || line.empty? || line =~ /^\s*$/
110
+
111
+ # check if else condition
112
+ if line =~ IFNUL_REGEX
113
+ res = line.scan(IFNUL_REGEX).first
114
+ var = res[0]
115
+ parse_enable = RED.locals[var.to_sym].nil?
116
+ next
117
+ elsif line =~ IF_REGEX
118
+ res = line.scan(IF_REGEX).first
119
+ var = res[0]
120
+ val = res[1].gsub /[\s|\'|\"]+/, ''
121
+ parse_enable = (RED.locals[var.to_sym] == val)
122
+ next
123
+ elsif line =~ ENDIF_REGEX
124
+ parse_enable = true
125
+ next
126
+ end
127
+ next unless parse_enable
128
+
129
+ # compile sub file
130
+ if line =~ LOAD_REGEX
131
+ # parse load syntax
132
+ res = line.scan(LOAD_REGEX).first
133
+ sub = res[0].gsub /[\s|\'|\"]+/, ''
134
+ name = res[1].gsub /[\s|:]+/, ''
135
+ Log.error! "QUERY ERROR: syntax error for load query: #{line}" if sub.empty? || name.empty?
136
+
137
+ sub_file = in_file.parent.join "_#{sub}.red.sql"
138
+ sub_file = RED.root.join 'database', 'shared', "_#{sub}.rea.sql" unless sub_file.exist?
139
+ sub_temp_tables = self.parse sub_file, out_file.dirname.join("#{name}.resql")
140
+ sub_temp_tables.each do |n|
141
+ temp_tables.push n unless temp_tables.include? n
142
+ end
143
+ temp_tables.push name unless temp_tables.include? name
144
+ next # load query line can not contain other content
85
145
  end
86
- elsif line =~ START_TIME_REGEX
146
+
87
147
  # parse [start_time] syntax
88
- out.puts line.gsub(START_TIME_REGEX, "'#{start_time}'").gsub(';', '')
89
- elsif line =~ TIME_OFFSET_REGEX
148
+ line.gsub! START_TIME_REGEX, "'#{RED.start_time}'"
149
+ # parse [end_time] syntax
150
+ line.gsub! END_TIME_REGEX, "'#{RED.end_time}'"
151
+ # parse [current_time] syntax
152
+ line.gsub! CURRENT_TIME_REGEX, "'#{RED.current_time}'"
153
+
90
154
  # parse [3 days ago]
91
155
  res = line.scan(TIME_OFFSET_REGEX).each do |res|
92
- line = line.gsub "[#{res[0]} days ago]", "#{RED.date_days_ago(res[0].to_i)}"
156
+ line.gsub! "[#{res[0]} days ago]", "'#{RED.date_days_ago(res[0].to_i)}'"
93
157
  end
94
- out.puts line
95
- elsif line =~ CURRENT_TIME_REGEX
96
- line = line.gsub "[current_time]", "#{RED.current_time}"
97
- out.puts line
98
- elsif line =~ LOCALS_REGEX
99
158
  # parse [locals] syntax
100
159
  line.scan(LOCALS_REGEX).each do |res|
101
160
  key = res.first
102
161
  Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
103
- line = line.gsub "[#{key}]", "'#{RED.locals[key.to_sym]}'"
162
+ line.gsub! "[#{key}]", "'#{RED.locals[key.to_sym]}'"
104
163
  end
105
- out.puts line.gsub ';', ''
106
- else
107
- # other, print absolutely
108
- out.puts line.gsub ';', ''
109
- end
110
- end
111
- end
112
-
113
- def self.parse_convertor_file(in_file)
114
- is_parsing_column = false
115
- columns = []
116
- source = ""
117
- File.open(in_file).each.with_index do |line, index|
118
- if line =~ CONV_TABLE_REGEX
119
- # parse table declare
120
- res = line.scan(CONV_TABLE_REGEX).first
121
- source = res[0].gsub /\s+/, ''
122
- is_parsing_column = false
123
- elsif line =~ CONV_COLUMN_REGEX
124
- is_parsing_column = true
125
- elsif is_parsing_column
126
- line.gsub! /\s+/, ''
127
- if line =~ CONV_SWITCHDEF_REGEX
128
- res = line.scan(CONV_SWITCHDEF_REGEX).first
129
- res[1].gsub!("'", "\\\\'")
130
- switches = res[1].scan CONV_SWITCH_REGEX
131
- switches.map! do |m|
132
- "when #{m[0]} then #{m[1]}"
133
- end
134
- columns.push "case #{res[0]} #{switches.join ' '} end as #{res[0]}"
135
- elsif line =~ CONV_TIMESTAMP_REGEX
136
- columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
137
- columns.push "\\'#{(Time.now+9*3600).strftime("%Y-%m-%d %H:%M:%S")}\\'"
138
- else
139
- columns.push line.gsub("'", "\\\\'").gsub('NULL', "\\\\'NULL\\\\'") unless line.empty?
164
+ # parse [<local_list>] syntax
165
+ line.scan(LOCALS_LIST_REGEX).each do |res|
166
+ key = res.first
167
+ Log.error! "QUERY ERROR: Local params #{key} was missing." unless RED.locals[key.to_sym]
168
+ line = line.gsub "[<#{key}>]", "(#{RED.locals[key.to_sym].split(',').map{|e| "'#{e}'"}.join(',')})"
140
169
  end
170
+
171
+ out.puts line.gsub skip_char, ''
141
172
  end
142
173
  end
143
- "select #{columns.join ','} from #{source}"
174
+ temp_tables
144
175
  end
145
176
 
146
177
  end
@@ -1,6 +1,6 @@
1
1
  module Redata
2
2
  class Relation
3
- attr_accessor :category, :name, :key, :file, :dir, :type, :update_type
3
+ attr_accessor :category, :name, :key, :file, :dir, :type
4
4
  def initialize(category, name, setting)
5
5
  @category = category
6
6
  @name = name
@@ -13,5 +13,31 @@ module Redata
13
13
  @category == :main ? @key : "#{@category}_#{@key}".to_sym
14
14
  end
15
15
 
16
+ def source_name
17
+ @category == :main ? @name : "#{@category}_#{@name}"
18
+ end
19
+
20
+ def query_file
21
+ query_file = RED.root.join 'database', 'sources'
22
+ query_file = query_file.join @dir if @dir
23
+ query_file = query_file.join "#{@file}.red.sql"
24
+ query_file
25
+ end
26
+
27
+ def tmp_file_dir
28
+ RED.root.join 'tmp', "#{@category}_#{@name}"
29
+ end
30
+
31
+ def tmp_exec_file
32
+ self.tmp_file_dir.join "exec.sql"
33
+ end
34
+
35
+ def tmp_mkdir
36
+ Dir.mkdir self.tmp_file_dir unless self.tmp_file_dir.exist?
37
+ end
38
+
39
+ def tmp_rmdir
40
+ FileUtils.rm_r self.tmp_file_dir if !RED.keep_tmp? && self.tmp_file_dir.exist?
41
+ end
16
42
  end
17
43
  end
@@ -3,22 +3,6 @@ module Redata
3
3
  def initialize(category, name, setting)
4
4
  super category, name, setting
5
5
  @type = :table
6
- @update_type = setting[:update] || :renewal
7
- end
8
-
9
- def source_name
10
- @category == :main ? @name : "#{@category}_#{@name}"
11
- end
12
-
13
- def query_file
14
- query_file = RED.root.join 'database', 'sources'
15
- query_file = query_file.join @dir if @dir
16
- query_file = query_file.join "#{@file}.sql"
17
- query_file
18
- end
19
-
20
- def tmp_script_file
21
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
22
6
  end
23
7
  end
24
8
  end
@@ -3,22 +3,14 @@ module Redata
3
3
  def initialize(category, name, setting)
4
4
  super category, name, setting
5
5
  @type = :view
6
- @update_type = :renewal
7
6
  end
8
7
 
9
- def source_name
10
- @category == :main ? @name : "#{@category}_#{@name}"
8
+ def bucket_file
9
+ "#{RED.end_time}/#{@category}/#{@name}.tsv"
11
10
  end
12
11
 
13
- def query_file
14
- query_file = RED.root.join 'database', 'sources'
15
- query_file = query_file.join @dir if @dir
16
- query_file = query_file.join "#{@file}.sql"
17
- query_file
18
- end
19
-
20
- def tmp_script_file
21
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
12
+ def tmp_data_file
13
+ self.tmp_file_dir.join "#{@name}.tsv"
22
14
  end
23
15
  end
24
16
  end
@@ -18,7 +18,6 @@ require 'redata/bucket'
18
18
  require 'redata/relation'
19
19
  require 'redata/relation/table'
20
20
  require 'redata/relation/view'
21
- require 'redata/relation/export'
22
21
  require 'redata/schema'
23
22
  require 'redata/parser'
24
23
  require 'redata/tasks'
data/lib/redata/ssh.rb CHANGED
@@ -17,12 +17,16 @@ module Redata
17
17
  return false
18
18
  end
19
19
 
20
- def upload_file(file, target_file=nil)
21
- system "scp -i #{@ssh['IdentityFile']} #{file} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/#{target_file}"
20
+ def upload_dir(dir)
21
+ system "scp -r -i #{@ssh['IdentityFile']} #{dir} #{@ssh['User']}@#{@ssh['HostName']}:~/tmp/"
22
22
  end
23
23
 
24
24
  def run_command(cmd)
25
25
  system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"#{cmd}\""
26
26
  end
27
+
28
+ def remove_dir(dir)
29
+ system "ssh -i #{@ssh['IdentityFile']} #{@ssh['User']}@#{@ssh['HostName']} \"rm -rf #{dir}\""
30
+ end
27
31
  end
28
32
  end
data/lib/redata/tasks.rb CHANGED
@@ -8,59 +8,56 @@ module Redata
8
8
 
9
9
  def self.create_datasource(key)
10
10
  self.parse_key(key, [:table, :view]).each do |config|
11
- if RED.is_append && config.update_type == :append
12
- start_time = RED.locals[:start_time] || RED.default_append_date
13
- Parser.gen_redshift_query config, start_time
14
- Log.action "QUERY: Append data after #{start_time} into [#{config.source_name}]"
15
- DATABASE.connect_with_file config.tmp_script_file
11
+ config.tmp_mkdir
12
+ Parser.gen_create_query config
13
+ if RED.is_append
14
+ dLog.action "APPEND<#{config.type}>: data(#{RED.start_time} ~ #{RED.end_time}) into [#{config.source_name}]"
16
15
  else
17
- Parser.gen_redshift_query config
18
- Log.action "QUERY: Create #{config.type} [#{config.source_name}]"
19
- DATABASE.connect_with_file config.tmp_script_file
16
+ Log.action "CREATE<#{config.type}>: [#{config.source_name}]"
20
17
  end
18
+ DATABASE.connect_redshift config
19
+ config.tmp_rmdir
21
20
  end
22
21
  end
23
22
 
24
23
  def self.delete_datasource(key)
25
24
  self.parse_key(key, [:table, :view]).reverse.each do |config|
26
- unless RED.is_append && config.update_type == :append
27
- Log.action "QUERY: Drop #{config.type} [#{config.source_name}]"
28
- Log.warning "WARNING: CASCADE mode will also drop other sources that depend on this #{config.type}" if RED.is_forced
29
- DATABASE.connect_with_query "DROP #{config.type} #{config.source_name} #{RED.is_forced ? 'CASCADE' : 'RESTRICT'}"
30
- end
25
+ config.tmp_mkdir
26
+ Parser.gen_delete_query config
27
+ Log.action "DROP<#{config.type}>: [#{config.source_name}]"
28
+ Log.warning "WARNING: CASCADE mode will also drop other views that depend on this" if RED.is_forced
29
+ DATABASE.connect_redshift config
30
+ config.tmp_rmdir
31
31
  end
32
32
  end
33
33
 
34
34
  def self.checkout_datasource(key)
35
- self.parse_key(key, [:export]).each do |config|
36
- if RED.is_append && config.update_type == :append
37
- start_time = RED.locals[:start_time] || RED.default_append_date
38
- Parser.gen_export_query config, start_time
39
- Log.action "QUERY: Checkout data after #{start_time} to bucket [#{config.bucket_file}]"
40
- else
41
- Parser.gen_export_query config
42
- Log.action "QUERY: Checkout data to bucket [#{config.bucket_file}]"
43
- end
44
- DATABASE.connect_with_file config.tmp_script_file
35
+ self.parse_key(key, [:view]).each do |config|
36
+ config.tmp_mkdir
37
+ Parser.gen_checkout_query config
38
+ Log.action "CHECKOUT<#{config.category}>: to bucket [#{config.bucket_file}]"
39
+ DATABASE.connect_redshift config
40
+
45
41
  bucket = S3Bucket.new
46
42
  bucket.move "#{config.bucket_file}000", config.bucket_file
43
+ config.tmp_rmdir
47
44
  end
48
45
  end
49
46
 
50
- def self.inject(key, platform=nil)
51
- self.parse_key(key, [:export]).each do |config|
52
- Log.action "BUCKET: Make [#{config.bucket_file}] public"
47
+ def self.deploy_datasource(key, stage)
48
+ self.parse_key(key, [:view]).each do |config|
49
+ config.tmp_mkdir
53
50
  bucket = S3Bucket.new
54
51
  bucket.make_public config.bucket_file, true
55
52
 
56
- Log.action "DOWNLOAD: Downlaod [#{config.bucket_file}] from bucket"
53
+ Log.action "DOWNLOAD<bucket>: from [#{config.bucket_file}]"
57
54
  system "wget #{RED.s3['host']}/#{config.bucket_file} -O #{config.tmp_data_file} --quiet"
58
55
 
59
- Log.action "BUCKET: Make [#{config.bucket_file}] private"
60
56
  bucket.make_public config.bucket_file, false
61
57
 
62
- Log.action "QUERY: Inject data to [#{config.name}] of #{config.category}"
63
- DATABASE.inject_to_mysql config, platform
58
+ Log.action "INJECT<#{config.category}>: with [#{config.name}] #{stage ? 'for stage '+stage : ''}"
59
+ DATABASE.inject_data config, stage
60
+ config.tmp_rmdir
64
61
  end
65
62
  end
66
63
 
@@ -73,7 +70,7 @@ module Redata
73
70
  configs = @@schema.category_configs(key, types)
74
71
  if configs.empty?
75
72
  config = @@schema.config_with key if key
76
- Log.error! "ERROR: Data source relation #{key} was not defined in config/relations.rb" unless config
73
+ Log.error! "ERROR: Data source relation #{key} was not defined" unless config
77
74
  configs.push config
78
75
  end
79
76
  configs
@@ -1,3 +1,3 @@
1
1
  module Redata
2
- VERSION = "0.1.0"
2
+ VERSION = "1.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - goshan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-09 00:00:00.000000000 Z
11
+ date: 2017-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -136,7 +136,6 @@ files:
136
136
  - lib/redata/notice.rb
137
137
  - lib/redata/parser.rb
138
138
  - lib/redata/relation.rb
139
- - lib/redata/relation/export.rb
140
139
  - lib/redata/relation/table.rb
141
140
  - lib/redata/relation/view.rb
142
141
  - lib/redata/requires.rb
@@ -1,30 +0,0 @@
1
- module Redata
2
- class Export < Relation
3
- def initialize(category, name, setting)
4
- super category, name, setting
5
- @type = :export
6
- @update_type = setting[:update] || :renewal
7
- end
8
-
9
- def conv_file
10
- conv_file = RED.root.join 'database', 'convertors'
11
- conv_file = conv_file.join @dir if @dir
12
- conv_file = conv_file.join "#{@file}.conv"
13
- conv_file
14
- end
15
-
16
- def tmp_script_file
17
- RED.root.join 'tmp', "queries", "red#{@type}_#{@category}_#{@name}.sql"
18
- end
19
-
20
- def tmp_data_file
21
- RED.root.join 'tmp', "data", "#{@name}.tsv"
22
- end
23
-
24
- def bucket_file
25
- bucket_dir = RED.default_append_date
26
- bucket_dir = RED.locals[:start_time] if RED.is_append && @update_type == :append && RED.locals[:start_time]
27
- "#{bucket_dir}/#{@category}/#{@name}.tsv"
28
- end
29
- end
30
- end