gd_bam 0.1.35 → 0.1.36

Sign up to get free protection for your applications and to get access to all the features.
data/bin/bam CHANGED
@@ -21,6 +21,17 @@ default_value false
21
21
  arg_name 'logger'
22
22
  switch [:l,:logger]
23
23
 
24
+ desc 'Platform server'
25
+ default_value nil
26
+ arg_name 'server'
27
+ flag [:s,:server]
28
+
29
+ desc 'Platform uploads server'
30
+ default_value nil
31
+ arg_name 'web dav upload server'
32
+ flag [:w,:webdav_server]
33
+
34
+
24
35
  desc 'Generates clover project based on information in current directory. The default ouptut is the directory ./clover-project'
25
36
  # arg_name 'Describe arguments to new here'
26
37
  command :generate do |c|
@@ -99,7 +110,7 @@ command :run_validator do |c|
99
110
  end
100
111
  fail "You need to specify process" if process.blank?
101
112
 
102
- GoodData::Bam::Commands::connect_to_gd()
113
+ GoodData::Bam::Commands::connect_to_gd(global_options)
103
114
  GoodData::Bam::Commands::run_validator(process, files, PARAMS)
104
115
  end
105
116
  end
@@ -240,6 +251,62 @@ command :procs do |c|
240
251
  end
241
252
 
242
253
 
254
+ desc 'Replicate and join S3 backup locally. Use ONLY if you knwo what it does.'
255
+ command :clone_backup_locally do |c|
256
+
257
+ c.desc 'taps'
258
+ c.arg_name 'taps'
259
+ c.flag :taps
260
+
261
+
262
+ c.action do |global_options,options,args|
263
+ only = options[:only]
264
+ params = PARAMS.merge({
265
+ :project_name => "backup-restore-#{PARAMS[:project_name]}",
266
+ :graph_repos => [
267
+ GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
268
+ GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)],
269
+ :only => only
270
+ })
271
+ GoodData::Bam::Commands::clone_backup(".", params)
272
+ end
273
+
274
+ end
275
+ desc 'Restore data from S3 backup'
276
+ command :restore_backup do |c|
277
+
278
+ c.desc 'ES name'
279
+ c.arg_name 'es'
280
+ c.flag :es
281
+
282
+ c.desc 'specify tap to process only that one'
283
+ c.arg_name 'only'
284
+ c.flag :only
285
+
286
+
287
+ c.action do |global_options,options,args|
288
+ new_es = options[:es]
289
+ only = options[:only]
290
+ params = PARAMS.merge({
291
+ :project_name => "backup-restore-#{PARAMS[:project_name]}",
292
+ :graph_repos => [
293
+ GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
294
+ GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)],
295
+ :new_es => new_es,
296
+ :only => only
297
+ })
298
+
299
+ fail "You do not have backup information specified. I do not know where to look for the backup." unless PARAMS[:s3_backup]
300
+ fail "You did not specify an ES." if new_es.blank?
301
+ fail "ES cannot be the same as you currently have in params.json. The point is to use a new one. If anything goes wrong you can always go back." if new_es == (params[:additional_params] && params[:additional_params][:GDC_EVENTSTORE])
302
+
303
+ # puts "Let's reload backup"
304
+ GoodData::Bam::Commands::generate_backup_restoration(".", params)
305
+ end
306
+
307
+ end
308
+
309
+
243
310
  desc 'Creates project'
244
311
  command :project do |c|
245
312
 
@@ -253,20 +320,22 @@ command :project do |c|
253
320
 
254
321
  c.action do |global_options,options,args|
255
322
  fail "You need to specify token to create a project" if options[:token].nil?
256
- GoodData::Bam::Commands::connect_to_gd()
257
- pid = case options[:blueprint]
323
+ GoodData::Bam::Commands::connect_to_gd(global_options)
324
+ creation_info = case options[:blueprint]
258
325
  when "goodsales"
259
- "nt935rwzls50zfqwy6dh62tabu8h0ocy"
326
+ { :template => "/projectTemplates/GoodSalesAnalytics/2" }
260
327
  when "box"
261
- "n1zoyqvahupyxfxv35nzo0ernax69odm"
328
+ { :pid => "n1zoyqvahupyxfxv35nzo0ernax69odm" }
262
329
  end
263
330
 
264
331
  params = PARAMS.merge({:token => options[:token]})
265
332
 
266
- new_project = if pid
267
- GoodData::Bam::Commands.clone_project(pid, params)
268
- else
333
+ new_project = if creation_info && creation_info.has_key?(:template)
334
+ params = PARAMS.merge({:token => options[:token], :template => creation_info[:template]})
335
+
269
336
  GoodData::Bam::Commands.create_project(params)
337
+ else
338
+ GoodData::Bam::Commands.clone_project(pid, params)
270
339
  end
271
340
 
272
341
  puts "Your project pid is #{new_project}"
@@ -360,7 +429,7 @@ command :deploy do |c|
360
429
  fail "You have to specify directory to deploy as an argument" if dir.nil?
361
430
  fail "Specified directory does not exist" unless File.exist?(dir)
362
431
 
363
- GoodData::Bam::Commands::connect_to_gd()
432
+ GoodData::Bam::Commands::connect_to_gd(global_options)
364
433
  params = PARAMS.merge(global_options.merge(options))
365
434
  response = GoodData::Bam::Commands::deploy(dir, params)
366
435
  end
@@ -380,7 +449,7 @@ command :run do |c|
380
449
  fail "Specified directory does not exist" unless File.exist?(dir)
381
450
 
382
451
  params = PARAMS.merge(global_options.merge(options))
383
- GoodData::Bam::Commands::connect_to_gd()
452
+ GoodData::Bam::Commands::connect_to_gd(global_options)
384
453
  GoodData::Bam::Commands::run(dir, params)
385
454
  end
386
455
  end
@@ -1,3 +1,3 @@
1
1
  module Bam
2
- VERSION = '0.1.35'
2
+ VERSION = '0.1.36'
3
3
  end
@@ -45,7 +45,11 @@ module GoodData
45
45
  :token => "${SFDC_TOKEN}",
46
46
  :type => "SFDC",
47
47
  :username => "${SFDC_USERNAME}",
48
- :passwordEncrypted => "false"
48
+ :passwordEncrypted => "false",
49
+ :consumerKey => "${SFDC_CONSUMER_KEY}",
50
+ :consumerSecret => "${SFDC_CONSUMER_SECRET}",
51
+ :credentialsType => "${SFDC_CREDENTIALS_TYPE}",
52
+ :refreshToken => "${SFDC_REFRESH_TOKEN}"
49
53
  })
50
54
  end
51
55
  end
@@ -4,6 +4,10 @@ module GoodData
4
4
 
5
5
  include GoodData::CloudConnect
6
6
 
7
+ # Structural pattern
8
+ # loop_over_file will loop over every line provided in the file given as parameter
9
+ # it will run the graph given as options[:graph_to_run]
10
+ # it will make all arrangments that the underlying graph will have access to only one line thus making a loop
7
11
  def self.loop_over_file(file, options={})
8
12
  file_to_loop = options[:file_to_loop]
9
13
  token = options[:token]
@@ -103,6 +103,98 @@ module GoodData
103
103
  GoodData::Bam::Generators::Downloaders.generate_history_downloaders(home + DOWNLOADERS_HOME + "graphs", project, params)
104
104
  end
105
105
 
106
+ def self.generate_backup_restoration(home, params)
107
+ home = Pathname(home)
108
+ new_params = params.clone
109
+ only = params[:only]
110
+ new_params[:additional_params][:GDC_EVENTSTORE] = params[:new_es]
111
+ clobber_downloaders_project(home)
112
+ project = GoodData::Bam::Project.build_project(home, new_params)
113
+
114
+ require 'aws-sdk'
115
+ AWS.config(
116
+ :access_key_id => params[:additional_params][:S3_ACCESS_KEY_ID],
117
+ :secret_access_key => params[:additional_params][:S3_SECRET_ACCESS_KEY]
118
+ )
119
+
120
+ s3_interface_from = AWS::S3.new()
121
+ bucket_from = s3_interface_from.buckets[params[:additional_params][:S3_BUCKETNAME]]
122
+
123
+ files_in_from_bucket = bucket_from.objects.map {|o| o.key}
124
+
125
+ incremental_taps = Taps.get_incremental(project[:taps])
126
+
127
+ setup_downloaders_project(home, new_params)
128
+ GoodData::Bam::Generators::Downloaders.generate_backup_restoration_graph(home + DOWNLOADERS_HOME + "graphs", project, new_params)
129
+
130
+ end
131
+
132
+ def self.clone_backup(home, params)
133
+ home = Pathname(home)
134
+ new_params = params.clone
135
+ only = params[:only]
136
+ new_params[:additional_params][:GDC_EVENTSTORE] = params[:new_es]
137
+ clobber_downloaders_project(home)
138
+ project = GoodData::Bam::Project.build_project(home, new_params)
139
+
140
+ require 'aws-sdk'
141
+ AWS.config(
142
+ :access_key_id => params[:additional_params][:S3_ACCESS_KEY_ID],
143
+ :secret_access_key => params[:additional_params][:S3_SECRET_ACCESS_KEY]
144
+ )
145
+
146
+ s3_interface_from = AWS::S3.new()
147
+ bucket_from = s3_interface_from.buckets[params[:additional_params][:S3_BUCKETNAME]]
148
+
149
+ files_in_from_bucket = bucket_from.objects.map {|o| o.key}
150
+
151
+ binding.pry
152
+
153
+ incremental_taps = Taps.get_incremental(project[:taps])
154
+
155
+ taps_to_process = if only.blank?
156
+ incremental_taps
157
+ else
158
+ # binding.pry
159
+ temp = Taps.get_incremental([Project::find_tap_by_id(project, only)].reject {|x| x.nil?})
160
+ fail "There is no such tap \"#{only}\"" if temp.empty?
161
+ temp
162
+ end
163
+
164
+ taps_to_process.each do |tap|
165
+ what = tap[:id]
166
+ Dir.mktmpdir do |dir|
167
+ dir = Pathname(dir)
168
+ files_in_from_bucket.grep(/#{params[:project_pid]}\/#{what}\/#{what}/).each do |file_name|
169
+ puts "Downloading #{file_name}"
170
+ o = bucket_from.objects[file_name]
171
+ local_File_name = file_name.split("\/").last
172
+ # puts local_File_name
173
+ File.open(dir + local_File_name,"w") do |local_file|
174
+ local_file.write(o.read)
175
+ end
176
+ end
177
+
178
+ # FileUtils::cd(dir) do
179
+ full_file_name = "full_" + what + ".csv"
180
+ # TODO
181
+ system "\"line to be skipped\n\">> #{dir + full_file_name}"
182
+
183
+ system "for i in #{dir + what}*
184
+ do
185
+ if test -f \"$i\"
186
+ then
187
+ echo \"Doing somthing to $i\"
188
+ tail +2 \"$i\" >> #{dir + full_file_name}
189
+ fi
190
+ done"
191
+ FileUtils::cp(dir + full_file_name, full_file_name)
192
+ # binding.pry
193
+ # end
194
+ end
195
+ end
196
+ end
197
+
106
198
  def self.generate(home, params)
107
199
  # fail "The flow you specified was not found" if flows.empty? && !only_flow.nil?
108
200
  clobber_etl_project(home)
@@ -3,8 +3,9 @@ module GoodData
3
3
  module Commands
4
4
 
5
5
  def self.connect_to_gd(options={})
6
- GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass])
7
- GoodData.project = PARAMS[:project_pid] if !PARAMS[:project_pid].nil? && !PARAMS[:project_pid].empty?
6
+ server = options[:server]
7
+ GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass], server, options)
8
+ GoodData.project = PARAMS[:project_pid] if !PARAMS[:project_pid].nil? && !PARAMS[:project_pid].empty?
8
9
  end
9
10
 
10
11
  def self.set_up_debug(project, flow, graph)
@@ -73,15 +74,18 @@ module GoodData
73
74
  :project => {
74
75
  :content => {
75
76
  :guidedNavigation => 1,
76
- :driver => "Pg",
77
+ :driver => "Pg",
77
78
  :authorizationToken => options[:token]
78
79
  },
79
80
  :meta => {
80
81
  :title => project_name,
81
- :summary => "Testing Project"
82
+ :summary => "Testing Project",
83
+ :projectTemplate => options[:template]
82
84
  }
83
85
  }
84
86
  }
87
+
88
+ binding.pry
85
89
  result = GoodData.post("/gdc/projects/", pr)
86
90
  uri = result["uri"]
87
91
  while(GoodData.get(uri)["project"]["content"]["state"] == "LOADING")
@@ -139,68 +139,150 @@ HEREDOC
139
139
  end
140
140
 
141
141
  file_taps = Taps.get_file(taps)
142
+ generate_file_lists(file_taps, {
143
+ phase => phase,
144
+ :builder => builder,
145
+ :reformat_func => :webdav
146
+ })
147
+ # file_taps.each do |tap|
148
+ # source = tap[:source]
149
+ # id = tap[:id]
150
+ #
151
+ #
152
+ # reformat_func = <<HEREDOC
153
+ #
154
+ # function integer transform() {
155
+ # $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
156
+ # $out.0.fileName = $in.0.fileName;
157
+ # return ALL;
158
+ # }
159
+ # HEREDOC
160
+ #
161
+ # builder.Phase(:number => phase += 1) do
162
+ # Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
163
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
164
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
165
+ # Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
166
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
167
+ # end
168
+ #
169
+ # builder.Phase(:number => phase += 1) do
170
+ # ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
171
+ # Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
172
+ # Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
173
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
174
+ # end
175
+ #
176
+ # subgraph_reformat_func = <<HEREDOC
177
+ #
178
+ # function integer transform() {
179
+ # $out.0.all = "graphs/#{id}_loop.grf";
180
+ # return ALL;
181
+ # }
182
+ # HEREDOC
183
+ #
184
+ # fail_reformat = <<HEREDOC
185
+ # function integer transform() {
186
+ # raiseError("Loop failed");
187
+ # }
188
+ # HEREDOC
189
+ #
190
+ #
191
+ # builder.Phase(:number => phase += 1) do
192
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
193
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
194
+ # Core::build_node2(builder, Nodes.run_graph2({
195
+ # :guiName => id,
196
+ # :name => id,
197
+ # :id => "#{id}_run_graph"
198
+ # }))
199
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
200
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
201
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
202
+ # Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
203
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
204
+ # end
205
+ # end
142
206
 
143
- file_taps.each do |tap|
144
- source = tap[:source]
145
- id = tap[:id]
146
207
 
208
+ end
209
+ end
210
+ end
147
211
 
212
+ def self.generate_file_lists(taps, options={})
213
+ phase = options[:phase] || 0
214
+ builder = options[:builder] || fail("I need a builder")
215
+ reformat_func = nil
216
+ case options[:reformat_func]
217
+ when :webdav
148
218
  reformat_func = <<HEREDOC
149
219
 
150
220
  function integer transform() {
151
- $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
152
- $out.0.fileName = $in.0.fileName;
153
- return ALL;
221
+ $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
222
+ $out.0.fileName = $in.0.fileName;
223
+ return ALL;
154
224
  }
155
225
  HEREDOC
226
+ when :s3
227
+ reformat_func = <<HEREDOC
156
228
 
157
- builder.Phase(:number => phase += 1) do
158
- Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
159
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
160
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
161
- Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
162
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
163
- end
229
+ function integer transform() {
230
+
231
+ $out.0.filePath = replace($in.0.filePath, "https://", "https://${S3_ACCESS_KEY_ID}:${S3_SECRET_ACCESS_KEY}@");
232
+ $out.0.fileName = $in.0.fileName;
233
+ return ALL;
234
+ }
235
+ HEREDOC
236
+ else
237
+ reformat_func = Nodes::MAP_ALL
238
+ end
239
+ taps.each do |tap|
240
+ source = tap[:source]
241
+ id = tap[:id]
242
+
243
+ builder.Phase(:number => phase += 1) do
244
+ Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
245
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
246
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
247
+ Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
248
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
249
+ end
164
250
 
165
- builder.Phase(:number => phase += 1) do
166
- ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
167
- Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
168
- Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
169
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
170
- end
251
+ builder.Phase(:number => phase += 1) do
252
+ ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
253
+ Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
254
+ Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
255
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
256
+ end
171
257
 
172
- subgraph_reformat_func = <<HEREDOC
258
+ subgraph_reformat_func = <<HEREDOC
173
259
 
174
260
  function integer transform() {
175
- $out.0.all = "graphs/#{id}_loop.grf";
176
- return ALL;
261
+ $out.0.all = "graphs/#{id}_loop.grf";
262
+ return ALL;
177
263
  }
178
264
  HEREDOC
179
265
 
180
- fail_reformat = <<HEREDOC
266
+ fail_reformat = <<HEREDOC
181
267
  function integer transform() {
182
- raiseError("Loop failed");
268
+ raiseError("Loop failed");
183
269
  }
184
270
  HEREDOC
185
271
 
186
272
 
187
- builder.Phase(:number => phase += 1) do
188
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
189
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
190
- Core::build_node2(builder, Nodes.run_graph2({
191
- :guiName => id,
192
- :name => id,
193
- :id => "#{id}_run_graph"
194
- }))
195
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
196
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
197
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
198
- Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
199
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
200
- end
201
- end
202
-
203
-
273
+ builder.Phase(:number => phase += 1) do
274
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
275
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
276
+ Core::build_node2(builder, Nodes.run_graph2({
277
+ :guiName => id,
278
+ :name => id,
279
+ :id => "#{id}_run_graph"
280
+ }))
281
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
282
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
283
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
284
+ Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
285
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
204
286
  end
205
287
  end
206
288
  end
@@ -689,11 +771,78 @@ HEREDOC
689
771
  :file_to_loop => "data/#{id}_files_to_read.csv",
690
772
  :graph_to_run => "graphs/#{id}_download.grf"
691
773
  })
692
- create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], params)
774
+ create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], params.merge(:delete_after_processing => true))
693
775
 
694
776
  end
695
777
  end
696
778
 
779
+ def self.create_backup_restoration_run_graph(file, taps, options={})
780
+
781
+ File.open(file, "w") do |file|
782
+ builder = Builder::XmlMarkup.new(:target=>file, :indent=>2)
783
+ builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
784
+ builder.Graph({
785
+ :name => "Run graph"
786
+ }) do
787
+ builder.Global do
788
+ Helpers::property_file(builder, {:id => "workspace_params", :fileURL => "workspace.prm"})
789
+ Helpers::property_file(builder, {:id => "params_params", :fileURL => "params.prm"})
790
+ Helpers::create_trash_meta(builder)
791
+ Helpers::create_lookup_meta(builder)
792
+ Helpers::create_file_list_meta(builder)
793
+ Helpers::create_run_graph_failure_metadata(builder)
794
+
795
+ Core::build_node2(builder, Nodes.lookup2({:name => "gdLookup0", :id => "gdLookup0", :type => Nodes::GD_LOOKUP, :metadata => "lookup_metadata"}))
796
+
797
+ end
798
+
799
+ s3_taps = taps.map do |tap|
800
+ file = tap[:id]
801
+ # binding.pry
802
+ Tap.create(tap.merge({:source => "../full_#{file}.csv"}))
803
+ end
804
+ generate_file_lists(s3_taps, {
805
+ :builder => builder,
806
+ :reformat_func => :local
807
+ })
808
+ end
809
+ end
810
+ end
811
+
812
+ # Since everything that BAM touches is stored to S3 (if credentials are provided).
813
+ # it should be easy to restore it to any state
814
+ # if the timestamp is not provided in TAP it is nevertheless created so we are ensured predictable state
815
+ def self.generate_backup_restoration_graph(home, project, params)
816
+ home = Pathname(home)
817
+ only = params[:only]
818
+
819
+ incremental_taps = Taps.get_incremental(project[:taps])
820
+
821
+ taps_to_process = if only.blank?
822
+ incremental_taps
823
+ else
824
+ temp = Taps.get_incremental([Project::find_tap_by_id(project, only)].reject {|x| x.nil?})
825
+ fail "There is no such tap \"#{only}\"" if temp.empty?
826
+ temp
827
+ end
828
+
829
+ # sf_taps = Taps.get_salesforce(incremental_taps)
830
+ # file_taps = Taps.get_file(incremental_taps)
831
+
832
+ create_backup_restoration_run_graph(home + "main.grf", taps_to_process)
833
+
834
+ taps_to_process.each do |tap|
835
+ id = tap[:id]
836
+ Helpers::loop_over_file(home + "#{tap[:id]}_loop.grf", {
837
+ :token => id,
838
+ :file_to_loop => "data/#{id}_files_to_read.csv",
839
+ :graph_to_run => "graphs/#{id}_download.grf"
840
+ })
841
+ # Do not backup since we are just using backup files. Also explicitely forbid deleting the files from S3 to prevent default behavior
842
+ restoration_params = params.merge(:s3_backup => false, :delete_after_processing => false)
843
+ create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], restoration_params)
844
+ end
845
+ end
697
846
 
698
847
  def self.generate_history_downloaders(home, project, params)
699
848
  home = Pathname(home)
@@ -799,8 +948,10 @@ HEREDOC
799
948
  Core::build_node2(builder, Nodes.edge2({:toNode => "#{file}_es:0", :fromNode => "#{file}_es_sort:0", :metadata => "#{file}_es_metadata"}))
800
949
  end
801
950
 
802
- builder.Phase(:number => phase += 1) do
803
- Core::build_node2(builder, Nodes.file_delete2({:guiName => "#{file}_file_delete", :name => "#{file}_file_delete", :id => "#{file}_file_delete", :baseURL => "${FILE}"}))
951
+ if options[:delete_after_processing] == true
952
+ builder.Phase(:number => phase += 1) do
953
+ Core::build_node2(builder, Nodes.file_delete2({:guiName => "#{file}_file_delete", :name => "#{file}_file_delete", :id => "#{file}_file_delete", :baseURL => "${FILE}"}))
954
+ end
804
955
  end
805
956
 
806
957
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gd_bam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.35
4
+ version: 0.1.36
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-10 00:00:00.000000000 Z
12
+ date: 2013-11-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -283,6 +283,22 @@ dependencies:
283
283
  - - ! '>='
284
284
  - !ruby/object:Gem::Version
285
285
  version: '0'
286
+ - !ruby/object:Gem::Dependency
287
+ name: aws-sdk
288
+ requirement: !ruby/object:Gem::Requirement
289
+ none: false
290
+ requirements:
291
+ - - ! '>='
292
+ - !ruby/object:Gem::Version
293
+ version: '0'
294
+ type: :runtime
295
+ prerelease: false
296
+ version_requirements: !ruby/object:Gem::Requirement
297
+ none: false
298
+ requirements:
299
+ - - ! '>='
300
+ - !ruby/object:Gem::Version
301
+ version: '0'
286
302
  - !ruby/object:Gem::Dependency
287
303
  name: rubyzip
288
304
  requirement: !ruby/object:Gem::Requirement