gd_bam 0.1.35 → 0.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/bam CHANGED
@@ -21,6 +21,17 @@ default_value false
21
21
  arg_name 'logger'
22
22
  switch [:l,:logger]
23
23
 
24
+ desc 'Platform server'
25
+ default_value nil
26
+ arg_name 'server'
27
+ flag [:s,:server]
28
+
29
+ desc 'Platform uploads server'
30
+ default_value nil
31
+ arg_name 'web dav upload server'
32
+ flag [:w,:webdav_server]
33
+
34
+
24
35
  desc 'Generates clover project based on information in current directory. The default ouptut is the directory ./clover-project'
25
36
  # arg_name 'Describe arguments to new here'
26
37
  command :generate do |c|
@@ -99,7 +110,7 @@ command :run_validator do |c|
99
110
  end
100
111
  fail "You need to specify process" if process.blank?
101
112
 
102
- GoodData::Bam::Commands::connect_to_gd()
113
+ GoodData::Bam::Commands::connect_to_gd(global_options)
103
114
  GoodData::Bam::Commands::run_validator(process, files, PARAMS)
104
115
  end
105
116
  end
@@ -240,6 +251,62 @@ command :procs do |c|
240
251
  end
241
252
 
242
253
 
254
+ desc 'Replicate and join S3 backup locally. Use ONLY if you knwo what it does.'
255
+ command :clone_backup_locally do |c|
256
+
257
+ c.desc 'taps'
258
+ c.arg_name 'taps'
259
+ c.flag :taps
260
+
261
+
262
+ c.action do |global_options,options,args|
263
+ only = options[:only]
264
+ params = PARAMS.merge({
265
+ :project_name => "backup-restore-#{PARAMS[:project_name]}",
266
+ :graph_repos => [
267
+ GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
268
+ GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)],
269
+ :only => only
270
+ })
271
+ GoodData::Bam::Commands::clone_backup(".", params)
272
+ end
273
+
274
+ end
275
+ desc 'Restore data from S3 backup'
276
+ command :restore_backup do |c|
277
+
278
+ c.desc 'ES name'
279
+ c.arg_name 'es'
280
+ c.flag :es
281
+
282
+ c.desc 'specify tap to process only that one'
283
+ c.arg_name 'only'
284
+ c.flag :only
285
+
286
+
287
+ c.action do |global_options,options,args|
288
+ new_es = options[:es]
289
+ only = options[:only]
290
+ params = PARAMS.merge({
291
+ :project_name => "backup-restore-#{PARAMS[:project_name]}",
292
+ :graph_repos => [
293
+ GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
294
+ GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)],
295
+ :new_es => new_es,
296
+ :only => only
297
+ })
298
+
299
+ fail "You do not have backup information specified. I do not know where to look for the backup." unless PARAMS[:s3_backup]
300
+ fail "You did not specify an ES." if new_es.blank?
301
+ fail "ES cannot be the same as you currently have in params.json. The point is to use a new one. If anything goes wrong you can always go back." if new_es == (params[:additional_params] && params[:additional_params][:GDC_EVENTSTORE])
302
+
303
+ # puts "Let's reload backup"
304
+ GoodData::Bam::Commands::generate_backup_restoration(".", params)
305
+ end
306
+
307
+ end
308
+
309
+
243
310
  desc 'Creates project'
244
311
  command :project do |c|
245
312
 
@@ -253,20 +320,22 @@ command :project do |c|
253
320
 
254
321
  c.action do |global_options,options,args|
255
322
  fail "You need to specify token to create a project" if options[:token].nil?
256
- GoodData::Bam::Commands::connect_to_gd()
257
- pid = case options[:blueprint]
323
+ GoodData::Bam::Commands::connect_to_gd(global_options)
324
+ creation_info = case options[:blueprint]
258
325
  when "goodsales"
259
- "nt935rwzls50zfqwy6dh62tabu8h0ocy"
326
+ { :template => "/projectTemplates/GoodSalesAnalytics/2" }
260
327
  when "box"
261
- "n1zoyqvahupyxfxv35nzo0ernax69odm"
328
+ { :pid => "n1zoyqvahupyxfxv35nzo0ernax69odm" }
262
329
  end
263
330
 
264
331
  params = PARAMS.merge({:token => options[:token]})
265
332
 
266
- new_project = if pid
267
- GoodData::Bam::Commands.clone_project(pid, params)
268
- else
333
+ new_project = if creation_info && creation_info.has_key?(:template)
334
+ params = PARAMS.merge({:token => options[:token], :template => creation_info[:template]})
335
+
269
336
  GoodData::Bam::Commands.create_project(params)
337
+ else
338
+ GoodData::Bam::Commands.clone_project(pid, params)
270
339
  end
271
340
 
272
341
  puts "Your project pid is #{new_project}"
@@ -360,7 +429,7 @@ command :deploy do |c|
360
429
  fail "You have to specify directory to deploy as an argument" if dir.nil?
361
430
  fail "Specified directory does not exist" unless File.exist?(dir)
362
431
 
363
- GoodData::Bam::Commands::connect_to_gd()
432
+ GoodData::Bam::Commands::connect_to_gd(global_options)
364
433
  params = PARAMS.merge(global_options.merge(options))
365
434
  response = GoodData::Bam::Commands::deploy(dir, params)
366
435
  end
@@ -380,7 +449,7 @@ command :run do |c|
380
449
  fail "Specified directory does not exist" unless File.exist?(dir)
381
450
 
382
451
  params = PARAMS.merge(global_options.merge(options))
383
- GoodData::Bam::Commands::connect_to_gd()
452
+ GoodData::Bam::Commands::connect_to_gd(global_options)
384
453
  GoodData::Bam::Commands::run(dir, params)
385
454
  end
386
455
  end
@@ -1,3 +1,3 @@
1
1
  module Bam
2
- VERSION = '0.1.35'
2
+ VERSION = '0.1.36'
3
3
  end
@@ -45,7 +45,11 @@ module GoodData
45
45
  :token => "${SFDC_TOKEN}",
46
46
  :type => "SFDC",
47
47
  :username => "${SFDC_USERNAME}",
48
- :passwordEncrypted => "false"
48
+ :passwordEncrypted => "false",
49
+ :consumerKey => "${SFDC_CONSUMER_KEY}",
50
+ :consumerSecret => "${SFDC_CONSUMER_SECRET}",
51
+ :credentialsType => "${SFDC_CREDENTIALS_TYPE}",
52
+ :refreshToken => "${SFDC_REFRESH_TOKEN}"
49
53
  })
50
54
  end
51
55
  end
@@ -4,6 +4,10 @@ module GoodData
4
4
 
5
5
  include GoodData::CloudConnect
6
6
 
7
+ # Structural pattern
8
+ # loop_over_file will loop over every line provided in the file given as parameter
9
+ # it will run the graph given as options[:graph_to_run]
10
+ # it will make all arrangments that the underlying graph will have access to only one line thus making a loop
7
11
  def self.loop_over_file(file, options={})
8
12
  file_to_loop = options[:file_to_loop]
9
13
  token = options[:token]
@@ -103,6 +103,98 @@ module GoodData
103
103
  GoodData::Bam::Generators::Downloaders.generate_history_downloaders(home + DOWNLOADERS_HOME + "graphs", project, params)
104
104
  end
105
105
 
106
+ def self.generate_backup_restoration(home, params)
107
+ home = Pathname(home)
108
+ new_params = params.clone
109
+ only = params[:only]
110
+ new_params[:additional_params][:GDC_EVENTSTORE] = params[:new_es]
111
+ clobber_downloaders_project(home)
112
+ project = GoodData::Bam::Project.build_project(home, new_params)
113
+
114
+ require 'aws-sdk'
115
+ AWS.config(
116
+ :access_key_id => params[:additional_params][:S3_ACCESS_KEY_ID],
117
+ :secret_access_key => params[:additional_params][:S3_SECRET_ACCESS_KEY]
118
+ )
119
+
120
+ s3_interface_from = AWS::S3.new()
121
+ bucket_from = s3_interface_from.buckets[params[:additional_params][:S3_BUCKETNAME]]
122
+
123
+ files_in_from_bucket = bucket_from.objects.map {|o| o.key}
124
+
125
+ incremental_taps = Taps.get_incremental(project[:taps])
126
+
127
+ setup_downloaders_project(home, new_params)
128
+ GoodData::Bam::Generators::Downloaders.generate_backup_restoration_graph(home + DOWNLOADERS_HOME + "graphs", project, new_params)
129
+
130
+ end
131
+
132
+ def self.clone_backup(home, params)
133
+ home = Pathname(home)
134
+ new_params = params.clone
135
+ only = params[:only]
136
+ new_params[:additional_params][:GDC_EVENTSTORE] = params[:new_es]
137
+ clobber_downloaders_project(home)
138
+ project = GoodData::Bam::Project.build_project(home, new_params)
139
+
140
+ require 'aws-sdk'
141
+ AWS.config(
142
+ :access_key_id => params[:additional_params][:S3_ACCESS_KEY_ID],
143
+ :secret_access_key => params[:additional_params][:S3_SECRET_ACCESS_KEY]
144
+ )
145
+
146
+ s3_interface_from = AWS::S3.new()
147
+ bucket_from = s3_interface_from.buckets[params[:additional_params][:S3_BUCKETNAME]]
148
+
149
+ files_in_from_bucket = bucket_from.objects.map {|o| o.key}
150
+
151
+ binding.pry
152
+
153
+ incremental_taps = Taps.get_incremental(project[:taps])
154
+
155
+ taps_to_process = if only.blank?
156
+ incremental_taps
157
+ else
158
+ # binding.pry
159
+ temp = Taps.get_incremental([Project::find_tap_by_id(project, only)].reject {|x| x.nil?})
160
+ fail "There is no such tap \"#{only}\"" if temp.empty?
161
+ temp
162
+ end
163
+
164
+ taps_to_process.each do |tap|
165
+ what = tap[:id]
166
+ Dir.mktmpdir do |dir|
167
+ dir = Pathname(dir)
168
+ files_in_from_bucket.grep(/#{params[:project_pid]}\/#{what}\/#{what}/).each do |file_name|
169
+ puts "Downloading #{file_name}"
170
+ o = bucket_from.objects[file_name]
171
+ local_File_name = file_name.split("\/").last
172
+ # puts local_File_name
173
+ File.open(dir + local_File_name,"w") do |local_file|
174
+ local_file.write(o.read)
175
+ end
176
+ end
177
+
178
+ # FileUtils::cd(dir) do
179
+ full_file_name = "full_" + what + ".csv"
180
+ # TODO
181
+ system "\"line to be skipped\n\">> #{dir + full_file_name}"
182
+
183
+ system "for i in #{dir + what}*
184
+ do
185
+ if test -f \"$i\"
186
+ then
187
+ echo \"Doing somthing to $i\"
188
+ tail +2 \"$i\" >> #{dir + full_file_name}
189
+ fi
190
+ done"
191
+ FileUtils::cp(dir + full_file_name, full_file_name)
192
+ # binding.pry
193
+ # end
194
+ end
195
+ end
196
+ end
197
+
106
198
  def self.generate(home, params)
107
199
  # fail "The flow you specified was not found" if flows.empty? && !only_flow.nil?
108
200
  clobber_etl_project(home)
@@ -3,8 +3,9 @@ module GoodData
3
3
  module Commands
4
4
 
5
5
  def self.connect_to_gd(options={})
6
- GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass])
7
- GoodData.project = PARAMS[:project_pid] if !PARAMS[:project_pid].nil? && !PARAMS[:project_pid].empty?
6
+ server = options[:server]
7
+ GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass], server, options)
8
+ GoodData.project = PARAMS[:project_pid] if !PARAMS[:project_pid].nil? && !PARAMS[:project_pid].empty?
8
9
  end
9
10
 
10
11
  def self.set_up_debug(project, flow, graph)
@@ -73,15 +74,18 @@ module GoodData
73
74
  :project => {
74
75
  :content => {
75
76
  :guidedNavigation => 1,
76
- :driver => "Pg",
77
+ :driver => "Pg",
77
78
  :authorizationToken => options[:token]
78
79
  },
79
80
  :meta => {
80
81
  :title => project_name,
81
- :summary => "Testing Project"
82
+ :summary => "Testing Project",
83
+ :projectTemplate => options[:template]
82
84
  }
83
85
  }
84
86
  }
87
+
88
+ binding.pry
85
89
  result = GoodData.post("/gdc/projects/", pr)
86
90
  uri = result["uri"]
87
91
  while(GoodData.get(uri)["project"]["content"]["state"] == "LOADING")
@@ -139,68 +139,150 @@ HEREDOC
139
139
  end
140
140
 
141
141
  file_taps = Taps.get_file(taps)
142
+ generate_file_lists(file_taps, {
143
+ phase => phase,
144
+ :builder => builder,
145
+ :reformat_func => :webdav
146
+ })
147
+ # file_taps.each do |tap|
148
+ # source = tap[:source]
149
+ # id = tap[:id]
150
+ #
151
+ #
152
+ # reformat_func = <<HEREDOC
153
+ #
154
+ # function integer transform() {
155
+ # $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
156
+ # $out.0.fileName = $in.0.fileName;
157
+ # return ALL;
158
+ # }
159
+ # HEREDOC
160
+ #
161
+ # builder.Phase(:number => phase += 1) do
162
+ # Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
163
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
164
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
165
+ # Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
166
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
167
+ # end
168
+ #
169
+ # builder.Phase(:number => phase += 1) do
170
+ # ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
171
+ # Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
172
+ # Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
173
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
174
+ # end
175
+ #
176
+ # subgraph_reformat_func = <<HEREDOC
177
+ #
178
+ # function integer transform() {
179
+ # $out.0.all = "graphs/#{id}_loop.grf";
180
+ # return ALL;
181
+ # }
182
+ # HEREDOC
183
+ #
184
+ # fail_reformat = <<HEREDOC
185
+ # function integer transform() {
186
+ # raiseError("Loop failed");
187
+ # }
188
+ # HEREDOC
189
+ #
190
+ #
191
+ # builder.Phase(:number => phase += 1) do
192
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
193
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
194
+ # Core::build_node2(builder, Nodes.run_graph2({
195
+ # :guiName => id,
196
+ # :name => id,
197
+ # :id => "#{id}_run_graph"
198
+ # }))
199
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
200
+ # Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
201
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
202
+ # Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
203
+ # Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
204
+ # end
205
+ # end
142
206
 
143
- file_taps.each do |tap|
144
- source = tap[:source]
145
- id = tap[:id]
146
207
 
208
+ end
209
+ end
210
+ end
147
211
 
212
+ def self.generate_file_lists(taps, options={})
213
+ phase = options[:phase] || 0
214
+ builder = options[:builder] || fail("I need a builder")
215
+ reformat_func = nil
216
+ case options[:reformat_func]
217
+ when :webdav
148
218
  reformat_func = <<HEREDOC
149
219
 
150
220
  function integer transform() {
151
- $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
152
- $out.0.fileName = $in.0.fileName;
153
- return ALL;
221
+ $out.0.filePath = replace($in.0.filePath, "${GDC_WEBDAV_HOST}", replace(replace(\"${GD_LOGIN}\",\"@\",\"%40\"),\"\\\\+\",\"%2B\") + ":${GD_PASSWORD}@${GDC_WEBDAV_HOST}");
222
+ $out.0.fileName = $in.0.fileName;
223
+ return ALL;
154
224
  }
155
225
  HEREDOC
226
+ when :s3
227
+ reformat_func = <<HEREDOC
156
228
 
157
- builder.Phase(:number => phase += 1) do
158
- Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
159
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
160
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
161
- Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
162
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
163
- end
229
+ function integer transform() {
230
+
231
+ $out.0.filePath = replace($in.0.filePath, "https://", "https://${S3_ACCESS_KEY_ID}:${S3_SECRET_ACCESS_KEY}@");
232
+ $out.0.fileName = $in.0.fileName;
233
+ return ALL;
234
+ }
235
+ HEREDOC
236
+ else
237
+ reformat_func = Nodes::MAP_ALL
238
+ end
239
+ taps.each do |tap|
240
+ source = tap[:source]
241
+ id = tap[:id]
242
+
243
+ builder.Phase(:number => phase += 1) do
244
+ Core::build_node2(builder, Nodes.file_list2({:id => "#{id}_file_list", :name => "#{id}_file_list", :dataPolicy => "Strict", :baseURL => "#{tap[:source]}", :output_mapping => Nodes::MAP_ALL}))
245
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat", :id => "#{id}_reformat", :transformation => reformat_func}))
246
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat:0", :fromNode => "#{id}_file_list:0", :metadata => "file_list"}))
247
+ Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_writer", :fileURL => "data/#{id}_files_to_read.csv"}))
248
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_writer:0", :fromNode => "#{id}_reformat:0", :metadata => "file_list"}))
249
+ end
164
250
 
165
- builder.Phase(:number => phase += 1) do
166
- ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
167
- Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
168
- Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
169
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
170
- end
251
+ builder.Phase(:number => phase += 1) do
252
+ ctl = "function integer generate() {$out.0.all = \"#{id}_SKIP_LINES=0\";return OK;}"
253
+ Core::build_node2(builder, Nodes.data_generator2({:name => "#{id}_generator", :id => "#{id}_generator", :generate => ctl}))
254
+ Core::build_node2(builder, Nodes.writer2({:name => "PARAMS CSV Writer", :id => "#{id}_csv_writer", :fileURL => "#{id}_counter.prm", :outputFieldNames => "false", :quotedStrings => "false"}))
255
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_csv_writer:0", :fromNode => "#{id}_generator:0", :metadata => "trash_metadata"}))
256
+ end
171
257
 
172
- subgraph_reformat_func = <<HEREDOC
258
+ subgraph_reformat_func = <<HEREDOC
173
259
 
174
260
  function integer transform() {
175
- $out.0.all = "graphs/#{id}_loop.grf";
176
- return ALL;
261
+ $out.0.all = "graphs/#{id}_loop.grf";
262
+ return ALL;
177
263
  }
178
264
  HEREDOC
179
265
 
180
- fail_reformat = <<HEREDOC
266
+ fail_reformat = <<HEREDOC
181
267
  function integer transform() {
182
- raiseError("Loop failed");
268
+ raiseError("Loop failed");
183
269
  }
184
270
  HEREDOC
185
271
 
186
272
 
187
- builder.Phase(:number => phase += 1) do
188
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
189
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
190
- Core::build_node2(builder, Nodes.run_graph2({
191
- :guiName => id,
192
- :name => id,
193
- :id => "#{id}_run_graph"
194
- }))
195
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
196
- Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
197
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
198
- Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
199
- Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
200
- end
201
- end
202
-
203
-
273
+ builder.Phase(:number => phase += 1) do
274
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat graph", :id => "#{id}_reformat_graph", :transformation => subgraph_reformat_func}))
275
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_graph:0", :fromNode => "#{id}_reformat:1", :metadata => "file_list"}))
276
+ Core::build_node2(builder, Nodes.run_graph2({
277
+ :guiName => id,
278
+ :name => id,
279
+ :id => "#{id}_run_graph"
280
+ }))
281
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_run_graph:0", :fromNode => "#{id}_reformat_graph:0", :metadata => "trash_metadata"}))
282
+ Core::build_node2(builder, Nodes.reformat2({:name => "#{id} Reformat fail", :id => "#{id}_reformat_fail", :transformation => fail_reformat}))
283
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_reformat_fail:0", :fromNode => "#{id}_run_graph:1", :metadata => "run_graph_failure_metadata"}))
284
+ Core::build_node2(builder, Nodes.trash2({:name => "#{id}_trash", :id => "#{id}_trash", :debugPrint => true}))
285
+ Core::build_node2(builder, Nodes.edge2({:toNode => "#{id}_trash:0", :fromNode => "#{id}_reformat_fail:0", :metadata => "run_graph_failure_metadata"}))
204
286
  end
205
287
  end
206
288
  end
@@ -689,11 +771,78 @@ HEREDOC
689
771
  :file_to_loop => "data/#{id}_files_to_read.csv",
690
772
  :graph_to_run => "graphs/#{id}_download.grf"
691
773
  })
692
- create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], params)
774
+ create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], params.merge(:delete_after_processing => true))
693
775
 
694
776
  end
695
777
  end
696
778
 
779
+ def self.create_backup_restoration_run_graph(file, taps, options={})
780
+
781
+ File.open(file, "w") do |file|
782
+ builder = Builder::XmlMarkup.new(:target=>file, :indent=>2)
783
+ builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
784
+ builder.Graph({
785
+ :name => "Run graph"
786
+ }) do
787
+ builder.Global do
788
+ Helpers::property_file(builder, {:id => "workspace_params", :fileURL => "workspace.prm"})
789
+ Helpers::property_file(builder, {:id => "params_params", :fileURL => "params.prm"})
790
+ Helpers::create_trash_meta(builder)
791
+ Helpers::create_lookup_meta(builder)
792
+ Helpers::create_file_list_meta(builder)
793
+ Helpers::create_run_graph_failure_metadata(builder)
794
+
795
+ Core::build_node2(builder, Nodes.lookup2({:name => "gdLookup0", :id => "gdLookup0", :type => Nodes::GD_LOOKUP, :metadata => "lookup_metadata"}))
796
+
797
+ end
798
+
799
+ s3_taps = taps.map do |tap|
800
+ file = tap[:id]
801
+ # binding.pry
802
+ Tap.create(tap.merge({:source => "../full_#{file}.csv"}))
803
+ end
804
+ generate_file_lists(s3_taps, {
805
+ :builder => builder,
806
+ :reformat_func => :local
807
+ })
808
+ end
809
+ end
810
+ end
811
+
812
+ # Since everything that BAM touches is stored to S3 (if credentials are provided).
813
+ # it should be easy to restore it to any state
814
+ # if the timestamp is not provided in TAP it is nevertheless created so we are ensured predictable state
815
+ def self.generate_backup_restoration_graph(home, project, params)
816
+ home = Pathname(home)
817
+ only = params[:only]
818
+
819
+ incremental_taps = Taps.get_incremental(project[:taps])
820
+
821
+ taps_to_process = if only.blank?
822
+ incremental_taps
823
+ else
824
+ temp = Taps.get_incremental([Project::find_tap_by_id(project, only)].reject {|x| x.nil?})
825
+ fail "There is no such tap \"#{only}\"" if temp.empty?
826
+ temp
827
+ end
828
+
829
+ # sf_taps = Taps.get_salesforce(incremental_taps)
830
+ # file_taps = Taps.get_file(incremental_taps)
831
+
832
+ create_backup_restoration_run_graph(home + "main.grf", taps_to_process)
833
+
834
+ taps_to_process.each do |tap|
835
+ id = tap[:id]
836
+ Helpers::loop_over_file(home + "#{tap[:id]}_loop.grf", {
837
+ :token => id,
838
+ :file_to_loop => "data/#{id}_files_to_read.csv",
839
+ :graph_to_run => "graphs/#{id}_download.grf"
840
+ })
841
+ # Do not backup since we are just using backup files. Also explicitely forbid deleting the files from S3 to prevent default behavior
842
+ restoration_params = params.merge(:s3_backup => false, :delete_after_processing => false)
843
+ create_incremental_file_downloading_graph(home + "#{tap[:id]}_download.grf", [tap], restoration_params)
844
+ end
845
+ end
697
846
 
698
847
  def self.generate_history_downloaders(home, project, params)
699
848
  home = Pathname(home)
@@ -799,8 +948,10 @@ HEREDOC
799
948
  Core::build_node2(builder, Nodes.edge2({:toNode => "#{file}_es:0", :fromNode => "#{file}_es_sort:0", :metadata => "#{file}_es_metadata"}))
800
949
  end
801
950
 
802
- builder.Phase(:number => phase += 1) do
803
- Core::build_node2(builder, Nodes.file_delete2({:guiName => "#{file}_file_delete", :name => "#{file}_file_delete", :id => "#{file}_file_delete", :baseURL => "${FILE}"}))
951
+ if options[:delete_after_processing] == true
952
+ builder.Phase(:number => phase += 1) do
953
+ Core::build_node2(builder, Nodes.file_delete2({:guiName => "#{file}_file_delete", :name => "#{file}_file_delete", :id => "#{file}_file_delete", :baseURL => "${FILE}"}))
954
+ end
804
955
  end
805
956
 
806
957
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gd_bam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.35
4
+ version: 0.1.36
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-10-10 00:00:00.000000000 Z
12
+ date: 2013-11-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -283,6 +283,22 @@ dependencies:
283
283
  - - ! '>='
284
284
  - !ruby/object:Gem::Version
285
285
  version: '0'
286
+ - !ruby/object:Gem::Dependency
287
+ name: aws-sdk
288
+ requirement: !ruby/object:Gem::Requirement
289
+ none: false
290
+ requirements:
291
+ - - ! '>='
292
+ - !ruby/object:Gem::Version
293
+ version: '0'
294
+ type: :runtime
295
+ prerelease: false
296
+ version_requirements: !ruby/object:Gem::Requirement
297
+ none: false
298
+ requirements:
299
+ - - ! '>='
300
+ - !ruby/object:Gem::Version
301
+ version: '0'
286
302
  - !ruby/object:Gem::Dependency
287
303
  name: rubyzip
288
304
  requirement: !ruby/object:Gem::Requirement