gd_bam 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/bin/bam.orig +369 -0
  2. data/lib/bam/version.rb +1 -1
  3. data/lib/runtime.rb.orig +748 -0
  4. metadata +272 -284
@@ -0,0 +1,748 @@
1
+ require 'nodes/clover_gen'
2
+ require 'nodes/dependency'
3
+ require 'pathname'
4
+ require 'json'
5
+ require 'builder'
6
+ require 'erubis'
7
+ require 'salesforce'
8
+ require 'active_support/time'
9
+ require 'es'
10
+ require 'gooddata'
11
+ require 'logger'
12
+ require 'highline'
13
+
14
+ module GoodData
15
+
16
+ module CloverGenerator
17
+
18
+ GENERATOR_ROOT = Pathname(__FILE__).expand_path.dirname
19
+ BAM_ROOT = GENERATOR_ROOT.parent
20
+
21
+ BAM_DEFINED_GRAPHS_ROOT = GENERATOR_ROOT + "graphs"
22
+ REPO_ROOT = GENERATOR_ROOT + "repo"
23
+ TEMPLATE_ROOT = BAM_ROOT + "templates"
24
+
25
+ DEFINITION_ROOT = Pathname.new(Dir.pwd)
26
+ USER_DEFINED_GRAPHS_ROOT = DEFINITION_ROOT + "local_graphs"
27
+
28
+ CLOVER_PROJECT_ROOT = DEFINITION_ROOT + "clover-project"
29
+ CLOVER_DOWNLOADERS_ROOT = DEFINITION_ROOT + "downloader-project"
30
+ CONTRACT_PROJECT_ROOT = DEFINITION_ROOT + "contract-project"
31
+
32
+ PROJECT_GRAPHS_ROOT = "graphs"
33
+ PROJECT_METADATA_ROOT = "metadata"
34
+ PROJECT_DATA_ROOT = "data"
35
+
36
+ PARAM_FILE = DEFINITION_ROOT + 'params.json'
37
+ FLOWS_ROOT = DEFINITION_ROOT + "flows"
38
+ USER_DEFINED_PARAMS = File.exist?(PARAM_FILE) ? JSON.parse(File.read(PARAM_FILE), :symbolize_names => true) : {}
39
+
40
+ PARAMS = {:additional_params => {}}.merge(USER_DEFINED_PARAMS).merge(USER_DEFINED_PARAMS[:additional_params] || {})
41
+
42
+ # TODO: REMOVE WHEN WEBDAV cann use SST
43
+ PARAMS[:additional_params][:GD_LOGIN] = PARAMS[:gd_login]
44
+ PARAMS[:additional_params][:GD_PASSWORD] = PARAMS[:gd_pass]
45
+ PARAMS[:additional_params][:GDC_WEBDAV_HOST] = PARAMS[:web_dav_host] || "secure-di.gooddata.com"
46
+
47
+ <<<<<<< Updated upstream
48
+ =======
49
+
50
+ def self.render_template(template, params, options={})
51
+ to_file = options[:to_file]
52
+ root = options[:root] || TEMPLATE_ROOT
53
+ t = Pathname(template)
54
+ output = Erubis::Eruby.new(File.read(root + t)).result(params)
55
+ if to_file.nil?
56
+ output
57
+ else
58
+ File.open(to_file, 'w') {|f| f.write(output)}
59
+ end
60
+ end
61
+
62
+ def self.set_up_debug(project, flow, graph)
63
+ fail "Project \"#{project}\" was not found" unless File.exist?(project)
64
+ fail "Project \"#{project}\" is not a directory" unless File.directory?(project)
65
+ # :TODO make the proram really check there is a flow and a graph etc before writing to the file
66
+ project = Pathname(project)
67
+ File.open(project + 'params.prm', 'w') do |f|
68
+ f << "FLOW=#{flow}\n"
69
+ f << "NAME=#{graph}\n"
70
+ end
71
+ end
72
+
73
+ def self.setup_clover_project(base, options={})
74
+
75
+ name = options[:name] || PARAMS[:project_name]
76
+
77
+ [PROJECT_GRAPHS_ROOT, PROJECT_METADATA_ROOT, PROJECT_DATA_ROOT].each do |dir|
78
+ FileUtils::mkdir_p base + dir
79
+ end
80
+ File.open(base + 'params.prm', 'w') do |f|
81
+ f << "TRASH=0"
82
+ end
83
+ render_template("project.erb", PARAMS.merge(:project_name => name), :to_file => base + '.project')
84
+ render_template("workspace.prm.erb", PARAMS, :to_file => base + 'workspace.prm')
85
+ end
86
+
87
+ def self.setup_bash_structure(name)
88
+ fail "Directory \"#{name}\" already exists. Please remove it if you want to move forward." if File.exist?(name)
89
+ FileUtils::mkdir_p name
90
+ FileUtils::cd(name) do
91
+ render_template("params.json.erb", PARAMS, :to_file => 'params.json')
92
+ ['flows', 'sinks', 'taps'].each do |dir|
93
+ FileUtils::mkdir_p dir
94
+ end
95
+
96
+ setup_flow('user')
97
+ # render_template("flow.rb.erb", PARAMS, :to_file => 'flows/flow_example.rb')
98
+ render_template("source.json.erb", PARAMS, :to_file => 'taps/source_example.json')
99
+ render_template("dataset.json.erb", PARAMS, :to_file => 'sinks/dataset_example.json')
100
+ end
101
+
102
+ end
103
+
104
+ def self.setup_flow(name)
105
+ render_template("flow.rb.erb", {:flow_name => name}, :to_file => "flows/#{name}.rb")
106
+ end
107
+
108
+ def self.setup_tap(name)
109
+ render_template("tap.json.erb", {:tap_name => name}, :to_file => "taps/#{name}.json")
110
+ end
111
+
112
+ def self.clobber_clover_project()
113
+ FileUtils::rm_rf(CLOVER_PROJECT_ROOT)
114
+ end
115
+
116
+ def self.clobber_downloader_project()
117
+ FileUtils::rm_rf(CLOVER_DOWNLOADERS_ROOT)
118
+ end
119
+
120
+ def self.build_project()
121
+ p = GoodData::CloverGenerator::DSL.project do |p|
122
+ project_name("Goodsales for ABCD")
123
+
124
+ use_dims ["GoodSales/opportunity", "GoodSales/user"]
125
+ use_usecase("GoodSales/quota_attainment")
126
+ end
127
+
128
+ repos = GoodData::CloverGenerator::Repo.load(REPO_ROOT).map {|config| N.new(config)}
129
+ p.run(repos)
130
+ p
131
+ end
132
+
133
+ def self.validate_datasets
134
+ project = build_project
135
+ datasets = project.get_datasets
136
+ GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass])
137
+ GoodData.project = PARAMS[:project_pid]
138
+ gd_datasets = GoodData.get("/gdc/md/#{PARAMS[:project_pid]}/query/datasets")['query']['entries'].map {|m| GoodData.get(m['link'])}
139
+ report1 = GoodData::CloverGenerator::validate_gd_datasets_metadata(datasets, gd_datasets)
140
+ report2 = GoodData::CloverGenerator::validate_gd_datasets(datasets, gd_datasets)
141
+ report1.merge(report2)
142
+ end
143
+
144
+ def self.procs_list(options={})
145
+ project = build_project
146
+ datasets = project.get_datasets
147
+ GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass])
148
+ procs = GoodData.get("/gdc/projects/#{PARAMS[:project_pid]}/dataload/processes")
149
+ procs["processes"]["items"].map {|p| [p["process"]["name"], p["process"]["links"]["self"]]}
150
+ end
151
+
152
+ def self.validate_taps
153
+ project = build_project
154
+ sources = project.get_sources
155
+ client = get_sf_client(PARAMS)
156
+ sf_sources = sources.find_all {|tap| tap[:source] == "salesforce"}
157
+ GoodData::CloverGenerator::validate_sf_metadata(client, sf_sources)
158
+ end
159
+
160
+ def self.sf_jack_in
161
+ client = get_sf_client(PARAMS)
162
+ client.pry
163
+ end
164
+
165
+ def self.generate_docs
166
+ project = build_project
167
+ sources = project.get_sources
168
+ datasets = project.get_datasets
169
+
170
+ taps = ""
171
+ sources.each do |source|
172
+ taps += "####{source[:object]}"
173
+ taps += "\n"
174
+ source[:fields].each do |f|
175
+ if f[:acts_as]
176
+ taps += " #{f[:name]} -> #{f[:acts_as].join(", ")}"
177
+ else
178
+ taps += " #{f[:name]}"
179
+ end
180
+ taps += "\n"
181
+ end
182
+
183
+ taps += "\n"
184
+ end
185
+
186
+ sinks = ""
187
+ datasets.each do |sink|
188
+ name = sink[:gd_name] || sink[:id]
189
+ sinks += "####{name}\n"
190
+ sink[:fields].each do |field|
191
+ name = field[:name] || "#{field[:schema]}:#{field[:ref]}"
192
+ type = field[:type]
193
+ sinks += " #{type.upcase} #{field[:meta]} => #{name}\n"
194
+ end
195
+ sinks += "\n"
196
+
197
+ end
198
+
199
+
200
+ render_template("README.md.erb", PARAMS.merge(:taps => taps, :sinks => sinks), :to_file => 'README.md', :root => DEFINITION_ROOT)
201
+ end
202
+
203
+ def self.model_sync(options)
204
+ dry_run = options[:dry]
205
+ project = build_project
206
+ datasets = project.get_datasets
207
+ model_update_dir = Pathname('model_update')
208
+ cl_home = ENV['CL_HOME'] || PARAMS['CL_HOME'] || fail("Home of cl tool cannot be found. Either set up CL_HOME in your env with 'export CL_HOME=path/to/cl or set it up in your params.json. Point to the directory of CL not to the bin dir.'")
209
+ cl_home = Pathname(cl_home) + 'bin/gdi.sh'
210
+
211
+ FileUtils::mkdir_p(model_update_dir)
212
+ File.open(model_update_dir + 'dummy', 'w')
213
+ FileUtils::cd(model_update_dir) do
214
+ datasets.each do |ds|
215
+ dataset_path = Pathname("cl_file_#{ds[:id]}")
216
+ File.open(dataset_path, "w") do |temp|
217
+ builder = Builder::XmlMarkup.new(:target=>temp, :indent=>2)
218
+ builder.schema do |builder|
219
+ builder.name(ds[:gd_name])
220
+ builder.title(ds[:gd_name])
221
+ builder.columns do |b|
222
+ ds[:fields].each do |f|
223
+ builder.column do |builder|
224
+ builder.title(f[:name])
225
+ builder.ldmType(f[:type].upcase)
226
+ builder.reference(f[:for]) if f.has_key?(:for)
227
+ builder.reference(f[:ref]) if f.has_key?(:ref)
228
+ builder.schemaReference(f[:schema]) if f.has_key?(:schema)
229
+ if f[:type] == "date"
230
+ builder.schemaReference("#{f[:dd]}")
231
+ builder.name("#{f[:name]}")
232
+ else
233
+ builder.name(f[:name] || f[:ref])
234
+ end
235
+ end
236
+ end
237
+ end
238
+ end
239
+ end
240
+ template_name = dry_run ? "update_dataset_dry.script.erb" : "update_dataset.script.erb"
241
+ render_template(template_name, PARAMS.merge({"config_file" => dataset_path.expand_path}), :to_file => 'update_dataset.script')
242
+ puts "Generate #{ds[:id]}"
243
+
244
+ system("#{cl_home} update_dataset.script --username #{PARAMS[:gd_login]} --password #{PARAMS[:gd_pass]}")
245
+ File.delete(dataset_path)
246
+ end
247
+ end
248
+ FileUtils::rm_rf(model_update_dir)
249
+ end
250
+
251
+ def self.generate_downloaders(options={})
252
+ setup_clover_project(CLOVER_DOWNLOADERS_ROOT, :name => "downloaders-#{PARAMS[:project_name]}")
253
+ project = build_project
254
+ sources = project.get_sources
255
+ sf_sources = sources.find_all {|tap| tap[:source] == "salesforce" && tap[:incremental] == true}
256
+ create_incremental_downloader_run_graph(CLOVER_DOWNLOADERS_ROOT + PROJECT_GRAPHS_ROOT + "main.grf", sf_sources)
257
+ s3_backup = PARAMS[:S3_SECRET_ACCESS_KEY] && PARAMS[:S3_ACCESS_KEY_ID] && PARAMS[:S3_BUCKETNAME]
258
+
259
+ GoodData::CloverGenerator::create_incremental_downloading_graph(CLOVER_DOWNLOADERS_ROOT + PROJECT_GRAPHS_ROOT + "incremental.grf", sf_sources, {
260
+ :password => PARAMS[:sf_password],
261
+ :token => PARAMS[:sf_token],
262
+ :login => PARAMS[:sf_login],
263
+ :sf_server => PARAMS[:sf_server],
264
+ :s3_backup => s3_backup
265
+ })
266
+ end
267
+
268
+
269
+ def self.run(dir, options={})
270
+ email = options[:email]
271
+ verbose = options[:v]
272
+
273
+ dir = Pathname(dir)
274
+
275
+ GoodData::CloverGenerator.deploy(dir, options.merge(:name => "Temporary deploy[#{dir}][#{PARAMS[:project_name]}]")) do |deploy_response|
276
+ puts HighLine::color("Executing", HighLine::BOLD) if verbose
277
+ if email.nil?
278
+ result = GoodData::CloverGenerator.execute_process(deploy_response["process"]["links"]["executions"], dir)
279
+ else
280
+ GoodData::CloverGenerator.create_email_channel(options) do |channel_response|
281
+ GoodData::CloverGenerator.subscribe_on_finish(:success, channel_response, deploy_response, options)
282
+ result = GoodData::CloverGenerator.execute_process(deploy_response["process"]["links"]["executions"], dir)
283
+ end
284
+ end
285
+ end
286
+ end
287
+
288
+ def self.execute_process(link, dir)
289
+ result = GoodData.post(link, {
290
+ :execution => {
291
+ :graph => "./#{dir}/graphs/main.grf",
292
+ :params => {}
293
+ }
294
+ })
295
+ begin
296
+ GoodData.poll(result, "executionTask")
297
+ rescue RestClient::RequestFailed => e
298
+
299
+ ensure
300
+ result = GoodData.get(result["executionTask"]["links"]["detail"])
301
+ if result["executionDetail"]["status"] == "ERROR"
302
+ fail "Runing process failed. You can look at a log here #{result["executionDetail"]["logFileName"]}"
303
+ end
304
+ end
305
+ result
306
+ end
307
+
308
+ def self.connect_to_gd(options={})
309
+ GoodData.connect(PARAMS[:gd_login], PARAMS[:gd_pass])
310
+ GoodData.project = PARAMS[:project_pid] if !PARAMS[:project_pid].nil? && !PARAMS[:project_pid].empty?
311
+ end
312
+
313
+ def self.subscribe_on_finish(event_type, channel, deploy, options={})
314
+ channel_uri = channel["channelConfiguration"]["meta"]["uri"]
315
+ process_id = deploy["process"]["links"]["self"].split('/').last
316
+ event_id = case event_type
317
+ when :success
318
+ "dataload.process.finish.ok"
319
+ when :failure
320
+ "dataload.process.finish.error"
321
+ else
322
+ fail "You specified unknown event \"#{event_type}\""
323
+ end
324
+
325
+ templates = {
326
+ :success => {
327
+ :message => "Just wanted to let you know that ETL for \"#{deploy["process"]["name"]}\" Succeeded",
328
+ :subject => "GoodData ETL SUCCESS: process with name \"#{deploy["process"]["name"]}\""
329
+ },
330
+ :failure => {
331
+ :message => "Just wanted to let you know that ETL for \"#{deploy["process"]["name"]}\" Failed",
332
+ :subject => "GoodData ETL FAILURE: process with name \"#{deploy["process"]["name"]}\""
333
+ }
334
+ }
335
+
336
+ templates_to_be_used = templates[event_type]
337
+
338
+ data = {
339
+ :subscription => {
340
+ :triggers => [
341
+ {
342
+ :projectEventTrigger => {
343
+ :types => [event_id]
344
+ }
345
+ }
346
+ ],
347
+ :condition => {
348
+ :condition => {
349
+ :expression => "params.PROCESS_ID==\"#{process_id}\""
350
+ }
351
+ },
352
+ :subject => {
353
+ :template => {
354
+ :expression => templates_to_be_used[:subject]
355
+ }
356
+ },
357
+ :message => {
358
+ :template => {
359
+ :expression => templates_to_be_used[:message]
360
+ }
361
+ },
362
+ :channels => [channel_uri],
363
+ :meta => {
364
+ :title => "Notification for process #{process_id}"
365
+ }
366
+ }
367
+ }
368
+ profile_id = GoodData.connection.user["profile"].split("/").last
369
+ GoodData.post("/gdc/projects/#{PARAMS[:project_pid]}/users/#{profile_id}/subscriptions", data)
370
+ end
371
+
372
+ def self.clone_project(options={})
373
+ pid = case options[:blueprint]
374
+ when "goodsales"
375
+ "nt935rwzls50zfqwy6dh62tabu8h0ocy"
376
+ when nil
377
+ fail "Empty project not supported now"
378
+ end
379
+
380
+ project_name = PARAMS[:project_name]
381
+ fail "project name has to be filled in" if project_name.blank?
382
+ with_users = options[:with_users]
383
+
384
+ export = {
385
+ :exportProject => {
386
+ :exportUsers => with_users ? 1 : 0,
387
+ :exportData => 1
388
+ }
389
+ }
390
+
391
+ result = GoodData.post("/gdc/md/#{pid}/maintenance/export", export)
392
+ token = result["exportArtifact"]["token"]
393
+ status_url = result["exportArtifact"]["status"]["uri"]
394
+
395
+ state = GoodData.get(status_url)["taskState"]["status"]
396
+ while state == "RUNNING"
397
+ sleep 5
398
+ result = GoodData.get(status_url)
399
+ state = result["taskState"]["status"]
400
+ end
401
+
402
+ old_project = GoodData::Project[pid]
403
+
404
+ pr = {
405
+ :project => {
406
+ :content => {
407
+ :guidedNavigation => 1,
408
+ :driver => "Pg",
409
+ :authorizationToken => options[:token]
410
+ },
411
+ :meta => {
412
+ :title => project_name,
413
+ :summary => "Testing Project"
414
+ }
415
+ }
416
+ }
417
+ result = GoodData.post("/gdc/projects/", pr)
418
+ uri = result["uri"]
419
+ while(GoodData.get(uri)["project"]["content"]["state"] == "LOADING")
420
+ sleep(5)
421
+ end
422
+
423
+ new_project = GoodData::Project[uri]
424
+
425
+ import = {
426
+ :importProject => {
427
+ :token => token
428
+ }
429
+ }
430
+
431
+ result = GoodData.post("/gdc/md/#{new_project.obj_id}/maintenance/import", import)
432
+ status_url = result["uri"]
433
+ state = GoodData.get(status_url)["taskState"]["status"]
434
+ while state == "RUNNING"
435
+ sleep 5
436
+ result = GoodData.get(status_url)
437
+ state = result["taskState"]["status"]
438
+ end
439
+ GoodData.post "/gdc/projects/#{new_project.obj_id}/eventStore/stores", {:store => {:storeId => "es_0"}}
440
+ new_project.obj_id
441
+ end
442
+
443
+ def self.create_email_channel(options={}, &block)
444
+ email = options[:email]
445
+
446
+ data = {
447
+ :channelConfiguration => {
448
+ :configuration => {
449
+ :emailConfiguration => {
450
+ :to => email
451
+ }
452
+ },
453
+ :meta => {
454
+ :title => "temporary email channel"
455
+ }
456
+ }
457
+ }
458
+ profile_id = GoodData.connection.user["profile"].split("/").last
459
+ res = GoodData.post("/gdc/account/profile/#{profile_id}/channelConfigurations", data)
460
+ self_link = res["channelConfiguration"]["meta"]["uri"]
461
+ if block
462
+ begin
463
+ block.call(res)
464
+ ensure
465
+ GoodData.delete(self_link)
466
+ end
467
+ else
468
+ res
469
+ end
470
+ end
471
+
472
+ def self.deploy_graph(dir, options={})
473
+ deploy_name = options[:name] || "#{PARAMS[:project_name]}"
474
+ verbose = options[:verbose] || false
475
+ puts HighLine::color("Deploying #{dir}", HighLine::BOLD) if verbose
476
+ res = nil
477
+
478
+ Tempfile.open("deploy-graph-archive") do |temp|
479
+ Zip::ZipOutputStream.open(temp.path) do |zio|
480
+ Dir.glob("./#{dir}/**/*") do |item|
481
+ puts "including #{item}" if verbose
482
+ unless File.directory?(item)
483
+ zio.put_next_entry(item)
484
+ zio.print IO.read(item)
485
+ end
486
+ end
487
+ end
488
+
489
+ GoodData.connection.upload(temp.path)
490
+ process_id = options[:process]
491
+
492
+ binding.pry
493
+
494
+ data = {
495
+ :process => {
496
+ :name => deploy_name,
497
+ :path => "/uploads/#{File.basename(temp.path)}"
498
+ }
499
+ }
500
+ res = if process_id.nil?
501
+ GoodData.post("/gdc/projects/#{PARAMS[:project_pid]}/dataload/processes", data)
502
+ else
503
+ GoodData.put("/gdc/projects/#{PARAMS[:project_pid]}/dataload/processes/#{process_id}", data)
504
+ end
505
+ end
506
+ puts HighLine::color("Deploy DONE #{dir}", HighLine::BOLD) if verbose
507
+ res
508
+ end
509
+
510
+ def self.deploy(dir, options={}, &block)
511
+ verbose = options[:verbose] || false
512
+ if block
513
+ begin
514
+ res = deploy_graph(dir, options)
515
+ block.call(res)
516
+ ensure
517
+ self_link = res["process"]["links"]["self"]
518
+ GoodData.delete(self_link)
519
+ end
520
+ else
521
+ deploy_graph(dir, options)
522
+ end
523
+ end
524
+
525
+ def self.generate_graph_template(name, target)
526
+ template_name = "#{name}_template.grf.erb"
527
+ render_template(template_name, PARAMS, :to_file => USER_DEFINED_GRAPHS_ROOT + target)
528
+ end
529
+
530
+
531
+ def self.generate(options)
532
+
533
+ only_flow = options[:only]
534
+ setup_clover_project(CLOVER_PROJECT_ROOT, :name => "etl-#{PARAMS[:project_name]}")
535
+ p = build_project
536
+ sources = p.get_sources
537
+ datasets = p.get_datasets
538
+ s3_backup = PARAMS[:S3_SECRET_ACCESS_KEY] && PARAMS[:S3_ACCESS_KEY_ID] && PARAMS[:S3_BUCKETNAME]
539
+
540
+ flows = []
541
+ FileUtils::cd FLOWS_ROOT do
542
+ flows_sources = Dir.glob("*.rb")
543
+ flows = flows_sources.map do |f|
544
+ instance_eval(File.read(f))
545
+ end
546
+ end
547
+
548
+ flows = flows.find_all {|flow| flow && flow.name == only_flow} unless only_flow.nil? || only_flow.empty?
549
+ fail "The flow you specified was not found" if flows.empty? && !only_flow.nil?
550
+ fail "There are no flows to generate from" if flows.empty?
551
+ super_flow = []
552
+ FileUtils::cd CLOVER_PROJECT_ROOT do
553
+
554
+ flows.each do |f|
555
+
556
+ current_metadata = {}
557
+ steps_to_be_wrapped = []
558
+ flow_sources = []
559
+ if f.nil?
560
+ puts "Flow skipped"
561
+ next
562
+ end
563
+ name = f.name
564
+ step_no = 0
565
+
566
+ begin
567
+ f.steps.each_with_index do |s, i|
568
+
569
+ if s[:type] == :tap
570
+ source_name = s[:source_name] || f.name
571
+ source = sources.find do |source|
572
+ source[:id] == source_name
573
+ end
574
+
575
+ fail "Tap \"#{source_name}\" was not found" if source.nil?
576
+
577
+ flow_sources << source
578
+
579
+ dataset_name = source[:dataset] || source[:id]
580
+
581
+ current_metadata[source_name] = GoodData::CloverGenerator::create_metadata(source)
582
+ graph_name = "graphs/#{dataset_name}_#{source[:source]}_#{source[:type]}.grf"
583
+ steps_to_be_wrapped << {
584
+ :name => "#{source_name}_download",
585
+ :file => graph_name,
586
+ :flow => source_name
587
+ }
588
+
589
+ if source[:incremental] == true
590
+ current_metadata[source_name] = GoodData::CloverGenerator::DSL::Metadata.new(current_metadata[source_name]).change do |m|
591
+ m.remove("timestamp")
592
+ end.to_hash
593
+
594
+ GoodData::CloverGenerator::create_es_downloading_graph(graph_name, [source], {
595
+ :metadata => current_metadata[source_name],
596
+ :s3_backup => s3_backup
597
+ })
598
+ else
599
+ GoodData::CloverGenerator::create_sf_downloading_graph(graph_name, [source], {
600
+ :password => PARAMS[:sf_password],
601
+ :token => PARAMS[:sf_token],
602
+ :login => PARAMS[:sf_login],
603
+ :sf_server => PARAMS[:sf_server],
604
+ :metadata => current_metadata[source_name],
605
+ :s3_backup => s3_backup
606
+ })
607
+ end
608
+
609
+ step_no += 1
610
+ elsif s[:type] == :upload
611
+ source_name = s[:id] || f.name
612
+ dataset = datasets.find {|d| d[:id] == source_name}
613
+
614
+ fail "Sink \"#{source_name}\" was not found" if dataset.nil?
615
+ fail "Sink needs to have id defined" if dataset[:id].nil?
616
+
617
+ unless current_metadata.has_key?(source_name)
618
+ fail("Tap #{source_name} was not found in current metadata")
619
+ end
620
+ fail "Sink \"#{f.name}\" was not found" if dataset.nil?
621
+ metadata = current_metadata[source_name]
622
+
623
+ graph_name = "graphs/#{dataset[:id]}_#{dataset[:type]}.grf"
624
+ steps_to_be_wrapped << {
625
+ :name => "#{name}_upload",
626
+ :file => graph_name,
627
+ :flow => source_name
628
+ }
629
+
630
+ GoodData::CloverGenerator::create_uploading_graph(graph_name, {
631
+ :datasets => dataset,
632
+ :metadata => current_metadata[source_name]
633
+ })
634
+ step_no += 1
635
+ elsif s[:type] == :user_provided
636
+ # check that what is getting in a step is check
637
+ s[:metadata_block] && s[:metadata_block].each do |val|
638
+ name = val[:name]
639
+ next if(name.nil?)
640
+ sources_names = flow_sources.map {|flow_source| flow_source[:id]}
641
+ included_in_flow = sources_names.include?(name)
642
+ unless included_in_flow
643
+ # binding.pry
644
+ # fail "Metadata \"#{name}\" is not in the defined by any source"
645
+ end
646
+ end
647
+ graph_name = s[:graph]
648
+ graph_filename = "#{s[:graph]}.grf"
649
+
650
+ if File.exist?(USER_DEFINED_GRAPHS_ROOT + graph_filename)
651
+ FileUtils::cp(USER_DEFINED_GRAPHS_ROOT + graph_filename, CLOVER_PROJECT_ROOT + PROJECT_GRAPHS_ROOT)
652
+ elsif File.exist?(BAM_DEFINED_GRAPHS_ROOT + graph_filename)
653
+ FileUtils::cp(BAM_DEFINED_GRAPHS_ROOT + graph_filename, CLOVER_PROJECT_ROOT + PROJECT_GRAPHS_ROOT)
654
+ else
655
+ fail("The graph \"#{graph_filename}\" was not found in any location (local, global)")
656
+ end
657
+ graph_filename = "graphs/#{graph_filename}"
658
+
659
+ step_no += 1
660
+ if s[:metadata_block].nil?
661
+ s[:metadata_block] = [{:name=> flow_sources.first[:id] }]
662
+ end
663
+
664
+ s[:metadata_block] && s[:metadata_block].each_with_index do |metadata, i|
665
+ j = i+1
666
+ bl = metadata[:block]
667
+ name = metadata[:name] || f.name
668
+ as = metadata[:out_as]
669
+ FileUtils::mkdir_p("metadata/#{f.name}/#{graph_name}")
670
+
671
+ input_meta = current_metadata[name].clone
672
+
673
+ input_meta[:name] = "in_#{j}"
674
+ GoodData::CloverGenerator::save_metadata("metadata/#{f.name}/#{graph_name}/#{j}_in.xml", input_meta)
675
+ m = GoodData::CloverGenerator::DSL::Metadata.new(current_metadata[name])
676
+ new_m = (bl && bl.call(m)) || current_metadata[name]
677
+
678
+ current_metadata[name] = new_m
679
+ unless as.nil?
680
+ as_metadata = new_m.clone
681
+ as_metadata[:name] = as
682
+ current_metadata[as] = as_metadata
683
+ end
684
+
685
+ output_meta = current_metadata[name].clone
686
+ output_meta[:name] = "out_#{j}"
687
+ GoodData::CloverGenerator::save_metadata("metadata/#{f.name}/#{graph_name}/#{j}_out.xml", output_meta)
688
+
689
+ GoodData::CloverGenerator::create_moving_graph("graphs/#{f.name}_#{graph_name}_move_in_#{j}.grf", {
690
+ :source => "${DATA}/#{name}.csv",
691
+ :target => "${DATA}/#{j}_in.csv",
692
+ :operation => "MOVE"
693
+ })
694
+
695
+ steps_to_be_wrapped << {
696
+ :name => "graphs/#{f.name}_#{graph_name}_move_in_#{j}.grf",
697
+ :file => "graphs/#{f.name}_#{graph_name}_move_in_#{j}.grf",
698
+ :flow => "kopirujeme"
699
+ }
700
+ end
701
+
702
+ steps_to_be_wrapped << {
703
+ :name => graph_name,
704
+ :file => graph_filename,
705
+ :flow => f.name
706
+ }
707
+
708
+ output_id = if s[:metadata_block] && s[:metadata_block].any? {|metadata| metadata[:out_as]}
709
+ s[:metadata_block].find {|metadata| metadata[:out_as]}[:out_as]
710
+ end
711
+ output_id = s[:metadata_block].first[:name] if output_id.nil? && s[:metadata_block] && s[:metadata_block].length == 1
712
+
713
+ GoodData::CloverGenerator::create_moving_graph("graphs/#{f.name}_#{graph_name}_move_out.grf", {
714
+ :source => "${DATA}/out.csv",
715
+ :target => "${DATA}/#{output_id}.csv",
716
+ :operation => "MOVE"
717
+ })
718
+
719
+ steps_to_be_wrapped << {
720
+ :name => "graphs/#{f.name}_#{graph_name}_move_out.grf",
721
+ :file => "graphs/#{f.name}_#{graph_name}_move_out.grf",
722
+ :flow => "kopirujeme"
723
+ }
724
+
725
+ end
726
+ end
727
+ rescue GoodData::CloverGenerator::DSL::RemoveMetadataFieldError => e
728
+ puts "Removing field \"#{e.field}\" from metadata \"#{e.metadata.name}\" in Flow \"#{f.name}\" there was a roblem with step X."
729
+ end
730
+
731
+ GoodData::CloverGenerator::create_run_graph("graphs/#{f.name}_main.grf", {
732
+ :subgraphs => steps_to_be_wrapped
733
+ })
734
+ super_flow << {
735
+ :name => name,
736
+ :file => "graphs/#{f.name}_main.grf",
737
+ :flow => f.name
738
+ }
739
+ end
740
+
741
+ GoodData::CloverGenerator::create_run_graph("graphs/main.grf", {
742
+ :subgraphs => super_flow
743
+ })
744
+ end
745
+ end
746
+ >>>>>>> Stashed changes
747
+ end
748
+ end