gd_bam 0.0.15 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/README.md +313 -5
  2. data/bin/bam +126 -48
  3. data/lib/bam/version.rb +1 -1
  4. data/lib/bam.rb +51 -0
  5. data/lib/base/errors.rb +15 -0
  6. data/lib/base/flow.rb +37 -0
  7. data/lib/base/graph.rb +23 -0
  8. data/lib/base/metadata.rb +107 -0
  9. data/lib/base/project.rb +95 -0
  10. data/lib/base/repo.rb +35 -0
  11. data/lib/base/sink.rb +44 -0
  12. data/lib/base/step.rb +47 -0
  13. data/lib/base/tap.rb +167 -0
  14. data/lib/base/taps.rb +19 -0
  15. data/lib/cloud_connect/dsl/cc.rb +42 -0
  16. data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
  17. data/lib/cloud_connect/dsl/helpers.rb +199 -0
  18. data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
  19. data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
  20. data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
  21. data/lib/commands/commands.rb +110 -0
  22. data/lib/commands/deployment.rb +217 -0
  23. data/lib/commands/docs_commands.rb +41 -0
  24. data/lib/commands/gd_commands.rb +95 -0
  25. data/lib/commands/scaffold_commands.rb +103 -0
  26. data/lib/commands/sf_commands.rb +37 -0
  27. data/lib/commands/validators.rb +19 -0
  28. data/lib/compatibility.rb +19 -0
  29. data/lib/compiler/compiler.rb +76 -0
  30. data/lib/compiler/etl_visitor.rb +165 -0
  31. data/lib/dsl/dsl.rb +125 -0
  32. data/lib/generators/downloaders.rb +449 -0
  33. data/lib/generators/etl.rb +261 -0
  34. data/lib/generators/validators.rb +445 -0
  35. data/lib/graphs/docentize.grf +1 -1
  36. data/lib/graphs/dummy.grf +1 -1
  37. data/lib/graphs/goodsales_v2/docentize.grf +47 -0
  38. data/lib/graphs/goodsales_v2/dummy.grf +46 -0
  39. data/lib/graphs/goodsales_v2/load_history.grf +579 -0
  40. data/lib/graphs/goodsales_v2/process_account.grf +47 -0
  41. data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
  42. data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
  43. data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
  44. data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
  45. data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
  46. data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
  47. data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
  48. data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
  49. data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
  50. data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
  51. data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
  52. data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
  53. data/lib/graphs/process_account.grf +1 -1
  54. data/lib/graphs/process_activity.grf +1 -1
  55. data/lib/graphs/process_activity_dim.grf +1 -1
  56. data/lib/graphs/process_activity_owner.grf +1 -1
  57. data/lib/graphs/process_forecast.grf +1 -1
  58. data/lib/graphs/process_opp_records.grf +1 -1
  59. data/lib/graphs/process_opportunity.grf +1 -1
  60. data/lib/graphs/process_opportunity_line_item.grf +1 -1
  61. data/lib/graphs/process_opportunity_snapshot.grf +1 -1
  62. data/lib/graphs/process_owner.grf +1 -1
  63. data/lib/graphs/process_stage.grf +1 -1
  64. data/lib/graphs/process_stage_history.grf +1 -1
  65. data/lib/graphs/process_velocity_duration.grf +1 -1
  66. data/lib/nodes/clover_gen.rb +59 -946
  67. data/lib/nodes/dependency.rb +95 -96
  68. data/lib/runtime.rb +7 -648
  69. data/lib/utils/utils.rb +66 -0
  70. data/templates/flow.rb.erb +7 -6
  71. data/templates/join_template.grf.erb +1 -1
  72. data/templates/reformat_template.grf.erb +1 -1
  73. data/templates/sink.json.erb +28 -0
  74. data/templates/tap.json.erb +3 -5
  75. data/templates/workspace.prm.erb +4 -0
  76. metadata +50 -8
  77. data/lib/contract_checkers/contract_checkers.rb +0 -53
  78. data/lib/dsl/project_dsl.rb +0 -259
  79. data/lib/repo/1_config.json +0 -8
  80. data/templates/dataset.json.erb +0 -13
  81. data/templates/source.json.erb +0 -22
@@ -0,0 +1,76 @@
1
+ module GoodData
2
+ module Bam
3
+ module Compiler
4
+
5
+ def self.compile_dsl(project)
6
+ fail "There are no flow. Nothing to work on." if project[:flows_definitions].empty?
7
+ compiled_flows = project[:flows_definitions].map do |flow_description|
8
+ Compiler.compile_flow(project, flow_description)
9
+ end
10
+ Project.create(project.merge({:flows => compiled_flows}))
11
+ end
12
+
13
+ def self.change_metadata(state, metadata_change_description)
14
+ metadata = state[metadata_change_description[:id]]
15
+
16
+ # enrich meta with everything from descriptor. There might be additional stuff so we want this to be part of metadata
17
+ enriched_meta = metadata.merge(metadata_change_description.reject {|k,v| [:id, :steps, :type].include?(k)})
18
+
19
+ metadata_change_description[:steps].reduce(Metadata.create(enriched_meta)) do |meta_accumulator, step|
20
+ case step[:type]
21
+ when :field_add
22
+ Metadata.add_field(meta_accumulator, {:name => step[:name]})
23
+ when :field_remove
24
+ Metadata.remove_field(meta_accumulator, step[:name])
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.check_metadata_existence(metadata_store, id, flow_description)
30
+ unless metadata_store.has_key?(id)
31
+ fail "Metadata with id \"#{id}\" were not found. Check that you have them included in flow \"#{flow_description[:name]}\"."
32
+ end
33
+ end
34
+
35
+ def self.compile_flow(project, flow_description)
36
+ flow = Flow.create({:name => flow_description[:name]})
37
+ # metadata_state = {}
38
+ out_port = nil
39
+
40
+ # this nil magic is done so we have visibility to one step back and forward. Currently not used much but it is useful for detectin input to sink
41
+ flow = [nil].concat(flow_description[:steps]).concat([nil]).each_cons(3).reduce(flow) do |flow_memo, steps|
42
+ previous_step, current_step, next_step = steps
43
+ id = current_step[:id] || current_step[:flow_id]
44
+
45
+ case current_step[:type]
46
+ when :dummy_tap
47
+ Flow.add_step(flow_memo, Tap.create(current_step.merge(:source => :dummy, :id => :none)))
48
+ when :tap
49
+ tap = Project.find_tap_by_id(project, id)
50
+ fail "Tap \"#{id}\" which was used in flow \"#{current_step[:flow_id]}\" is not defined" if tap.nil?
51
+ Flow.add_step(flow_memo, Tap.create(tap.merge(current_step)))
52
+ when :sink
53
+ sink = Project.find_sink_by_id(project, id)
54
+ fail "Sink \"#{id}\" not found" if sink.nil?
55
+ out_step = Step.find_output_step(previous_step[:steps])
56
+ Flow.add_step(flow_memo, Sink.create(sink.merge(current_step).merge({:in => out_step})))
57
+ when :graph
58
+ graph = Project.find_graph_by_path(project, current_step[:path])
59
+ fail "Graph on the path \"#{current_step[:path]}\" could not be found. This was specified in step \"#{current_step}\"" if graph.nil?
60
+ g = Graph.create(graph.merge(current_step))
61
+
62
+ if !Step.has_output_step?(g[:steps]) && g[:steps].count > 1
63
+ fail "You need to specify an output metadata."
64
+ end
65
+
66
+ Flow.add_step(flow_memo, Graph.create(g.merge({
67
+ :input_ports => g[:steps],
68
+ :output_port => Step.find_output_step(g[:steps])
69
+ })))
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,165 @@
1
+ module GoodData
2
+ module Bam
3
+ module Compiler
4
+ class EtlVisitor
5
+
6
+ def visit(node, state)
7
+ case node[:type]
8
+ when :project
9
+ FileUtils::cd(GoodData::Bam::Commands::ETL_HOME) do
10
+ flows = if state[:only]
11
+ node[:flows].find_all {|f| f[:name] == state[:only]}
12
+ else
13
+ node[:flows]
14
+ end
15
+ result = flows.reduce({
16
+ :flows => [],
17
+ :params => state
18
+ }) {|state, node| visit(node, state)}
19
+ GoodData::CloudConnect::Helpers::create_run_graph("graphs/main.grf", {
20
+ :subgraphs => result[:flows],
21
+ :flow => "main"
22
+ })
23
+ end
24
+ when :flow
25
+ visit_flow(node, state)
26
+ when :tap
27
+ case node[:direct]
28
+ when false
29
+ visit_incremental_tap(node, state)
30
+ else
31
+ case node[:source_type]
32
+ when :salesforce
33
+ visit_direct_sf_tap(node, state)
34
+ else
35
+ visit_direct_file_tap(node, state)
36
+ end
37
+ end
38
+ when :sink
39
+ case node[:target_type]
40
+ when :file, :web
41
+ visit_file_sink(node, state)
42
+ else
43
+ visit_gd_sink(node, state)
44
+ end
45
+ when :graph
46
+ visit_graph(node, state)
47
+ else
48
+ state
49
+ end
50
+ end
51
+
52
+ def visit_flow(flow, state)
53
+ puts "Visiting flow"
54
+ result = flow[:steps].reduce(state.merge({
55
+ :graphs => [],
56
+ :metadata => {}
57
+ })) {|state, node| visit(node, state)}
58
+ GoodData::CloudConnect::Helpers::create_run_graph("graphs/#{flow[:name]}_main.grf", {
59
+ :subgraphs => result[:graphs],
60
+ :flow => flow[:name]
61
+ })
62
+ state.merge(:flows => state[:flows].concat(["graphs/#{flow[:name]}_main.grf"]))
63
+ end
64
+
65
+ def visit_graph(node, state)
66
+ puts "Visiting graph"
67
+ move_graphs = []
68
+ node[:input_ports].each_with_index do |input, i|
69
+ j = i+1
70
+ name = input[:id]
71
+ graph_path = "graphs/#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
72
+ graph_name = "#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
73
+ state = state.merge({:graphs => state[:graphs].concat([graph_path])})
74
+ GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
75
+ :source => "${DATA}/#{name}.csv",
76
+ :target => "${DATA}/#{j}_in.csv",
77
+ :operation => "MOVE"
78
+ })
79
+
80
+ FileUtils.cp(Graph.get_path(node), (Pathname("graphs") + node[:path].basename).to_s)
81
+ changed_metadata = Compiler.change_metadata(state[:metadata], input)
82
+
83
+ metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_in.xml"
84
+ GoodData::CloudConnect::Helpers::save_metadata(metadata_path, state[:metadata][name])
85
+ metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_out.xml"
86
+ GoodData::CloudConnect::Helpers::save_metadata(metadata_path, changed_metadata)
87
+ state = state.merge({:metadata => state[:metadata].merge(changed_metadata[:id] => changed_metadata)})
88
+ end
89
+
90
+ state = state.merge({:graphs => state[:graphs].concat([(Pathname("graphs") + node[:path].basename).to_s])})
91
+
92
+ out_metadata = node[:output_port]
93
+ graph_path = "graphs/#{node[:flow_id]}_#{Step.step_name(node)}_move_out.grf"
94
+ state = state.merge({:graphs => state[:graphs].concat([graph_path])})
95
+ GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
96
+ :source => "${DATA}/out.csv",
97
+ :target => "${DATA}/#{out_metadata[:id]}.csv",
98
+ :operation => "MOVE"
99
+ })
100
+ state
101
+ end
102
+
103
+ def visit_file_sink(node, state)
104
+ graph_path = Step.get_graph_path(node)
105
+
106
+ sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
107
+ metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
108
+ fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
109
+
110
+ GoodData::Bam::Generators::Etl::create_file_uploading_graph(graph_path, node, node[:in][:id], state)
111
+ state.merge({
112
+ :graphs => state[:graphs].concat([graph_path])
113
+ })
114
+ end
115
+
116
+
117
+ def visit_gd_sink(node, state)
118
+ graph_path = Step.get_graph_path(node)
119
+
120
+ sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
121
+ metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
122
+ fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
123
+
124
+ GoodData::Bam::Generators::Etl::create_gd_uploading_graph(graph_path, node, node[:in][:id], state)
125
+ state.merge({
126
+ :graphs => state[:graphs].concat([graph_path])
127
+ })
128
+ end
129
+
130
+ def visit_direct_sf_tap(node, state)
131
+ puts "Visiting direct tap"
132
+ graph_path = Step.get_graph_path(node)
133
+ tap = Tap.prepare_for_sf_downloader(node)
134
+ GoodData::Bam::Generators::Etl::create_sf_downloading_graph(graph_path, tap, state)
135
+ state.merge({
136
+ :graphs => state[:graphs].concat([graph_path]),
137
+ :metadata => state[:metadata].merge({node[:id] => Metadata.get_target_metadata(node)})
138
+ })
139
+ end
140
+
141
+ def visit_direct_file_tap(node, state)
142
+ graph_path = Step.get_graph_path(node)
143
+ tap = Tap.prepare_for_sf_downloader(node)
144
+ GoodData::Bam::Generators::Etl::create_file_downloading_graph(graph_path, node, state)
145
+ state.merge({
146
+ :graphs => state[:graphs].concat([graph_path]),
147
+ :metadata => state[:metadata].merge({ node[:id] => Metadata.get_target_metadata(node) })
148
+ })
149
+ end
150
+
151
+ def visit_incremental_tap(node, state)
152
+ puts "Visiting Incremental tap"
153
+ # Get rid of all the shit. Timestamps, pull ids check for snapshots. And shove them downstream.
154
+ tap = Tap.prepare_for_es_downloader(node)
155
+ graph_path = Step.get_graph_path(tap)
156
+ GoodData::Bam::Generators::Etl::create_es_downloading_graph(graph_path, tap, state)
157
+ state.merge({
158
+ :graphs => state[:graphs].concat([graph_path]),
159
+ :metadata => state[:metadata].merge({tap[:id] => Metadata.get_target_metadata(tap)})
160
+ })
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
data/lib/dsl/dsl.rb ADDED
@@ -0,0 +1,125 @@
1
+ module GoodData
2
+ module Bam
3
+ module DSL
4
+
5
+ def self.flow(data={}, &bl)
6
+ FlowDSL.new(data, &bl)
7
+ end
8
+
9
+ class FlowDSL
10
+
11
+ attr_reader :steps, :name
12
+
13
+ def initialize(data={}, &bl)
14
+ fail "Flows params needs a hash" unless data.is_a? Hash
15
+ name = data[:id]
16
+ fail "Flow needs to have a name" if name.blank?
17
+ @name = name
18
+ @steps = []
19
+ instance_eval(&bl)
20
+ end
21
+
22
+ def tap(data={})
23
+ fail "Params for tap need to be hash" unless data.is_a? Hash
24
+ @steps << {:type => :tap, :flow_id => @name}.merge(data)
25
+ end
26
+
27
+ def dummy_tap(data={})
28
+ fail "Params for tap need to be hash" unless data.is_a? Hash
29
+ @steps << {:type => :dummy_tap, :flow_id => @name}.merge(data)
30
+ end
31
+
32
+ def sink(data={})
33
+ fail "Params for tap need to be hash" unless data.is_a? Hash
34
+ @steps << {:type => :sink , :flow_id => @name}.merge(data)
35
+ end
36
+
37
+ def graph(data, &bl)
38
+ fail "Params for graph need to be hash" unless data.is_a? Hash
39
+ # @steps << {:type => :graph , :flow_id => @name}.merge(data)
40
+ if bl.nil?
41
+ @steps << {:type => :graph, :steps => [], :flow_id => @name}.merge(data)
42
+ else
43
+ metadata = GraphDSL.new(@name, &bl)
44
+ @steps << {:type => :graph, :steps => metadata.to_a, :flow_id => @name}.merge(data)
45
+ end
46
+ end
47
+
48
+ def to_hash
49
+ {
50
+ :name => @name,
51
+ :steps => @steps
52
+ }
53
+ end
54
+
55
+ end
56
+
57
+ class GraphDSL
58
+
59
+ def initialize(flow_name, &bl)
60
+ @name = flow_name
61
+ @steps = []
62
+ instance_eval(&bl)
63
+ end
64
+
65
+ def metadata(data, &bl)
66
+ fail "Params need to be hash" unless data.is_a? Hash
67
+ fail "Metadata definition should have at least \"id\" defined. You provided \"#{data}\"." unless data.has_key?(:id)
68
+ if bl.nil?
69
+ @steps << {:type => :graph_metadata, :steps => [], :flow_id => @name}.merge(data)
70
+ else
71
+ metadata_changes = MetadataDSL.new(@name, &bl)
72
+ @steps << {:type => :graph_metadata, :steps => metadata_changes.to_a, :flow_id => @name}.merge(data)
73
+ end
74
+ end
75
+
76
+ def to_hash
77
+ {
78
+ :name => @name,
79
+ :steps => @steps
80
+ }
81
+ end
82
+
83
+ def to_a
84
+ @steps
85
+ end
86
+
87
+ end
88
+
89
+
90
+ class MetadataDSL
91
+
92
+ def initialize(flow_name, &bl)
93
+ @name = flow_name
94
+ @steps = []
95
+ instance_eval(&bl)
96
+ end
97
+
98
+ def add(data, &bl)
99
+ fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
100
+ fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
101
+ @steps << {:type => :field_add}.merge(data)
102
+ end
103
+
104
+ def remove(data, &bl)
105
+ fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
106
+ fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
107
+ @steps << {:type => :field_remove}.merge(data)
108
+ end
109
+
110
+ def to_hash
111
+ {
112
+ :name => @name,
113
+ :steps => @steps
114
+ }
115
+ end
116
+
117
+ def to_a
118
+ @steps
119
+ end
120
+
121
+ end
122
+
123
+ end
124
+ end
125
+ end