gd_bam 0.0.15 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/README.md +313 -5
  2. data/bin/bam +126 -48
  3. data/lib/bam/version.rb +1 -1
  4. data/lib/bam.rb +51 -0
  5. data/lib/base/errors.rb +15 -0
  6. data/lib/base/flow.rb +37 -0
  7. data/lib/base/graph.rb +23 -0
  8. data/lib/base/metadata.rb +107 -0
  9. data/lib/base/project.rb +95 -0
  10. data/lib/base/repo.rb +35 -0
  11. data/lib/base/sink.rb +44 -0
  12. data/lib/base/step.rb +47 -0
  13. data/lib/base/tap.rb +167 -0
  14. data/lib/base/taps.rb +19 -0
  15. data/lib/cloud_connect/dsl/cc.rb +42 -0
  16. data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
  17. data/lib/cloud_connect/dsl/helpers.rb +199 -0
  18. data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
  19. data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
  20. data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
  21. data/lib/commands/commands.rb +110 -0
  22. data/lib/commands/deployment.rb +217 -0
  23. data/lib/commands/docs_commands.rb +41 -0
  24. data/lib/commands/gd_commands.rb +95 -0
  25. data/lib/commands/scaffold_commands.rb +103 -0
  26. data/lib/commands/sf_commands.rb +37 -0
  27. data/lib/commands/validators.rb +19 -0
  28. data/lib/compatibility.rb +19 -0
  29. data/lib/compiler/compiler.rb +76 -0
  30. data/lib/compiler/etl_visitor.rb +165 -0
  31. data/lib/dsl/dsl.rb +125 -0
  32. data/lib/generators/downloaders.rb +449 -0
  33. data/lib/generators/etl.rb +261 -0
  34. data/lib/generators/validators.rb +445 -0
  35. data/lib/graphs/docentize.grf +1 -1
  36. data/lib/graphs/dummy.grf +1 -1
  37. data/lib/graphs/goodsales_v2/docentize.grf +47 -0
  38. data/lib/graphs/goodsales_v2/dummy.grf +46 -0
  39. data/lib/graphs/goodsales_v2/load_history.grf +579 -0
  40. data/lib/graphs/goodsales_v2/process_account.grf +47 -0
  41. data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
  42. data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
  43. data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
  44. data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
  45. data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
  46. data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
  47. data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
  48. data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
  49. data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
  50. data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
  51. data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
  52. data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
  53. data/lib/graphs/process_account.grf +1 -1
  54. data/lib/graphs/process_activity.grf +1 -1
  55. data/lib/graphs/process_activity_dim.grf +1 -1
  56. data/lib/graphs/process_activity_owner.grf +1 -1
  57. data/lib/graphs/process_forecast.grf +1 -1
  58. data/lib/graphs/process_opp_records.grf +1 -1
  59. data/lib/graphs/process_opportunity.grf +1 -1
  60. data/lib/graphs/process_opportunity_line_item.grf +1 -1
  61. data/lib/graphs/process_opportunity_snapshot.grf +1 -1
  62. data/lib/graphs/process_owner.grf +1 -1
  63. data/lib/graphs/process_stage.grf +1 -1
  64. data/lib/graphs/process_stage_history.grf +1 -1
  65. data/lib/graphs/process_velocity_duration.grf +1 -1
  66. data/lib/nodes/clover_gen.rb +59 -946
  67. data/lib/nodes/dependency.rb +95 -96
  68. data/lib/runtime.rb +7 -648
  69. data/lib/utils/utils.rb +66 -0
  70. data/templates/flow.rb.erb +7 -6
  71. data/templates/join_template.grf.erb +1 -1
  72. data/templates/reformat_template.grf.erb +1 -1
  73. data/templates/sink.json.erb +28 -0
  74. data/templates/tap.json.erb +3 -5
  75. data/templates/workspace.prm.erb +4 -0
  76. metadata +50 -8
  77. data/lib/contract_checkers/contract_checkers.rb +0 -53
  78. data/lib/dsl/project_dsl.rb +0 -259
  79. data/lib/repo/1_config.json +0 -8
  80. data/templates/dataset.json.erb +0 -13
  81. data/templates/source.json.erb +0 -22
@@ -0,0 +1,76 @@
1
+ module GoodData
2
+ module Bam
3
+ module Compiler
4
+
5
+ def self.compile_dsl(project)
6
+ fail "There are no flow. Nothing to work on." if project[:flows_definitions].empty?
7
+ compiled_flows = project[:flows_definitions].map do |flow_description|
8
+ Compiler.compile_flow(project, flow_description)
9
+ end
10
+ Project.create(project.merge({:flows => compiled_flows}))
11
+ end
12
+
13
+ def self.change_metadata(state, metadata_change_description)
14
+ metadata = state[metadata_change_description[:id]]
15
+
16
+ # enrich meta with everything from descriptor. There might be additional stuff so we want this to be part of metadata
17
+ enriched_meta = metadata.merge(metadata_change_description.reject {|k,v| [:id, :steps, :type].include?(k)})
18
+
19
+ metadata_change_description[:steps].reduce(Metadata.create(enriched_meta)) do |meta_accumulator, step|
20
+ case step[:type]
21
+ when :field_add
22
+ Metadata.add_field(meta_accumulator, {:name => step[:name]})
23
+ when :field_remove
24
+ Metadata.remove_field(meta_accumulator, step[:name])
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.check_metadata_existence(metadata_store, id, flow_description)
30
+ unless metadata_store.has_key?(id)
31
+ fail "Metadata with id \"#{id}\" were not found. Check that you have them included in flow \"#{flow_description[:name]}\"."
32
+ end
33
+ end
34
+
35
+ def self.compile_flow(project, flow_description)
36
+ flow = Flow.create({:name => flow_description[:name]})
37
+ # metadata_state = {}
38
+ out_port = nil
39
+
40
+ # this nil magic is done so we have visibility to one step back and forward. Currently not used much but it is useful for detectin input to sink
41
+ flow = [nil].concat(flow_description[:steps]).concat([nil]).each_cons(3).reduce(flow) do |flow_memo, steps|
42
+ previous_step, current_step, next_step = steps
43
+ id = current_step[:id] || current_step[:flow_id]
44
+
45
+ case current_step[:type]
46
+ when :dummy_tap
47
+ Flow.add_step(flow_memo, Tap.create(current_step.merge(:source => :dummy, :id => :none)))
48
+ when :tap
49
+ tap = Project.find_tap_by_id(project, id)
50
+ fail "Tap \"#{id}\" which was used in flow \"#{current_step[:flow_id]}\" is not defined" if tap.nil?
51
+ Flow.add_step(flow_memo, Tap.create(tap.merge(current_step)))
52
+ when :sink
53
+ sink = Project.find_sink_by_id(project, id)
54
+ fail "Sink \"#{id}\" not found" if sink.nil?
55
+ out_step = Step.find_output_step(previous_step[:steps])
56
+ Flow.add_step(flow_memo, Sink.create(sink.merge(current_step).merge({:in => out_step})))
57
+ when :graph
58
+ graph = Project.find_graph_by_path(project, current_step[:path])
59
+ fail "Graph on the path \"#{current_step[:path]}\" could not be found. This was specified in step \"#{current_step}\"" if graph.nil?
60
+ g = Graph.create(graph.merge(current_step))
61
+
62
+ if !Step.has_output_step?(g[:steps]) && g[:steps].count > 1
63
+ fail "You need to specify an output metadata."
64
+ end
65
+
66
+ Flow.add_step(flow_memo, Graph.create(g.merge({
67
+ :input_ports => g[:steps],
68
+ :output_port => Step.find_output_step(g[:steps])
69
+ })))
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,165 @@
1
+ module GoodData
2
+ module Bam
3
+ module Compiler
4
+ class EtlVisitor
5
+
6
+ def visit(node, state)
7
+ case node[:type]
8
+ when :project
9
+ FileUtils::cd(GoodData::Bam::Commands::ETL_HOME) do
10
+ flows = if state[:only]
11
+ node[:flows].find_all {|f| f[:name] == state[:only]}
12
+ else
13
+ node[:flows]
14
+ end
15
+ result = flows.reduce({
16
+ :flows => [],
17
+ :params => state
18
+ }) {|state, node| visit(node, state)}
19
+ GoodData::CloudConnect::Helpers::create_run_graph("graphs/main.grf", {
20
+ :subgraphs => result[:flows],
21
+ :flow => "main"
22
+ })
23
+ end
24
+ when :flow
25
+ visit_flow(node, state)
26
+ when :tap
27
+ case node[:direct]
28
+ when false
29
+ visit_incremental_tap(node, state)
30
+ else
31
+ case node[:source_type]
32
+ when :salesforce
33
+ visit_direct_sf_tap(node, state)
34
+ else
35
+ visit_direct_file_tap(node, state)
36
+ end
37
+ end
38
+ when :sink
39
+ case node[:target_type]
40
+ when :file, :web
41
+ visit_file_sink(node, state)
42
+ else
43
+ visit_gd_sink(node, state)
44
+ end
45
+ when :graph
46
+ visit_graph(node, state)
47
+ else
48
+ state
49
+ end
50
+ end
51
+
52
+ def visit_flow(flow, state)
53
+ puts "Visiting flow"
54
+ result = flow[:steps].reduce(state.merge({
55
+ :graphs => [],
56
+ :metadata => {}
57
+ })) {|state, node| visit(node, state)}
58
+ GoodData::CloudConnect::Helpers::create_run_graph("graphs/#{flow[:name]}_main.grf", {
59
+ :subgraphs => result[:graphs],
60
+ :flow => flow[:name]
61
+ })
62
+ state.merge(:flows => state[:flows].concat(["graphs/#{flow[:name]}_main.grf"]))
63
+ end
64
+
65
+ def visit_graph(node, state)
66
+ puts "Visiting graph"
67
+ move_graphs = []
68
+ node[:input_ports].each_with_index do |input, i|
69
+ j = i+1
70
+ name = input[:id]
71
+ graph_path = "graphs/#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
72
+ graph_name = "#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
73
+ state = state.merge({:graphs => state[:graphs].concat([graph_path])})
74
+ GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
75
+ :source => "${DATA}/#{name}.csv",
76
+ :target => "${DATA}/#{j}_in.csv",
77
+ :operation => "MOVE"
78
+ })
79
+
80
+ FileUtils.cp(Graph.get_path(node), (Pathname("graphs") + node[:path].basename).to_s)
81
+ changed_metadata = Compiler.change_metadata(state[:metadata], input)
82
+
83
+ metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_in.xml"
84
+ GoodData::CloudConnect::Helpers::save_metadata(metadata_path, state[:metadata][name])
85
+ metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_out.xml"
86
+ GoodData::CloudConnect::Helpers::save_metadata(metadata_path, changed_metadata)
87
+ state = state.merge({:metadata => state[:metadata].merge(changed_metadata[:id] => changed_metadata)})
88
+ end
89
+
90
+ state = state.merge({:graphs => state[:graphs].concat([(Pathname("graphs") + node[:path].basename).to_s])})
91
+
92
+ out_metadata = node[:output_port]
93
+ graph_path = "graphs/#{node[:flow_id]}_#{Step.step_name(node)}_move_out.grf"
94
+ state = state.merge({:graphs => state[:graphs].concat([graph_path])})
95
+ GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
96
+ :source => "${DATA}/out.csv",
97
+ :target => "${DATA}/#{out_metadata[:id]}.csv",
98
+ :operation => "MOVE"
99
+ })
100
+ state
101
+ end
102
+
103
+ def visit_file_sink(node, state)
104
+ graph_path = Step.get_graph_path(node)
105
+
106
+ sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
107
+ metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
108
+ fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
109
+
110
+ GoodData::Bam::Generators::Etl::create_file_uploading_graph(graph_path, node, node[:in][:id], state)
111
+ state.merge({
112
+ :graphs => state[:graphs].concat([graph_path])
113
+ })
114
+ end
115
+
116
+
117
+ def visit_gd_sink(node, state)
118
+ graph_path = Step.get_graph_path(node)
119
+
120
+ sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
121
+ metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
122
+ fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
123
+
124
+ GoodData::Bam::Generators::Etl::create_gd_uploading_graph(graph_path, node, node[:in][:id], state)
125
+ state.merge({
126
+ :graphs => state[:graphs].concat([graph_path])
127
+ })
128
+ end
129
+
130
+ def visit_direct_sf_tap(node, state)
131
+ puts "Visiting direct tap"
132
+ graph_path = Step.get_graph_path(node)
133
+ tap = Tap.prepare_for_sf_downloader(node)
134
+ GoodData::Bam::Generators::Etl::create_sf_downloading_graph(graph_path, tap, state)
135
+ state.merge({
136
+ :graphs => state[:graphs].concat([graph_path]),
137
+ :metadata => state[:metadata].merge({node[:id] => Metadata.get_target_metadata(node)})
138
+ })
139
+ end
140
+
141
+ def visit_direct_file_tap(node, state)
142
+ graph_path = Step.get_graph_path(node)
143
+ tap = Tap.prepare_for_sf_downloader(node)
144
+ GoodData::Bam::Generators::Etl::create_file_downloading_graph(graph_path, node, state)
145
+ state.merge({
146
+ :graphs => state[:graphs].concat([graph_path]),
147
+ :metadata => state[:metadata].merge({ node[:id] => Metadata.get_target_metadata(node) })
148
+ })
149
+ end
150
+
151
+ def visit_incremental_tap(node, state)
152
+ puts "Visiting Incremental tap"
153
+ # Get rid of all the shit. Timestamps, pull ids check for snapshots. And shove them downstream.
154
+ tap = Tap.prepare_for_es_downloader(node)
155
+ graph_path = Step.get_graph_path(tap)
156
+ GoodData::Bam::Generators::Etl::create_es_downloading_graph(graph_path, tap, state)
157
+ state.merge({
158
+ :graphs => state[:graphs].concat([graph_path]),
159
+ :metadata => state[:metadata].merge({tap[:id] => Metadata.get_target_metadata(tap)})
160
+ })
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
data/lib/dsl/dsl.rb ADDED
@@ -0,0 +1,125 @@
1
+ module GoodData
2
+ module Bam
3
+ module DSL
4
+
5
+ def self.flow(data={}, &bl)
6
+ FlowDSL.new(data, &bl)
7
+ end
8
+
9
+ class FlowDSL
10
+
11
+ attr_reader :steps, :name
12
+
13
+ def initialize(data={}, &bl)
14
+ fail "Flows params needs a hash" unless data.is_a? Hash
15
+ name = data[:id]
16
+ fail "Flow needs to have a name" if name.blank?
17
+ @name = name
18
+ @steps = []
19
+ instance_eval(&bl)
20
+ end
21
+
22
+ def tap(data={})
23
+ fail "Params for tap need to be hash" unless data.is_a? Hash
24
+ @steps << {:type => :tap, :flow_id => @name}.merge(data)
25
+ end
26
+
27
+ def dummy_tap(data={})
28
+ fail "Params for tap need to be hash" unless data.is_a? Hash
29
+ @steps << {:type => :dummy_tap, :flow_id => @name}.merge(data)
30
+ end
31
+
32
+ def sink(data={})
33
+ fail "Params for tap need to be hash" unless data.is_a? Hash
34
+ @steps << {:type => :sink , :flow_id => @name}.merge(data)
35
+ end
36
+
37
+ def graph(data, &bl)
38
+ fail "Params for graph need to be hash" unless data.is_a? Hash
39
+ # @steps << {:type => :graph , :flow_id => @name}.merge(data)
40
+ if bl.nil?
41
+ @steps << {:type => :graph, :steps => [], :flow_id => @name}.merge(data)
42
+ else
43
+ metadata = GraphDSL.new(@name, &bl)
44
+ @steps << {:type => :graph, :steps => metadata.to_a, :flow_id => @name}.merge(data)
45
+ end
46
+ end
47
+
48
+ def to_hash
49
+ {
50
+ :name => @name,
51
+ :steps => @steps
52
+ }
53
+ end
54
+
55
+ end
56
+
57
+ class GraphDSL
58
+
59
+ def initialize(flow_name, &bl)
60
+ @name = flow_name
61
+ @steps = []
62
+ instance_eval(&bl)
63
+ end
64
+
65
+ def metadata(data, &bl)
66
+ fail "Params need to be hash" unless data.is_a? Hash
67
+ fail "Metadata definition should have at least \"id\" defined. You provided \"#{data}\"." unless data.has_key?(:id)
68
+ if bl.nil?
69
+ @steps << {:type => :graph_metadata, :steps => [], :flow_id => @name}.merge(data)
70
+ else
71
+ metadata_changes = MetadataDSL.new(@name, &bl)
72
+ @steps << {:type => :graph_metadata, :steps => metadata_changes.to_a, :flow_id => @name}.merge(data)
73
+ end
74
+ end
75
+
76
+ def to_hash
77
+ {
78
+ :name => @name,
79
+ :steps => @steps
80
+ }
81
+ end
82
+
83
+ def to_a
84
+ @steps
85
+ end
86
+
87
+ end
88
+
89
+
90
+ class MetadataDSL
91
+
92
+ def initialize(flow_name, &bl)
93
+ @name = flow_name
94
+ @steps = []
95
+ instance_eval(&bl)
96
+ end
97
+
98
+ def add(data, &bl)
99
+ fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
100
+ fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
101
+ @steps << {:type => :field_add}.merge(data)
102
+ end
103
+
104
+ def remove(data, &bl)
105
+ fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
106
+ fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
107
+ @steps << {:type => :field_remove}.merge(data)
108
+ end
109
+
110
+ def to_hash
111
+ {
112
+ :name => @name,
113
+ :steps => @steps
114
+ }
115
+ end
116
+
117
+ def to_a
118
+ @steps
119
+ end
120
+
121
+ end
122
+
123
+ end
124
+ end
125
+ end