gd_bam 0.0.15 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +313 -5
- data/bin/bam +126 -48
- data/lib/bam/version.rb +1 -1
- data/lib/bam.rb +51 -0
- data/lib/base/errors.rb +15 -0
- data/lib/base/flow.rb +37 -0
- data/lib/base/graph.rb +23 -0
- data/lib/base/metadata.rb +107 -0
- data/lib/base/project.rb +95 -0
- data/lib/base/repo.rb +35 -0
- data/lib/base/sink.rb +44 -0
- data/lib/base/step.rb +47 -0
- data/lib/base/tap.rb +167 -0
- data/lib/base/taps.rb +19 -0
- data/lib/cloud_connect/dsl/cc.rb +42 -0
- data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
- data/lib/cloud_connect/dsl/helpers.rb +199 -0
- data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
- data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
- data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
- data/lib/commands/commands.rb +110 -0
- data/lib/commands/deployment.rb +217 -0
- data/lib/commands/docs_commands.rb +41 -0
- data/lib/commands/gd_commands.rb +95 -0
- data/lib/commands/scaffold_commands.rb +103 -0
- data/lib/commands/sf_commands.rb +37 -0
- data/lib/commands/validators.rb +19 -0
- data/lib/compatibility.rb +19 -0
- data/lib/compiler/compiler.rb +76 -0
- data/lib/compiler/etl_visitor.rb +165 -0
- data/lib/dsl/dsl.rb +125 -0
- data/lib/generators/downloaders.rb +449 -0
- data/lib/generators/etl.rb +261 -0
- data/lib/generators/validators.rb +445 -0
- data/lib/graphs/docentize.grf +1 -1
- data/lib/graphs/dummy.grf +1 -1
- data/lib/graphs/goodsales_v2/docentize.grf +47 -0
- data/lib/graphs/goodsales_v2/dummy.grf +46 -0
- data/lib/graphs/goodsales_v2/load_history.grf +579 -0
- data/lib/graphs/goodsales_v2/process_account.grf +47 -0
- data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
- data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
- data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
- data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
- data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
- data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
- data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
- data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
- data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
- data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
- data/lib/graphs/process_account.grf +1 -1
- data/lib/graphs/process_activity.grf +1 -1
- data/lib/graphs/process_activity_dim.grf +1 -1
- data/lib/graphs/process_activity_owner.grf +1 -1
- data/lib/graphs/process_forecast.grf +1 -1
- data/lib/graphs/process_opp_records.grf +1 -1
- data/lib/graphs/process_opportunity.grf +1 -1
- data/lib/graphs/process_opportunity_line_item.grf +1 -1
- data/lib/graphs/process_opportunity_snapshot.grf +1 -1
- data/lib/graphs/process_owner.grf +1 -1
- data/lib/graphs/process_stage.grf +1 -1
- data/lib/graphs/process_stage_history.grf +1 -1
- data/lib/graphs/process_velocity_duration.grf +1 -1
- data/lib/nodes/clover_gen.rb +59 -946
- data/lib/nodes/dependency.rb +95 -96
- data/lib/runtime.rb +7 -648
- data/lib/utils/utils.rb +66 -0
- data/templates/flow.rb.erb +7 -6
- data/templates/join_template.grf.erb +1 -1
- data/templates/reformat_template.grf.erb +1 -1
- data/templates/sink.json.erb +28 -0
- data/templates/tap.json.erb +3 -5
- data/templates/workspace.prm.erb +4 -0
- metadata +50 -8
- data/lib/contract_checkers/contract_checkers.rb +0 -53
- data/lib/dsl/project_dsl.rb +0 -259
- data/lib/repo/1_config.json +0 -8
- data/templates/dataset.json.erb +0 -13
- data/templates/source.json.erb +0 -22
@@ -0,0 +1,76 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module Compiler
|
4
|
+
|
5
|
+
def self.compile_dsl(project)
|
6
|
+
fail "There are no flow. Nothing to work on." if project[:flows_definitions].empty?
|
7
|
+
compiled_flows = project[:flows_definitions].map do |flow_description|
|
8
|
+
Compiler.compile_flow(project, flow_description)
|
9
|
+
end
|
10
|
+
Project.create(project.merge({:flows => compiled_flows}))
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.change_metadata(state, metadata_change_description)
|
14
|
+
metadata = state[metadata_change_description[:id]]
|
15
|
+
|
16
|
+
# enrich meta with everything from descriptor. There might be additional stuff so we want this to be part of metadata
|
17
|
+
enriched_meta = metadata.merge(metadata_change_description.reject {|k,v| [:id, :steps, :type].include?(k)})
|
18
|
+
|
19
|
+
metadata_change_description[:steps].reduce(Metadata.create(enriched_meta)) do |meta_accumulator, step|
|
20
|
+
case step[:type]
|
21
|
+
when :field_add
|
22
|
+
Metadata.add_field(meta_accumulator, {:name => step[:name]})
|
23
|
+
when :field_remove
|
24
|
+
Metadata.remove_field(meta_accumulator, step[:name])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.check_metadata_existence(metadata_store, id, flow_description)
|
30
|
+
unless metadata_store.has_key?(id)
|
31
|
+
fail "Metadata with id \"#{id}\" were not found. Check that you have them included in flow \"#{flow_description[:name]}\"."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.compile_flow(project, flow_description)
|
36
|
+
flow = Flow.create({:name => flow_description[:name]})
|
37
|
+
# metadata_state = {}
|
38
|
+
out_port = nil
|
39
|
+
|
40
|
+
# this nil magic is done so we have visibility to one step back and forward. Currently not used much but it is useful for detectin input to sink
|
41
|
+
flow = [nil].concat(flow_description[:steps]).concat([nil]).each_cons(3).reduce(flow) do |flow_memo, steps|
|
42
|
+
previous_step, current_step, next_step = steps
|
43
|
+
id = current_step[:id] || current_step[:flow_id]
|
44
|
+
|
45
|
+
case current_step[:type]
|
46
|
+
when :dummy_tap
|
47
|
+
Flow.add_step(flow_memo, Tap.create(current_step.merge(:source => :dummy, :id => :none)))
|
48
|
+
when :tap
|
49
|
+
tap = Project.find_tap_by_id(project, id)
|
50
|
+
fail "Tap \"#{id}\" which was used in flow \"#{current_step[:flow_id]}\" is not defined" if tap.nil?
|
51
|
+
Flow.add_step(flow_memo, Tap.create(tap.merge(current_step)))
|
52
|
+
when :sink
|
53
|
+
sink = Project.find_sink_by_id(project, id)
|
54
|
+
fail "Sink \"#{id}\" not found" if sink.nil?
|
55
|
+
out_step = Step.find_output_step(previous_step[:steps])
|
56
|
+
Flow.add_step(flow_memo, Sink.create(sink.merge(current_step).merge({:in => out_step})))
|
57
|
+
when :graph
|
58
|
+
graph = Project.find_graph_by_path(project, current_step[:path])
|
59
|
+
fail "Graph on the path \"#{current_step[:path]}\" could not be found. This was specified in step \"#{current_step}\"" if graph.nil?
|
60
|
+
g = Graph.create(graph.merge(current_step))
|
61
|
+
|
62
|
+
if !Step.has_output_step?(g[:steps]) && g[:steps].count > 1
|
63
|
+
fail "You need to specify an output metadata."
|
64
|
+
end
|
65
|
+
|
66
|
+
Flow.add_step(flow_memo, Graph.create(g.merge({
|
67
|
+
:input_ports => g[:steps],
|
68
|
+
:output_port => Step.find_output_step(g[:steps])
|
69
|
+
})))
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module Compiler
|
4
|
+
class EtlVisitor
|
5
|
+
|
6
|
+
def visit(node, state)
|
7
|
+
case node[:type]
|
8
|
+
when :project
|
9
|
+
FileUtils::cd(GoodData::Bam::Commands::ETL_HOME) do
|
10
|
+
flows = if state[:only]
|
11
|
+
node[:flows].find_all {|f| f[:name] == state[:only]}
|
12
|
+
else
|
13
|
+
node[:flows]
|
14
|
+
end
|
15
|
+
result = flows.reduce({
|
16
|
+
:flows => [],
|
17
|
+
:params => state
|
18
|
+
}) {|state, node| visit(node, state)}
|
19
|
+
GoodData::CloudConnect::Helpers::create_run_graph("graphs/main.grf", {
|
20
|
+
:subgraphs => result[:flows],
|
21
|
+
:flow => "main"
|
22
|
+
})
|
23
|
+
end
|
24
|
+
when :flow
|
25
|
+
visit_flow(node, state)
|
26
|
+
when :tap
|
27
|
+
case node[:direct]
|
28
|
+
when false
|
29
|
+
visit_incremental_tap(node, state)
|
30
|
+
else
|
31
|
+
case node[:source_type]
|
32
|
+
when :salesforce
|
33
|
+
visit_direct_sf_tap(node, state)
|
34
|
+
else
|
35
|
+
visit_direct_file_tap(node, state)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
when :sink
|
39
|
+
case node[:target_type]
|
40
|
+
when :file, :web
|
41
|
+
visit_file_sink(node, state)
|
42
|
+
else
|
43
|
+
visit_gd_sink(node, state)
|
44
|
+
end
|
45
|
+
when :graph
|
46
|
+
visit_graph(node, state)
|
47
|
+
else
|
48
|
+
state
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def visit_flow(flow, state)
|
53
|
+
puts "Visiting flow"
|
54
|
+
result = flow[:steps].reduce(state.merge({
|
55
|
+
:graphs => [],
|
56
|
+
:metadata => {}
|
57
|
+
})) {|state, node| visit(node, state)}
|
58
|
+
GoodData::CloudConnect::Helpers::create_run_graph("graphs/#{flow[:name]}_main.grf", {
|
59
|
+
:subgraphs => result[:graphs],
|
60
|
+
:flow => flow[:name]
|
61
|
+
})
|
62
|
+
state.merge(:flows => state[:flows].concat(["graphs/#{flow[:name]}_main.grf"]))
|
63
|
+
end
|
64
|
+
|
65
|
+
def visit_graph(node, state)
|
66
|
+
puts "Visiting graph"
|
67
|
+
move_graphs = []
|
68
|
+
node[:input_ports].each_with_index do |input, i|
|
69
|
+
j = i+1
|
70
|
+
name = input[:id]
|
71
|
+
graph_path = "graphs/#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
|
72
|
+
graph_name = "#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
|
73
|
+
state = state.merge({:graphs => state[:graphs].concat([graph_path])})
|
74
|
+
GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
|
75
|
+
:source => "${DATA}/#{name}.csv",
|
76
|
+
:target => "${DATA}/#{j}_in.csv",
|
77
|
+
:operation => "MOVE"
|
78
|
+
})
|
79
|
+
|
80
|
+
FileUtils.cp(Graph.get_path(node), (Pathname("graphs") + node[:path].basename).to_s)
|
81
|
+
changed_metadata = Compiler.change_metadata(state[:metadata], input)
|
82
|
+
|
83
|
+
metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_in.xml"
|
84
|
+
GoodData::CloudConnect::Helpers::save_metadata(metadata_path, state[:metadata][name])
|
85
|
+
metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_out.xml"
|
86
|
+
GoodData::CloudConnect::Helpers::save_metadata(metadata_path, changed_metadata)
|
87
|
+
state = state.merge({:metadata => state[:metadata].merge(changed_metadata[:id] => changed_metadata)})
|
88
|
+
end
|
89
|
+
|
90
|
+
state = state.merge({:graphs => state[:graphs].concat([(Pathname("graphs") + node[:path].basename).to_s])})
|
91
|
+
|
92
|
+
out_metadata = node[:output_port]
|
93
|
+
graph_path = "graphs/#{node[:flow_id]}_#{Step.step_name(node)}_move_out.grf"
|
94
|
+
state = state.merge({:graphs => state[:graphs].concat([graph_path])})
|
95
|
+
GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
|
96
|
+
:source => "${DATA}/out.csv",
|
97
|
+
:target => "${DATA}/#{out_metadata[:id]}.csv",
|
98
|
+
:operation => "MOVE"
|
99
|
+
})
|
100
|
+
state
|
101
|
+
end
|
102
|
+
|
103
|
+
def visit_file_sink(node, state)
|
104
|
+
graph_path = Step.get_graph_path(node)
|
105
|
+
|
106
|
+
sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
|
107
|
+
metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
|
108
|
+
fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
|
109
|
+
|
110
|
+
GoodData::Bam::Generators::Etl::create_file_uploading_graph(graph_path, node, node[:in][:id], state)
|
111
|
+
state.merge({
|
112
|
+
:graphs => state[:graphs].concat([graph_path])
|
113
|
+
})
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def visit_gd_sink(node, state)
|
118
|
+
graph_path = Step.get_graph_path(node)
|
119
|
+
|
120
|
+
sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
|
121
|
+
metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
|
122
|
+
fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
|
123
|
+
|
124
|
+
GoodData::Bam::Generators::Etl::create_gd_uploading_graph(graph_path, node, node[:in][:id], state)
|
125
|
+
state.merge({
|
126
|
+
:graphs => state[:graphs].concat([graph_path])
|
127
|
+
})
|
128
|
+
end
|
129
|
+
|
130
|
+
def visit_direct_sf_tap(node, state)
|
131
|
+
puts "Visiting direct tap"
|
132
|
+
graph_path = Step.get_graph_path(node)
|
133
|
+
tap = Tap.prepare_for_sf_downloader(node)
|
134
|
+
GoodData::Bam::Generators::Etl::create_sf_downloading_graph(graph_path, tap, state)
|
135
|
+
state.merge({
|
136
|
+
:graphs => state[:graphs].concat([graph_path]),
|
137
|
+
:metadata => state[:metadata].merge({node[:id] => Metadata.get_target_metadata(node)})
|
138
|
+
})
|
139
|
+
end
|
140
|
+
|
141
|
+
def visit_direct_file_tap(node, state)
|
142
|
+
graph_path = Step.get_graph_path(node)
|
143
|
+
tap = Tap.prepare_for_sf_downloader(node)
|
144
|
+
GoodData::Bam::Generators::Etl::create_file_downloading_graph(graph_path, node, state)
|
145
|
+
state.merge({
|
146
|
+
:graphs => state[:graphs].concat([graph_path]),
|
147
|
+
:metadata => state[:metadata].merge({ node[:id] => Metadata.get_target_metadata(node) })
|
148
|
+
})
|
149
|
+
end
|
150
|
+
|
151
|
+
def visit_incremental_tap(node, state)
|
152
|
+
puts "Visiting Incremental tap"
|
153
|
+
# Get rid of all the shit. Timestamps, pull ids check for snapshots. And shove them downstream.
|
154
|
+
tap = Tap.prepare_for_es_downloader(node)
|
155
|
+
graph_path = Step.get_graph_path(tap)
|
156
|
+
GoodData::Bam::Generators::Etl::create_es_downloading_graph(graph_path, tap, state)
|
157
|
+
state.merge({
|
158
|
+
:graphs => state[:graphs].concat([graph_path]),
|
159
|
+
:metadata => state[:metadata].merge({tap[:id] => Metadata.get_target_metadata(tap)})
|
160
|
+
})
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/dsl/dsl.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module DSL
|
4
|
+
|
5
|
+
def self.flow(data={}, &bl)
|
6
|
+
FlowDSL.new(data, &bl)
|
7
|
+
end
|
8
|
+
|
9
|
+
class FlowDSL
|
10
|
+
|
11
|
+
attr_reader :steps, :name
|
12
|
+
|
13
|
+
def initialize(data={}, &bl)
|
14
|
+
fail "Flows params needs a hash" unless data.is_a? Hash
|
15
|
+
name = data[:id]
|
16
|
+
fail "Flow needs to have a name" if name.blank?
|
17
|
+
@name = name
|
18
|
+
@steps = []
|
19
|
+
instance_eval(&bl)
|
20
|
+
end
|
21
|
+
|
22
|
+
def tap(data={})
|
23
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
24
|
+
@steps << {:type => :tap, :flow_id => @name}.merge(data)
|
25
|
+
end
|
26
|
+
|
27
|
+
def dummy_tap(data={})
|
28
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
29
|
+
@steps << {:type => :dummy_tap, :flow_id => @name}.merge(data)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sink(data={})
|
33
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
34
|
+
@steps << {:type => :sink , :flow_id => @name}.merge(data)
|
35
|
+
end
|
36
|
+
|
37
|
+
def graph(data, &bl)
|
38
|
+
fail "Params for graph need to be hash" unless data.is_a? Hash
|
39
|
+
# @steps << {:type => :graph , :flow_id => @name}.merge(data)
|
40
|
+
if bl.nil?
|
41
|
+
@steps << {:type => :graph, :steps => [], :flow_id => @name}.merge(data)
|
42
|
+
else
|
43
|
+
metadata = GraphDSL.new(@name, &bl)
|
44
|
+
@steps << {:type => :graph, :steps => metadata.to_a, :flow_id => @name}.merge(data)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_hash
|
49
|
+
{
|
50
|
+
:name => @name,
|
51
|
+
:steps => @steps
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
class GraphDSL
|
58
|
+
|
59
|
+
def initialize(flow_name, &bl)
|
60
|
+
@name = flow_name
|
61
|
+
@steps = []
|
62
|
+
instance_eval(&bl)
|
63
|
+
end
|
64
|
+
|
65
|
+
def metadata(data, &bl)
|
66
|
+
fail "Params need to be hash" unless data.is_a? Hash
|
67
|
+
fail "Metadata definition should have at least \"id\" defined. You provided \"#{data}\"." unless data.has_key?(:id)
|
68
|
+
if bl.nil?
|
69
|
+
@steps << {:type => :graph_metadata, :steps => [], :flow_id => @name}.merge(data)
|
70
|
+
else
|
71
|
+
metadata_changes = MetadataDSL.new(@name, &bl)
|
72
|
+
@steps << {:type => :graph_metadata, :steps => metadata_changes.to_a, :flow_id => @name}.merge(data)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_hash
|
77
|
+
{
|
78
|
+
:name => @name,
|
79
|
+
:steps => @steps
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_a
|
84
|
+
@steps
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
class MetadataDSL
|
91
|
+
|
92
|
+
def initialize(flow_name, &bl)
|
93
|
+
@name = flow_name
|
94
|
+
@steps = []
|
95
|
+
instance_eval(&bl)
|
96
|
+
end
|
97
|
+
|
98
|
+
def add(data, &bl)
|
99
|
+
fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
|
100
|
+
fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
|
101
|
+
@steps << {:type => :field_add}.merge(data)
|
102
|
+
end
|
103
|
+
|
104
|
+
def remove(data, &bl)
|
105
|
+
fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
|
106
|
+
fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
|
107
|
+
@steps << {:type => :field_remove}.merge(data)
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_hash
|
111
|
+
{
|
112
|
+
:name => @name,
|
113
|
+
:steps => @steps
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
def to_a
|
118
|
+
@steps
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|