gd_bam 0.0.15 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +313 -5
- data/bin/bam +126 -48
- data/lib/bam/version.rb +1 -1
- data/lib/bam.rb +51 -0
- data/lib/base/errors.rb +15 -0
- data/lib/base/flow.rb +37 -0
- data/lib/base/graph.rb +23 -0
- data/lib/base/metadata.rb +107 -0
- data/lib/base/project.rb +95 -0
- data/lib/base/repo.rb +35 -0
- data/lib/base/sink.rb +44 -0
- data/lib/base/step.rb +47 -0
- data/lib/base/tap.rb +167 -0
- data/lib/base/taps.rb +19 -0
- data/lib/cloud_connect/dsl/cc.rb +42 -0
- data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
- data/lib/cloud_connect/dsl/helpers.rb +199 -0
- data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
- data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
- data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
- data/lib/commands/commands.rb +110 -0
- data/lib/commands/deployment.rb +217 -0
- data/lib/commands/docs_commands.rb +41 -0
- data/lib/commands/gd_commands.rb +95 -0
- data/lib/commands/scaffold_commands.rb +103 -0
- data/lib/commands/sf_commands.rb +37 -0
- data/lib/commands/validators.rb +19 -0
- data/lib/compatibility.rb +19 -0
- data/lib/compiler/compiler.rb +76 -0
- data/lib/compiler/etl_visitor.rb +165 -0
- data/lib/dsl/dsl.rb +125 -0
- data/lib/generators/downloaders.rb +449 -0
- data/lib/generators/etl.rb +261 -0
- data/lib/generators/validators.rb +445 -0
- data/lib/graphs/docentize.grf +1 -1
- data/lib/graphs/dummy.grf +1 -1
- data/lib/graphs/goodsales_v2/docentize.grf +47 -0
- data/lib/graphs/goodsales_v2/dummy.grf +46 -0
- data/lib/graphs/goodsales_v2/load_history.grf +579 -0
- data/lib/graphs/goodsales_v2/process_account.grf +47 -0
- data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
- data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
- data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
- data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
- data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
- data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
- data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
- data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
- data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
- data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
- data/lib/graphs/process_account.grf +1 -1
- data/lib/graphs/process_activity.grf +1 -1
- data/lib/graphs/process_activity_dim.grf +1 -1
- data/lib/graphs/process_activity_owner.grf +1 -1
- data/lib/graphs/process_forecast.grf +1 -1
- data/lib/graphs/process_opp_records.grf +1 -1
- data/lib/graphs/process_opportunity.grf +1 -1
- data/lib/graphs/process_opportunity_line_item.grf +1 -1
- data/lib/graphs/process_opportunity_snapshot.grf +1 -1
- data/lib/graphs/process_owner.grf +1 -1
- data/lib/graphs/process_stage.grf +1 -1
- data/lib/graphs/process_stage_history.grf +1 -1
- data/lib/graphs/process_velocity_duration.grf +1 -1
- data/lib/nodes/clover_gen.rb +59 -946
- data/lib/nodes/dependency.rb +95 -96
- data/lib/runtime.rb +7 -648
- data/lib/utils/utils.rb +66 -0
- data/templates/flow.rb.erb +7 -6
- data/templates/join_template.grf.erb +1 -1
- data/templates/reformat_template.grf.erb +1 -1
- data/templates/sink.json.erb +28 -0
- data/templates/tap.json.erb +3 -5
- data/templates/workspace.prm.erb +4 -0
- metadata +50 -8
- data/lib/contract_checkers/contract_checkers.rb +0 -53
- data/lib/dsl/project_dsl.rb +0 -259
- data/lib/repo/1_config.json +0 -8
- data/templates/dataset.json.erb +0 -13
- data/templates/source.json.erb +0 -22
@@ -0,0 +1,76 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module Compiler
|
4
|
+
|
5
|
+
def self.compile_dsl(project)
|
6
|
+
fail "There are no flow. Nothing to work on." if project[:flows_definitions].empty?
|
7
|
+
compiled_flows = project[:flows_definitions].map do |flow_description|
|
8
|
+
Compiler.compile_flow(project, flow_description)
|
9
|
+
end
|
10
|
+
Project.create(project.merge({:flows => compiled_flows}))
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.change_metadata(state, metadata_change_description)
|
14
|
+
metadata = state[metadata_change_description[:id]]
|
15
|
+
|
16
|
+
# enrich meta with everything from descriptor. There might be additional stuff so we want this to be part of metadata
|
17
|
+
enriched_meta = metadata.merge(metadata_change_description.reject {|k,v| [:id, :steps, :type].include?(k)})
|
18
|
+
|
19
|
+
metadata_change_description[:steps].reduce(Metadata.create(enriched_meta)) do |meta_accumulator, step|
|
20
|
+
case step[:type]
|
21
|
+
when :field_add
|
22
|
+
Metadata.add_field(meta_accumulator, {:name => step[:name]})
|
23
|
+
when :field_remove
|
24
|
+
Metadata.remove_field(meta_accumulator, step[:name])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.check_metadata_existence(metadata_store, id, flow_description)
|
30
|
+
unless metadata_store.has_key?(id)
|
31
|
+
fail "Metadata with id \"#{id}\" were not found. Check that you have them included in flow \"#{flow_description[:name]}\"."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.compile_flow(project, flow_description)
|
36
|
+
flow = Flow.create({:name => flow_description[:name]})
|
37
|
+
# metadata_state = {}
|
38
|
+
out_port = nil
|
39
|
+
|
40
|
+
# this nil magic is done so we have visibility to one step back and forward. Currently not used much but it is useful for detectin input to sink
|
41
|
+
flow = [nil].concat(flow_description[:steps]).concat([nil]).each_cons(3).reduce(flow) do |flow_memo, steps|
|
42
|
+
previous_step, current_step, next_step = steps
|
43
|
+
id = current_step[:id] || current_step[:flow_id]
|
44
|
+
|
45
|
+
case current_step[:type]
|
46
|
+
when :dummy_tap
|
47
|
+
Flow.add_step(flow_memo, Tap.create(current_step.merge(:source => :dummy, :id => :none)))
|
48
|
+
when :tap
|
49
|
+
tap = Project.find_tap_by_id(project, id)
|
50
|
+
fail "Tap \"#{id}\" which was used in flow \"#{current_step[:flow_id]}\" is not defined" if tap.nil?
|
51
|
+
Flow.add_step(flow_memo, Tap.create(tap.merge(current_step)))
|
52
|
+
when :sink
|
53
|
+
sink = Project.find_sink_by_id(project, id)
|
54
|
+
fail "Sink \"#{id}\" not found" if sink.nil?
|
55
|
+
out_step = Step.find_output_step(previous_step[:steps])
|
56
|
+
Flow.add_step(flow_memo, Sink.create(sink.merge(current_step).merge({:in => out_step})))
|
57
|
+
when :graph
|
58
|
+
graph = Project.find_graph_by_path(project, current_step[:path])
|
59
|
+
fail "Graph on the path \"#{current_step[:path]}\" could not be found. This was specified in step \"#{current_step}\"" if graph.nil?
|
60
|
+
g = Graph.create(graph.merge(current_step))
|
61
|
+
|
62
|
+
if !Step.has_output_step?(g[:steps]) && g[:steps].count > 1
|
63
|
+
fail "You need to specify an output metadata."
|
64
|
+
end
|
65
|
+
|
66
|
+
Flow.add_step(flow_memo, Graph.create(g.merge({
|
67
|
+
:input_ports => g[:steps],
|
68
|
+
:output_port => Step.find_output_step(g[:steps])
|
69
|
+
})))
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module Compiler
|
4
|
+
class EtlVisitor
|
5
|
+
|
6
|
+
def visit(node, state)
|
7
|
+
case node[:type]
|
8
|
+
when :project
|
9
|
+
FileUtils::cd(GoodData::Bam::Commands::ETL_HOME) do
|
10
|
+
flows = if state[:only]
|
11
|
+
node[:flows].find_all {|f| f[:name] == state[:only]}
|
12
|
+
else
|
13
|
+
node[:flows]
|
14
|
+
end
|
15
|
+
result = flows.reduce({
|
16
|
+
:flows => [],
|
17
|
+
:params => state
|
18
|
+
}) {|state, node| visit(node, state)}
|
19
|
+
GoodData::CloudConnect::Helpers::create_run_graph("graphs/main.grf", {
|
20
|
+
:subgraphs => result[:flows],
|
21
|
+
:flow => "main"
|
22
|
+
})
|
23
|
+
end
|
24
|
+
when :flow
|
25
|
+
visit_flow(node, state)
|
26
|
+
when :tap
|
27
|
+
case node[:direct]
|
28
|
+
when false
|
29
|
+
visit_incremental_tap(node, state)
|
30
|
+
else
|
31
|
+
case node[:source_type]
|
32
|
+
when :salesforce
|
33
|
+
visit_direct_sf_tap(node, state)
|
34
|
+
else
|
35
|
+
visit_direct_file_tap(node, state)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
when :sink
|
39
|
+
case node[:target_type]
|
40
|
+
when :file, :web
|
41
|
+
visit_file_sink(node, state)
|
42
|
+
else
|
43
|
+
visit_gd_sink(node, state)
|
44
|
+
end
|
45
|
+
when :graph
|
46
|
+
visit_graph(node, state)
|
47
|
+
else
|
48
|
+
state
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def visit_flow(flow, state)
|
53
|
+
puts "Visiting flow"
|
54
|
+
result = flow[:steps].reduce(state.merge({
|
55
|
+
:graphs => [],
|
56
|
+
:metadata => {}
|
57
|
+
})) {|state, node| visit(node, state)}
|
58
|
+
GoodData::CloudConnect::Helpers::create_run_graph("graphs/#{flow[:name]}_main.grf", {
|
59
|
+
:subgraphs => result[:graphs],
|
60
|
+
:flow => flow[:name]
|
61
|
+
})
|
62
|
+
state.merge(:flows => state[:flows].concat(["graphs/#{flow[:name]}_main.grf"]))
|
63
|
+
end
|
64
|
+
|
65
|
+
def visit_graph(node, state)
|
66
|
+
puts "Visiting graph"
|
67
|
+
move_graphs = []
|
68
|
+
node[:input_ports].each_with_index do |input, i|
|
69
|
+
j = i+1
|
70
|
+
name = input[:id]
|
71
|
+
graph_path = "graphs/#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
|
72
|
+
graph_name = "#{node[:flow_id]}_#{input[:id]}_move_in_#{j}.grf"
|
73
|
+
state = state.merge({:graphs => state[:graphs].concat([graph_path])})
|
74
|
+
GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
|
75
|
+
:source => "${DATA}/#{name}.csv",
|
76
|
+
:target => "${DATA}/#{j}_in.csv",
|
77
|
+
:operation => "MOVE"
|
78
|
+
})
|
79
|
+
|
80
|
+
FileUtils.cp(Graph.get_path(node), (Pathname("graphs") + node[:path].basename).to_s)
|
81
|
+
changed_metadata = Compiler.change_metadata(state[:metadata], input)
|
82
|
+
|
83
|
+
metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_in.xml"
|
84
|
+
GoodData::CloudConnect::Helpers::save_metadata(metadata_path, state[:metadata][name])
|
85
|
+
metadata_path = "./metadata/#{node[:flow_id]}/#{Step.step_name(node)}/#{j}_out.xml"
|
86
|
+
GoodData::CloudConnect::Helpers::save_metadata(metadata_path, changed_metadata)
|
87
|
+
state = state.merge({:metadata => state[:metadata].merge(changed_metadata[:id] => changed_metadata)})
|
88
|
+
end
|
89
|
+
|
90
|
+
state = state.merge({:graphs => state[:graphs].concat([(Pathname("graphs") + node[:path].basename).to_s])})
|
91
|
+
|
92
|
+
out_metadata = node[:output_port]
|
93
|
+
graph_path = "graphs/#{node[:flow_id]}_#{Step.step_name(node)}_move_out.grf"
|
94
|
+
state = state.merge({:graphs => state[:graphs].concat([graph_path])})
|
95
|
+
GoodData::CloudConnect::Helpers::create_moving_graph(graph_path, {
|
96
|
+
:source => "${DATA}/out.csv",
|
97
|
+
:target => "${DATA}/#{out_metadata[:id]}.csv",
|
98
|
+
:operation => "MOVE"
|
99
|
+
})
|
100
|
+
state
|
101
|
+
end
|
102
|
+
|
103
|
+
def visit_file_sink(node, state)
|
104
|
+
graph_path = Step.get_graph_path(node)
|
105
|
+
|
106
|
+
sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
|
107
|
+
metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
|
108
|
+
fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
|
109
|
+
|
110
|
+
GoodData::Bam::Generators::Etl::create_file_uploading_graph(graph_path, node, node[:in][:id], state)
|
111
|
+
state.merge({
|
112
|
+
:graphs => state[:graphs].concat([graph_path])
|
113
|
+
})
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def visit_gd_sink(node, state)
|
118
|
+
graph_path = Step.get_graph_path(node)
|
119
|
+
|
120
|
+
sink_fields = Metadata.get_source_metadata(node)[:fields].map {|f| f[:name]}
|
121
|
+
metadata_fields = state[:metadata][node[:in][:id]][:fields].map {|f| f[:name]}
|
122
|
+
fail "You have fields \"#{(sink_fields - metadata_fields).join(', ')}\" in sink \"#{node[:id]}\" that are not filled in by data" unless (sink_fields - metadata_fields).empty?
|
123
|
+
|
124
|
+
GoodData::Bam::Generators::Etl::create_gd_uploading_graph(graph_path, node, node[:in][:id], state)
|
125
|
+
state.merge({
|
126
|
+
:graphs => state[:graphs].concat([graph_path])
|
127
|
+
})
|
128
|
+
end
|
129
|
+
|
130
|
+
def visit_direct_sf_tap(node, state)
|
131
|
+
puts "Visiting direct tap"
|
132
|
+
graph_path = Step.get_graph_path(node)
|
133
|
+
tap = Tap.prepare_for_sf_downloader(node)
|
134
|
+
GoodData::Bam::Generators::Etl::create_sf_downloading_graph(graph_path, tap, state)
|
135
|
+
state.merge({
|
136
|
+
:graphs => state[:graphs].concat([graph_path]),
|
137
|
+
:metadata => state[:metadata].merge({node[:id] => Metadata.get_target_metadata(node)})
|
138
|
+
})
|
139
|
+
end
|
140
|
+
|
141
|
+
def visit_direct_file_tap(node, state)
|
142
|
+
graph_path = Step.get_graph_path(node)
|
143
|
+
tap = Tap.prepare_for_sf_downloader(node)
|
144
|
+
GoodData::Bam::Generators::Etl::create_file_downloading_graph(graph_path, node, state)
|
145
|
+
state.merge({
|
146
|
+
:graphs => state[:graphs].concat([graph_path]),
|
147
|
+
:metadata => state[:metadata].merge({ node[:id] => Metadata.get_target_metadata(node) })
|
148
|
+
})
|
149
|
+
end
|
150
|
+
|
151
|
+
def visit_incremental_tap(node, state)
|
152
|
+
puts "Visiting Incremental tap"
|
153
|
+
# Get rid of all the shit. Timestamps, pull ids check for snapshots. And shove them downstream.
|
154
|
+
tap = Tap.prepare_for_es_downloader(node)
|
155
|
+
graph_path = Step.get_graph_path(tap)
|
156
|
+
GoodData::Bam::Generators::Etl::create_es_downloading_graph(graph_path, tap, state)
|
157
|
+
state.merge({
|
158
|
+
:graphs => state[:graphs].concat([graph_path]),
|
159
|
+
:metadata => state[:metadata].merge({tap[:id] => Metadata.get_target_metadata(tap)})
|
160
|
+
})
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/dsl/dsl.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
module GoodData
|
2
|
+
module Bam
|
3
|
+
module DSL
|
4
|
+
|
5
|
+
def self.flow(data={}, &bl)
|
6
|
+
FlowDSL.new(data, &bl)
|
7
|
+
end
|
8
|
+
|
9
|
+
class FlowDSL
|
10
|
+
|
11
|
+
attr_reader :steps, :name
|
12
|
+
|
13
|
+
def initialize(data={}, &bl)
|
14
|
+
fail "Flows params needs a hash" unless data.is_a? Hash
|
15
|
+
name = data[:id]
|
16
|
+
fail "Flow needs to have a name" if name.blank?
|
17
|
+
@name = name
|
18
|
+
@steps = []
|
19
|
+
instance_eval(&bl)
|
20
|
+
end
|
21
|
+
|
22
|
+
def tap(data={})
|
23
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
24
|
+
@steps << {:type => :tap, :flow_id => @name}.merge(data)
|
25
|
+
end
|
26
|
+
|
27
|
+
def dummy_tap(data={})
|
28
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
29
|
+
@steps << {:type => :dummy_tap, :flow_id => @name}.merge(data)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sink(data={})
|
33
|
+
fail "Params for tap need to be hash" unless data.is_a? Hash
|
34
|
+
@steps << {:type => :sink , :flow_id => @name}.merge(data)
|
35
|
+
end
|
36
|
+
|
37
|
+
def graph(data, &bl)
|
38
|
+
fail "Params for graph need to be hash" unless data.is_a? Hash
|
39
|
+
# @steps << {:type => :graph , :flow_id => @name}.merge(data)
|
40
|
+
if bl.nil?
|
41
|
+
@steps << {:type => :graph, :steps => [], :flow_id => @name}.merge(data)
|
42
|
+
else
|
43
|
+
metadata = GraphDSL.new(@name, &bl)
|
44
|
+
@steps << {:type => :graph, :steps => metadata.to_a, :flow_id => @name}.merge(data)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_hash
|
49
|
+
{
|
50
|
+
:name => @name,
|
51
|
+
:steps => @steps
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
class GraphDSL
|
58
|
+
|
59
|
+
def initialize(flow_name, &bl)
|
60
|
+
@name = flow_name
|
61
|
+
@steps = []
|
62
|
+
instance_eval(&bl)
|
63
|
+
end
|
64
|
+
|
65
|
+
def metadata(data, &bl)
|
66
|
+
fail "Params need to be hash" unless data.is_a? Hash
|
67
|
+
fail "Metadata definition should have at least \"id\" defined. You provided \"#{data}\"." unless data.has_key?(:id)
|
68
|
+
if bl.nil?
|
69
|
+
@steps << {:type => :graph_metadata, :steps => [], :flow_id => @name}.merge(data)
|
70
|
+
else
|
71
|
+
metadata_changes = MetadataDSL.new(@name, &bl)
|
72
|
+
@steps << {:type => :graph_metadata, :steps => metadata_changes.to_a, :flow_id => @name}.merge(data)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_hash
|
77
|
+
{
|
78
|
+
:name => @name,
|
79
|
+
:steps => @steps
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_a
|
84
|
+
@steps
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
class MetadataDSL
|
91
|
+
|
92
|
+
def initialize(flow_name, &bl)
|
93
|
+
@name = flow_name
|
94
|
+
@steps = []
|
95
|
+
instance_eval(&bl)
|
96
|
+
end
|
97
|
+
|
98
|
+
def add(data, &bl)
|
99
|
+
fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
|
100
|
+
fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
|
101
|
+
@steps << {:type => :field_add}.merge(data)
|
102
|
+
end
|
103
|
+
|
104
|
+
def remove(data, &bl)
|
105
|
+
fail "Params need to be hash. This means for example remove(:name => \"value\"). " unless data.is_a? Hash
|
106
|
+
fail "Remove field should have at least \"name\" defined. You provided \"#{data}\"." unless data.has_key?(:name)
|
107
|
+
@steps << {:type => :field_remove}.merge(data)
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_hash
|
111
|
+
{
|
112
|
+
:name => @name,
|
113
|
+
:steps => @steps
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
def to_a
|
118
|
+
@steps
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|