RubyGems - gd_bam - Versions diffs - 0.0.15 → 0.1.0 - Mend

gd_bam 0.0.15 → 0.1.0

Files changed (81) hide show

data/README.md +313 -5
data/bin/bam +126 -48
data/lib/bam/version.rb +1 -1
data/lib/bam.rb +51 -0
data/lib/base/errors.rb +15 -0
data/lib/base/flow.rb +37 -0
data/lib/base/graph.rb +23 -0
data/lib/base/metadata.rb +107 -0
data/lib/base/project.rb +95 -0
data/lib/base/repo.rb +35 -0
data/lib/base/sink.rb +44 -0
data/lib/base/step.rb +47 -0
data/lib/base/tap.rb +167 -0
data/lib/base/taps.rb +19 -0
data/lib/cloud_connect/dsl/cc.rb +42 -0
data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
data/lib/cloud_connect/dsl/helpers.rb +199 -0
data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
data/lib/commands/commands.rb +110 -0
data/lib/commands/deployment.rb +217 -0
data/lib/commands/docs_commands.rb +41 -0
data/lib/commands/gd_commands.rb +95 -0
data/lib/commands/scaffold_commands.rb +103 -0
data/lib/commands/sf_commands.rb +37 -0
data/lib/commands/validators.rb +19 -0
data/lib/compatibility.rb +19 -0
data/lib/compiler/compiler.rb +76 -0
data/lib/compiler/etl_visitor.rb +165 -0
data/lib/dsl/dsl.rb +125 -0
data/lib/generators/downloaders.rb +449 -0
data/lib/generators/etl.rb +261 -0
data/lib/generators/validators.rb +445 -0
data/lib/graphs/docentize.grf +1 -1
data/lib/graphs/dummy.grf +1 -1
data/lib/graphs/goodsales_v2/docentize.grf +47 -0
data/lib/graphs/goodsales_v2/dummy.grf +46 -0
data/lib/graphs/goodsales_v2/load_history.grf +579 -0
data/lib/graphs/goodsales_v2/process_account.grf +47 -0
data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
data/lib/graphs/process_account.grf +1 -1
data/lib/graphs/process_activity.grf +1 -1
data/lib/graphs/process_activity_dim.grf +1 -1
data/lib/graphs/process_activity_owner.grf +1 -1
data/lib/graphs/process_forecast.grf +1 -1
data/lib/graphs/process_opp_records.grf +1 -1
data/lib/graphs/process_opportunity.grf +1 -1
data/lib/graphs/process_opportunity_line_item.grf +1 -1
data/lib/graphs/process_opportunity_snapshot.grf +1 -1
data/lib/graphs/process_owner.grf +1 -1
data/lib/graphs/process_stage.grf +1 -1
data/lib/graphs/process_stage_history.grf +1 -1
data/lib/graphs/process_velocity_duration.grf +1 -1
data/lib/nodes/clover_gen.rb +59 -946
data/lib/nodes/dependency.rb +95 -96
data/lib/runtime.rb +7 -648
data/lib/utils/utils.rb +66 -0
data/templates/flow.rb.erb +7 -6
data/templates/join_template.grf.erb +1 -1
data/templates/reformat_template.grf.erb +1 -1
data/templates/sink.json.erb +28 -0
data/templates/tap.json.erb +3 -5
data/templates/workspace.prm.erb +4 -0
metadata +50 -8
data/lib/contract_checkers/contract_checkers.rb +0 -53
data/lib/dsl/project_dsl.rb +0 -259
data/lib/repo/1_config.json +0 -8
data/templates/dataset.json.erb +0 -13
data/templates/source.json.erb +0 -22

data/README.md CHANGED Viewed

@@ -3,6 +3,11 @@
 BAM is a tool that helps you be more productive while creating and maintaining projects.
+###TODO
+* Note to user if he generates downloaders and has not incremental taps
+* Clean docs so there is no reference to dataset as in sink
+* make 1:1 tap:sin easier
 ##Installation
 make sure you have ruby (1.9 and 1.8.7 is currently supported) and that you have gem installed.
@@ -73,7 +78,10 @@ In the log there should be something like
 	Worker task failed: Missing mandatory fields
-This means that some of your fields are either not accessible or not in your SF project. Use `bam taps_validate` to identify those and remap them.
+This means that some of your fields are either not accessible or not in your SF project. You can check several things
+* first make sure that you can actually connect to sf. For this run `bam sf_validate_connection`
+* If that is correct. Use `bam taps_validate` to identify fields that are inaccessible. It will mark in red those that are not there orange will be those that are not there but are marked as not mandatory in taps.
 ##Next steps
 Ok so by now you hopefully have your project up and running. Before we dive into modifications you have to understand key concepts tha BAM builds on. Once you are comfortable with those we will get back with.
@@ -168,8 +176,60 @@ Fail early. There is nothing more frustrating than when the ETL fails during exe
 ####Mandatory fields
 Sometimes it is necessary to move fields around in SF. In such case the tap will. If you know this upfront you can tell BAM that this field is not mandatory and it will silently go along filling the missing field with ''. If it is marked as mandatory which all fields are by default it will fail if it cannot access the field.
+###CSV
+  {
+     "source" : "/some/path/to/file.csv"
+    ,"id"     : "user"
+    ,"fields" : [
+      {
+        "name" : "Id"
+      },
+      {
+        "name" : "FirstName"
+      },
+      {
+        "name" : "LastName"
+      },
+      {
+        "name" : "Region"
+      },
+      {
+        "name" : "Department"
+      }
+    ]
+  }
+###CSV on GoodData WEBDav
+    {
+       "source" : ""https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/HERE_PUT_YOUR_PROJECT_ID/validated/account.csv""
+      ,"id"     : "user"
+      ,"fields" : [
+        {
+          "name" : "Id"
+        },
+        {
+          "name" : "FirstName"
+        },
+        {
+          "name" : "LastName"
+        },
+        {
+          "name" : "Region"
+        },
+        {
+          "name" : "Department"
+        }
+      ]
+    }
+####Note
+If you wonder about the password we do not like it either it should go away soon. It is a workaround for a bug.
 ##Flows
-Flow is an abstraction that should connect a tap with a sink creating a .. well a flow.
+Flow is an abstraction that should connect a tap(s) with a sink creating a .. well a flow.
 Probably better show you a simple example. This flow will download users from sf (you have to provide credentials in params.json). It then runs graph called "process user" (this is part of the distribution but we can act that is an arbitrary graph). This graph concatenates first name and last name together. It then feeds data to the sink.
@@ -195,6 +255,151 @@ Note couple of things.
 * there might be one or more metadata statements after graph definition. Each graph might expect numerous inputs so the order of these `metadata` statements is telling you which input goes where. Second purpose is actually telling what is going to change in those metadata. Here we are saying "*Ok the user is going in as input number one (there is no number two in this  case). At the output user will have one more field and that is Name. On top of that we are removing two fields FirstName and LastName*".
 * The last thing we specify is the sink. Again as in tap you can specify id so you tell BAM for which sink it should look. If you do not fill it in by default it looks for the same is as your flow.
+###Creating your own graph - short tutorial
+Rarely you want to put into sink exactly what comes from tap. Often you want to do some mangling with it. Lets have  a look how BAM tackles this. BAM currently does not try to any of this by itself (might change) and relies on CloudConnect graphs to do the heavy lifting. So from the example above lets' redo the Owner example ourselves.
+Let's pretend we start completely from scratch so first we need a flow.
+	bam scaffold flow my_owner
+There is going to be a new flow created for us (accidentally it is actually the old owner flow but no worries we will change it soon). If you go ahead and try to generate it right away it should throw an error
+	bam generate --only my_owner
+The error says "error: Tap "user" was not found" and it is because that our tap is named "owner". Go ahead and change it. The same should repeat for sink so change it to "owner" as well. Now the interesting part. When BAM hits the expression `graph(xy)` in the flow it goes and tries to find a grap that it incorporates in the generated output (it just copies it no magic). It currently looks at two places. First is the `local_graphs` directory of current graph. So what we need to do is create a graph put it into a `local_graphs` folder and than change the flow accordingly. BAM comes with couple of templates to make this easier so we are going to use one of those. What we wanna do is basically a reformat type of the job. There is going to be on input and one output. Luckily there is reformat template. Go ahead and run
+	bam scaffold graph_template reformat my_reformat.grf
+Now you could go agead and edit the graph by hand if you are advanced user but usually it is much easier to edit the graph in CloudConnect. I will show you how to do it currently (will change hopefully :-)).
+First let's open the flow again and change the graph name from "process_user" to "my_reformat". Then go ahead na generate this flow again.
+	bam generate --only my_owner
+import the clover-project as a CC project (remmeber this ia a fully functional CC project. Also during import uncheck the checkbox that says "Copy the project to workspace" this will allow us to regenerate at will without importing or copying files). Go ahead and in CloudConnect open graph that says "my_reformat.grf". It lookes something like this.
+![my_reformat.grf graph](https://www.dropbox.com/s/aolhvwjfpb21bm5/reformat.png?dl=1)
+Notice couple of things.
+* Input is named generically "1_in.csv"
+* Output is named "out.csv"
+* If you try doubleclicking the edge. It does not open the metadata but throws an error. If you hover over the edge that goes to reformat  the Metadata path will be something like "${PROJECT}/metadata/${FLOW}/${NAME}/1_in.xml". Similarly if you hover over the edge that goes out of reformat it will say "${PROJECT}/metadata/${FLOW}/${NAME}/1_out.xml"
+Generally the graph you create cannot be arbitrary. The BAM framework expects something from you so it can give you something back. We will walk through that in more detail later.
+If you know enough CloudConnect you know that ${SOMETHING} stands for a parameter. The metadata path "${PROJECT}/metadata/${FLOW}/${NAME}/1_out.xml" thus expects 3 parameters. One is a global ${PROJECT} and is just the path to your current project and automatically provided by the project and CloudConnect. More BAM related are the other two. They are dynamically generated during runtime so when a graph is run it reaches to the correct metadata.
+Short intermezzo -  Why do all this?
+The thinking behind this came from the GoodSales project. There is a default implementation of GoodSales and majority of the customers share some of the pieces. If you share those pieces you do not have to create your own on top of that if somebody finds a bug in that piece and fixes it you will get the fix for free. So it is a way of sharing updates. If you have something specific you can either change the graph altogether or you can prepend some other graph that will for example normalize the data so the standard can be used etc.
+Since we want to debug things we need to provide the clover-graph the same information it would have during runtime. This basically means filling the proper values to the FLOW and NAME params. There is a bam command to help you. Run
+	bam debug clover-project/ my_owner my_reformat
+It says for project that resides inside 'clover-project' I want to debug my_owner flow and my_reformat graph. Now go eahead and close and reopen the graph in CC again (to let the CC reload the parameter files). Now if you click on metadata it should open just fine. Take note that in and out metadata are different following the specification we made inside our flow. Go ahead and open the reformat and open the source tab. You will see something like this
+	function integer transform() {
+		$out.0.* = $in.0.*;
+		return ALL;
+	}
+You can go ahead and dig deeper into this language called CTL2 on the CloudConnect documentation page but what we need to worry about for now is the line
+	$out.0.* = $in.0.*;
+It says for each field that comes in an input record find a record with the same name on the output and copy the value. This is great since our input metadata share a lots of fields. There are also couple of that are on the output but not input. Like URL. We need to say specifically what to do with those. For example like this
+	$out.0.Url = "This will be same for all fields"
+Or maybe somewhat more useful
+	$out.0.Url = "This ia a peson named " + $out.0.Name
+The result should look something like this
+	$out.0.* = $in.0.*;
+	$out.0.Url = "This ia a peson named " + $out.0.Name
+Again let's note couple of things.
+* the * notation is what makes the BAM tick. It maps the fields by name automatically at runtime so if we add let's say Region to tap this reformat would still work letting Region through
+* Some of the fields are used by name. In our case it is URL and Id. If these are not on the input/output it will crash.
+* This is important to understand. Usually it is not a big deal. If you are doing your custom implementation you will just write the custom graphs that would suite your situation but sometimes you will (and always should strive to) implement projects that will reuse certain parts. Like we are doing with goodsales. This is ETL that use tens of customers. Then you have a problem when they do not have Id in their data and you have to deal with this situation.
+You are almost done. Just save the work you have done and let's think about what yu have done. What you did is that you set up the graph that is part of the generated project. If you would regenereate it now the changes you did by hand would go away. You need to copy the changed graph somewhere where it would be found by BAM. This place is the `local_graphs` directory that we talked about before. You can do it in linux/mac easily with
+	cp ./clover_project/graphs/my_reformat ./local_graphs
+Now go ahead and regenerate the project. If you reopen the graph in CloudConnect the changes you made should be there.
+###Creating graphs - some facts
+####Reading data
+Data are moved around in CSVs so if you want an input in your graph create a CSV Reader. Framework will provide them in files n_in.csv where n is number from 1 up based on the order of the metadata statements after the graph call in your flow definition.
+	tap(:id => 'user')
+	graph('my_graph')
+	metadata('user')
+	metadata('account')
+This means that `my_graph.grf` should have two CSV readers one with source `${DATA_DIR}/1_in.csv` and that will be fed the data from User tap and other with source `${DATA_DIR}/2_in.csv` and that will be fed data from tap account.
+####Writing data
+The graph is expected to output one output and that should be as a CSV to file `${DATA_DIR}/out.csv`.
+####Metadata
+There are 2 metadata files for each tap. Input and output. This is similar to the situation with files. The metadata are numbered by the "port number". This is determined again by the order of metadata statements after graph in the flow definition.
+Imagine you have this Tap.
+	{
+	   "source" : "salesforce"
+	  ,"object" : "User"
+	  ,"id"     : "user"
+	  ,"fields" : [
+	    {
+	      "name" : "Id"
+	    },
+	    {
+	      "name" : "Name"
+	    }
+	  ]
+	}
+	GoodData::CloverGenerator::DSL::flow("user") do |f|
+	  tap(:id => "user")
+	  tap(:id => "account")
+	  graph("my_graph")
+	  metadata("user") do |m|
+	    m.add(:name => "Url")
+	  end
+	  metadata("account")
+	  # other stuff
+	end
+This means that in the graph the first "port" will get data from user tap in file `1_in.csv` (we talked abou this). Two metadata files for this tap are going to be created in `clover_project/user/my_graph/1_in.xml` and `clover_project/user/my_graph/1_out.xml`. The first one will have
+	<?xml version="1.0" encoding="UTF-8"?>
+	<Record fieldDelimiter="," name="in_1" recordDelimiter="\n" type="delimited">
+	  <Field name="Id" type="string" nullable="true"/>
+	  <Field name="Name" type="string" nullable="true"/>
+	</Record>
+the other
+	<?xml version="1.0" encoding="UTF-8"?>
+	<Record fieldDelimiter="," name="in_1" recordDelimiter="\n" type="delimited">
+	  <Field name="Id" type="string" nullable="true"/>
+	  <Field name="Name" type="string" nullable="true"/>
+	  <Field name="Url" type="string" nullable="true"/>
+	</Record>
+Take note that this is in sync with what we have defined in the flow.
 ##Sinks
@@ -234,11 +439,74 @@ Sink is a definition of where data goes. Currently there is only one sink type a
 Gooddat sink is currently just mimicking the CL tool definition + some shortcuts on top of that. If you are familiar with CL tool you should be right at home if I tell you that the only additional thing you have to provide is telling BAM which metadata field is pulled in to a given field.
+##Deploying and running your work
+When you have finished work on your masterpiece it is time to run it. As we have already seen there is a command `run` in BAM. You can use it like this
+	bam run clover_project
+It will deploy the directory (remember this is a fully functional CloudConnect project) to the secure server. It will potenitally create a channel with your email and run it. When the run ends it will delete both the channel and the deploy. This is useful for one off jobs when you want to take advantage of the remote stack. Please take note that when you run this what is in the directory is deployed. This might bite you if you previosly run this locally and there are some data or parameter files. I recommend regenerate the graph before running.
+If you want something more permanent use deploy. This is the same as first step during run. When deployed you can schedule it through the administration UI https://secure.gooddata.com/admin/dataload/ . The same goes here as well. If you run it locally there probably are some files or parameters which might break your run. Regenerating is recomended.
+Note: Both `run` and `deploy` will change in the future. Run still does not have the emails completely done. For deploy we will probably try to prepare schedule and email channels as well.
 ##Adding a field
 Ok let's say you have a basic GoodSales
+## Data validation
+BAM tries to teach you the right way to do the projects and one of the pain points is the data you are getting from a customer. If you have ever got an invalid CSV file from a customer and had to write a custom script to find the error read on. We believe this pain can be automated. In a tap you can annotate the fields with validation rules. These rules are for example saying "This field is a number", "This field is a URI", "This is a date in format", "This cannot be empty". Off this description you can generate a validation graph that sits in between customers data and your ETL. If a new upload is validated and passes it is handed to ETL. If not it is ignored so your ETL can work only on data that passes certain checks. So how to do this?
+    {
+      "type" : "tap"
+      ,"source" : "https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/d2uopvzruqsc9mwuili714h0g6sl8h5y/validated/account*.csv"
+      ,"validation_source" : "https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/d2uopvzruqsc9mwuili714h0g6sl8h5y/account*.csv"
+      ,"incremental" : true
+      ,"id" : "account"
+      ,"fields" : [
+    		{
+    			"name" : "Id",
+          "validates_as" : {
+            "type" : "integer"
+          }
+    		},
+    		{
+    			"name" : "Name"
+    		},
+    		{
+    			"name" : "Date",
+          "validates_as" : {
+            "type" : "date",
+            "format" : "yyyy/MM/dd"
+          }
+    		},
+        {
+          "name": "OtherField"
+        }
+    	]
+    	// ,"limit": "10"
+    }
+Now you can generate data validator. This is a standard graph so you can deploy it to the production so it sits in GoodData.
+    bam generate_validator
+    bam -vl deploy validator-project
+This is going to deploy it in verbose mode. You need to get the deploy process number so you can use it later. Since you ran it in verbose mode one of the last lines should look like
+    =>"/gdc/projects/d2uopvzruqsc9mwuili714h0g6sl8h5y/dataload/processes/663136fa-f996-4b35-828a-60dd154ff71a", "executions"=>"/gdc/projects/d2uopvzruqsc9mwuili714h0g6sl8h5y/dataload/processes/663136fa-f996-4b35-828a-60dd154ff71a/executions"}}}
+The process number is this 663136fa-f996-4b35-828a-60dd154ff71a in this case.
+Now you need to upload the data and tell the validator to check it. Either you can do it yourself (once we document the things that must be followed) or you can use one of our agents. There is a Java one maturing but if it is development time you can easily use the one in BAM (we do not recommend it to use in production though thare are lots of nifty features missing).
+    bam -vl run_validator --process 663136fa-f996-4b35-828a-60dd154ff71a account.csv
+You can see that the `run_validator` command needs to have the process parameter passed in. It also consumes list of files to upload. After it does it runs the validator. If everything goes ok it just moves the files to other dir where ETL or downloaders can pick it up then it quits silently if not it tells you where to look for human readable report what went wrong.
 ##Runtime commands
 Part of the distribution is the bam executable which lets you do several neat things on the commandline
@@ -263,10 +531,10 @@ deploys the directory to the server.
 	bam deploy clover_project --process 1231jkadjk123k
-### model_sync
+###model_sync
 This will go through the sinks and updates the model. It rellies on CL tool to do this so this describes the limitation. It is very useful for adding additonal fields not changing the model altogether.
-### run
+###run
 Runs the project on the server. This is achived by deploying it there and deleting after the run finsihes.
 	bam run clover-project
@@ -306,8 +574,48 @@ Currently works only for SF. Validates that the target SF instance has all the f
 ### sinks_validate
 TBD
+###sf_jack_in
+Note: Before we start if you want to exit the interactive session just type `exit`. If there is output of the commmand that is larger than the screen the session enters a different "viewing" mode you can exit it bry pressing `q`
+This will log you into Salesforce project and starts up interactive client. You can do several useful things for example validate fields while talking to the customer. I will show you couple of things
+####You can list fields
+	fields('Opportunity')
+and do a lot of interesting stuff with it like searching
+	fields('Opportunity').grep /__c/
+counting
+	fields('Opportunity').count
+and basically anything you can do with ruby like writing those fields to a CSV file
+	CSV.open('list_of_opportunity_fields.csv', 'w') do |csv|
+		fields('Opportunity').map {|f| f.upcase}.each do |f|
+			csv << [f]
+		end
+	end
+####You can make a query
+	query("SELECT SUM(Amount) FROM Opporunity")
+or
+	query("SELECT Id, Name, StageName FROM Opportunity LIMIT 10")
+again you can access the results in many ways like summing amount on Closed Won opportunities
+	query("SELECT Id, Amount, StageName FROM Opportunity LIMIT 10").find_all do |line|
+		line[:StageName] == "Closed Won"
+	end.reduce(0) {|memo, line| memo += line[:Amount].to_i}
 ##The why
-For those that are interested in reading why we actually bothered developing this. Read on.
+For those that are interested in reading why we actually bothered developing this and what decisions we made. Read on and let us know if you like them or not.
 ###Metadata management
 Key pain that I had with CloudConnect is that I did not like the management of metadata. Every project I saw was just pile of metadata definition that has to be constantly changed and tweaked. This is caused by couple of choices that creators of underlying Clover engine made in the beginning and probably will not be changed easily. While I am trying to make it better I am still bound by these choices and sometimes the wiring stick out - sorry for that.

data/bin/bam CHANGED Viewed

@@ -21,7 +21,6 @@ default_value false
 arg_name 'logger'
 switch [:l,:logger]
 desc 'Generates clover project based on information in current directory. The default ouptut is the directory ./clover-project'
 # arg_name 'Describe arguments to new here'
 command :generate do |c|
@@ -31,8 +30,14 @@ command :generate do |c|
   c.flag :only
   c.action do |global_options,options,args|
-    GoodData::CloverGenerator.clobber_clover_project
-    GoodData::CloverGenerator.generate(options)
+    GoodData::Bam::Commands::clobber_etl_project('.')
+    GoodData::Bam::Commands::setup_etl_project('.', PARAMS.merge(options).merge({:project_name => "etl-#{PARAMS[:project_name]}"}))
+    GoodData::Bam::Commands::generate('.', PARAMS.merge(options).merge({
+      :project_name => "etl-#{PARAMS[:project_name]}",
+      :graph_repos => [
+        GoodData::Bam::Repository.create(:type => :file, :base => Pathname('./local_graphs').expand_path),
+        GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)
+      ]}))
   end
 end
@@ -41,10 +46,66 @@ desc 'Jacks into SF.'
 command :sf_jack_in do |c|
   c.action do |global_options,options,args|
-    GoodData::CloverGenerator.sf_jack_in
+    GoodData::Bam::Commands::sf_jack_in(PARAMS)
+  end
+end
+desc 'Jacks into project.'
+# arg_name 'Describe arguments to new here'
+command :project_jack_in do |c|
+  c.action do |global_options,options,args|
+    GoodData::Bam::Commands::project_jack_in(PARAMS)
   end
 end
+desc 'Validatates connection to SalesForce.'
+# arg_name 'Describe arguments to new here'
+command :sf_validate_connection do |c|
+  c.action do |global_options,options,args|
+    GoodData::Bam::Commands::sf_validate_connection(PARAMS)
+  end
+end
+desc 'Generate data validator.'
+# arg_name 'Describe arguments to new here'
+command :generate_validator do |c|
+  c.action do |global_options,options,args|
+    GoodData::Bam::Commands::generate_validators('.', PARAMS.merge(options).merge({
+      :project_name => "validator-#{PARAMS[:project_name]}",
+      :graph_repos => [
+        GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
+        GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)
+      ]
+      }))
+  end
+end
+desc 'Run data validator.'
+# arg_name 'Describe arguments to new here'
+command :run_validator do |c|
+  c.desc 'Checker process ID'
+  c.arg_name 'process'
+  c.flag :process
+  c.action do |global_options,options,args|
+    process = options[:process]
+    files = args.map {|f| Pathname(f)}
+    files.each do |f|
+      fail "Provded file \"#{f}\" does not exist." unless File.exist?(f)
+    end
+    fail "You need to specify process" if process.blank?
+    GoodData::Bam::Commands::connect_to_gd()
+    GoodData::Bam::Commands::run_validator(process, files, PARAMS)
+  end
+end
 desc 'Generates clover project for downloaders.'
 # arg_name 'Describe arguments to new here'
 command :generate_downloaders do |c|
@@ -54,17 +115,27 @@ command :generate_downloaders do |c|
   c.flag :backup
   c.action do |global_options,options,args|
-    GoodData::CloverGenerator.clobber_downloader_project
-    GoodData::CloverGenerator.generate_downloaders(options)
+    params = global_options[:user_params]
+    params = PARAMS.merge({
+      :project_name => "downloaders-#{PARAMS[:project_name]}",
+      :graph_repos => [
+        GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
+        GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)]
+    })
+    GoodData::Bam::Commands::generate_downloaders(".", params)
   end
 end
 desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
 # arg_name 'Describe arguments to new here'
-command :taps_validate do |c|
+command :sf_taps_validate do |c|
   c.action do |global_options,options,args|
     verbose = global_options[:v]
-    result = GoodData::CloverGenerator.validate_taps
+    params = PARAMS.merge({
+      :graph_repos => [
+        GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
+        GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)]})
+    result = GoodData::Bam::Commands::validate_sf_taps(params)
     error = false
     result.each_pair do |obj, fields|
@@ -79,11 +150,19 @@ command :taps_validate do |c|
   end
 end
+desc "Prepares params.prm file for debugging that particular graph"
+command :debug do |c|
+  c.action do |global_options,options,args|
+    fail "Arguments should be passed as \"project\" \"flow\" \"graph\"" if args.length < 3
+    GoodData::Bam::Commands::set_up_debug(args.first, args[1], args[2])
+  end
+end
 desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
 # arg_name 'Describe arguments to new here'
 command :docs do |c|
   c.action do |global_options,options,args|
-    GoodData::CloverGenerator.generate_docs
+    GoodData::Bam::Commands::generate_docs
   end
 end
@@ -96,7 +175,7 @@ command :procs do |c|
   c.switch :all
   c.action do |global_options,options,args|
-    out = GoodData::CloverGenerator.procs_list(options)
+    out = GoodData::Bam::Commands::procs_list(options)
     out.each do |proc|
       puts proc.join(',')
     end
@@ -104,21 +183,13 @@ command :procs do |c|
 end
-desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
-# arg_name 'Describe arguments to new here'
-command :sinks_validate do |c|
-  c.action do |global_options,options,args|
-    x = GoodData::CloverGenerator.validate_datasets
-  end
-end
 desc 'Creates project'
 command :project do |c|
   c.desc 'blueprint name. Currently support goodsales'
   c.arg_name 'blueprint'
   c.flag :blueprint
   c.desc 'token'
   c.arg_name 'token'
   c.flag :token
@@ -126,9 +197,19 @@ command :project do |c|
   c.action do |global_options,options,args|
     fail "You need to specify token to create a project" if options[:token].nil?
-    logger = Logger.new(STDOUT) if global_options[:l]
-    GoodData::CloverGenerator.connect_to_gd(:logger => logger)
-    new_project = GoodData::CloverGenerator.clone_project(options)
+    GoodData::Bam::Commands::connect_to_gd()
+    pid = case options[:blueprint]
+      when "goodsales"
+        "nt935rwzls50zfqwy6dh62tabu8h0ocy"
+    end
+    params = PARAMS.merge({:token => options[:token]})
+    new_project = if pid
+      GoodData::Bam::Commands.clone_project(pid, params)
+    else
+      GoodData::Bam::Commands.create_project(params)
+    end
     puts "Your project pid is #{new_project}"
   end
@@ -137,7 +218,7 @@ end
 desc 'Generates structures'
-arg_name 'what you want to generate project, tap, flow, dataset'
+arg_name 'what you want to generate project, tap, flow, sink'
 command :scaffold do |c|
   c.desc 'blueprint name. Currently support goodsales'
@@ -152,7 +233,7 @@ command :scaffold do |c|
       directory = args[1]
       fail "Directory has to be provided as an argument. See help" if directory.nil?
       if options[:blueprint].nil?
-        GoodData::CloverGenerator.setup_bash_structure(directory)
+        GoodData::Bam::Commands::setup_bash_structure(directory)
       else
         case options[:blueprint]
         when "goodsales"
@@ -162,21 +243,21 @@ command :scaffold do |c|
     when "flow"
       name = args[1]
       fail "Name of the flow has to be provided as an argument. See help" if name.nil?
-      GoodData::CloverGenerator.setup_flow(name)
+      GoodData::Bam::Commands::setup_flow(name)
     when "tap"
       name = args[1]
       fail "Name of the tap has to be provided as an argument. See help" if name.nil?
-      GoodData::CloverGenerator.setup_tap(name)
+      GoodData::Bam::Commands::setup_tap(name)
     when "sink"
       name = args[1]
       fail "Name of the sink has to be provided as an argument. See help" if name.nil?
-      GoodData::CloverGenerator.setup_sink(name)
+      GoodData::Bam::Commands::setup_sink(name)
     when "graph_template"
       name = args[1]
       target = args[2]
       fail "Name of the template has to be provided as an argument. See help" if name.nil?
       fail "Name of the target has to be provided as an argument. See help" if target.nil?
-      GoodData::CloverGenerator.generate_graph_template(name, target)
+      GoodData::Bam::Commands::generate_graph_template(name, target)
     end
   end
 end
@@ -189,7 +270,7 @@ command :model_sync do |c|
   c.switch :dry
   c.action do |global_options,options,args|
-    GoodData::CloverGenerator.model_sync(options)
+    GoodData::Bam::Commands::model_sync(options)
   end
 end
@@ -209,10 +290,10 @@ command :deploy do |c|
     dir = args.first
     fail "You have to specify directory to deploy as an argument" if dir.nil?
     fail "Specified directory does not exist" unless File.exist?(dir)
-    logger = Logger.new(STDOUT) if global_options[:l]
-    GoodData::CloverGenerator.connect_to_gd(:logger => logger)
-    options = global_options.merge({:name => "temporary"}).merge(options)
-    response = GoodData::CloverGenerator.deploy(dir, options)
+    GoodData::Bam::Commands::connect_to_gd()
+    options = global_options.merge(options)
+    response = GoodData::Bam::Commands::deploy(dir, options)
   end
 end
@@ -229,24 +310,21 @@ command :run do |c|
     fail "You have to specify directory to deploy as an argument" if dir.nil?
     fail "Specified directory does not exist" unless File.exist?(dir)
-    verbose = global_options[:v]
-    logger = Logger.new(STDOUT) if global_options[:l]
-    GoodData::CloverGenerator.connect_to_gd(:logger => logger)
-    options = global_options.merge({:name => "temporary"})
-    GoodData::CloverGenerator.deploy(dir, options) do |deploy_response|
-      puts HighLine::color("Executing", HighLine::BOLD) if verbose
-      GoodData::CloverGenerator.create_email_channel(options) do |channel_response|
-        GoodData::CloverGenerator.subscribe_on_finish(:success, channel_response["channelConfiguration"]["meta"]["uri"], deploy_response["process"]["links"]["self"].split('/').last)
-        result = GoodData::CloverGenerator.execute_process(deploy_response["process"]["links"]["executions"], dir)
-      end
-    end
+    options = global_options.merge(options)
+    GoodData::Bam::Commands::connect_to_gd()
+    GoodData::Bam::Commands::run(dir, options)
   end
 end
-pre do |global,command,options,args|
+pre do |global_options,command,options,args|
+  logger = Logger.new(STDOUT) if global_options[:l]
+  GoodData.logger = logger
+  params = GoodData::Bam::Utils::get_user_params('.')
+  s3_backup = GoodData::Bam::Utils::should_backup_to_s3?(params)
+  PARAMS = params.merge({:s3_backup => s3_backup})
   # Pre logic here
   # Return true to proceed; false to abort and not call the
   # chosen command