gd_bam 0.0.15 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +313 -5
- data/bin/bam +126 -48
- data/lib/bam/version.rb +1 -1
- data/lib/bam.rb +51 -0
- data/lib/base/errors.rb +15 -0
- data/lib/base/flow.rb +37 -0
- data/lib/base/graph.rb +23 -0
- data/lib/base/metadata.rb +107 -0
- data/lib/base/project.rb +95 -0
- data/lib/base/repo.rb +35 -0
- data/lib/base/sink.rb +44 -0
- data/lib/base/step.rb +47 -0
- data/lib/base/tap.rb +167 -0
- data/lib/base/taps.rb +19 -0
- data/lib/cloud_connect/dsl/cc.rb +42 -0
- data/lib/cloud_connect/dsl/es_helpers.rb +49 -0
- data/lib/cloud_connect/dsl/helpers.rb +199 -0
- data/lib/{nodes → cloud_connect/dsl}/nodes.rb +106 -16
- data/lib/cloud_connect/dsl/sf_helpers.rb +39 -0
- data/lib/cloud_connect/dsl/structure_helpers.rb +94 -0
- data/lib/commands/commands.rb +110 -0
- data/lib/commands/deployment.rb +217 -0
- data/lib/commands/docs_commands.rb +41 -0
- data/lib/commands/gd_commands.rb +95 -0
- data/lib/commands/scaffold_commands.rb +103 -0
- data/lib/commands/sf_commands.rb +37 -0
- data/lib/commands/validators.rb +19 -0
- data/lib/compatibility.rb +19 -0
- data/lib/compiler/compiler.rb +76 -0
- data/lib/compiler/etl_visitor.rb +165 -0
- data/lib/dsl/dsl.rb +125 -0
- data/lib/generators/downloaders.rb +449 -0
- data/lib/generators/etl.rb +261 -0
- data/lib/generators/validators.rb +445 -0
- data/lib/graphs/docentize.grf +1 -1
- data/lib/graphs/dummy.grf +1 -1
- data/lib/graphs/goodsales_v2/docentize.grf +47 -0
- data/lib/graphs/goodsales_v2/dummy.grf +46 -0
- data/lib/graphs/goodsales_v2/load_history.grf +579 -0
- data/lib/graphs/goodsales_v2/process_account.grf +47 -0
- data/lib/graphs/goodsales_v2/process_activity.grf +222 -0
- data/lib/graphs/goodsales_v2/process_activity_dim.grf +88 -0
- data/lib/graphs/goodsales_v2/process_activity_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_forecast.grf +20 -0
- data/lib/graphs/goodsales_v2/process_opp_records.grf +84 -0
- data/lib/graphs/goodsales_v2/process_opportunity.grf +46 -0
- data/lib/graphs/goodsales_v2/process_opportunity_line_item.grf +171 -0
- data/lib/graphs/goodsales_v2/process_opportunity_snapshot.grf +94 -0
- data/lib/graphs/goodsales_v2/process_owner.grf +48 -0
- data/lib/graphs/goodsales_v2/process_stage.grf +51 -0
- data/lib/graphs/goodsales_v2/process_stage_history.grf +184 -0
- data/lib/graphs/goodsales_v2/process_velocity_duration.grf +140 -0
- data/lib/graphs/process_account.grf +1 -1
- data/lib/graphs/process_activity.grf +1 -1
- data/lib/graphs/process_activity_dim.grf +1 -1
- data/lib/graphs/process_activity_owner.grf +1 -1
- data/lib/graphs/process_forecast.grf +1 -1
- data/lib/graphs/process_opp_records.grf +1 -1
- data/lib/graphs/process_opportunity.grf +1 -1
- data/lib/graphs/process_opportunity_line_item.grf +1 -1
- data/lib/graphs/process_opportunity_snapshot.grf +1 -1
- data/lib/graphs/process_owner.grf +1 -1
- data/lib/graphs/process_stage.grf +1 -1
- data/lib/graphs/process_stage_history.grf +1 -1
- data/lib/graphs/process_velocity_duration.grf +1 -1
- data/lib/nodes/clover_gen.rb +59 -946
- data/lib/nodes/dependency.rb +95 -96
- data/lib/runtime.rb +7 -648
- data/lib/utils/utils.rb +66 -0
- data/templates/flow.rb.erb +7 -6
- data/templates/join_template.grf.erb +1 -1
- data/templates/reformat_template.grf.erb +1 -1
- data/templates/sink.json.erb +28 -0
- data/templates/tap.json.erb +3 -5
- data/templates/workspace.prm.erb +4 -0
- metadata +50 -8
- data/lib/contract_checkers/contract_checkers.rb +0 -53
- data/lib/dsl/project_dsl.rb +0 -259
- data/lib/repo/1_config.json +0 -8
- data/templates/dataset.json.erb +0 -13
- data/templates/source.json.erb +0 -22
data/README.md
CHANGED
@@ -3,6 +3,11 @@
|
|
3
3
|
BAM is a tool that helps you be more productive while creating and maintaining projects.
|
4
4
|
|
5
5
|
|
6
|
+
###TODO
|
7
|
+
* Note to user if he generates downloaders and has not incremental taps
|
8
|
+
* Clean docs so there is no reference to dataset as in sink
|
9
|
+
* make 1:1 tap:sin easier
|
10
|
+
|
6
11
|
##Installation
|
7
12
|
|
8
13
|
make sure you have ruby (1.9 and 1.8.7 is currently supported) and that you have gem installed.
|
@@ -73,7 +78,10 @@ In the log there should be something like
|
|
73
78
|
|
74
79
|
Worker task failed: Missing mandatory fields
|
75
80
|
|
76
|
-
This means that some of your fields are either not accessible or not in your SF project.
|
81
|
+
This means that some of your fields are either not accessible or not in your SF project. You can check several things
|
82
|
+
|
83
|
+
* first make sure that you can actually connect to sf. For this run `bam sf_validate_connection`
|
84
|
+
* If that is correct. Use `bam taps_validate` to identify fields that are inaccessible. It will mark in red those that are not there orange will be those that are not there but are marked as not mandatory in taps.
|
77
85
|
|
78
86
|
##Next steps
|
79
87
|
Ok so by now you hopefully have your project up and running. Before we dive into modifications you have to understand key concepts tha BAM builds on. Once you are comfortable with those we will get back with.
|
@@ -168,8 +176,60 @@ Fail early. There is nothing more frustrating than when the ETL fails during exe
|
|
168
176
|
####Mandatory fields
|
169
177
|
Sometimes it is necessary to move fields around in SF. In such case the tap will. If you know this upfront you can tell BAM that this field is not mandatory and it will silently go along filling the missing field with ''. If it is marked as mandatory which all fields are by default it will fail if it cannot access the field.
|
170
178
|
|
179
|
+
###CSV
|
180
|
+
|
181
|
+
{
|
182
|
+
"source" : "/some/path/to/file.csv"
|
183
|
+
,"id" : "user"
|
184
|
+
,"fields" : [
|
185
|
+
{
|
186
|
+
"name" : "Id"
|
187
|
+
},
|
188
|
+
{
|
189
|
+
"name" : "FirstName"
|
190
|
+
},
|
191
|
+
{
|
192
|
+
"name" : "LastName"
|
193
|
+
},
|
194
|
+
{
|
195
|
+
"name" : "Region"
|
196
|
+
},
|
197
|
+
{
|
198
|
+
"name" : "Department"
|
199
|
+
}
|
200
|
+
]
|
201
|
+
}
|
202
|
+
|
203
|
+
###CSV on GoodData WEBDav
|
204
|
+
|
205
|
+
{
|
206
|
+
"source" : ""https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/HERE_PUT_YOUR_PROJECT_ID/validated/account.csv""
|
207
|
+
,"id" : "user"
|
208
|
+
,"fields" : [
|
209
|
+
{
|
210
|
+
"name" : "Id"
|
211
|
+
},
|
212
|
+
{
|
213
|
+
"name" : "FirstName"
|
214
|
+
},
|
215
|
+
{
|
216
|
+
"name" : "LastName"
|
217
|
+
},
|
218
|
+
{
|
219
|
+
"name" : "Region"
|
220
|
+
},
|
221
|
+
{
|
222
|
+
"name" : "Department"
|
223
|
+
}
|
224
|
+
]
|
225
|
+
}
|
226
|
+
|
227
|
+
####Note
|
228
|
+
If you wonder about the password we do not like it either it should go away soon. It is a workaround for a bug.
|
229
|
+
|
230
|
+
|
171
231
|
##Flows
|
172
|
-
Flow is an abstraction that should connect a tap with a sink creating a .. well a flow.
|
232
|
+
Flow is an abstraction that should connect a tap(s) with a sink creating a .. well a flow.
|
173
233
|
|
174
234
|
Probably better show you a simple example. This flow will download users from sf (you have to provide credentials in params.json). It then runs graph called "process user" (this is part of the distribution but we can act that is an arbitrary graph). This graph concatenates first name and last name together. It then feeds data to the sink.
|
175
235
|
|
@@ -195,6 +255,151 @@ Note couple of things.
|
|
195
255
|
* there might be one or more metadata statements after graph definition. Each graph might expect numerous inputs so the order of these `metadata` statements is telling you which input goes where. Second purpose is actually telling what is going to change in those metadata. Here we are saying "*Ok the user is going in as input number one (there is no number two in this case). At the output user will have one more field and that is Name. On top of that we are removing two fields FirstName and LastName*".
|
196
256
|
* The last thing we specify is the sink. Again as in tap you can specify id so you tell BAM for which sink it should look. If you do not fill it in by default it looks for the same is as your flow.
|
197
257
|
|
258
|
+
###Creating your own graph - short tutorial
|
259
|
+
Rarely you want to put into sink exactly what comes from tap. Often you want to do some mangling with it. Lets have a look how BAM tackles this. BAM currently does not try to any of this by itself (might change) and relies on CloudConnect graphs to do the heavy lifting. So from the example above lets' redo the Owner example ourselves.
|
260
|
+
|
261
|
+
Let's pretend we start completely from scratch so first we need a flow.
|
262
|
+
|
263
|
+
bam scaffold flow my_owner
|
264
|
+
|
265
|
+
There is going to be a new flow created for us (accidentally it is actually the old owner flow but no worries we will change it soon). If you go ahead and try to generate it right away it should throw an error
|
266
|
+
|
267
|
+
bam generate --only my_owner
|
268
|
+
|
269
|
+
The error says "error: Tap "user" was not found" and it is because that our tap is named "owner". Go ahead and change it. The same should repeat for sink so change it to "owner" as well. Now the interesting part. When BAM hits the expression `graph(xy)` in the flow it goes and tries to find a grap that it incorporates in the generated output (it just copies it no magic). It currently looks at two places. First is the `local_graphs` directory of current graph. So what we need to do is create a graph put it into a `local_graphs` folder and than change the flow accordingly. BAM comes with couple of templates to make this easier so we are going to use one of those. What we wanna do is basically a reformat type of the job. There is going to be on input and one output. Luckily there is reformat template. Go ahead and run
|
270
|
+
|
271
|
+
bam scaffold graph_template reformat my_reformat.grf
|
272
|
+
|
273
|
+
Now you could go agead and edit the graph by hand if you are advanced user but usually it is much easier to edit the graph in CloudConnect. I will show you how to do it currently (will change hopefully :-)).
|
274
|
+
|
275
|
+
First let's open the flow again and change the graph name from "process_user" to "my_reformat". Then go ahead na generate this flow again.
|
276
|
+
|
277
|
+
bam generate --only my_owner
|
278
|
+
|
279
|
+
import the clover-project as a CC project (remmeber this ia a fully functional CC project. Also during import uncheck the checkbox that says "Copy the project to workspace" this will allow us to regenerate at will without importing or copying files). Go ahead and in CloudConnect open graph that says "my_reformat.grf". It lookes something like this.
|
280
|
+
|
281
|
+
![my_reformat.grf graph](https://www.dropbox.com/s/aolhvwjfpb21bm5/reformat.png?dl=1)
|
282
|
+
|
283
|
+
Notice couple of things.
|
284
|
+
|
285
|
+
* Input is named generically "1_in.csv"
|
286
|
+
* Output is named "out.csv"
|
287
|
+
* If you try doubleclicking the edge. It does not open the metadata but throws an error. If you hover over the edge that goes to reformat the Metadata path will be something like "${PROJECT}/metadata/${FLOW}/${NAME}/1_in.xml". Similarly if you hover over the edge that goes out of reformat it will say "${PROJECT}/metadata/${FLOW}/${NAME}/1_out.xml"
|
288
|
+
|
289
|
+
Generally the graph you create cannot be arbitrary. The BAM framework expects something from you so it can give you something back. We will walk through that in more detail later.
|
290
|
+
|
291
|
+
If you know enough CloudConnect you know that ${SOMETHING} stands for a parameter. The metadata path "${PROJECT}/metadata/${FLOW}/${NAME}/1_out.xml" thus expects 3 parameters. One is a global ${PROJECT} and is just the path to your current project and automatically provided by the project and CloudConnect. More BAM related are the other two. They are dynamically generated during runtime so when a graph is run it reaches to the correct metadata.
|
292
|
+
|
293
|
+
Short intermezzo - Why do all this?
|
294
|
+
The thinking behind this came from the GoodSales project. There is a default implementation of GoodSales and majority of the customers share some of the pieces. If you share those pieces you do not have to create your own on top of that if somebody finds a bug in that piece and fixes it you will get the fix for free. So it is a way of sharing updates. If you have something specific you can either change the graph altogether or you can prepend some other graph that will for example normalize the data so the standard can be used etc.
|
295
|
+
|
296
|
+
Since we want to debug things we need to provide the clover-graph the same information it would have during runtime. This basically means filling the proper values to the FLOW and NAME params. There is a bam command to help you. Run
|
297
|
+
|
298
|
+
bam debug clover-project/ my_owner my_reformat
|
299
|
+
|
300
|
+
It says for project that resides inside 'clover-project' I want to debug my_owner flow and my_reformat graph. Now go eahead and close and reopen the graph in CC again (to let the CC reload the parameter files). Now if you click on metadata it should open just fine. Take note that in and out metadata are different following the specification we made inside our flow. Go ahead and open the reformat and open the source tab. You will see something like this
|
301
|
+
|
302
|
+
function integer transform() {
|
303
|
+
$out.0.* = $in.0.*;
|
304
|
+
|
305
|
+
return ALL;
|
306
|
+
}
|
307
|
+
|
308
|
+
You can go ahead and dig deeper into this language called CTL2 on the CloudConnect documentation page but what we need to worry about for now is the line
|
309
|
+
|
310
|
+
$out.0.* = $in.0.*;
|
311
|
+
|
312
|
+
It says for each field that comes in an input record find a record with the same name on the output and copy the value. This is great since our input metadata share a lots of fields. There are also couple of that are on the output but not input. Like URL. We need to say specifically what to do with those. For example like this
|
313
|
+
|
314
|
+
$out.0.Url = "This will be same for all fields"
|
315
|
+
|
316
|
+
Or maybe somewhat more useful
|
317
|
+
|
318
|
+
$out.0.Url = "This ia a peson named " + $out.0.Name
|
319
|
+
|
320
|
+
The result should look something like this
|
321
|
+
|
322
|
+
$out.0.* = $in.0.*;
|
323
|
+
$out.0.Url = "This ia a peson named " + $out.0.Name
|
324
|
+
|
325
|
+
Again let's note couple of things.
|
326
|
+
|
327
|
+
* the * notation is what makes the BAM tick. It maps the fields by name automatically at runtime so if we add let's say Region to tap this reformat would still work letting Region through
|
328
|
+
* Some of the fields are used by name. In our case it is URL and Id. If these are not on the input/output it will crash.
|
329
|
+
* This is important to understand. Usually it is not a big deal. If you are doing your custom implementation you will just write the custom graphs that would suite your situation but sometimes you will (and always should strive to) implement projects that will reuse certain parts. Like we are doing with goodsales. This is ETL that use tens of customers. Then you have a problem when they do not have Id in their data and you have to deal with this situation.
|
330
|
+
|
331
|
+
You are almost done. Just save the work you have done and let's think about what yu have done. What you did is that you set up the graph that is part of the generated project. If you would regenereate it now the changes you did by hand would go away. You need to copy the changed graph somewhere where it would be found by BAM. This place is the `local_graphs` directory that we talked about before. You can do it in linux/mac easily with
|
332
|
+
|
333
|
+
cp ./clover_project/graphs/my_reformat ./local_graphs
|
334
|
+
|
335
|
+
Now go ahead and regenerate the project. If you reopen the graph in CloudConnect the changes you made should be there.
|
336
|
+
|
337
|
+
###Creating graphs - some facts
|
338
|
+
|
339
|
+
####Reading data
|
340
|
+
Data are moved around in CSVs so if you want an input in your graph create a CSV Reader. Framework will provide them in files n_in.csv where n is number from 1 up based on the order of the metadata statements after the graph call in your flow definition.
|
341
|
+
|
342
|
+
tap(:id => 'user')
|
343
|
+
|
344
|
+
graph('my_graph')
|
345
|
+
metadata('user')
|
346
|
+
metadata('account')
|
347
|
+
|
348
|
+
This means that `my_graph.grf` should have two CSV readers one with source `${DATA_DIR}/1_in.csv` and that will be fed the data from User tap and other with source `${DATA_DIR}/2_in.csv` and that will be fed data from tap account.
|
349
|
+
|
350
|
+
####Writing data
|
351
|
+
The graph is expected to output one output and that should be as a CSV to file `${DATA_DIR}/out.csv`.
|
352
|
+
|
353
|
+
####Metadata
|
354
|
+
There are 2 metadata files for each tap. Input and output. This is similar to the situation with files. The metadata are numbered by the "port number". This is determined again by the order of metadata statements after graph in the flow definition.
|
355
|
+
|
356
|
+
Imagine you have this Tap.
|
357
|
+
|
358
|
+
{
|
359
|
+
"source" : "salesforce"
|
360
|
+
,"object" : "User"
|
361
|
+
,"id" : "user"
|
362
|
+
,"fields" : [
|
363
|
+
{
|
364
|
+
"name" : "Id"
|
365
|
+
},
|
366
|
+
{
|
367
|
+
"name" : "Name"
|
368
|
+
}
|
369
|
+
]
|
370
|
+
}
|
371
|
+
|
372
|
+
GoodData::CloverGenerator::DSL::flow("user") do |f|
|
373
|
+
tap(:id => "user")
|
374
|
+
tap(:id => "account")
|
375
|
+
|
376
|
+
graph("my_graph")
|
377
|
+
metadata("user") do |m|
|
378
|
+
m.add(:name => "Url")
|
379
|
+
end
|
380
|
+
metadata("account")
|
381
|
+
|
382
|
+
# other stuff
|
383
|
+
end
|
384
|
+
|
385
|
+
This means that in the graph the first "port" will get data from user tap in file `1_in.csv` (we talked abou this). Two metadata files for this tap are going to be created in `clover_project/user/my_graph/1_in.xml` and `clover_project/user/my_graph/1_out.xml`. The first one will have
|
386
|
+
|
387
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
388
|
+
<Record fieldDelimiter="," name="in_1" recordDelimiter="\n" type="delimited">
|
389
|
+
<Field name="Id" type="string" nullable="true"/>
|
390
|
+
<Field name="Name" type="string" nullable="true"/>
|
391
|
+
</Record>
|
392
|
+
|
393
|
+
the other
|
394
|
+
|
395
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
396
|
+
<Record fieldDelimiter="," name="in_1" recordDelimiter="\n" type="delimited">
|
397
|
+
<Field name="Id" type="string" nullable="true"/>
|
398
|
+
<Field name="Name" type="string" nullable="true"/>
|
399
|
+
<Field name="Url" type="string" nullable="true"/>
|
400
|
+
</Record>
|
401
|
+
|
402
|
+
Take note that this is in sync with what we have defined in the flow.
|
198
403
|
|
199
404
|
##Sinks
|
200
405
|
|
@@ -234,11 +439,74 @@ Sink is a definition of where data goes. Currently there is only one sink type a
|
|
234
439
|
|
235
440
|
Gooddat sink is currently just mimicking the CL tool definition + some shortcuts on top of that. If you are familiar with CL tool you should be right at home if I tell you that the only additional thing you have to provide is telling BAM which metadata field is pulled in to a given field.
|
236
441
|
|
442
|
+
##Deploying and running your work
|
443
|
+
When you have finished work on your masterpiece it is time to run it. As we have already seen there is a command `run` in BAM. You can use it like this
|
444
|
+
|
445
|
+
bam run clover_project
|
446
|
+
|
447
|
+
It will deploy the directory (remember this is a fully functional CloudConnect project) to the secure server. It will potenitally create a channel with your email and run it. When the run ends it will delete both the channel and the deploy. This is useful for one off jobs when you want to take advantage of the remote stack. Please take note that when you run this what is in the directory is deployed. This might bite you if you previosly run this locally and there are some data or parameter files. I recommend regenerate the graph before running.
|
448
|
+
|
449
|
+
If you want something more permanent use deploy. This is the same as first step during run. When deployed you can schedule it through the administration UI https://secure.gooddata.com/admin/dataload/ . The same goes here as well. If you run it locally there probably are some files or parameters which might break your run. Regenerating is recomended.
|
450
|
+
|
451
|
+
Note: Both `run` and `deploy` will change in the future. Run still does not have the emails completely done. For deploy we will probably try to prepare schedule and email channels as well.
|
237
452
|
|
238
453
|
##Adding a field
|
239
454
|
Ok let's say you have a basic GoodSales
|
240
455
|
|
241
456
|
|
457
|
+
## Data validation
|
458
|
+
BAM tries to teach you the right way to do the projects and one of the pain points is the data you are getting from a customer. If you have ever got an invalid CSV file from a customer and had to write a custom script to find the error read on. We believe this pain can be automated. In a tap you can annotate the fields with validation rules. These rules are for example saying "This field is a number", "This field is a URI", "This is a date in format", "This cannot be empty". Off this description you can generate a validation graph that sits in between customers data and your ETL. If a new upload is validated and passes it is handed to ETL. If not it is ignored so your ETL can work only on data that passes certain checks. So how to do this?
|
459
|
+
|
460
|
+
{
|
461
|
+
"type" : "tap"
|
462
|
+
,"source" : "https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/d2uopvzruqsc9mwuili714h0g6sl8h5y/validated/account*.csv"
|
463
|
+
,"validation_source" : "https://svarovsky%40gooddata.com:password@secure-di.gooddata.com/project-uploads/d2uopvzruqsc9mwuili714h0g6sl8h5y/account*.csv"
|
464
|
+
,"incremental" : true
|
465
|
+
,"id" : "account"
|
466
|
+
,"fields" : [
|
467
|
+
{
|
468
|
+
"name" : "Id",
|
469
|
+
"validates_as" : {
|
470
|
+
"type" : "integer"
|
471
|
+
}
|
472
|
+
},
|
473
|
+
{
|
474
|
+
"name" : "Name"
|
475
|
+
},
|
476
|
+
{
|
477
|
+
"name" : "Date",
|
478
|
+
"validates_as" : {
|
479
|
+
"type" : "date",
|
480
|
+
"format" : "yyyy/MM/dd"
|
481
|
+
}
|
482
|
+
},
|
483
|
+
{
|
484
|
+
"name": "OtherField"
|
485
|
+
}
|
486
|
+
]
|
487
|
+
// ,"limit": "10"
|
488
|
+
}
|
489
|
+
|
490
|
+
|
491
|
+
Now you can generate data validator. This is a standard graph so you can deploy it to the production so it sits in GoodData.
|
492
|
+
|
493
|
+
bam generate_validator
|
494
|
+
bam -vl deploy validator-project
|
495
|
+
|
496
|
+
This is going to deploy it in verbose mode. You need to get the deploy process number so you can use it later. Since you ran it in verbose mode one of the last lines should look like
|
497
|
+
|
498
|
+
=>"/gdc/projects/d2uopvzruqsc9mwuili714h0g6sl8h5y/dataload/processes/663136fa-f996-4b35-828a-60dd154ff71a", "executions"=>"/gdc/projects/d2uopvzruqsc9mwuili714h0g6sl8h5y/dataload/processes/663136fa-f996-4b35-828a-60dd154ff71a/executions"}}}
|
499
|
+
|
500
|
+
The process number is this 663136fa-f996-4b35-828a-60dd154ff71a in this case.
|
501
|
+
|
502
|
+
Now you need to upload the data and tell the validator to check it. Either you can do it yourself (once we document the things that must be followed) or you can use one of our agents. There is a Java one maturing but if it is development time you can easily use the one in BAM (we do not recommend it to use in production though thare are lots of nifty features missing).
|
503
|
+
|
504
|
+
bam -vl run_validator --process 663136fa-f996-4b35-828a-60dd154ff71a account.csv
|
505
|
+
|
506
|
+
You can see that the `run_validator` command needs to have the process parameter passed in. It also consumes list of files to upload. After it does it runs the validator. If everything goes ok it just moves the files to other dir where ETL or downloaders can pick it up then it quits silently if not it tells you where to look for human readable report what went wrong.
|
507
|
+
|
508
|
+
|
509
|
+
|
242
510
|
##Runtime commands
|
243
511
|
Part of the distribution is the bam executable which lets you do several neat things on the commandline
|
244
512
|
|
@@ -263,10 +531,10 @@ deploys the directory to the server.
|
|
263
531
|
|
264
532
|
bam deploy clover_project --process 1231jkadjk123k
|
265
533
|
|
266
|
-
###
|
534
|
+
###model_sync
|
267
535
|
This will go through the sinks and updates the model. It rellies on CL tool to do this so this describes the limitation. It is very useful for adding additonal fields not changing the model altogether.
|
268
536
|
|
269
|
-
###
|
537
|
+
###run
|
270
538
|
Runs the project on the server. This is achived by deploying it there and deleting after the run finsihes.
|
271
539
|
|
272
540
|
bam run clover-project
|
@@ -306,8 +574,48 @@ Currently works only for SF. Validates that the target SF instance has all the f
|
|
306
574
|
### sinks_validate
|
307
575
|
TBD
|
308
576
|
|
577
|
+
###sf_jack_in
|
578
|
+
|
579
|
+
Note: Before we start if you want to exit the interactive session just type `exit`. If there is output of the commmand that is larger than the screen the session enters a different "viewing" mode you can exit it bry pressing `q`
|
580
|
+
|
581
|
+
This will log you into Salesforce project and starts up interactive client. You can do several useful things for example validate fields while talking to the customer. I will show you couple of things
|
582
|
+
|
583
|
+
####You can list fields
|
584
|
+
|
585
|
+
fields('Opportunity')
|
586
|
+
|
587
|
+
and do a lot of interesting stuff with it like searching
|
588
|
+
|
589
|
+
fields('Opportunity').grep /__c/
|
590
|
+
|
591
|
+
counting
|
592
|
+
|
593
|
+
fields('Opportunity').count
|
594
|
+
|
595
|
+
and basically anything you can do with ruby like writing those fields to a CSV file
|
596
|
+
|
597
|
+
CSV.open('list_of_opportunity_fields.csv', 'w') do |csv|
|
598
|
+
fields('Opportunity').map {|f| f.upcase}.each do |f|
|
599
|
+
csv << [f]
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
603
|
+
####You can make a query
|
604
|
+
|
605
|
+
query("SELECT SUM(Amount) FROM Opporunity")
|
606
|
+
|
607
|
+
or
|
608
|
+
|
609
|
+
query("SELECT Id, Name, StageName FROM Opportunity LIMIT 10")
|
610
|
+
|
611
|
+
again you can access the results in many ways like summing amount on Closed Won opportunities
|
612
|
+
|
613
|
+
query("SELECT Id, Amount, StageName FROM Opportunity LIMIT 10").find_all do |line|
|
614
|
+
line[:StageName] == "Closed Won"
|
615
|
+
end.reduce(0) {|memo, line| memo += line[:Amount].to_i}
|
616
|
+
|
309
617
|
##The why
|
310
|
-
For those that are interested in reading why we actually bothered developing this. Read on.
|
618
|
+
For those that are interested in reading why we actually bothered developing this and what decisions we made. Read on and let us know if you like them or not.
|
311
619
|
|
312
620
|
###Metadata management
|
313
621
|
Key pain that I had with CloudConnect is that I did not like the management of metadata. Every project I saw was just pile of metadata definition that has to be constantly changed and tweaked. This is caused by couple of choices that creators of underlying Clover engine made in the beginning and probably will not be changed easily. While I am trying to make it better I am still bound by these choices and sometimes the wiring stick out - sorry for that.
|
data/bin/bam
CHANGED
@@ -21,7 +21,6 @@ default_value false
|
|
21
21
|
arg_name 'logger'
|
22
22
|
switch [:l,:logger]
|
23
23
|
|
24
|
-
|
25
24
|
desc 'Generates clover project based on information in current directory. The default ouptut is the directory ./clover-project'
|
26
25
|
# arg_name 'Describe arguments to new here'
|
27
26
|
command :generate do |c|
|
@@ -31,8 +30,14 @@ command :generate do |c|
|
|
31
30
|
c.flag :only
|
32
31
|
|
33
32
|
c.action do |global_options,options,args|
|
34
|
-
GoodData::
|
35
|
-
GoodData::
|
33
|
+
GoodData::Bam::Commands::clobber_etl_project('.')
|
34
|
+
GoodData::Bam::Commands::setup_etl_project('.', PARAMS.merge(options).merge({:project_name => "etl-#{PARAMS[:project_name]}"}))
|
35
|
+
GoodData::Bam::Commands::generate('.', PARAMS.merge(options).merge({
|
36
|
+
:project_name => "etl-#{PARAMS[:project_name]}",
|
37
|
+
:graph_repos => [
|
38
|
+
GoodData::Bam::Repository.create(:type => :file, :base => Pathname('./local_graphs').expand_path),
|
39
|
+
GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)
|
40
|
+
]}))
|
36
41
|
end
|
37
42
|
end
|
38
43
|
|
@@ -41,10 +46,66 @@ desc 'Jacks into SF.'
|
|
41
46
|
command :sf_jack_in do |c|
|
42
47
|
|
43
48
|
c.action do |global_options,options,args|
|
44
|
-
GoodData::
|
49
|
+
GoodData::Bam::Commands::sf_jack_in(PARAMS)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
desc 'Jacks into project.'
|
54
|
+
# arg_name 'Describe arguments to new here'
|
55
|
+
command :project_jack_in do |c|
|
56
|
+
|
57
|
+
c.action do |global_options,options,args|
|
58
|
+
GoodData::Bam::Commands::project_jack_in(PARAMS)
|
45
59
|
end
|
46
60
|
end
|
47
61
|
|
62
|
+
|
63
|
+
desc 'Validatates connection to SalesForce.'
|
64
|
+
# arg_name 'Describe arguments to new here'
|
65
|
+
command :sf_validate_connection do |c|
|
66
|
+
|
67
|
+
c.action do |global_options,options,args|
|
68
|
+
GoodData::Bam::Commands::sf_validate_connection(PARAMS)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
desc 'Generate data validator.'
|
74
|
+
# arg_name 'Describe arguments to new here'
|
75
|
+
command :generate_validator do |c|
|
76
|
+
c.action do |global_options,options,args|
|
77
|
+
GoodData::Bam::Commands::generate_validators('.', PARAMS.merge(options).merge({
|
78
|
+
:project_name => "validator-#{PARAMS[:project_name]}",
|
79
|
+
:graph_repos => [
|
80
|
+
GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
|
81
|
+
GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)
|
82
|
+
]
|
83
|
+
}))
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
desc 'Run data validator.'
|
88
|
+
# arg_name 'Describe arguments to new here'
|
89
|
+
command :run_validator do |c|
|
90
|
+
|
91
|
+
c.desc 'Checker process ID'
|
92
|
+
c.arg_name 'process'
|
93
|
+
c.flag :process
|
94
|
+
|
95
|
+
c.action do |global_options,options,args|
|
96
|
+
process = options[:process]
|
97
|
+
files = args.map {|f| Pathname(f)}
|
98
|
+
files.each do |f|
|
99
|
+
fail "Provded file \"#{f}\" does not exist." unless File.exist?(f)
|
100
|
+
end
|
101
|
+
fail "You need to specify process" if process.blank?
|
102
|
+
|
103
|
+
GoodData::Bam::Commands::connect_to_gd()
|
104
|
+
GoodData::Bam::Commands::run_validator(process, files, PARAMS)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
|
48
109
|
desc 'Generates clover project for downloaders.'
|
49
110
|
# arg_name 'Describe arguments to new here'
|
50
111
|
command :generate_downloaders do |c|
|
@@ -54,17 +115,27 @@ command :generate_downloaders do |c|
|
|
54
115
|
c.flag :backup
|
55
116
|
|
56
117
|
c.action do |global_options,options,args|
|
57
|
-
|
58
|
-
|
118
|
+
params = global_options[:user_params]
|
119
|
+
params = PARAMS.merge({
|
120
|
+
:project_name => "downloaders-#{PARAMS[:project_name]}",
|
121
|
+
:graph_repos => [
|
122
|
+
GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
|
123
|
+
GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)]
|
124
|
+
})
|
125
|
+
GoodData::Bam::Commands::generate_downloaders(".", params)
|
59
126
|
end
|
60
127
|
end
|
61
128
|
|
62
129
|
desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
|
63
130
|
# arg_name 'Describe arguments to new here'
|
64
|
-
command :
|
131
|
+
command :sf_taps_validate do |c|
|
65
132
|
c.action do |global_options,options,args|
|
66
133
|
verbose = global_options[:v]
|
67
|
-
|
134
|
+
params = PARAMS.merge({
|
135
|
+
:graph_repos => [
|
136
|
+
GoodData::Bam::Repository.create(:type => :file, :base => './local_graphs'),
|
137
|
+
GoodData::Bam::Repository.create(:type => :file, :base => GoodData::CloverGenerator::BAM_DEFINED_GRAPHS_ROOT)]})
|
138
|
+
result = GoodData::Bam::Commands::validate_sf_taps(params)
|
68
139
|
|
69
140
|
error = false
|
70
141
|
result.each_pair do |obj, fields|
|
@@ -79,11 +150,19 @@ command :taps_validate do |c|
|
|
79
150
|
end
|
80
151
|
end
|
81
152
|
|
153
|
+
desc "Prepares params.prm file for debugging that particular graph"
|
154
|
+
command :debug do |c|
|
155
|
+
c.action do |global_options,options,args|
|
156
|
+
fail "Arguments should be passed as \"project\" \"flow\" \"graph\"" if args.length < 3
|
157
|
+
GoodData::Bam::Commands::set_up_debug(args.first, args[1], args[2])
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
82
161
|
desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
|
83
162
|
# arg_name 'Describe arguments to new here'
|
84
163
|
command :docs do |c|
|
85
164
|
c.action do |global_options,options,args|
|
86
|
-
GoodData::
|
165
|
+
GoodData::Bam::Commands::generate_docs
|
87
166
|
end
|
88
167
|
end
|
89
168
|
|
@@ -96,7 +175,7 @@ command :procs do |c|
|
|
96
175
|
c.switch :all
|
97
176
|
|
98
177
|
c.action do |global_options,options,args|
|
99
|
-
out = GoodData::
|
178
|
+
out = GoodData::Bam::Commands::procs_list(options)
|
100
179
|
out.each do |proc|
|
101
180
|
puts proc.join(',')
|
102
181
|
end
|
@@ -104,21 +183,13 @@ command :procs do |c|
|
|
104
183
|
end
|
105
184
|
|
106
185
|
|
107
|
-
desc 'Validates that the tap has the fields it is claimed it should have. This is supposed to make the mitigate errors during deploy.'
|
108
|
-
# arg_name 'Describe arguments to new here'
|
109
|
-
command :sinks_validate do |c|
|
110
|
-
c.action do |global_options,options,args|
|
111
|
-
x = GoodData::CloverGenerator.validate_datasets
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
186
|
desc 'Creates project'
|
116
187
|
command :project do |c|
|
117
|
-
|
188
|
+
|
118
189
|
c.desc 'blueprint name. Currently support goodsales'
|
119
190
|
c.arg_name 'blueprint'
|
120
191
|
c.flag :blueprint
|
121
|
-
|
192
|
+
|
122
193
|
c.desc 'token'
|
123
194
|
c.arg_name 'token'
|
124
195
|
c.flag :token
|
@@ -126,9 +197,19 @@ command :project do |c|
|
|
126
197
|
c.action do |global_options,options,args|
|
127
198
|
fail "You need to specify token to create a project" if options[:token].nil?
|
128
199
|
|
129
|
-
|
130
|
-
|
131
|
-
|
200
|
+
GoodData::Bam::Commands::connect_to_gd()
|
201
|
+
pid = case options[:blueprint]
|
202
|
+
when "goodsales"
|
203
|
+
"nt935rwzls50zfqwy6dh62tabu8h0ocy"
|
204
|
+
end
|
205
|
+
|
206
|
+
params = PARAMS.merge({:token => options[:token]})
|
207
|
+
|
208
|
+
new_project = if pid
|
209
|
+
GoodData::Bam::Commands.clone_project(pid, params)
|
210
|
+
else
|
211
|
+
GoodData::Bam::Commands.create_project(params)
|
212
|
+
end
|
132
213
|
|
133
214
|
puts "Your project pid is #{new_project}"
|
134
215
|
end
|
@@ -137,7 +218,7 @@ end
|
|
137
218
|
|
138
219
|
|
139
220
|
desc 'Generates structures'
|
140
|
-
arg_name 'what you want to generate project, tap, flow,
|
221
|
+
arg_name 'what you want to generate project, tap, flow, sink'
|
141
222
|
command :scaffold do |c|
|
142
223
|
|
143
224
|
c.desc 'blueprint name. Currently support goodsales'
|
@@ -152,7 +233,7 @@ command :scaffold do |c|
|
|
152
233
|
directory = args[1]
|
153
234
|
fail "Directory has to be provided as an argument. See help" if directory.nil?
|
154
235
|
if options[:blueprint].nil?
|
155
|
-
GoodData::
|
236
|
+
GoodData::Bam::Commands::setup_bash_structure(directory)
|
156
237
|
else
|
157
238
|
case options[:blueprint]
|
158
239
|
when "goodsales"
|
@@ -162,21 +243,21 @@ command :scaffold do |c|
|
|
162
243
|
when "flow"
|
163
244
|
name = args[1]
|
164
245
|
fail "Name of the flow has to be provided as an argument. See help" if name.nil?
|
165
|
-
GoodData::
|
246
|
+
GoodData::Bam::Commands::setup_flow(name)
|
166
247
|
when "tap"
|
167
248
|
name = args[1]
|
168
249
|
fail "Name of the tap has to be provided as an argument. See help" if name.nil?
|
169
|
-
GoodData::
|
250
|
+
GoodData::Bam::Commands::setup_tap(name)
|
170
251
|
when "sink"
|
171
252
|
name = args[1]
|
172
253
|
fail "Name of the sink has to be provided as an argument. See help" if name.nil?
|
173
|
-
GoodData::
|
254
|
+
GoodData::Bam::Commands::setup_sink(name)
|
174
255
|
when "graph_template"
|
175
256
|
name = args[1]
|
176
257
|
target = args[2]
|
177
258
|
fail "Name of the template has to be provided as an argument. See help" if name.nil?
|
178
259
|
fail "Name of the target has to be provided as an argument. See help" if target.nil?
|
179
|
-
GoodData::
|
260
|
+
GoodData::Bam::Commands::generate_graph_template(name, target)
|
180
261
|
end
|
181
262
|
end
|
182
263
|
end
|
@@ -189,7 +270,7 @@ command :model_sync do |c|
|
|
189
270
|
c.switch :dry
|
190
271
|
|
191
272
|
c.action do |global_options,options,args|
|
192
|
-
GoodData::
|
273
|
+
GoodData::Bam::Commands::model_sync(options)
|
193
274
|
end
|
194
275
|
end
|
195
276
|
|
@@ -209,10 +290,10 @@ command :deploy do |c|
|
|
209
290
|
dir = args.first
|
210
291
|
fail "You have to specify directory to deploy as an argument" if dir.nil?
|
211
292
|
fail "Specified directory does not exist" unless File.exist?(dir)
|
212
|
-
|
213
|
-
GoodData::
|
214
|
-
options = global_options.merge(
|
215
|
-
response = GoodData::
|
293
|
+
|
294
|
+
GoodData::Bam::Commands::connect_to_gd()
|
295
|
+
options = global_options.merge(options)
|
296
|
+
response = GoodData::Bam::Commands::deploy(dir, options)
|
216
297
|
end
|
217
298
|
end
|
218
299
|
|
@@ -229,24 +310,21 @@ command :run do |c|
|
|
229
310
|
fail "You have to specify directory to deploy as an argument" if dir.nil?
|
230
311
|
fail "Specified directory does not exist" unless File.exist?(dir)
|
231
312
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
GoodData::CloverGenerator.connect_to_gd(:logger => logger)
|
236
|
-
options = global_options.merge({:name => "temporary"})
|
237
|
-
GoodData::CloverGenerator.deploy(dir, options) do |deploy_response|
|
238
|
-
puts HighLine::color("Executing", HighLine::BOLD) if verbose
|
239
|
-
GoodData::CloverGenerator.create_email_channel(options) do |channel_response|
|
240
|
-
GoodData::CloverGenerator.subscribe_on_finish(:success, channel_response["channelConfiguration"]["meta"]["uri"], deploy_response["process"]["links"]["self"].split('/').last)
|
241
|
-
result = GoodData::CloverGenerator.execute_process(deploy_response["process"]["links"]["executions"], dir)
|
242
|
-
|
243
|
-
end
|
244
|
-
end
|
313
|
+
options = global_options.merge(options)
|
314
|
+
GoodData::Bam::Commands::connect_to_gd()
|
315
|
+
GoodData::Bam::Commands::run(dir, options)
|
245
316
|
end
|
246
317
|
end
|
247
318
|
|
248
319
|
|
249
|
-
pre do |
|
320
|
+
pre do |global_options,command,options,args|
|
321
|
+
|
322
|
+
logger = Logger.new(STDOUT) if global_options[:l]
|
323
|
+
GoodData.logger = logger
|
324
|
+
params = GoodData::Bam::Utils::get_user_params('.')
|
325
|
+
s3_backup = GoodData::Bam::Utils::should_backup_to_s3?(params)
|
326
|
+
PARAMS = params.merge({:s3_backup => s3_backup})
|
327
|
+
|
250
328
|
# Pre logic here
|
251
329
|
# Return true to proceed; false to abort and not call the
|
252
330
|
# chosen command
|