gooddata_marketo 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +9 -0
  3. data/Gemfile.lock +131 -0
  4. data/README.md +207 -0
  5. data/bin/Gemfile +10 -0
  6. data/bin/auth.json +17 -0
  7. data/bin/main.rb +0 -0
  8. data/bin/process.rbx +541 -0
  9. data/examples/all_lead_changes.rb +119 -0
  10. data/examples/all_leads.rb +249 -0
  11. data/examples/lead_changes_to_ads.rb +63 -0
  12. data/gooddata_marketo.gemspec +24 -0
  13. data/gooddata_marketo_gem.zip +0 -0
  14. data/lib/gooddata_marketo.rb +24 -0
  15. data/lib/gooddata_marketo/adapters/rest.rb +287 -0
  16. data/lib/gooddata_marketo/client.rb +373 -0
  17. data/lib/gooddata_marketo/data/activity_types.rb +104 -0
  18. data/lib/gooddata_marketo/data/reserved_sql_keywords.rb +205 -0
  19. data/lib/gooddata_marketo/helpers/s3.rb +141 -0
  20. data/lib/gooddata_marketo/helpers/stringwizard.rb +32 -0
  21. data/lib/gooddata_marketo/helpers/table.rb +323 -0
  22. data/lib/gooddata_marketo/helpers/webdav.rb +118 -0
  23. data/lib/gooddata_marketo/loads.rb +235 -0
  24. data/lib/gooddata_marketo/models/campaigns.rb +57 -0
  25. data/lib/gooddata_marketo/models/channels.rb +30 -0
  26. data/lib/gooddata_marketo/models/child/activity.rb +104 -0
  27. data/lib/gooddata_marketo/models/child/criteria.rb +17 -0
  28. data/lib/gooddata_marketo/models/child/lead.rb +118 -0
  29. data/lib/gooddata_marketo/models/child/mobj.rb +68 -0
  30. data/lib/gooddata_marketo/models/etl.rb +75 -0
  31. data/lib/gooddata_marketo/models/leads.rb +493 -0
  32. data/lib/gooddata_marketo/models/load.rb +17 -0
  33. data/lib/gooddata_marketo/models/mobjects.rb +121 -0
  34. data/lib/gooddata_marketo/models/streams.rb +137 -0
  35. data/lib/gooddata_marketo/models/tags.rb +35 -0
  36. data/lib/gooddata_marketo/models/validate.rb +46 -0
  37. metadata +177 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cac617b71a6f470a15bb65b9c9f95335c3024546
4
+ data.tar.gz: 1dfe0328779ef33026080e6af6ec7609421e0e0f
5
+ SHA512:
6
+ metadata.gz: fca9b55c97b89799b048337ac0e10e161ab47f21c822ee5d5b610a77cbdaff50c28b7389211559a54796d130e7dd291501d129a7246fd556f7387c3600c635c7
7
+ data.tar.gz: 60cd618ed5620155e2211985e045d0967395f4a72e5b5802603b36f40f49272416b7fe5f8eada927e5afab1aa0b6f33bc840599fa6737675b59f299876fa1683
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'savon', '2.8.0'
4
+ gem 'rubyntlm', '0.3.2'
5
+ gem 'gooddata', '0.6.11'
6
+ gem 'rest-client', '1.7.2'
7
+ gem 'aws-sdk', '1.61.0'
8
+ gem 'pmap', '1.0.2'
9
+ gem 'gooddata_datawarehouse', '0.0.5'
@@ -0,0 +1,131 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (4.2.0)
5
+ i18n (~> 0.7)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.3, >= 0.3.4)
9
+ tzinfo (~> 1.1)
10
+ akami (1.2.2)
11
+ gyoku (>= 0.4.0)
12
+ nokogiri
13
+ aws-sdk (1.61.0)
14
+ aws-sdk-v1 (= 1.61.0)
15
+ aws-sdk-v1 (1.61.0)
16
+ json (~> 1.4)
17
+ nokogiri (>= 1.4.4)
18
+ builder (3.2.2)
19
+ coderay (1.1.0)
20
+ docile (1.1.5)
21
+ erubis (2.7.0)
22
+ faraday (0.9.1)
23
+ multipart-post (>= 1.2, < 3)
24
+ faraday_middleware (0.9.1)
25
+ faraday (>= 0.7.4, < 0.10)
26
+ ffi (1.9.6-java)
27
+ gli (2.12.2)
28
+ gooddata (0.6.11)
29
+ activesupport (~> 4.1, >= 4.1.0)
30
+ aws-sdk (~> 1.45)
31
+ bundler (~> 1.7, >= 1.7.3)
32
+ docile (~> 1.1, >= 1.1.5)
33
+ erubis (~> 2.7, >= 2.7.0)
34
+ gli (~> 2.12, >= 2.12.2)
35
+ highline (~> 1.6, >= 1.6.21)
36
+ i18n (~> 0.6, >= 0.6.9)
37
+ json_pure (~> 1.8, >= 1.8.1)
38
+ multi_json (~> 1.10, >= 1.10.0)
39
+ parseconfig (~> 1.0, >= 1.0.4)
40
+ pmap (~> 1.0, >= 1.0.1)
41
+ pry (~> 0.9.12.6)
42
+ rest-client (~> 1.7, >= 1.7.2)
43
+ restforce (~> 1.5, >= 1.5.0)
44
+ rubyzip (~> 1.1, >= 1.1.0)
45
+ salesforce_bulk_query (~> 0.0)
46
+ terminal-table (~> 1.4, >= 1.4.5)
47
+ gyoku (1.2.2)
48
+ builder (>= 2.1.2)
49
+ hashie (3.3.2)
50
+ highline (1.6.21)
51
+ httpi (2.3.0)
52
+ rack
53
+ i18n (0.7.0)
54
+ json (1.8.2)
55
+ json (1.8.2-java)
56
+ json_pure (1.8.2)
57
+ macaddr (1.7.1)
58
+ systemu (~> 2.6.2)
59
+ method_source (0.8.2)
60
+ mime-types (1.25.1)
61
+ mini_portile (0.6.2)
62
+ minitest (5.5.0)
63
+ multi_json (1.10.1)
64
+ multipart-post (2.0.0)
65
+ netrc (0.10.2)
66
+ nokogiri (1.6.5)
67
+ mini_portile (~> 0.6.0)
68
+ nokogiri (1.6.5-java)
69
+ nori (2.4.0)
70
+ parseconfig (1.0.6)
71
+ pmap (1.0.2)
72
+ pry (0.9.12.6)
73
+ coderay (~> 1.0)
74
+ method_source (~> 0.8)
75
+ slop (~> 3.4)
76
+ pry (0.9.12.6-java)
77
+ coderay (~> 1.0)
78
+ method_source (~> 0.8)
79
+ slop (~> 3.4)
80
+ spoon (~> 0.0)
81
+ rack (1.6.0)
82
+ rest-client (1.7.2)
83
+ mime-types (>= 1.16, < 3.0)
84
+ netrc (~> 0.7)
85
+ restforce (1.5.1)
86
+ faraday (~> 0.9.0)
87
+ faraday_middleware (>= 0.8.8)
88
+ hashie (>= 1.2.0, < 4.0)
89
+ json (>= 1.7.5, < 1.9.0)
90
+ rubyntlm (0.3.2)
91
+ rubyzip (1.1.6)
92
+ salesforce_bulk_query (0.0.6)
93
+ json (~> 1.8)
94
+ xml-simple (~> 1.1)
95
+ savon (2.8.0)
96
+ akami (~> 1.2)
97
+ builder (>= 2.1.2)
98
+ gyoku (~> 1.2)
99
+ httpi (~> 2.3)
100
+ nokogiri (>= 1.4.0)
101
+ nori (~> 2.4)
102
+ uuid (~> 2.3.7)
103
+ wasabi (= 3.3.0)
104
+ slop (3.6.0)
105
+ spoon (0.0.4)
106
+ ffi
107
+ systemu (2.6.4)
108
+ terminal-table (1.4.5)
109
+ thread_safe (0.3.4)
110
+ thread_safe (0.3.4-java)
111
+ tzinfo (1.2.2)
112
+ thread_safe (~> 0.1)
113
+ uuid (2.3.7)
114
+ macaddr (~> 1.0)
115
+ wasabi (3.3.0)
116
+ httpi (~> 2.0)
117
+ mime-types (< 2.0.0)
118
+ nokogiri (>= 1.4.0)
119
+ xml-simple (1.1.4)
120
+
121
+ PLATFORMS
122
+ java
123
+ ruby
124
+
125
+ DEPENDENCIES
126
+ aws-sdk (= 1.61.0)
127
+ gooddata (= 0.6.11)
128
+ pmap (= 1.0.2)
129
+ rest-client (= 1.7.2)
130
+ rubyntlm (= 0.3.2)
131
+ savon (= 2.8.0)
@@ -0,0 +1,207 @@
1
+ GOODDATA MARKETO CONNECTOR
2
+ ==========================
3
+ Marketo SOAP/REST services to GoodData ADS
4
+
5
+ ## Overview
6
+ With the Marketo Connector a complete transfer of all available lead data is automatically moved *and* kept in sync with GoodData ADS. Supporting both the Marketo REST & SOAP the GoodData Marketo Connector adapts to the source data making installation as simple as entering your login information.
7
+
8
+ ### Marketo
9
+ The central concept of the Marketo API is a "Lead". Each lead contains default attributes like City or Email in addition to custom attributes added by the client. The Marketo Connector downloads and synchronizes all leads and lead changes for a given Marketo client. The connector utitlizes two main calls, *get_lead_changes* and *get_multiple_leads*, these feed into the ADS tables *marketo_changes* and *marketo_leads* respectively. For examples, take a look at the code within the *examples* directory.
10
+
11
+ ### Getting Started
12
+
13
+ 1. Install jruby `rvm install jruby` and then `rvm use jruby` (tested on jruby 1.7.16.1)
14
+ 2. [Install DSS for Ruby](https://confluence.intgdc.com/pages/viewpage.action?title=DSS+Alpha+Version+-+User+Guide&spaceKey=plat)
15
+ 3. Clone this repository.
16
+ 4. Configure authentication in bin/auth.json.
17
+ 5. Zip the the entire "bin" folder to a new zip file.
18
+ 6. Upload the zip file it as a Ruby process, schedule it to run daily, every 12 hours.
19
+
20
+ ### Configuration & Keys
21
+ To use the Marketo Connector you must have credentials for the following services.
22
+
23
+ - [Marketo REST API](http://developers.marketo.com/blog/quick-start-guide-for-marketo-rest-api/).
24
+ - [Marketo SOAP API](http://developers.marketo.com/documentation/soap/).
25
+ - [GoodData Username, Password, & Project ID](https://na1.secure.gooddata.com/account.html?clicked_trial_link=1#/registration/projectTemplate/urn%3Agooddata%3AOnboarding).
26
+ - [AWS S3](http://aws.amazon.com/s3/).
27
+
28
+ #### File Tree
29
+
30
+ ├── bin
31
+ │   ├── Gemfile
32
+ │   ├── auth.json
33
+ │   ├── main.rb
34
+ │   └── process.rbx
35
+
36
+ ├── examples
37
+ │   ├── all_lead_changes.rb
38
+ │   ├── all_leads.rb
39
+ │   └── lead_changes_to_ads.rb
40
+
41
+ ├── lib
42
+ │   ├── gooddata_marketo
43
+ │   │   ├── adapters
44
+ │   │   │   └── rest.rb
45
+ │   │   ├── client.rb
46
+ │   │   ├── data
47
+ │   │   │   ├── activity_types.rb
48
+ │   │   │   └── reserved_sql_keywords.rb
49
+ │   │   ├── helpers
50
+ │   │   │   ├── s3.rb
51
+ │   │   │   ├── stringwizard.rb
52
+ │   │   │   ├── table.rb
53
+ │   │   │   └── webdav.rb
54
+ │   │   ├── loads.rb
55
+ │   │   └── models
56
+ │   │   ├── campaigns.rb
57
+ │   │   ├── channels.rb
58
+ │   │   ├── child
59
+ │   │   │   ├── activity.rb
60
+ │   │   │   ├── criteria.rb
61
+ │   │   │   ├── lead.rb
62
+ │   │   │   └── mobj.rb
63
+ │   │   ├── etl.rb
64
+ │   │   ├── leads.rb
65
+ │   │   ├── load.rb
66
+ │   │   ├── mobjects.rb
67
+ │   │   ├── streams.rb
68
+ │   │   ├── tags.rb
69
+ │   │   └── validate.rb
70
+ │   └── gooddata_marketo.rb
71
+
72
+ └── tests
73
+ └── test.rb
74
+
75
+ ## Custom Scripts
76
+ In addition to running the main process you can also write scripts for exploration and small transformations with the Marketo API. To start, require the Marketo Connector and create a client.
77
+
78
+ require 'marketo_connector'
79
+
80
+ user = ""
81
+ key = ""
82
+ subdomain = ""
83
+
84
+ client = GoodDataMarketo.connect(:user_id => user,
85
+ :encryption_key => key,
86
+ :api_subdomain => subdomain)
87
+
88
+ From the client you can get a specific lead or a stream of ids.
89
+
90
+ lead = client.leads.get_by_email('email@email.com')
91
+ leads = client.leads.get_multiple ['23590','2930','9209'], :filters => 'Merge Leads', :type => 'IDNUM'
92
+
93
+ ## Customizing Calls
94
+ Marketo API queries can be very time consuming, in the hours or days range, before a response returns. This is why the Marketo API allows for very complex filtration options like creation date, data-type, request size etc. Filtering your queries by some level is **required** due to the size and slower speed of Marketo. Keep in mind, as of this post RUBY Processes on the GoodData platform automatically terminate after 5 hours. Setting up configuration options for the two main calls, `client.leads.get_changes` and `client.leads.get_multiple` is almost identical, you can learn more about them [here](http://developers.marketo.com/documentation/soap/getmultipleleads/).
95
+
96
+ #### Important Configurations
97
+
98
+ * ` :timeout => Int `
99
+
100
+ Specifies the amount of time before you want a call to the API to fail. Currently the library wide default is 120 which is also the the time allocated to the SAVON GEM in the initialization of the client. To go past that you will need to change the timeout when the client is first initialized.
101
+
102
+ * ` :filters => Array `
103
+
104
+ A list of activity types which are translated into the *activity_name_filters*. Due to changes in the Marketo API becareful to print these in complete form like 'Merge Leads' instead of 'MergeLeads' or 'Webpage Clicks' instead of 'webpageClicks'.
105
+
106
+ * ` :type => String `
107
+
108
+ Defines the type of your query. For instance, `:type => 'IDNUM'` would mean you are searching for data using a Marketo ID, or you might use `:type => 'EMAIL'` if you are querying using an email. In specific cases like `client.leads.get_by_id` the type is set for you.
109
+
110
+ If the given call you are looking for is not fully implemented, you can always build the complete soap request your self by calling `client.call` directly.
111
+ Outside of the authentication options, the default configuration for the client is in *marketo_connector/client.rb* under the parameter *DEFAULT_CONFIG*.
112
+
113
+ ## Leads
114
+ All calls to Marketo return objects that contain attributes. Currently the Marketo Connector supports *Lead* and *Activity* where Activity is a child of lead containing the ID and values pertaining to the lead's activity. Custom attributes can be added to leads which include values, accessing them looks like this:
115
+
116
+ leads = client.leads.get_changes :last_created_at => '12/02/2013'
117
+ lead = leads.first
118
+ custom_form = lead.attributes['c__Custom_Form']
119
+
120
+ Where `custom_form` will now contain the given value of the lead. Lead and Activity objects can also be flattened to save to CSV for parsing in the future.
121
+
122
+ lead = client.leads.get_by_email('john@smith.com')
123
+
124
+ CSV.open('leads.csv', 'wb') do |csv|
125
+
126
+ csv << lead.headers # => Incase you wanted to wanted the headers of attributes also saved to CSV.
127
+ csv << lead.to_row
128
+
129
+ end
130
+
131
+ You can also extract the activities of a given lead through `lead.activities` or changes of a given lead with `lead.changes`. For example if you wanted to write the attributes for leads which included the City 'San Francisco' to csv...
132
+
133
+ leads = client.leads.get_changes :filters => ['Email Opens', 'Merge Leads'], :lastest_created_at => '10/11/2014'
134
+
135
+ san_francisco_leads = leads.select {|lead| lead.attributes['City'] == 'San Francisco'}
136
+
137
+ CSV.open('leads.csv', 'wb') do |csv|
138
+ san_francisco_leads.each do |lead|
139
+ csv << lead.to_row
140
+ end
141
+ end
142
+
143
+ In addition, you can always work with the raw response of the lead object with `lead.raw`.
144
+
145
+ ## Streams
146
+ Specific to getMultipleLeads, getLeadChanges, getMObjects, and getLeadActivity calls, the Marketo SOAP API supports "Streams" which allow you to increment through large responses by offset stamp or stream ID (just like pagenate).
147
+ Within the client script (*lib/marketo_connector/client.rb*) are two calls, a streaming function which automatically works through APIs, building a complete object.
148
+
149
+ In the event the Marketo times out **AND** you gave specific date configurations like *:started_at => TIME* or *:oldest_created_at => TIME*, the stream will increment your calls into smaller time windows until it does receive a response within the given timeout configuration. This is helpful to insure that the most relevant data is always in a clients account. You can also deactivate this by passing the configuration symbol *:safe* and setting it to **false**.
150
+
151
+ ## Custom Objects
152
+ Marketo allows you to query the API using what it calls *criteria* which is a hash comprised of the attribute name and value with a comparison statement like *EQ* "Equals" or *LT* "Less Then". Many criteria hashes can be sent with a given request, due to the size of these queries it is also suggest that you do so but rather than forcing you to do with large arrays of criteria hashes you simply add criteria to the given *client.mobject* like so:
153
+
154
+ criteria_one = {
155
+ :attr_name => "Id", # See the types of content it can search above.
156
+ :comparison => "LE",
157
+ :attr_value => "1010"
158
+ }
159
+
160
+ >> client.mobjects.add(criteria_one)
161
+ [{:attr_name => 'Id', :comparison => 'LE', :attr_value => '1010'}]
162
+
163
+ criteria_two = {
164
+ :attr_name => "Zip", # See the types of content it can search above.
165
+ :comparison => "EQ",
166
+ :attr_value => "94104"
167
+ }
168
+
169
+ >> client.mobjects.add(criteria_two)
170
+ [{:attr_name => 'Id', :comparison => 'LE', :attr_value => '1010'}, {:attr_name => 'Zip', :comparison => 'EQ', :attr_value => '94104'}]
171
+
172
+ The data is stored in a class array which allows you to do this:
173
+
174
+ objects = client.mobjects.get
175
+
176
+ You can also make requests with one criteria hash like this:
177
+
178
+ client.mobjects.get :criteria => HASH
179
+
180
+ Finally criteria can be removed by name with: ` client.mbobjects.remove_criteria 'NAME' `.
181
+
182
+ ## Supported Calls
183
+
184
+ * [` client.leads.get_changes `](http://developers.marketo.com/documentation/soap/getleadchanges/) - Optional config. Streaming is required.
185
+
186
+ * [` client.leads.get_multiple `](http://developers.marketo.com/documentation/soap/getmultipleleads/) - Requires an query Array, optional Config.
187
+
188
+ * [` client.leads.get_lead `](http://developers.marketo.com/documentation/soap/getlead/) - Requires query String, optional config.
189
+
190
+ * [` client.mobjects `](http://developers.marketo.com/documentation/soap/getcustomobjects/) - Requires criteria to be set (See Custom Objects above)
191
+
192
+ * [` client.usage `](http://developers.marketo.com/documentation/rest/get-daily-usage/) - Requires API key to within REST config.
193
+
194
+ * [` client.channels `](http://developers.marketo.com/documentation/soap/getchannels/) - Requires query value, optional config.
195
+
196
+ * [` client.tags `](http://developers.marketo.com/documentation/soap/gettags/) - Requires the Lead email or ID for the query, optional config.
197
+
198
+ ## Custom Requests
199
+ You can call custom requests should you need want more specific control or use of a feature.
200
+
201
+ #### Direct Call
202
+ A single call is ` client.call(WEB_METHOD, MESSAGE) ` where WEB_METHOD is the specific SOAP function you are attempting to use and message is the SOAP request message (as a hash).
203
+
204
+ #### Streaming Calls
205
+ In addition for web methods that support streaming, you can using `client.stream(WEB_METHOD, MESSAGE)` to automatically iterate through either *offset* or *start_position* and return an array of results in the correct type.
206
+
207
+
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'savon', '2.8.0'
4
+ gem 'rubyntlm', '0.3.2'
5
+ gem 'gooddata', '0.6.11'
6
+ gem 'rest-client', '1.7.2'
7
+ gem 'aws-sdk', '1.61.0'
8
+ gem 'pmap', '1.0.2'
9
+ gem 'gooddata_datawarehouse', '0.0.5'
10
+ gem 'gooddata_marketo', '0.0.1'
@@ -0,0 +1,17 @@
1
+ {
2
+ "MARKETO_SOAP_USER" : "lionbridge1_954603944F16DBA4B8DCE3",
3
+ "MARKETO_SOAP_KEY" : "248763534270886944FF1166DDBBAA56BB9915989447",
4
+ "MARKETO_REST_ID" : "Ayc9WFfqN1SHrDNVTcw3PoaulwBomGSr",
5
+ "MARKETO_REST_SECRET" : "2581e771-9030-4ecd-8960-9a16759166d2",
6
+ "MARKETO_SUBDOMAIN" : "972-PWS-816",
7
+ "MARKETO_API_LIMIT" : 10000,
8
+ "LEAD_LIST_DUMP_CSV" : "marketo_lead_dump.csv",
9
+ "GOODDATA_USER" : "patrick.mcconlogue@gooddata.com",
10
+ "GOODDATA_PASSWORD" : "dxdemo1",
11
+ "GOODDATA_PROJECT" : "wj8uljb34dmz0mp92dsypuksv2j8xw5m",
12
+ "GOODDATA_ADS" : "t4d9806bbea8656285664cf0160d0f07",
13
+ "S3_PUBLIC_KEY" : "AKIAISTOSUAVASUBEUSQ",
14
+ "S3_PRIVATE_KEY" : "KbV1sAkB3EhgLoljUs8V21SLYF0RALPgaNZb5W+P",
15
+ "S3_BUCKET" : "marketo_connector_lionbridge"
16
+ }
17
+
File without changes
@@ -0,0 +1,541 @@
1
+ #!/usr/bin/ruby
2
+
3
+ ###################################################################
4
+ # #
5
+ # #
6
+ # GOODDATA MARKETO CONNECTOR #
7
+ # #
8
+ # Ruby process for download client data from Marketo. #
9
+ # https://github.com/gooddata/app_store/marketo_connector #
10
+ # #
11
+ # #
12
+ ###################################################################
13
+
14
+ require 'gooddata_marketo'
15
+
16
+ # Load the authorization keys.
17
+ auth = JSON.parse(IO.read('auth.json'), :symbolize_names => true)
18
+
19
+ MARKETO_SOAP_USER = auth[:MARKETO_SOAP_USER]
20
+ MARKETO_SOAP_KEY = auth[:MARKETO_SOAP_KEY]
21
+ MARKETO_REST_ID = auth[:MARKETO_REST_ID]
22
+ MARKETO_REST_SECRET = auth[:MARKETO_REST_SECRET]
23
+ MARKETO_SUBDOMAIN = auth[:MARKETO_SUBDOMAIN]
24
+ MARKETO_API_LIMIT = auth[:MARKETO_API_LIMIT]
25
+ LEAD_LIST_DUMP_CSV = auth[:LEAD_LIST_DUMP_CSV]
26
+ GOODDATA_USER = auth[:GOODDATA_USER]
27
+ GOODDATA_PASSWORD = auth[:GOODDATA_PASSWORD]
28
+ GOODDATA_PROJECT = auth[:GOODDATA_PROJECT]
29
+ GOODDATA_ADS = auth[:GOODDATA_ADS]
30
+
31
+ S3_PUBLIC_KEY = auth[:S3_PUBLIC_KEY]
32
+ S3_PRIVATE_KEY = auth[:S3_PRIVATE_KEY]
33
+ S3_BUCKET = auth[:S3_BUCKET]
34
+
35
+ # No need to configure beyond this point. #
36
+ @s3 = S3Helper.new :public_key => S3_PUBLIC_KEY,
37
+ :private_key => S3_PRIVATE_KEY,
38
+ :bucket => S3_BUCKET
39
+
40
+ @webdav = WebDAV.new(:user => GOODDATA_USER,
41
+ :pass => GOODDATA_PASSWORD,
42
+ :project => GOODDATA_PROJECT)
43
+
44
+ @dwh = GoodData::Datawarehouse.new(GOODDATA_USER,
45
+ GOODDATA_PASSWORD,
46
+ GOODDATA_ADS)
47
+
48
+ @marketo = GoodDataMarketo.connect(:user_id => MARKETO_SOAP_USER,
49
+ :encryption_key => MARKETO_SOAP_KEY,
50
+ :api_subdomain => MARKETO_SUBDOMAIN,
51
+ :webdav => @webdav)
52
+
53
+ GoodDataMarketo.logging = true
54
+
55
+ # Test services
56
+ # @marketo.test_rest
57
+ # @marketo.test_soap
58
+ # @s3.test
59
+ # @dwh.test
60
+ # @webdav.test
61
+
62
+ def run_load config = {}
63
+
64
+ index = config[:index] || 1
65
+ @increment = config[:increment] || (12*60*60)
66
+ @marketo = config[:marketo_client]
67
+ @ads_target_table_name = config[:ads_table]
68
+ @lead_dump_file = "get_load_chunk_#{index}"
69
+
70
+ if @s3.exists? 'queue.json'
71
+
72
+ @queue = JSON.parse(@s3.download('queue.json'))
73
+ # Cancel the load if the queue is empty and delete the object.
74
+ if @queue.empty?
75
+ puts 'WARNING: Empty queue array was extracted from S3. Using queue passed in method.' if GoodDataMarketo.logging
76
+ @s3.delete('queue.json')
77
+ @queue = config[:queue]
78
+
79
+ end
80
+
81
+ else
82
+ @queue = config[:queue]
83
+ end
84
+
85
+ raise "You must pass an array of job hashs :queue_array when using run_load AND define a :marketo_client." unless @queue.length > 0 && @marketo
86
+ raise ":ads_table param is required with using run_load." unless @ads_target_table_name
87
+
88
+ loop do
89
+
90
+ loads = @marketo.loads(:user => GOODDATA_USER,
91
+ :pass => GOODDATA_PASSWORD,
92
+ :project => GOODDATA_PROJECT,
93
+ :marketo_client => @marketo)
94
+
95
+ if loads.available?
96
+
97
+ file = loads.available.first
98
+
99
+ load = loads.create :name => file
100
+
101
+ @job_name = file
102
+ @id = load.id
103
+ @ads_table = load.json[:ads_table]
104
+ # Run the load from local or remote.
105
+ load.execute
106
+ # Data from the job can now be accessed ARRAY load.storage
107
+ # load.storage
108
+
109
+ if !load.storage.empty?
110
+
111
+ # Join all of the columns from the sample to all other columns.
112
+
113
+ @columns_load_aggregate = ['sys_capture_date']
114
+ load.storage.each { |raw_json_lead|
115
+
116
+ lead = GoodDataMarketo::Activity.new raw_json_lead if load.json[:method] == 'get_changes'
117
+ lead = GoodDataMarketo::Lead.new raw_json_lead if load.json[:method] == 'get_multiple'
118
+
119
+ print "\r#{Time.now} => Resolving columns: #{lead.id}\s" if GoodDataMarketo.logging
120
+
121
+ @columns_load_aggregate = @columns_load_aggregate | lead.columns
122
+ }
123
+ @columns_load_aggregate.map! { |column|
124
+ column.downcase.gsub('-','_')
125
+ }
126
+
127
+ # DEFAULTS: Use the correct ADS table.
128
+ if load.json[:method] == "get_multiple"
129
+ ads_target_table_name = 'marketo_leads'
130
+ elsif load.json[:method] = 'get_changes'
131
+ ads_target_table_name = 'marketo_changes'
132
+ else
133
+ ads_target_table_name = 'dump'
134
+ end
135
+
136
+ # Set up a new Table/Automatically loads current table if exists.
137
+
138
+ table = Table.new :client => @dwh, :name => @ads_table || ads_target_table_name, :columns => ['id','sys_capture_date']
139
+
140
+ if @columns_load_aggregate.length > 0
141
+ table.merge_columns :merge_with => @columns_load_aggregate
142
+
143
+ end
144
+ @csv = CSV.open("#{@id}.csv", 'w')
145
+
146
+ updated_columns = table.columns
147
+
148
+ if @columns_load_aggregate.length > 0
149
+ @csv << updated_columns
150
+ end
151
+
152
+ count = 0
153
+
154
+ ids_for_get_multiple_load = []
155
+
156
+ puts "#{Time.now} => Building objects from stream cache." if GoodDataMarketo.logging
157
+
158
+ total_leads = load.storage.length
159
+ total_leads_index = 0
160
+ load.storage.pmap do |lead|
161
+
162
+ lead = GoodDataMarketo::Activity.new lead if load.json[:method] == 'get_changes'
163
+ lead = GoodDataMarketo::Lead.new lead if load.json[:method] == 'get_multiple'
164
+
165
+ total_leads_index += 1
166
+ percentage = ((100*total_leads_index.to_f)/total_leads.to_f).round(1)
167
+ print "\r#{Time.now} => Transforming #{lead.id} #{percentage}%\s" if GoodDataMarketo.logging
168
+
169
+ # Get any new lead or merge lead ids and queue them for a load with get multiple.
170
+ ids_for_get_multiple_load << lead.values['merge_id'] if lead.values['merge_id']
171
+ ids_for_get_multiple_load << lead.values['lead_id'] if lead.values['lead_id']
172
+
173
+ row_to_save_csv = []
174
+
175
+ row_with_columns = updated_columns.map { |column|
176
+ if lead.columns.include? column
177
+ { column => lead.values[column] }
178
+ elsif lead.columns.include? "#{column}_m" # Check for anything that was removed by SQL
179
+ { "#{column}_m" => lead.values["#{column}_m"] }
180
+ elsif column == 'sys_capture_date'
181
+ { 'sys_capture_date' => Time.now.to_s }
182
+ else
183
+ { column => nil }
184
+ end
185
+ }
186
+
187
+ row_with_columns.each { |item|
188
+ c = item.to_a.flatten
189
+ if c[1] == nil
190
+ row_to_save_csv << nil
191
+ else
192
+ row_to_save_csv << c[1]
193
+ end
194
+
195
+ }
196
+
197
+ count += 1
198
+ @csv << row_to_save_csv
199
+
200
+ end
201
+
202
+ # Prepare (flush) the CSV for upload.
203
+ @csv.flush
204
+
205
+ table.import_csv("#{@id}.csv")
206
+
207
+ puts "#{Time.now} => #{@dwh.execute_select("SELECT id FROM #{@ads_table}").length} rows in ADS." if GoodDataMarketo.logging
208
+ puts "#{Time.now} => Rows extracted from CSV: #{count}" if GoodDataMarketo.logging
209
+ puts "#{Time.now} => New and merged ids queued: #{ids_for_get_multiple_load.length}" if GoodDataMarketo.logging
210
+
211
+ save_ids_for_get_multiple ids_for_get_multiple_load, 'a'
212
+
213
+ File.delete("#{load.json[:name]}_load.json") if File.exists? ("#{load.json[:name]}_load.json")
214
+ File.delete("#{@id}.csv") if File.exists? ("#{@id}.csv")
215
+
216
+ puts "#{Time.now} => Ads import of \"#{@lead_dump_file}\" complete." if GoodDataMarketo.logging
217
+ puts "#{Time.now} => Arguments: #{load.arguments}" if GoodDataMarketo.logging
218
+
219
+ end
220
+
221
+ case load.json[:method]
222
+
223
+ when 'get_changes'
224
+
225
+ # Increment the load by one day if it is time related.
226
+
227
+ oca = load.arguments[:oldest_created_at]
228
+ lca = load.arguments[:latest_created_at]
229
+
230
+ increment = Time.parse(lca) - Time.parse(oca)
231
+ total_time_range = Time.now - Time.parse(oca)
232
+
233
+ load.arguments[:oldest_created_at] = Time.parse(lca).to_s
234
+ load.arguments[:latest_created_at] = (Time.parse(lca) + increment).to_s
235
+
236
+ puts "#{Time.now} => API calls until current time: #{(total_time_range/increment).round}" if GoodDataMarketo.logging
237
+
238
+ # If the latest time is later then today kill the load.
239
+
240
+ if Time.parse(load.arguments[:latest_created_at]) > Time.now
241
+
242
+
243
+ load.terminate
244
+
245
+ next
246
+
247
+ # Otherwise save the load and resume additional loads.
248
+ else
249
+
250
+ load.save
251
+
252
+ next
253
+
254
+ end
255
+
256
+ when 'get_multiple'
257
+
258
+ determine_loads_state
259
+
260
+ else
261
+
262
+ raise 'Unable to determine lead type ("get_multiple"/"get_changes")!'
263
+
264
+ break
265
+
266
+ end
267
+
268
+ else
269
+
270
+ load = @queue.pop
271
+
272
+ puts "#{Time.now} => #{@queue.length} loads remaining."
273
+
274
+ if @queue.length > 0
275
+ File.open('queue.json','w'){ |f| JSON.dump(@queue, f) }
276
+ @s3.upload('queue.json')
277
+ end
278
+
279
+ if load
280
+
281
+ loads.create load
282
+
283
+ next
284
+
285
+ else
286
+
287
+ @s3.delete('queue.json')
288
+ File.delete('queue.json') if File.exists?('queue.json')
289
+
290
+ break
291
+
292
+ end
293
+
294
+
295
+ end
296
+
297
+
298
+ end
299
+
300
+ end
301
+
302
+ #####################################
303
+ # #
304
+ # INITIAL GET ALL MULTIPLE LEADS #
305
+ # #
306
+ #####################################
307
+ # Downloads all current lead ids with REST ID.
308
+ # Uses SOAP API to rotate through ids with get multiple.
309
+ # Runs once, boolean value set in initial_load_get_multiple at marketo_connector_config.json
310
+
311
+ def initial_load_get_multiple
312
+
313
+ @marketo.write_all_lead_ids_to_csv # Large bulk download to CSV of leads over REST API.
314
+
315
+ ids = CSV.open(LEAD_LIST_DUMP_CSV).map { |m| m[0] }
316
+
317
+ puts "#{Time.now} => #{ids.length} imported from local CSV." if GoodDataMarketo.logging
318
+
319
+ counter = 0
320
+
321
+ loop do
322
+
323
+ counter += 1
324
+
325
+ batch = ids.slice!(1..1000)
326
+
327
+ break if batch.length <= 0
328
+
329
+ get_multiple_leads_configuration = {
330
+ :name => "get_all_leads_chunk",
331
+ :type => 'leads',
332
+ :method => 'get_multiple',
333
+ :ads_table => 'marketo_leads',
334
+ :arguments => {
335
+ :ids => batch, # Notice the addition of the IDS box
336
+ :type => 'IDNUM'
337
+ }
338
+ }
339
+
340
+ puts "#{Time.now} => Id count in batch:#{batch.length} (Req:#{counter})" if GoodDataMarketo.logging
341
+
342
+ run_load :batch => batch,
343
+ :counter => counter,
344
+ :ads_table => 'marketo_leads',
345
+ :marketo_client => @marketo,
346
+ :queue => [get_multiple_leads_configuration]
347
+
348
+ save_ids_for_get_multiple ids, 'w'
349
+
350
+ end
351
+ end
352
+
353
+ ########################################
354
+ # #
355
+ # INITIAL GET ALL LEAD CHANGES #
356
+ # #
357
+ ########################################
358
+ # Starts January 1st 2000 and increments by the day until present day.
359
+ # All Activity Types included unless specified.
360
+ # Runs once, boolean value set in initial_load_get_changes at marketo_connector_config.json
361
+
362
+ def initial_load_get_changes
363
+
364
+ get_lead_changes_configuration = {
365
+ :name => 'get_lead_changes_chunk',
366
+ :type => 'leads',
367
+ :method => 'get_changes',
368
+ :ads_table => 'marketo_changes',
369
+ :arguments => {
370
+ # "oldest_created_at" and "latest_created_at" is the size of the increment it will to current time.
371
+ :oldest_created_at => 'January 1st 2004',
372
+ :latest_created_at => 'January 2nd 2004',
373
+ :filters => []
374
+ }
375
+ }
376
+
377
+ # Create a new configuration object for each activity type.
378
+ get_lead_changes_configuration.freeze
379
+
380
+ queue = []
381
+
382
+ @marketo.activity_types.each { |type|
383
+
384
+ g = {
385
+ :filters => [type]
386
+ }
387
+
388
+ m = get_lead_changes_configuration.dup
389
+ c = m.dup
390
+ c[:arguments] = m[:arguments].merge(g)
391
+
392
+ queue << c
393
+
394
+ }
395
+
396
+ run_load :ads_table => get_lead_changes_configuration[:ads_table],
397
+ :queue => queue,
398
+ :marketo_client => @marketo,
399
+ :index => 1,
400
+ :counter => 1
401
+
402
+ puts "#{Time.now} => Updating initial load changes to true in connector configuration." if GoodDataMarketo.logging
403
+
404
+ @s3.set_config(:initial_load_get_changes => true)
405
+
406
+ end
407
+
408
+ ########################################
409
+ # #
410
+ # SYNC MULTIPLE LEADS #
411
+ # #
412
+ ########################################
413
+
414
+ def update_get_multiple_leads
415
+
416
+ file = File.open(LEAD_LIST_DUMP_CSV, 'w')
417
+ file.puts @s3.download(LEAD_LIST_DUMP_CSV)
418
+
419
+ ids = CSV.open(LEAD_LIST_DUMP_CSV).map { |m| m[0] }
420
+
421
+ puts "#{Time.now} => #{ids.length} imported from local CSV." if GoodDataMarketo.logging
422
+
423
+ counter = 0
424
+
425
+ loop do
426
+
427
+ counter += 1
428
+
429
+ batch = ids.slice!(1..1000)
430
+
431
+ break if batch.length <= 0
432
+
433
+ get_multiple_leads_configuration = {
434
+ :name => "get_all_leads_chunk",
435
+ :type => 'leads',
436
+ :method => 'get_multiple',
437
+ :ads_table => 'marketo_leads',
438
+ :arguments => {
439
+ :ids => batch, # Notice the addition of the IDS box
440
+ :type => 'IDNUM'
441
+ }
442
+ }
443
+
444
+ puts "#{Time.now} => Id count in batch:#{batch.length} (Req:#{counter})" if GoodDataMarketo.logging
445
+
446
+ run_load :batch => batch,
447
+ :counter => counter,
448
+ :ads_table => 'marketo_leads',
449
+ :marketo_client => @marketo,
450
+ :queue => [get_multiple_leads_configuration]
451
+
452
+ save_ids_for_get_multiple ids, 'w'
453
+
454
+ end
455
+ end
456
+
457
+ ########################################
458
+ # #
459
+ # SYNC LEAD CHANGES #
460
+ # #
461
+ ########################################
462
+ # Synchronizes last 12 hours from process execution.
463
+ # Runs continuously.
464
+
465
+ def update_lead_changes
466
+
467
+ # Changing activity types from the default set to just Visit Webpage type.
468
+ #@marketo.activity_types = ['Visit Webpage']
469
+ #@marketo.activity_types = ['New SFDC Opportunity','Remove from Opportunity','Add to Opportunity', 'Update Opportunity']
470
+
471
+ fourty_eight_hours_ago = (Time.now - (48*60*60)).to_s
472
+ twenty_four_hours_ago = (Time.now - (24*60*60)).to_s
473
+ twelve_hours_ago = (Time.now - (12*60*60)).to_s
474
+ six_hours_ago = (Time.now - (6*60*60)).to_s
475
+
476
+ get_lead_changes_configuration = {
477
+ :name => 'get_lead_changes_chunk',
478
+ :type => 'leads',
479
+ :method => 'get_changes',
480
+ :ads_table => 'marketo_changes_date_platform',
481
+ :arguments => {
482
+ # "oldest_created_at" and "latest_created_at" is the size of the increment it will to current time.
483
+ :oldest_created_at => twelve_hours_ago,
484
+ :latest_created_at => six_hours_ago,
485
+ :filters => []
486
+ }
487
+ }
488
+
489
+ # Create a new configuration object for each activity type.
490
+ get_lead_changes_configuration.freeze
491
+
492
+ queue = []
493
+
494
+ @marketo.activity_types.each { |type|
495
+
496
+ g = {
497
+ :filters => [type]
498
+ }
499
+
500
+ m = get_lead_changes_configuration.dup
501
+ c = m.dup
502
+ c[:arguments] = m[:arguments].merge(g)
503
+
504
+ queue << c
505
+
506
+ }
507
+
508
+ run_load :ads_table => get_lead_changes_configuration[:ads_table],
509
+ :queue => queue,
510
+ :marketo_client => @marketo,
511
+ :increment => (24*60*60),
512
+ :counter => 1
513
+
514
+ end
515
+
516
+ def save_ids_for_get_multiple ids, write_type
517
+ csv = CSV.open(LEAD_LIST_DUMP_CSV, write_type)
518
+ ids.each { |row| csv << [row] }
519
+ csv.flush
520
+ @s3.upload(LEAD_LIST_DUMP_CSV)
521
+ end
522
+
523
+ # Download the configuration file from S3. If there is not one, one will be created.
524
+ config = @s3.get_config
525
+
526
+ if !config[:initial_load_get_multiple]
527
+
528
+ initial_load_get_multiple
529
+
530
+ # elsif !config[:initial_load_get_changes]
531
+ #
532
+ # initial_load_get_changes
533
+
534
+ else
535
+
536
+ update_lead_changes
537
+ #
538
+ # #Once the most recent changes downloaded, create and save a load for update_get_multiple_leads
539
+ # update_get_multiple_leads
540
+
541
+ end