gooddata_marketo 0.0.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +9 -0
  3. data/Gemfile.lock +131 -0
  4. data/README.md +207 -0
  5. data/bin/Gemfile +10 -0
  6. data/bin/auth.json +16 -0
  7. data/bin/main.rb +24 -0
  8. data/bin/process.rbx +541 -0
  9. data/examples/all_lead_changes.rb +119 -0
  10. data/examples/all_leads.rb +249 -0
  11. data/examples/lead_changes_to_ads.rb +63 -0
  12. data/gooddata_marketo.gemspec +25 -0
  13. data/lib/gooddata_marketo/adapters/rest.rb +287 -0
  14. data/lib/gooddata_marketo/client.rb +373 -0
  15. data/lib/gooddata_marketo/data/activity_types.rb +104 -0
  16. data/lib/gooddata_marketo/data/reserved_sql_keywords.rb +205 -0
  17. data/lib/gooddata_marketo/helpers/s3.rb +141 -0
  18. data/lib/gooddata_marketo/helpers/stringwizard.rb +32 -0
  19. data/lib/gooddata_marketo/helpers/table.rb +323 -0
  20. data/lib/gooddata_marketo/helpers/webdav.rb +118 -0
  21. data/lib/gooddata_marketo/loads.rb +235 -0
  22. data/lib/gooddata_marketo/models/campaigns.rb +57 -0
  23. data/lib/gooddata_marketo/models/channels.rb +30 -0
  24. data/lib/gooddata_marketo/models/child/activity.rb +104 -0
  25. data/lib/gooddata_marketo/models/child/criteria.rb +17 -0
  26. data/lib/gooddata_marketo/models/child/lead.rb +118 -0
  27. data/lib/gooddata_marketo/models/child/mobj.rb +68 -0
  28. data/lib/gooddata_marketo/models/etl.rb +75 -0
  29. data/lib/gooddata_marketo/models/leads.rb +493 -0
  30. data/lib/gooddata_marketo/models/load.rb +17 -0
  31. data/lib/gooddata_marketo/models/mobjects.rb +121 -0
  32. data/lib/gooddata_marketo/models/streams.rb +137 -0
  33. data/lib/gooddata_marketo/models/tags.rb +35 -0
  34. data/lib/gooddata_marketo/models/validate.rb +46 -0
  35. data/lib/gooddata_marketo.rb +24 -0
  36. data/process.rb +517 -0
  37. metadata +177 -0
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'gooddata_marketo'
4
+
5
+ MARKETO_USER = ""
6
+ MARKETO_KEY = ""
7
+ MARKETO_SUBDOMAIN = ''
8
+
9
+ MARKETO_AUTHORIZED_USER = ''
10
+ MARKETO_REST_SECRET = ''
11
+ MARKETO_REST_ID = ''
12
+
13
+ MARKETO_API_LIMIT = 10000 # Total number (SOAP+REST) of API calls to make in a given day.
14
+ LEAD_LIST_DUMP_CSV = 'marketo_leads_dump.csv' # Name of file to store all lead IDS.
15
+
16
+ GOODDATA_USER = 'YOU@gooddata.com'
17
+ GOODDATA_PASSWORD = ''
18
+ GOODDATA_PROJECT = ''
19
+ GOODDATA_ADS = ''
20
+
21
+ @webdav = WebDAV.new(:user => GOODDATA_USER,
22
+ :pass => GOODDATA_PASSWORD,
23
+ :project => GOODDATA_PROJECT)
24
+
25
+ @dwh = GoodData::Datawarehouse.new(GOODDATA_USER,
26
+ GOODDATA_PASSWORD,
27
+ GOODDATA_ADS)
28
+
29
+ @marketo = GoodDataMarketo.connect(:user_id => MARKETO_USER,
30
+ :encryption_key => MARKETO_KEY,
31
+ :api_subdomain => MARKETO_SUBDOMAIN,
32
+ :webdav => @webdav)
33
+
34
+ GoodDataMarketo.logging = true
35
+
36
+ ###############################
37
+ # #
38
+ # STAGE I: GET ALL LEAD IDS #
39
+ # #
40
+ ###############################
41
+ # REST API download of all available leads.
42
+
43
+ marketo.write_all_lead_ids_to_csv :file => LEAD_LIST_DUMP_CSV
44
+
45
+ ids = CSV.open(LEAD_LIST_DUMP_CSV).map { |m| m[0] }
46
+
47
+ puts "#{Time.now} => #{ids.length} imported from local CSV." if GoodDataMarketo.logging
48
+
49
+ def get_all_leads_batch batch, index
50
+
51
+
52
+ ###############################
53
+ # #
54
+ # STAGE II: CONFIGURE LOADS #
55
+ # #
56
+ ###############################
57
+ # SOAP API
58
+ # Configure additional loads as hashes, add them to @queue
59
+
60
+ get_all_leads = {
61
+ :name => "get_all_leads_chunk_#{index}",
62
+ :type => 'leads',
63
+ :method => 'get_multiple',
64
+ :arguments => {
65
+ :ids => batch, # Notice the addition of the IDS box
66
+ :type => 'IDNUM'
67
+ }
68
+ }
69
+
70
+ @lead_dump_file = get_all_leads[:name]
71
+
72
+ # If there are aditional loads, add to this array.
73
+ @queue = [get_all_leads]
74
+
75
+
76
+ ###############################
77
+ # #
78
+ # STAGE III: LOAD #
79
+ # #
80
+ ###############################
81
+ # SOAP API. No need to cofigure beyond this point.
82
+ # Loops function until no remaining jobs.
83
+
84
+ def determine_loads_state config = {}
85
+
86
+
87
+ loads = @marketo.loads(:user => GOODDATA_USER,
88
+ :pass => GOODDATA_PASSWORD,
89
+ :project => GOODDATA_PROJECT,
90
+ :marketo_client => @marketo)
91
+
92
+ if loads.available?
93
+
94
+ file = loads.available.first
95
+ load = loads.create :name => file
96
+
97
+ # Run the load from local or remote.
98
+ load.execute
99
+
100
+ # Data from the job can now be accessed ARRAY load.storage
101
+ # load.storage
102
+
103
+ # Join all of the columns from the sample to all other columns.
104
+ @columns_load_aggregate = ['sys_capture_date']
105
+ load.storage.each { |lead| @columns_load_aggregate = @columns_load_aggregate | lead.columns }
106
+ @columns_load_aggregate.map! { |column| column.downcase.gsub('-','_') }
107
+
108
+ # Set up a new Table/Automatically loads current table if exists.
109
+ table = Table.new :client => @dwh, :name => 'marketo_leads', :columns => ['id','email']
110
+
111
+ if @columns_load_aggregate.length > 0
112
+ table.merge_columns :merge_with => @columns_load_aggregate
113
+ @csv = CSV.open("#{@lead_dump_file}.csv", 'wb')
114
+ else
115
+ @csv = CSV.open("#{@lead_dump_file}.csv", 'wb')
116
+ end
117
+
118
+ updated_columns = table.columns
119
+
120
+ if @columns_load_aggregate.length > 0
121
+ @csv << updated_columns
122
+ end
123
+
124
+ count = 0
125
+ load.storage.each do |lead|
126
+
127
+ row_to_save_csv = []
128
+
129
+ row_with_columns = updated_columns.map { |column|
130
+ if lead.columns.include? column
131
+ { column => lead.values[column] }
132
+ elsif column == 'sys_capture_date'
133
+ { 'sys_capture_date' => Time.now.to_s }
134
+ else
135
+ { column => "NULL" }
136
+ end
137
+ }
138
+
139
+ row_with_columns.each { |item|
140
+ c = item.to_a.flatten
141
+ if c[1] == "NULL"
142
+ row_to_save_csv << "NULL"
143
+ else
144
+ row_to_save_csv << c[1]
145
+ end
146
+
147
+ }
148
+
149
+ count += 1
150
+ @csv << row_to_save_csv
151
+
152
+ end
153
+
154
+ # Prepare (flush) the CSV for upload.
155
+ @csv.flush
156
+
157
+ table.import_csv("#{@lead_dump_file}.csv")
158
+
159
+ puts "#{Time.now} => CSV:Rows:#{count}"
160
+ File.delete("#{@lead_dump_file}.json")
161
+ File.delete("#{@lead_dump_file}.csv")
162
+
163
+ puts "#{Time.now} => ADS:TableComplete:#{@lead_dump_file}" if GoodDataMarketo.logging
164
+
165
+ case load.json[:method]
166
+
167
+ when 'get_changes'
168
+
169
+ # Increment the load by one day if it is time related.
170
+ time_increment = (12*60*60)
171
+ oca = load.arguments[:oldest_created_at]
172
+ lca = load.arguments[:latest_created_at]
173
+
174
+ load.arguments[:oldest_created_at] = (Time.parse(oca) + time_increment).to_s
175
+ load.arguments[:latest_created_at] = (Time.parse(lca) + time_increment).to_s
176
+
177
+ # If the latest time is later then today kill the load.
178
+ if Time.parse(lca) > Time.now
179
+
180
+ load.terminate
181
+
182
+ determine_loads_state
183
+
184
+ # Otherwise save the load and resume additional loads.
185
+ else
186
+
187
+ load.save
188
+
189
+ determine_loads_state
190
+
191
+ end
192
+
193
+ when 'get_multiple'
194
+
195
+ determine_loads_state
196
+
197
+ else
198
+ raise 'Unable to determine lead type ("get_multiple"/"get_changes")!'
199
+
200
+ end
201
+
202
+ else
203
+
204
+ load = @queue.pop
205
+
206
+ if load
207
+ loads.create load
208
+ determine_loads_state
209
+ end
210
+
211
+ end
212
+
213
+ end
214
+
215
+ # Run once, recursive until no loads are available.
216
+ determine_loads_state
217
+
218
+ end
219
+
220
+ counter = 0
221
+
222
+ while ids.length > -1
223
+
224
+ counter += 1
225
+
226
+ batch = ids.slice!(1..1000)
227
+
228
+ if batch.length > 0
229
+ puts "#{Time.now} => Batch:Downloader:Length:#{batch.length} (Req:#{counter})" if GoodDataMarketo.logging
230
+ get_all_leads_batch batch, counter
231
+
232
+ CSV.open(LEAD_LIST_DUMP_CSV,'w') do |csv|
233
+ ids.each { |row| csv << [row] }
234
+ end
235
+
236
+ next
237
+
238
+ else
239
+
240
+ break
241
+
242
+ end
243
+
244
+ end
245
+
246
+ binding.pry
247
+ # table.select("SELECT * FROM #{table_name} ORDER BY id")
248
+ # SELECT * FROM marketo_leads ORDER BY id
249
+
@@ -0,0 +1,63 @@
1
+ # encoding: UTF-8
2
+ # !!! JRUBY 1.7 REQUIRED !!!
3
+
4
+ # MARKETO CONNECTOR EXAMPLE
5
+ # Gets changes for Merge Leads since March 26th 2014. Creates a table if there is not already called leads in ADS. BULK INSERTS changed leads into leads table on ADS.
6
+
7
+
8
+ require 'gooddata_marketo'
9
+
10
+ MARKETO_USER = ''
11
+ MARKETO_KEY = ''
12
+ MARKETO_SUBDOMAIN = ''
13
+
14
+ GOODDATA_USER = ''
15
+ GOODDATA_PASSWORD = ''
16
+ GOODDATA_PROJECT = ''
17
+ GOODDATA_ADS = ''
18
+
19
+ #############
20
+ # #
21
+ # MARKETO #
22
+ # #
23
+ #############
24
+
25
+ client = GoodDataMarketo.connect(:user_id => MARKETO_USER, :encryption_key => MARKETO_KEY, :api_subdomain => MARKETO_SUBDOMAIN)
26
+
27
+ changes = client.leads.get_changes(:oldest_created_at => 'March 26th 2014', :filters => ['Merge Leads'])
28
+
29
+ changed_ids = changes.map { |c| c.id }
30
+
31
+ leads = client.leads.get_multiple(:ids => changed_ids, :type => 'IDNUM')
32
+
33
+ CSV.open("tmp_leads_#{Process.pid}.csv", 'wb') do |csv|
34
+ csv << leads.first.headers
35
+ leads.each do |lead|
36
+ csv << lead.to_row
37
+ end
38
+ end
39
+
40
+ #############
41
+ # #
42
+ # ADS #
43
+ # #
44
+ #############
45
+
46
+ dwh = GoodData::Datawarehouse.new(GOODDATA_USER, GOODDATA_PASSWORD, GOODDATA_ADS)
47
+
48
+ # Set up a Table & Import BULK CSV
49
+ table = Table.new :client => dwh, :name => 'changes'
50
+ table.import_csv(file)
51
+
52
+ # File.delete(file) # Clean up
53
+
54
+ # For each change load into the leads table
55
+ count = 0
56
+
57
+ # Print the ads additions
58
+ puts 'Print tables?'
59
+ input = gets.chomp
60
+ if input == "y" || input == "yes" || input == ""
61
+ table.select("SELECT * FROM #{table_name} ORDER BY id")
62
+ end
63
+
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'gooddata_marketo'
3
+ s.version = '0.0.1'
4
+ s.date = '2015-01-01'
5
+ s.summary = "Marketo SOAP/REST wrapper to CSV/GoodData ADS"
6
+ s.description = "A gem."
7
+ s.authors = ["Patrick McConlogue"]
8
+ s.email = 'patrick.mcconlogue@gooddata.com'
9
+ s.files = `git ls-files`.split("\n")
10
+ s.homepage = 'https://github.com/thnkr/connectors/tree/master/marketo'
11
+
12
+ s.required_ruby_version = '>= 1.9.3'
13
+ s.platform = 'java'
14
+
15
+ s.add_development_dependency 'aws-sdk', '~> 1.61.0'
16
+ s.add_development_dependency 'rubyntlm', '~> 0.3.2'
17
+ s.add_development_dependency 'rest-client', '~> 1.7.2'
18
+ s.add_development_dependency 'pmap', '~> 1.0.2'
19
+
20
+ s.add_runtime_dependency 'savon', '= 2.8.0'
21
+ s.add_runtime_dependency 'gooddata', '= 0.6.11'
22
+ s.add_runtime_dependency 'gooddata_datawarehouse', '= 0.0.5'
23
+
24
+ s.license = 'MIT'
25
+ end
@@ -0,0 +1,287 @@
1
+ require 'gooddata_marketo/models/child/lead'
2
+
3
+ require 'rest_client'
4
+ require 'json'
5
+ require 'csv'
6
+
7
+ class GoodDataMarketo::RESTAdapter
8
+
9
+ attr_accessor :token
10
+ attr_accessor :token_timer
11
+
12
+ def initialize config = {}
13
+
14
+ @api_limit = MARKETO_API_LIMIT
15
+ @lead_list_dump = config[:file] || LEAD_LIST_DUMP_CSV || 'marketo_leads_dump.csv'
16
+ @marketo_subdomain = config[:subdomain] || MARKETO_SUBDOMAIN
17
+ @marketo_rest_secret = config[:secret] || config[:user] || MARKETO_REST_SECRET
18
+ @marketo_rest_id = config[:id] || config[:user] || MARKETO_REST_ID
19
+ @marketo_domain = "https://#{@marketo_subdomain}.mktorest.com"
20
+ @webdav = config[:webdav]
21
+ @lead_list_ids = []
22
+ @token = self.get_token unless config[:token]
23
+ @token_uri = "?access_token=#{@token}"
24
+
25
+ # Example endpoint = "/rest/v1/lists.json"
26
+
27
+ end
28
+
29
+ # If token has existed for more than 6000 seconds, refresh it.
30
+ def refresh_token?
31
+ if (@token_timer + 6000) < Time.now
32
+ self.get_token
33
+ true
34
+ else
35
+ false
36
+ end
37
+ end
38
+
39
+ def get url
40
+ @api_limit -= 1
41
+ begin
42
+ puts "#{Time.now} => REST:GET (#{url})" if GoodDataMarketo.logging
43
+ response = RestClient.get url
44
+ rescue Exception => exc
45
+ puts exc if GoodDataMarketo.logging
46
+ puts "#{Time.now} => Possible API Limit reached, last request was ##{@api_limit} of #{MARKETO_API_LIMIT}"
47
+ end
48
+ end
49
+
50
+ def get_lead_lists next_page_token = nil
51
+
52
+ endpoint = "/rest/v1/lists.json"
53
+ url = @marketo_domain + endpoint + @token_uri
54
+
55
+ if next_page_token
56
+ endpoint = "/rest/v1/lists.json"
57
+ url = @marketo_domain + endpoint + @token_uri + "&nextPageToken="+next_page_token
58
+ end
59
+
60
+ response = self.get url
61
+
62
+ json = JSON.parse(response.body, :symbolize_names => true)
63
+
64
+ json[:result].each { |m| @lead_list_ids << m[:id] }
65
+
66
+ next_page_token = json[:nextPageToken]
67
+
68
+ if next_page_token
69
+ self.get_lead_lists next_page_token
70
+ else
71
+ @lead_list_ids
72
+ end
73
+
74
+ end
75
+
76
+ def get_multiple_leads config = {}
77
+
78
+ self.refresh_token?
79
+
80
+ ids = config[:ids] || config[:values] || [28567,30885,32240,37161,40832]
81
+ domain = @marketo_domain
82
+ parameters = "&filterType=id"+"&filterValues="+ids.join(',')
83
+ endpoint= "/rest/v1/leads.json"
84
+ url = domain + endpoint + @token_uri + parameters
85
+
86
+ self.get url
87
+
88
+ end
89
+
90
+ def get_all_leads config = {}
91
+
92
+ self.refresh_token?
93
+
94
+ # Check if we already have a leads list, if so, download it.
95
+ self.load_lists
96
+
97
+ # Check if there was a stream broken while downloading a specific list.
98
+ self.load_last_page
99
+
100
+ # Check if there was a partial ID dump available on WebDAV, if so download it..
101
+ self.load_id_dump
102
+
103
+ @fields = config[:fields] || 'id'
104
+
105
+ # For each of the lead list ids, download their respective leads by id.
106
+ @csv = CSV.open(@lead_list_dump,'a+')
107
+
108
+ lists = @lead_list_ids
109
+
110
+ lists.each do |list|
111
+
112
+ id = @lead_list_ids.shift
113
+
114
+ domain = @marketo_domain
115
+ parameters = "&fields=#{@fields}"
116
+ endpoint= "/rest/v1/list/#{id}/leads.json"
117
+ url = domain + endpoint + @token_uri + parameters
118
+
119
+ # Conditional recursive function ends if nextPageToken is not available in response. Writes results to CSV.
120
+ def rest_lead_stream url, list_id
121
+
122
+ response = self.get url
123
+ json = JSON.parse(response.body, :symbolize_names => true)
124
+ results = json[:result]
125
+
126
+ if results
127
+ results.each { |result| @csv << [result[:id]] }
128
+ @csv.flush
129
+ puts "#{Time.now} => REST:leadList:#{list_id}:Results:#{results.length}" if GoodDataMarketo.logging
130
+ end
131
+
132
+ next_page_token = json[:nextPageToken]
133
+
134
+ # If there is another page, remember it and then attempt the next load.
135
+ if next_page_token
136
+
137
+ self.remember_next_page :token => token, :list => list_id
138
+ domain = @marketo_domain
139
+ parameters = "&fields=#{@fields}"
140
+ endpoint= "/rest/v1/list/#{list_id}/leads.json"
141
+ url = domain + endpoint + @token_uri + parameters + "&nextPageToken=" + next_page_token
142
+ rest_lead_stream url, list_id
143
+
144
+ else
145
+
146
+ # Update the local and remote lead lists
147
+ File.open('lead_list_ids.json','w'){ |f| JSON.dump(@lead_list_ids, f) }
148
+ @webdav.upload('lead_list_ids.json')
149
+
150
+ end
151
+
152
+ end
153
+
154
+ # While on this list, check to see if it failed working on it before and resume where it left off.
155
+ if @next_page_token_list == list
156
+
157
+ puts "#{Time.now} => REST(Resumed):leadList:#{list}:NextPageToken:#{@next_page_token}" if GoodDataMarketo.logging
158
+ domain = @marketo_domain
159
+ parameters = "" #"&fields=#{fields}"
160
+ endpoint= "/rest/v1/list/#{list}/leads.json"
161
+ url = domain + endpoint + @token_uri + parameters + "&nextPageToken=" + @next_page_token
162
+
163
+ rest_lead_stream url, list
164
+
165
+ else
166
+
167
+ rest_lead_stream url, list
168
+
169
+ end
170
+
171
+ end
172
+
173
+ # Remove lists from WebDav & Local, upload Marketo Ids Dump to webdav.
174
+ self.clean
175
+
176
+ # Update the etl controller that an update as occured.
177
+ self.resolve_with_etl_controller
178
+
179
+ end
180
+
181
+ def get_all_lead_ids config = {}
182
+ config[:fields] = 'id'
183
+ self.get_all_leads config
184
+ end
185
+
186
+ def get_all_lead_emails config = {}
187
+ config[:fields] = 'email'
188
+ self.get_all_leads config
189
+ end
190
+
191
+ def load_last_page
192
+
193
+ if @webdav.include? 'lead_list_next_page_token.json'
194
+ next_page_token_json = @webdav.download 'lead_list_next_page_token.json'
195
+ json = JSON.parse(next_page_token_json)
196
+ elsif File.exists? 'lead_list_next_page_token.json'
197
+ json = JSON.parse( IO.read('lead_list_next_page_token.json'))
198
+ else
199
+ json = {}
200
+ end
201
+
202
+ @next_page_token = json[:token]
203
+ @next_page_token_list = json[:list]
204
+
205
+ end
206
+
207
+ def load_id_dump
208
+ # Check if the lists leads have already been dumped. Append to it.
209
+ if @webdav.exists? @lead_list_dump
210
+ file = @webdav.download @lead_list_dump
211
+ puts "#{Time.now} => WEBDAV:ID_DUMP:Load" if GoodDataMarketo.logging
212
+ f = File.open(@lead_list_dump,'w')
213
+ f.write(file)
214
+ end
215
+ end
216
+
217
+ def load_lists
218
+ if @webdav.include? 'lead_list_ids.json'
219
+ lists = @webdav.download 'lead_list_ids.json'
220
+ @lead_list_ids = JSON.parse(lists)
221
+ puts "#{Time.now} => WEBDAV:ListsLoaded:#{@lead_list_ids.length}:lead_list_ids.json" if GoodDataMarketo.logging
222
+ else
223
+ self.get_lead_lists
224
+ end
225
+ end
226
+
227
+ def remember_next_page config = {}
228
+ File.open('lead_list_next_page_token.json','w'){ |f| JSON.dump(config, f) }
229
+ @webdav.upload('lead_list_next_page_token.json')
230
+ end
231
+
232
+ def clean
233
+
234
+ lli = 'lead_list_ids.json'
235
+ llnpt = 'lead_list_next_page_token.json'
236
+
237
+ # Remove list ids from webdav and local
238
+ if @webdav.exists?(lli)
239
+ @webdav.delete(lli)
240
+ end
241
+
242
+ if @webdav.exists?(llnpt)
243
+ @webdav.delete(llnpt)
244
+ end
245
+
246
+ File.delete(lli) if File.exists? lli
247
+ File.delete(llnpt) if File.exists? llnpt
248
+
249
+ # Upload marketo_dump to webdav for backup.
250
+ @webdav.upload(@lead_list_dump)
251
+
252
+ end
253
+
254
+ def get_token
255
+
256
+
257
+ domain = @marketo_domain
258
+ endpoint = "/identity/oauth/token?grant_type=client_credentials"
259
+ url = domain + endpoint + "&client_id=" + @marketo_rest_id + "&client_secret=" + @marketo_rest_secret
260
+ response = self.get url
261
+ results = JSON.parse(response.body)
262
+ access_token = results["access_token"]
263
+ @token_timer = Time.now
264
+ @token = access_token
265
+ access_token
266
+
267
+ end
268
+
269
+ def usage
270
+ domain = @marketo_domain
271
+ endpoint = "/rest/v1/stats/usage.json"
272
+ url = domain + endpoint + @token_uri
273
+
274
+ # Note this is the only rest call which does not use the log so as not to print the keys to log.
275
+ response = RestClient.get url
276
+
277
+ #Parse reponse and return only access token
278
+ results = JSON.parse(response.body)['result']
279
+ end
280
+
281
+ def resolve_with_etl_controller
282
+
283
+ puts 'Complete'
284
+ # Here it would update the log that a full upload as occured.
285
+ end
286
+
287
+ end