mobilize-base 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,309 @@
1
+ module Mobilize
2
+ module Gdriver
3
+ def Gdriver.config
4
+ Base.config('gdrive')[Base.env]
5
+ end
6
+
7
+ def Gdriver.owner_email
8
+ Gdriver.config['owner']['email']
9
+ end
10
+
11
+ def Gdriver.password(email)
12
+ if email == Gdriver.owner_email
13
+ Gdriver.config['owner']['pw']
14
+ else
15
+ worker = Gdriver.workers(email)
16
+ return worker['pw'] if worker
17
+ end
18
+ end
19
+
20
+ def Gdriver.admins
21
+ Gdriver.config['admins']
22
+ end
23
+
24
+ def Gdriver.workers(email=nil)
25
+ if email.nil?
26
+ Gdriver.config['workers']
27
+ else
28
+ Gdriver.workers.select{|w| w['email'] == email}.first
29
+ end
30
+ end
31
+
32
+ def Gdriver.worker_emails
33
+ Gdriver.workers.map{|w| w['email']}
34
+ end
35
+
36
+ def Gdriver.admin_emails
37
+ Gdriver.admins.map{|w| w['email']}
38
+ end
39
+
40
+ #email management - used to make sure not too many emails get used at the same time
41
+ def Gdriver.get_worker_email_by_mongo_id(mongo_id)
42
+ active_emails = Mobilize::Resque.jobs('working').map{|j| j['email'] if j['email']}.compact
43
+ Gdriver.workers.sort_by{rand}.each do |w|
44
+ if !(active_emails.include?(w['email']))
45
+ Mobilize::Resque.update_job_email(mongo_id,w['email'])
46
+ return w['email']
47
+ end
48
+ end
49
+ #return false if none are available
50
+ return false
51
+ end
52
+
53
+ def Gdriver.root(email=nil)
54
+ email ||= Gdriver.owner_email
55
+ pw = Gdriver.password(email)
56
+ GoogleDrive.login(email,pw)
57
+ end
58
+
59
+ def Gdriver.files(email=nil,params={})
60
+ root = Gdriver.root(email)
61
+ root.files(params)
62
+ end
63
+
64
+ def Gdriver.books(email=nil,params={})
65
+ Gdriver.files(email,params).select{|f| f.class==GoogleDrive::Spreadsheet}
66
+ end
67
+ end
68
+
69
+ class Gfiler
70
+ def Gfiler.find_by_title(title,email=nil)
71
+ Gdriver.files(email).select{|f| f.title==title}.first
72
+ end
73
+
74
+ def Gfiler.find_by_dst_id(dst_id,email=nil)
75
+ dst = Dataset.find(dst_id)
76
+ Gfiler.find_by_title(dst.path,email)
77
+ end
78
+
79
+ def Gfiler.add_admin_acl_by_dst_id(dst_id)
80
+ #adds admins and workers as writers
81
+ file = Gfiler.find_by_dst_id(dst_id)
82
+ file.add_admin_acl
83
+ return true
84
+ end
85
+
86
+ def Gfiler.add_admin_acl_by_title(title)
87
+ file = Gfiler.find_by_title(title)
88
+ file.add_admin_acl
89
+ return true
90
+ end
91
+
92
+ def Gfiler.add_worker_acl_by_title(title)
93
+ file = Gfiler.find_by_title(title)
94
+ file.add_worker_acl
95
+ return true
96
+ end
97
+
98
+ def Gfiler.update_acl_by_dst_id(dst_id,email,role="writer",edit_email=nil)
99
+ dst = Dataset.find(dst_id)
100
+ Gfiler.update_acl_by_title(dst.path,email,role,edit_email)
101
+ end
102
+
103
+ def Gfiler.update_acl_by_title(title,email,role="writer",edit_email=nil)
104
+ file = Gfiler.find_by_title(title,edit_email)
105
+ raise "File #{title} not found" unless file
106
+ file.update_acl(email,role)
107
+ end
108
+ end
109
+
110
+ module Gbooker
111
+ def Gbooker.find_all_by_title(title,email=nil)
112
+ Gdriver.books(email,{"title"=>title,"title-exact"=>"true"})
113
+ end
114
+ def Gbooker.find_or_create_by_title(title,email)
115
+ books = Gdriver.books(email,{"title"=>title,"title-exact"=>"true"})
116
+ #there should only be one book with each title, otherwise we have fail
117
+ book = nil
118
+ if books.length>1
119
+ #some idiot process created a duplicate book.
120
+ #Fix by renaming all but one with dst entry's key
121
+ dst = Dataset.find_by_handler_and_name('gbooker',title)
122
+ dkey = dst.url.split("key=").last
123
+ books.each do |b|
124
+ bkey = b.resource_id.split(":").last
125
+ if bkey == dkey
126
+ book = b
127
+ else
128
+ #delete the invalid book
129
+ b.delete
130
+ ("Deleted duplicate book #{title}").oputs
131
+ end
132
+ end
133
+ else
134
+ book = books.first
135
+ end
136
+ if book.nil?
137
+ #add book using owner email
138
+ #http
139
+ book = Gdriver.root.create_spreadsheet(title)
140
+ ("Created book #{title} at #{Time.now.utc.to_s}").oputs
141
+ end
142
+ #delete Sheet1 if there are other sheets
143
+ #http
144
+ if (sheets = book.worksheets).length>1
145
+ sheet1 = sheets.select{|s| s.title == "Sheet1"}.first
146
+ #http
147
+ sheet1.delete if sheet1
148
+ end
149
+ #always make sure books have admin acl
150
+ book.add_admin_acl
151
+ return book
152
+ end
153
+
154
+ def Gbooker.find_or_create_by_dst_id(dst_id,email=nil)
155
+ #creates by title, updates acl, updates dataset with url
156
+ dst = Dataset.find(dst_id)
157
+ r = Requestor.find(dst.requestor_id)
158
+ book = nil
159
+ #http
160
+ book = Gdriver.root.spreadsheet_by_url(dst.url) if dst.url
161
+ #manually try 5 times to validate sheet since we can't just try again and again
162
+ 5.times.each do
163
+ begin
164
+ book.resource_id
165
+ #if no error then break loop
166
+ break
167
+ rescue=>exc
168
+ if book.nil? or exc.to_s.index('Invalid document id')
169
+ book = Gbooker.find_or_create_by_title(dst.name,email)
170
+ #if invalid doc then update url w new book and break loop
171
+ dst.update_attributes(:url=>book.human_url)
172
+ break
173
+ end
174
+ end
175
+ end
176
+ #add requestor write access
177
+ book.update_acl(r.email)
178
+ return book
179
+ end
180
+ end
181
+
182
+ module Gsheeter
183
+
184
+ def Gsheeter.max_cells
185
+ 400000
186
+ end
187
+
188
+ def Gsheeter.read(name,email=nil)
189
+ sheet = Gsheeter.find_or_create_by_name(name,email)
190
+ sheet.to_tsv
191
+ end
192
+
193
+ def Gsheeter.write(name,tsv,email=nil)
194
+ sheet = Gsheeter.find_or_create_by_name(name,email)
195
+ sheet.write(tsv)
196
+ end
197
+
198
+ def Gsheeter.find_all_by_name(name,email)
199
+ book_title,sheet_title = name.split("/")
200
+ books = Gdriver.books(email,{"title"=>book_title,"title-exact"=>"true"})
201
+ sheets = books.map{|b| b.worksheets}.flatten.select{|w| w.title == sheet_title }
202
+ sheets
203
+ end
204
+
205
+ def Gsheeter.find_or_create_by_name(name,email=nil,rows=100,cols=20)
206
+ book_title,sheet_title = name.split("/")
207
+ book = Gbooker.find_or_create_by_title(book_title,email)
208
+ #http
209
+ sheet = book.worksheets.select{|w| w.title==sheet_title}.first
210
+ if sheet.nil?
211
+ #http
212
+ sheet = book.add_worksheet(sheet_title,rows,cols)
213
+ ("Created sheet #{name} at #{Time.now.utc.to_s}").oputs
214
+ end
215
+ return sheet
216
+ end
217
+
218
+ def Gsheeter.find_or_create_by_dst_id(dst_id,email=nil)
219
+ #creates by title, updates acl, updates dataset with url
220
+ dst = Dataset.find(dst_id)
221
+ r = Requestor.find(dst.requestor_id)
222
+ name = dst.name
223
+ book_title,sheet_title = name.split("/")
224
+ #make sure book exists and is assigned to this user
225
+ r.find_or_create_gbook_by_title(book_title,email)
226
+ #add admin write access
227
+ sheet = Gsheeter.find_or_create_by_name(name)
228
+ sheet_title = nil
229
+ return sheet
230
+ end
231
+
232
+ def Gsheeter.read_by_job_id(job_id)
233
+ j = Job.find(job_id)
234
+ r = j.requestor
235
+ #reserve email account for read
236
+ email = Gdriver.get_worker_email_by_mongo_id(job_id)
237
+ return false unless email
238
+ source = j.param_source
239
+ book,sheet = source.split("/")
240
+ #assume jobspec source if none given
241
+ source = [r.jobspec_title,source].join("/") if sheet.nil?
242
+ tsv = Gsheeter.find_or_create_by_name(source,email).to_tsv
243
+ book = nil
244
+ return tsv
245
+ end
246
+
247
+ def Gsheeter.read_by_dst_id(dst_id,email=nil)
248
+ dst = Dataset.find(dst_id)
249
+ name = dst.name
250
+ sheet = Gsheeter.find_or_create_by_name(name,email)
251
+ output = sheet.to_tsv
252
+ return output
253
+ end
254
+
255
+ def Gsheeter.write_by_dst_id(dst_id,tsv,email=nil)
256
+ dst = Dataset.find(dst_id)
257
+ #see if this is a specific cell
258
+ name = dst.name
259
+ return false unless email
260
+ #create temp tab, write data to it, checksum it against the source
261
+ tempsheet = Gsheeter.find_or_create_by_name("#{name}_temp")
262
+ tempsheet.write(tsv)
263
+ #delete current sheet, replace it with temp one
264
+ sheet = Gsheeter.find_or_create_by_name(dst.name)
265
+ title = sheet.title
266
+ #http
267
+ sheet.delete
268
+ begin
269
+ tempsheet.rename(title)
270
+ rescue
271
+ #need this because sometimes it gets confused and tries to rename twice
272
+ end
273
+ "Write successful for #{write_name}".oputs
274
+ return true
275
+ end
276
+
277
+ def Gsheeter.write_by_job_id(job_id)
278
+ j = Job.find(job_id)
279
+ r = j.requestor
280
+ dest_name = if j.destination.split("/").length==1
281
+ "#{r.jobspec_title}#{"/"}#{j.destination}"
282
+ else
283
+ j.destination
284
+ end
285
+ sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',dest_name)
286
+ sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
287
+ email = Gdriver.get_worker_email_by_mongo_id(job_id)
288
+ #return false if there are no emails available
289
+ return false unless email
290
+ #create temp tab, write data to it, checksum it against the source
291
+ tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{dest_name}_temp")
292
+ tempsheet_dst.update_attributes(:requestor_id=>r.id.to_s) if tempsheet_dst.requestor_id.nil?
293
+ tempsheet = Gsheeter.find_or_create_by_dst_id(tempsheet_dst.id.to_s)
294
+ #tsv is the second to last stage's output (the last is the write)
295
+ tsv = Dataset.find(j.tasks[j.prior_task]['output_dst_id']).read
296
+ tempsheet.write(tsv,true,job_id)
297
+ #delete current sheet, replace it with temp one
298
+ sheet = Gsheeter.find_or_create_by_name(dest_name,email)
299
+ title = sheet.title
300
+ #http
301
+ sheet.delete
302
+ tempsheet.title = title
303
+ tempsheet.save
304
+ sheet_dst.update_attributes(:url=>tempsheet.spreadsheet.human_url)
305
+ "Write successful for #{dest_name}".oputs
306
+ return true
307
+ end
308
+ end
309
+ end
@@ -0,0 +1,32 @@
1
+ module Mobilize
2
+ class Mongoer
3
+
4
+ def Mongoer.grid
5
+ session = ::Mongoid.configure.sessions['default']
6
+ database_name = session['database']
7
+ host,port = session['hosts'].first.split(":")
8
+ return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
9
+ end
10
+
11
+ def Mongoer.read_by_filename(filename)
12
+ begin
13
+ zs=Mongoer.grid.open(filename,'r').read
14
+ return ::Zlib::Inflate.inflate(zs)
15
+ rescue
16
+ "failed Mongo read for filename #{filename}".oputs
17
+ return nil
18
+ end
19
+ end
20
+
21
+ def Mongoer.write_by_filename(filename,string)
22
+ zs = ::Zlib::Deflate.deflate(string)
23
+ Mongoer.grid.open(filename,'w',:delete_old => true){|f| f.write(zs)}
24
+ return true
25
+ end
26
+
27
+ def Mongoer.delete_by_filename(filename)
28
+ Mongoer.grid.delete(filename)
29
+ return true
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,208 @@
1
+ module Mobilize
2
+ module Jobtracker
3
+ def Jobtracker.config
4
+ Base.config('jobtracker')[Base.env]
5
+ end
6
+
7
+ #modify this to increase the frequency of request cycles
8
+ def Jobtracker.cycle_freq
9
+ Jobtracker.config['cycle_freq']
10
+ end
11
+
12
+ #frequency of notifications
13
+ def Jobtracker.notification_freq
14
+ Jobtracker.config['notification_freq']
15
+ end
16
+
17
+ def Jobtracker.requestor_refresh_freq
18
+ Jobtracker.config['requestor_refresh_freq']
19
+ end
20
+
21
+ #long running tolerance
22
+ def Jobtracker.max_run_time
23
+ Jobtracker.config['max_run_time']
24
+ end
25
+
26
+ def Jobtracker.admins
27
+ Jobtracker.config['admins']
28
+ end
29
+
30
+ def Jobtracker.admin_emails
31
+ Jobtracker.admins.map{|a| a['email']}
32
+ end
33
+
34
+ def Jobtracker.worker
35
+ Resque.find_worker_by_mongo_id("jobtracker")
36
+ end
37
+
38
+ def Jobtracker.workers(state="all")
39
+ Resque.workers(state)
40
+ end
41
+
42
+ def Jobtracker.status
43
+ args = Jobtracker.get_args
44
+ return args['status'] if args
45
+ job = Resque.jobs.select{|j| j['args'].first=='jobtracker'}.first
46
+ return 'queued' if job
47
+ return 'stopped'
48
+ end
49
+
50
+ def Jobtracker.update_status(msg)
51
+ #Jobtracker has no persistent database state
52
+ Resque.update_job_status("jobtracker",msg)
53
+ return true
54
+ end
55
+
56
+ def Jobtracker.restart
57
+ Jobtracker.stop!
58
+ Jobtracker.start
59
+ end
60
+
61
+ def Jobtracker.set_args(args)
62
+ Resque.set_worker_args(Jobtracker.worker,args)
63
+ return true
64
+ end
65
+
66
+ def Jobtracker.get_args
67
+ Resque.get_worker_args(Jobtracker.worker)
68
+ end
69
+
70
+ def Jobtracker.kill_workers
71
+ Resque.kill_workers
72
+ end
73
+
74
+ def Jobtracker.kill_idle_workers
75
+ Resque.kill_idle_workers
76
+ end
77
+
78
+ def Jobtracker.prep_workers
79
+ Resque.prep_workers
80
+ end
81
+
82
+ def Jobtracker.failures
83
+ Resque.failures
84
+ end
85
+
86
+ def Jobtracker.start
87
+ if Jobtracker.status!='stopped'
88
+ raise "Jobtracker still #{Jobtracker.status}"
89
+ else
90
+ #make sure that workers are running and at the right number
91
+ #Resque.prep_workers
92
+ #queue up the jobtracker (starts the perform method)
93
+ Jobtracker.enqueue!
94
+ end
95
+ return true
96
+ end
97
+
98
+ def Jobtracker.enqueue!
99
+ ::Resque::Job.create(Resque.queue_name, Jobtracker, 'jobtracker',{'status'=>'working'})
100
+ end
101
+
102
+ def Jobtracker.restart!
103
+ Jobtracker.stop!
104
+ Jobtracker.start
105
+ return true
106
+ end
107
+
108
+ def Jobtracker.restart_workers!
109
+ Jobtracker.kill_workers
110
+ sleep 5
111
+ Jobtracker.prep_workers
112
+ end
113
+
114
+ def Jobtracker.stop!
115
+ #send signal for Jobtracker to check for
116
+ Jobtracker.update_status('stopping')
117
+ sleep 5
118
+ i=0
119
+ while Jobtracker.status=='stopping'
120
+ puts "#{Jobtracker.to_s} still on queue, waiting"
121
+ sleep 5
122
+ i+=1
123
+ end
124
+ return true
125
+ end
126
+
127
+ def Jobtracker.last_notification
128
+ return Jobtracker.get_args["last_notification"] if Jobtracker.get_args
129
+ end
130
+
131
+ def Jobtracker.last_notification=(time)
132
+ Jobtracker.set_args({"last_notification"=>time})
133
+ end
134
+
135
+ def Jobtracker.notif_due?
136
+ last_duetime = Time.now.utc - Jobtracker.notification_freq
137
+ return (Jobtracker.last_notification.to_s.length==0 || Jobtracker.last_notification.to_datetime < last_duetime)
138
+ end
139
+
140
+ def Jobtracker.max_run_time_workers
141
+ #return workers who have been cranking away for 6+ hours
142
+ workers = Jobtracker.workers('working').select do |w|
143
+ w.job['runat'].to_s.length>0 and
144
+ (Time.now.utc - Time.parse(w.job['runat'])) > Jobtracker.max_run_time
145
+ end
146
+ return workers
147
+ end
148
+
149
+ def Jobtracker.start_worker(count=nil)
150
+ Resque.start_workers(count)
151
+ end
152
+
153
+ def Jobtracker.kill_workers(count=nil)
154
+ Resque.kill_workers(count)
155
+ end
156
+
157
+ def Jobtracker.set_test_env
158
+ ENV['MOBILIZE_ENV']='test'
159
+ ::Resque.redis="localhost:9736"
160
+ mongoid_config_path = "#{Mobilize::Base.root}/config/mongoid.yml"
161
+ Mongoid.load!(mongoid_config_path, Mobilize::Base.env)
162
+ end
163
+
164
+ def Jobtracker.run_notifications
165
+ if Jobtracker.notif_due?
166
+ notifs = []
167
+ if Jobtracker.failures.length>0
168
+ n = {}
169
+ jfcs = Resque.failure_report
170
+ n['subj'] = "#{jfcs.keys.length.to_s} failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
171
+ #one row per exception type, with the job name
172
+ n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
173
+ notifs << n
174
+ end
175
+ lws = Jobtracker.max_run_time_workers
176
+ if lws.length>0
177
+ n = {}
178
+ n['subj'] = "#{lws.length.to_s} max run time jobs"
179
+ n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
180
+ notifs << n
181
+ end
182
+ notifs.each do |notif|
183
+ Emailer.write(n['subj'],notif['body']).deliver
184
+ Jobtracker.last_notification=Time.now.utc.to_s
185
+ "Sent notification at #{Jobtracker.last_notification}".oputs
186
+ end
187
+ end
188
+ return true
189
+ end
190
+
191
+ def Jobtracker.perform(id,*args)
192
+ while Jobtracker.status != 'stopping'
193
+ requestors = Requestor.all
194
+ Jobtracker.run_notifications
195
+ requestors.each do |r|
196
+ Jobtracker.update_status("Running requestor #{r.name}")
197
+ if r.is_due?
198
+ r.enqueue!
199
+ Jobtracker.update_status("Enqueued requestor #{r.name}")
200
+ end
201
+ end
202
+ sleep 5
203
+ end
204
+ Jobtracker.update_status("told to stop")
205
+ return true
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,70 @@
1
+ module Mobilize
2
+ class Dataset
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+ field :requestor_id, type: String
6
+ field :handler, type: String
7
+ field :name, type: String
8
+ field :url, type: String
9
+ field :size, type: Fixnum
10
+ field :last_cached_at, type: Time
11
+ field :last_read_at, type: Time
12
+ field :cache_expire_at, type: Time
13
+
14
+ index({ requestor_id: 1})
15
+ index({ handler: 1})
16
+ index({ name: 1})
17
+
18
+ before_destroy :destroy_cache
19
+
20
+ def read
21
+ dst = self
22
+ if dst.last_cached_at and (dst.cache_expire_at.nil? or dst.cache_expire_at > Time.now.utc)
23
+ return dst.read_cache
24
+ else
25
+ return dst.handler.humanize.constantize.read_by_dst_id(dst.id.to_s)
26
+ end
27
+ end
28
+
29
+ def Dataset.find_by_handler_and_name(handler,name)
30
+ Dataset.where(handler: handler, name: name).first
31
+ end
32
+
33
+ def Dataset.find_or_create_by_handler_and_name(handler,name)
34
+ dst = Dataset.where(handler: handler, name: name).first
35
+ dst = Dataset.create(handler: handler, name: name) unless dst
36
+ return dst
37
+ end
38
+
39
+ def Dataset.find_or_create_by_requestor_id_and_handler_and_name(requestor_id,handler,name)
40
+ dst = Dataset.where(requestor_id: requestor_id, handler: handler, name: name).first
41
+ dst = Dataset.create(requestor_id: requestor_id, handler: handler, name: name) unless dst
42
+ return dst
43
+ end
44
+
45
+ def write(data)
46
+ dst = self
47
+ dst.handler.humanize.constantize.write_by_dst_id(dst.id.to_s,data)
48
+ dst.save!
49
+ return true
50
+ end
51
+
52
+ def read_cache
53
+ dst = self
54
+ dst.update_attributes(:last_read_at=>Time.now.utc)
55
+ return Mongoer.read_by_filename(dst.id.to_s)
56
+ end
57
+
58
+ def write_cache(string,expire_at=nil)
59
+ dst = self
60
+ Mongoer.write_by_filename(dst.id.to_s,string)
61
+ dst.update_attributes(:last_cached_at=>Time.now.utc,:cache_expire_at=>expire_at,:size=>string.length)
62
+ return true
63
+ end
64
+
65
+ def delete_cache
66
+ return Mongoer.delete_by_filename(dst.id.to_s)
67
+ end
68
+
69
+ end
70
+ end