mobilize-base 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,309 @@
1
+ module Mobilize
2
+ module Gdriver
3
+ def Gdriver.config
4
+ Base.config('gdrive')[Base.env]
5
+ end
6
+
7
+ def Gdriver.owner_email
8
+ Gdriver.config['owner']['email']
9
+ end
10
+
11
+ def Gdriver.password(email)
12
+ if email == Gdriver.owner_email
13
+ Gdriver.config['owner']['pw']
14
+ else
15
+ worker = Gdriver.workers(email)
16
+ return worker['pw'] if worker
17
+ end
18
+ end
19
+
20
+ def Gdriver.admins
21
+ Gdriver.config['admins']
22
+ end
23
+
24
+ def Gdriver.workers(email=nil)
25
+ if email.nil?
26
+ Gdriver.config['workers']
27
+ else
28
+ Gdriver.workers.select{|w| w['email'] == email}.first
29
+ end
30
+ end
31
+
32
+ def Gdriver.worker_emails
33
+ Gdriver.workers.map{|w| w['email']}
34
+ end
35
+
36
+ def Gdriver.admin_emails
37
+ Gdriver.admins.map{|w| w['email']}
38
+ end
39
+
40
+ #email management - used to make sure not too many emails get used at the same time
41
+ def Gdriver.get_worker_email_by_mongo_id(mongo_id)
42
+ active_emails = Mobilize::Resque.jobs('working').map{|j| j['email'] if j['email']}.compact
43
+ Gdriver.workers.sort_by{rand}.each do |w|
44
+ if !(active_emails.include?(w['email']))
45
+ Mobilize::Resque.update_job_email(mongo_id,w['email'])
46
+ return w['email']
47
+ end
48
+ end
49
+ #return false if none are available
50
+ return false
51
+ end
52
+
53
+ def Gdriver.root(email=nil)
54
+ email ||= Gdriver.owner_email
55
+ pw = Gdriver.password(email)
56
+ GoogleDrive.login(email,pw)
57
+ end
58
+
59
+ def Gdriver.files(email=nil,params={})
60
+ root = Gdriver.root(email)
61
+ root.files(params)
62
+ end
63
+
64
+ def Gdriver.books(email=nil,params={})
65
+ Gdriver.files(email,params).select{|f| f.class==GoogleDrive::Spreadsheet}
66
+ end
67
+ end
68
+
69
+ class Gfiler
70
+ def Gfiler.find_by_title(title,email=nil)
71
+ Gdriver.files(email).select{|f| f.title==title}.first
72
+ end
73
+
74
+ def Gfiler.find_by_dst_id(dst_id,email=nil)
75
+ dst = Dataset.find(dst_id)
76
+ Gfiler.find_by_title(dst.path,email)
77
+ end
78
+
79
+ def Gfiler.add_admin_acl_by_dst_id(dst_id)
80
+ #adds admins and workers as writers
81
+ file = Gfiler.find_by_dst_id(dst_id)
82
+ file.add_admin_acl
83
+ return true
84
+ end
85
+
86
+ def Gfiler.add_admin_acl_by_title(title)
87
+ file = Gfiler.find_by_title(title)
88
+ file.add_admin_acl
89
+ return true
90
+ end
91
+
92
+ def Gfiler.add_worker_acl_by_title(title)
93
+ file = Gfiler.find_by_title(title)
94
+ file.add_worker_acl
95
+ return true
96
+ end
97
+
98
+ def Gfiler.update_acl_by_dst_id(dst_id,email,role="writer",edit_email=nil)
99
+ dst = Dataset.find(dst_id)
100
+ Gfiler.update_acl_by_title(dst.path,email,role,edit_email)
101
+ end
102
+
103
+ def Gfiler.update_acl_by_title(title,email,role="writer",edit_email=nil)
104
+ file = Gfiler.find_by_title(title,edit_email)
105
+ raise "File #{title} not found" unless file
106
+ file.update_acl(email,role)
107
+ end
108
+ end
109
+
110
+ module Gbooker
111
+ def Gbooker.find_all_by_title(title,email=nil)
112
+ Gdriver.books(email,{"title"=>title,"title-exact"=>"true"})
113
+ end
114
+ def Gbooker.find_or_create_by_title(title,email)
115
+ books = Gdriver.books(email,{"title"=>title,"title-exact"=>"true"})
116
+ #there should only be one book with each title, otherwise we have fail
117
+ book = nil
118
+ if books.length>1
119
+ #some idiot process created a duplicate book.
120
+ #Fix by renaming all but one with dst entry's key
121
+ dst = Dataset.find_by_handler_and_name('gbooker',title)
122
+ dkey = dst.url.split("key=").last
123
+ books.each do |b|
124
+ bkey = b.resource_id.split(":").last
125
+ if bkey == dkey
126
+ book = b
127
+ else
128
+ #delete the invalid book
129
+ b.delete
130
+ ("Deleted duplicate book #{title}").oputs
131
+ end
132
+ end
133
+ else
134
+ book = books.first
135
+ end
136
+ if book.nil?
137
+ #add book using owner email
138
+ #http
139
+ book = Gdriver.root.create_spreadsheet(title)
140
+ ("Created book #{title} at #{Time.now.utc.to_s}").oputs
141
+ end
142
+ #delete Sheet1 if there are other sheets
143
+ #http
144
+ if (sheets = book.worksheets).length>1
145
+ sheet1 = sheets.select{|s| s.title == "Sheet1"}.first
146
+ #http
147
+ sheet1.delete if sheet1
148
+ end
149
+ #always make sure books have admin acl
150
+ book.add_admin_acl
151
+ return book
152
+ end
153
+
154
+ def Gbooker.find_or_create_by_dst_id(dst_id,email=nil)
155
+ #creates by title, updates acl, updates dataset with url
156
+ dst = Dataset.find(dst_id)
157
+ r = Requestor.find(dst.requestor_id)
158
+ book = nil
159
+ #http
160
+ book = Gdriver.root.spreadsheet_by_url(dst.url) if dst.url
161
+ #manually try 5 times to validate sheet since we can't just try again and again
162
+ 5.times.each do
163
+ begin
164
+ book.resource_id
165
+ #if no error then break loop
166
+ break
167
+ rescue=>exc
168
+ if book.nil? or exc.to_s.index('Invalid document id')
169
+ book = Gbooker.find_or_create_by_title(dst.name,email)
170
+ #if invalid doc then update url w new book and break loop
171
+ dst.update_attributes(:url=>book.human_url)
172
+ break
173
+ end
174
+ end
175
+ end
176
+ #add requestor write access
177
+ book.update_acl(r.email)
178
+ return book
179
+ end
180
+ end
181
+
182
+ module Gsheeter
183
+
184
+ def Gsheeter.max_cells
185
+ 400000
186
+ end
187
+
188
+ def Gsheeter.read(name,email=nil)
189
+ sheet = Gsheeter.find_or_create_by_name(name,email)
190
+ sheet.to_tsv
191
+ end
192
+
193
+ def Gsheeter.write(name,tsv,email=nil)
194
+ sheet = Gsheeter.find_or_create_by_name(name,email)
195
+ sheet.write(tsv)
196
+ end
197
+
198
+ def Gsheeter.find_all_by_name(name,email)
199
+ book_title,sheet_title = name.split("/")
200
+ books = Gdriver.books(email,{"title"=>book_title,"title-exact"=>"true"})
201
+ sheets = books.map{|b| b.worksheets}.flatten.select{|w| w.title == sheet_title }
202
+ sheets
203
+ end
204
+
205
+ def Gsheeter.find_or_create_by_name(name,email=nil,rows=100,cols=20)
206
+ book_title,sheet_title = name.split("/")
207
+ book = Gbooker.find_or_create_by_title(book_title,email)
208
+ #http
209
+ sheet = book.worksheets.select{|w| w.title==sheet_title}.first
210
+ if sheet.nil?
211
+ #http
212
+ sheet = book.add_worksheet(sheet_title,rows,cols)
213
+ ("Created sheet #{name} at #{Time.now.utc.to_s}").oputs
214
+ end
215
+ return sheet
216
+ end
217
+
218
+ def Gsheeter.find_or_create_by_dst_id(dst_id,email=nil)
219
+ #creates by title, updates acl, updates dataset with url
220
+ dst = Dataset.find(dst_id)
221
+ r = Requestor.find(dst.requestor_id)
222
+ name = dst.name
223
+ book_title,sheet_title = name.split("/")
224
+ #make sure book exists and is assigned to this user
225
+ r.find_or_create_gbook_by_title(book_title,email)
226
+ #add admin write access
227
+ sheet = Gsheeter.find_or_create_by_name(name)
228
+ sheet_title = nil
229
+ return sheet
230
+ end
231
+
232
+ def Gsheeter.read_by_job_id(job_id)
233
+ j = Job.find(job_id)
234
+ r = j.requestor
235
+ #reserve email account for read
236
+ email = Gdriver.get_worker_email_by_mongo_id(job_id)
237
+ return false unless email
238
+ source = j.param_source
239
+ book,sheet = source.split("/")
240
+ #assume jobspec source if none given
241
+ source = [r.jobspec_title,source].join("/") if sheet.nil?
242
+ tsv = Gsheeter.find_or_create_by_name(source,email).to_tsv
243
+ book = nil
244
+ return tsv
245
+ end
246
+
247
+ def Gsheeter.read_by_dst_id(dst_id,email=nil)
248
+ dst = Dataset.find(dst_id)
249
+ name = dst.name
250
+ sheet = Gsheeter.find_or_create_by_name(name,email)
251
+ output = sheet.to_tsv
252
+ return output
253
+ end
254
+
255
+ def Gsheeter.write_by_dst_id(dst_id,tsv,email=nil)
256
+ dst = Dataset.find(dst_id)
257
+ #see if this is a specific cell
258
+ name = dst.name
259
+ return false unless email
260
+ #create temp tab, write data to it, checksum it against the source
261
+ tempsheet = Gsheeter.find_or_create_by_name("#{name}_temp")
262
+ tempsheet.write(tsv)
263
+ #delete current sheet, replace it with temp one
264
+ sheet = Gsheeter.find_or_create_by_name(dst.name)
265
+ title = sheet.title
266
+ #http
267
+ sheet.delete
268
+ begin
269
+ tempsheet.rename(title)
270
+ rescue
271
+ #need this because sometimes it gets confused and tries to rename twice
272
+ end
273
+ "Write successful for #{write_name}".oputs
274
+ return true
275
+ end
276
+
277
+ def Gsheeter.write_by_job_id(job_id)
278
+ j = Job.find(job_id)
279
+ r = j.requestor
280
+ dest_name = if j.destination.split("/").length==1
281
+ "#{r.jobspec_title}#{"/"}#{j.destination}"
282
+ else
283
+ j.destination
284
+ end
285
+ sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',dest_name)
286
+ sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
287
+ email = Gdriver.get_worker_email_by_mongo_id(job_id)
288
+ #return false if there are no emails available
289
+ return false unless email
290
+ #create temp tab, write data to it, checksum it against the source
291
+ tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{dest_name}_temp")
292
+ tempsheet_dst.update_attributes(:requestor_id=>r.id.to_s) if tempsheet_dst.requestor_id.nil?
293
+ tempsheet = Gsheeter.find_or_create_by_dst_id(tempsheet_dst.id.to_s)
294
+ #tsv is the second to last stage's output (the last is the write)
295
+ tsv = Dataset.find(j.tasks[j.prior_task]['output_dst_id']).read
296
+ tempsheet.write(tsv,true,job_id)
297
+ #delete current sheet, replace it with temp one
298
+ sheet = Gsheeter.find_or_create_by_name(dest_name,email)
299
+ title = sheet.title
300
+ #http
301
+ sheet.delete
302
+ tempsheet.title = title
303
+ tempsheet.save
304
+ sheet_dst.update_attributes(:url=>tempsheet.spreadsheet.human_url)
305
+ "Write successful for #{dest_name}".oputs
306
+ return true
307
+ end
308
+ end
309
+ end
@@ -0,0 +1,32 @@
1
+ module Mobilize
2
+ class Mongoer
3
+
4
+ def Mongoer.grid
5
+ session = ::Mongoid.configure.sessions['default']
6
+ database_name = session['database']
7
+ host,port = session['hosts'].first.split(":")
8
+ return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
9
+ end
10
+
11
+ def Mongoer.read_by_filename(filename)
12
+ begin
13
+ zs=Mongoer.grid.open(filename,'r').read
14
+ return ::Zlib::Inflate.inflate(zs)
15
+ rescue
16
+ "failed Mongo read for filename #{filename}".oputs
17
+ return nil
18
+ end
19
+ end
20
+
21
+ def Mongoer.write_by_filename(filename,string)
22
+ zs = ::Zlib::Deflate.deflate(string)
23
+ Mongoer.grid.open(filename,'w',:delete_old => true){|f| f.write(zs)}
24
+ return true
25
+ end
26
+
27
+ def Mongoer.delete_by_filename(filename)
28
+ Mongoer.grid.delete(filename)
29
+ return true
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,208 @@
1
+ module Mobilize
2
+ module Jobtracker
3
+ def Jobtracker.config
4
+ Base.config('jobtracker')[Base.env]
5
+ end
6
+
7
+ #modify this to increase the frequency of request cycles
8
+ def Jobtracker.cycle_freq
9
+ Jobtracker.config['cycle_freq']
10
+ end
11
+
12
+ #frequency of notifications
13
+ def Jobtracker.notification_freq
14
+ Jobtracker.config['notification_freq']
15
+ end
16
+
17
+ def Jobtracker.requestor_refresh_freq
18
+ Jobtracker.config['requestor_refresh_freq']
19
+ end
20
+
21
+ #long running tolerance
22
+ def Jobtracker.max_run_time
23
+ Jobtracker.config['max_run_time']
24
+ end
25
+
26
+ def Jobtracker.admins
27
+ Jobtracker.config['admins']
28
+ end
29
+
30
+ def Jobtracker.admin_emails
31
+ Jobtracker.admins.map{|a| a['email']}
32
+ end
33
+
34
+ def Jobtracker.worker
35
+ Resque.find_worker_by_mongo_id("jobtracker")
36
+ end
37
+
38
+ def Jobtracker.workers(state="all")
39
+ Resque.workers(state)
40
+ end
41
+
42
+ def Jobtracker.status
43
+ args = Jobtracker.get_args
44
+ return args['status'] if args
45
+ job = Resque.jobs.select{|j| j['args'].first=='jobtracker'}.first
46
+ return 'queued' if job
47
+ return 'stopped'
48
+ end
49
+
50
+ def Jobtracker.update_status(msg)
51
+ #Jobtracker has no persistent database state
52
+ Resque.update_job_status("jobtracker",msg)
53
+ return true
54
+ end
55
+
56
+ def Jobtracker.restart
57
+ Jobtracker.stop!
58
+ Jobtracker.start
59
+ end
60
+
61
+ def Jobtracker.set_args(args)
62
+ Resque.set_worker_args(Jobtracker.worker,args)
63
+ return true
64
+ end
65
+
66
+ def Jobtracker.get_args
67
+ Resque.get_worker_args(Jobtracker.worker)
68
+ end
69
+
70
+ def Jobtracker.kill_workers
71
+ Resque.kill_workers
72
+ end
73
+
74
+ def Jobtracker.kill_idle_workers
75
+ Resque.kill_idle_workers
76
+ end
77
+
78
+ def Jobtracker.prep_workers
79
+ Resque.prep_workers
80
+ end
81
+
82
+ def Jobtracker.failures
83
+ Resque.failures
84
+ end
85
+
86
+ def Jobtracker.start
87
+ if Jobtracker.status!='stopped'
88
+ raise "Jobtracker still #{Jobtracker.status}"
89
+ else
90
+ #make sure that workers are running and at the right number
91
+ #Resque.prep_workers
92
+ #queue up the jobtracker (starts the perform method)
93
+ Jobtracker.enqueue!
94
+ end
95
+ return true
96
+ end
97
+
98
+ def Jobtracker.enqueue!
99
+ ::Resque::Job.create(Resque.queue_name, Jobtracker, 'jobtracker',{'status'=>'working'})
100
+ end
101
+
102
+ def Jobtracker.restart!
103
+ Jobtracker.stop!
104
+ Jobtracker.start
105
+ return true
106
+ end
107
+
108
+ def Jobtracker.restart_workers!
109
+ Jobtracker.kill_workers
110
+ sleep 5
111
+ Jobtracker.prep_workers
112
+ end
113
+
114
+ def Jobtracker.stop!
115
+ #send signal for Jobtracker to check for
116
+ Jobtracker.update_status('stopping')
117
+ sleep 5
118
+ i=0
119
+ while Jobtracker.status=='stopping'
120
+ puts "#{Jobtracker.to_s} still on queue, waiting"
121
+ sleep 5
122
+ i+=1
123
+ end
124
+ return true
125
+ end
126
+
127
+ def Jobtracker.last_notification
128
+ return Jobtracker.get_args["last_notification"] if Jobtracker.get_args
129
+ end
130
+
131
+ def Jobtracker.last_notification=(time)
132
+ Jobtracker.set_args({"last_notification"=>time})
133
+ end
134
+
135
+ def Jobtracker.notif_due?
136
+ last_duetime = Time.now.utc - Jobtracker.notification_freq
137
+ return (Jobtracker.last_notification.to_s.length==0 || Jobtracker.last_notification.to_datetime < last_duetime)
138
+ end
139
+
140
+ def Jobtracker.max_run_time_workers
141
+ #return workers who have been cranking away for 6+ hours
142
+ workers = Jobtracker.workers('working').select do |w|
143
+ w.job['runat'].to_s.length>0 and
144
+ (Time.now.utc - Time.parse(w.job['runat'])) > Jobtracker.max_run_time
145
+ end
146
+ return workers
147
+ end
148
+
149
+ def Jobtracker.start_worker(count=nil)
150
+ Resque.start_workers(count)
151
+ end
152
+
153
+ def Jobtracker.kill_workers(count=nil)
154
+ Resque.kill_workers(count)
155
+ end
156
+
157
+ def Jobtracker.set_test_env
158
+ ENV['MOBILIZE_ENV']='test'
159
+ ::Resque.redis="localhost:9736"
160
+ mongoid_config_path = "#{Mobilize::Base.root}/config/mongoid.yml"
161
+ Mongoid.load!(mongoid_config_path, Mobilize::Base.env)
162
+ end
163
+
164
+ def Jobtracker.run_notifications
165
+ if Jobtracker.notif_due?
166
+ notifs = []
167
+ if Jobtracker.failures.length>0
168
+ n = {}
169
+ jfcs = Resque.failure_report
170
+ n['subj'] = "#{jfcs.keys.length.to_s} failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
171
+ #one row per exception type, with the job name
172
+ n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
173
+ notifs << n
174
+ end
175
+ lws = Jobtracker.max_run_time_workers
176
+ if lws.length>0
177
+ n = {}
178
+ n['subj'] = "#{lws.length.to_s} max run time jobs"
179
+ n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
180
+ notifs << n
181
+ end
182
+ notifs.each do |notif|
183
+ Emailer.write(n['subj'],notif['body']).deliver
184
+ Jobtracker.last_notification=Time.now.utc.to_s
185
+ "Sent notification at #{Jobtracker.last_notification}".oputs
186
+ end
187
+ end
188
+ return true
189
+ end
190
+
191
+ def Jobtracker.perform(id,*args)
192
+ while Jobtracker.status != 'stopping'
193
+ requestors = Requestor.all
194
+ Jobtracker.run_notifications
195
+ requestors.each do |r|
196
+ Jobtracker.update_status("Running requestor #{r.name}")
197
+ if r.is_due?
198
+ r.enqueue!
199
+ Jobtracker.update_status("Enqueued requestor #{r.name}")
200
+ end
201
+ end
202
+ sleep 5
203
+ end
204
+ Jobtracker.update_status("told to stop")
205
+ return true
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,70 @@
1
+ module Mobilize
2
+ class Dataset
3
+ include Mongoid::Document
4
+ include Mongoid::Timestamps
5
+ field :requestor_id, type: String
6
+ field :handler, type: String
7
+ field :name, type: String
8
+ field :url, type: String
9
+ field :size, type: Fixnum
10
+ field :last_cached_at, type: Time
11
+ field :last_read_at, type: Time
12
+ field :cache_expire_at, type: Time
13
+
14
+ index({ requestor_id: 1})
15
+ index({ handler: 1})
16
+ index({ name: 1})
17
+
18
+ before_destroy :destroy_cache
19
+
20
+ def read
21
+ dst = self
22
+ if dst.last_cached_at and (dst.cache_expire_at.nil? or dst.cache_expire_at > Time.now.utc)
23
+ return dst.read_cache
24
+ else
25
+ return dst.handler.humanize.constantize.read_by_dst_id(dst.id.to_s)
26
+ end
27
+ end
28
+
29
+ def Dataset.find_by_handler_and_name(handler,name)
30
+ Dataset.where(handler: handler, name: name).first
31
+ end
32
+
33
+ def Dataset.find_or_create_by_handler_and_name(handler,name)
34
+ dst = Dataset.where(handler: handler, name: name).first
35
+ dst = Dataset.create(handler: handler, name: name) unless dst
36
+ return dst
37
+ end
38
+
39
+ def Dataset.find_or_create_by_requestor_id_and_handler_and_name(requestor_id,handler,name)
40
+ dst = Dataset.where(requestor_id: requestor_id, handler: handler, name: name).first
41
+ dst = Dataset.create(requestor_id: requestor_id, handler: handler, name: name) unless dst
42
+ return dst
43
+ end
44
+
45
+ def write(data)
46
+ dst = self
47
+ dst.handler.humanize.constantize.write_by_dst_id(dst.id.to_s,data)
48
+ dst.save!
49
+ return true
50
+ end
51
+
52
+ def read_cache
53
+ dst = self
54
+ dst.update_attributes(:last_read_at=>Time.now.utc)
55
+ return Mongoer.read_by_filename(dst.id.to_s)
56
+ end
57
+
58
+ def write_cache(string,expire_at=nil)
59
+ dst = self
60
+ Mongoer.write_by_filename(dst.id.to_s,string)
61
+ dst.update_attributes(:last_cached_at=>Time.now.utc,:cache_expire_at=>expire_at,:size=>string.length)
62
+ return true
63
+ end
64
+
65
+ def delete_cache
66
+ return Mongoer.delete_by_filename(dst.id.to_s)
67
+ end
68
+
69
+ end
70
+ end