mobilize-base 1.36 → 1.293

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/README.md +666 -1
  2. data/lib/mobilize-base.rb +1 -12
  3. data/lib/mobilize-base/extensions/array.rb +3 -8
  4. data/lib/mobilize-base/extensions/google_drive/acl.rb +1 -1
  5. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +1 -2
  6. data/lib/mobilize-base/extensions/google_drive/file.rb +37 -11
  7. data/lib/mobilize-base/extensions/string.rb +6 -11
  8. data/lib/mobilize-base/extensions/yaml.rb +7 -10
  9. data/lib/mobilize-base/handlers/gbook.rb +38 -25
  10. data/lib/mobilize-base/handlers/gdrive.rb +4 -20
  11. data/lib/mobilize-base/handlers/gfile.rb +10 -64
  12. data/lib/mobilize-base/handlers/gridfs.rb +24 -19
  13. data/lib/mobilize-base/handlers/gsheet.rb +29 -45
  14. data/lib/mobilize-base/handlers/resque.rb +10 -17
  15. data/lib/mobilize-base/jobtracker.rb +196 -22
  16. data/lib/mobilize-base/models/job.rb +77 -107
  17. data/lib/mobilize-base/models/runner.rb +122 -36
  18. data/lib/mobilize-base/models/stage.rb +37 -18
  19. data/lib/mobilize-base/tasks.rb +13 -50
  20. data/lib/mobilize-base/version.rb +1 -1
  21. data/lib/samples/gdrive.yml +0 -15
  22. data/lib/samples/gridfs.yml +3 -0
  23. data/lib/samples/gsheet.yml +4 -4
  24. data/lib/samples/jobtracker.yml +6 -0
  25. data/mobilize-base.gemspec +3 -3
  26. data/test/base_job_rows.yml +11 -0
  27. data/test/mobilize-base_test.rb +106 -0
  28. data/test/test_base_1.yml +3 -0
  29. data/test/test_helper.rb +0 -155
  30. metadata +24 -36
  31. data/lib/mobilize-base/extensions/time.rb +0 -20
  32. data/lib/mobilize-base/helpers/job_helper.rb +0 -54
  33. data/lib/mobilize-base/helpers/jobtracker_helper.rb +0 -143
  34. data/lib/mobilize-base/helpers/runner_helper.rb +0 -83
  35. data/lib/mobilize-base/helpers/stage_helper.rb +0 -38
  36. data/lib/samples/gfile.yml +0 -9
  37. data/test/fixtures/base1_stage1.in.yml +0 -10
  38. data/test/fixtures/integration_expected.yml +0 -25
  39. data/test/fixtures/integration_jobs.yml +0 -12
  40. data/test/fixtures/is_due.yml +0 -97
  41. data/test/integration/mobilize-base_test.rb +0 -57
  42. data/test/unit/mobilize-base_test.rb +0 -33
@@ -1,38 +1,43 @@
1
- require 'tempfile'
2
1
  module Mobilize
3
2
  module Gridfs
4
3
  def Gridfs.config
5
4
  Base.config('gridfs')
6
5
  end
7
6
 
8
- def Gridfs.read_by_dataset_path(dst_path,*args)
9
- curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
10
- zs = curr_file.data if curr_file
11
- return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
7
+ def Gridfs.grid
8
+ session = ::Mongoid.configure.sessions['default']
9
+ database_name = session['database']
10
+ host,port = session['hosts'].first.split(":")
11
+ return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
12
12
  end
13
13
 
14
- def Gridfs.write_by_dataset_path(dst_path,string,*args)
14
+ def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
15
+ begin
16
+ zs=Gridfs.grid.open(dst_path,'r').read
17
+ return ::Zlib::Inflate.inflate(zs)
18
+ rescue
19
+ return nil
20
+ end
21
+ end
22
+
23
+ def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
15
24
  zs = ::Zlib::Deflate.deflate(string)
16
25
  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
17
- #find and delete existing file
18
- curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
19
- curr_zs = curr_file.data if curr_file
20
- #overwrite when there is a change
26
+ curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
27
+ #write a new version when there is a change
21
28
  if curr_zs != zs
22
- Mongoid::GridFs.delete(curr_file.id) if curr_file
23
- #create temp file w zstring
24
- temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
25
- temp_file.print(zs)
26
- temp_file.close
27
- #put data in file
28
- Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
29
+ Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
29
30
  end
30
31
  return true
31
32
  end
32
33
 
33
34
  def Gridfs.delete(dst_path)
34
- curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
35
- curr_file.delete
35
+ begin
36
+ Gridfs.grid.delete(dst_path)
37
+ return true
38
+ rescue
39
+ return nil
40
+ end
36
41
  end
37
42
  end
38
43
  end
@@ -81,16 +81,15 @@ module Mobilize
81
81
 
82
82
  def Gsheet.write_temp(target_path,gdrive_slot,tsv)
83
83
  #find and delete temp sheet, if any
84
- temp_book_title = target_path.gridsafe
85
- #create book and sheet
86
- temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
87
- rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
88
- temp_sheet = temp_book.add_worksheet("temp",rows,cols)
84
+ temp_path = [target_path.gridsafe,"temp"].join("/")
85
+ temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
86
+ temp_sheet.delete if temp_sheet
87
+ #write data to temp sheet
88
+ temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
89
89
  #this step has a tendency to fail; if it does,
90
90
  #don't fail the stage, mark it as false
91
91
  begin
92
- gdrive_user = gdrive_slot.split("@").first
93
- temp_sheet.write(tsv,gdrive_user)
92
+ temp_sheet.write(tsv,Gdrive.owner_name)
94
93
  rescue
95
94
  return nil
96
95
  end
@@ -109,7 +108,7 @@ module Mobilize
109
108
  #only give the user edit permissions if they're the ones
110
109
  #creating it
111
110
  target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
112
- target_sheet.spreadsheet.update_acl(u.email) unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
111
+ target_sheet.spreadsheet.update_acl(u.email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
113
112
  target_sheet.delete_sheet1
114
113
  end
115
114
  #pass it crop param to determine whether to shrink target sheet to fit data
@@ -127,43 +126,28 @@ module Mobilize
127
126
  s = Stage.where(:path=>stage_path).first
128
127
  u = s.job.runner.user
129
128
  crop = s.params['crop'] || true
130
- retries = 0
131
- stdout,stderr = []
132
- while stdout.nil? and stderr.nil? and retries < Gdrive.max_file_write_retries
133
- begin
134
- #get tsv to write from stage
135
- source = s.sources(gdrive_slot).first
136
- raise "Need source for gsheet write" unless source
137
- tsv = source.read(u.name,gdrive_slot)
138
- raise "No data source found for #{source.url}" unless tsv
139
- tsv_row_count = tsv.to_s.split("\n").length
140
- tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
141
- tsv_cell_count = tsv_row_count * tsv_col_count
142
- if tsv_cell_count > Gsheet.max_cells
143
- raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
144
- end
145
- stdout = if tsv_row_count == 0
146
- #soft error; no data to write. Stage will complete.
147
- "Write skipped for #{s.target.url}"
148
- else
149
- Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
150
- #update status
151
- "Write successful for #{s.target.url}"
152
- end
153
- Gdrive.unslot_worker_by_path(stage_path)
154
- stderr = nil
155
- s.update_status(stdout)
156
- signal = 0
157
- rescue => exc
158
- if retries < Gdrive.max_file_write_retries
159
- retries +=1
160
- sleep Gdrive.file_write_retry_delay
161
- else
162
- stdout = nil
163
- stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
164
- signal = 500
165
- end
166
- end
129
+ begin
130
+ #get tsv to write from stage
131
+ source = s.sources(gdrive_slot).first
132
+ raise "Need source for gsheet write" unless source
133
+ tsv = source.read(u.name,gdrive_slot)
134
+ raise "No data source found for #{source.url}" unless tsv
135
+ stdout = if tsv.to_s.length == 0
136
+ #soft error; no data to write. Stage will complete.
137
+ "Write skipped for #{s.target.url}"
138
+ else
139
+ Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
140
+ #update status
141
+ "Write successful for #{s.target.url}"
142
+ end
143
+ Gdrive.unslot_worker_by_path(stage_path)
144
+ stderr = nil
145
+ s.update_status(stdout)
146
+ signal = 0
147
+ rescue => exc
148
+ stdout = nil
149
+ stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
150
+ signal = 500
167
151
  end
168
152
  return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
169
153
  end
@@ -19,17 +19,13 @@ module Mobilize
19
19
  def Resque.workers(state="all")
20
20
  workers = ::Resque.workers.select{|w| w.queues.first == Resque.queue_name}
21
21
  return workers if state == 'all'
22
- working_workers = workers.select{|w| w.job['queue'] == Resque.queue_name}
22
+ working_workers = workers.select{|w| w.job['queue']== Resque.queue_name}
23
23
  return working_workers if state == 'working'
24
24
  idle_workers = workers.select{|w| w.job['queue'].nil?}
25
25
  return idle_workers if state == 'idle'
26
26
  stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
27
27
  return stale_workers if state == 'stale'
28
- timeout_workers = workers.select do |w|
29
- w.job['payload'] and
30
- w.job['payload']['class']!='Jobtracker' and
31
- w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)
32
- end
28
+ timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['runat'] < (Time.now.utc - Jobtracker.max_run_time)}
33
29
  return timeout_workers if state == 'timeout'
34
30
  raise "invalid state #{state}"
35
31
  end
@@ -117,16 +113,10 @@ module Mobilize
117
113
  stage_path = f['payload']['args'].first
118
114
  email = begin
119
115
  s = Stage.where(:path=>stage_path).first
120
- if s.params['notify'].to_s=="false"
121
- next
122
- elsif s.params['notify'].to_s.index("@")
123
- s.params['notify']
124
- else
125
- s.job.runner.user.email
126
- end
127
- rescue ScriptError, StandardError
128
- #jobs without stages are sent to admins
129
- [Gdrive.admin_group_name,Gdrive.domain].join("@")
116
+ s.job.runner.user.email
117
+ rescue
118
+ #jobs without stages are sent to first admin
119
+ Jobtracker.admin_emails.first
130
120
  end
131
121
  exc_to_s = f['error']
132
122
  if fjobs[email].nil?
@@ -148,7 +138,10 @@ module Mobilize
148
138
 
149
139
  def Resque.start_workers(count=1)
150
140
  count.times do
151
- "(cd #{Base.root};rake mobilize:work[#{Base.env}]) >> #{Resque.log_path} 2>&1 &".bash
141
+ dir_envs = "MOBILIZE_ENV=#{Base.env} " +
142
+ "MOBILIZE_CONFIG_DIR=#{Base.config_dir} " +
143
+ "MOBILIZE_LOG_DIR=#{Base.log_dir}"
144
+ "(cd #{Base.root};rake #{dir_envs} mobilize_base:work) >> #{Resque.log_path} 2>&1 &".bash
152
145
  end
153
146
  end
154
147
 
@@ -1,13 +1,152 @@
1
1
  module Mobilize
2
2
  module Jobtracker
3
- #adds convenience methods
4
- require "#{File.dirname(__FILE__)}/helpers/jobtracker_helper"
3
+ def Jobtracker.config
4
+ Base.config('jobtracker')
5
+ end
6
+
7
+ #modify this to increase the frequency of request cycles
8
+ def Jobtracker.cycle_freq
9
+ Jobtracker.config['cycle_freq']
10
+ end
11
+
12
+ #frequency of notifications
13
+ def Jobtracker.notification_freq
14
+ Jobtracker.config['notification_freq']
15
+ end
16
+
17
+ def Jobtracker.runner_read_freq
18
+ Jobtracker.config['runner_read_freq']
19
+ end
20
+
21
+ #long running tolerance
22
+ def Jobtracker.max_run_time
23
+ Jobtracker.config['max_run_time']
24
+ end
25
+
26
+ def Jobtracker.admins
27
+ Jobtracker.config['admins']
28
+ end
29
+
30
+ def Jobtracker.admin_emails
31
+ Jobtracker.admins.map{|a| a['email'] }
32
+ end
33
+
34
+ def Jobtracker.worker
35
+ Resque.find_worker_by_path("jobtracker")
36
+ end
37
+
38
+ def Jobtracker.workers(state="all")
39
+ Resque.workers(state)
40
+ end
41
+
42
+ def Jobtracker.status
43
+ args = Jobtracker.get_args
44
+ return args['status'] if args
45
+ job = Resque.jobs.select{|j| j['args'].first=='jobtracker'}.first
46
+ return 'queued' if job
47
+ return 'stopped'
48
+ end
49
+
50
+ def Jobtracker.update_status(msg)
51
+ #Jobtracker has no persistent database state
52
+ Resque.set_worker_args_by_path("jobtracker",{'status'=>msg})
53
+ return true
54
+ end
55
+
56
+ def Jobtracker.restart
57
+ Jobtracker.stop!
58
+ Jobtracker.start
59
+ end
60
+
61
+ def Jobtracker.set_args(args)
62
+ Resque.set_worker_args(Jobtracker.worker,args)
63
+ return true
64
+ end
65
+
66
+ def Jobtracker.get_args
67
+ Resque.get_worker_args(Jobtracker.worker)
68
+ end
69
+
70
+ def Jobtracker.kill_workers
71
+ Resque.kill_workers
72
+ end
73
+
74
+ def Jobtracker.kill_idle_workers
75
+ Resque.kill_idle_workers
76
+ end
77
+
78
+ def Jobtracker.kill_idle_and_stale_workers
79
+ Resque.kill_idle_and_stale_workers
80
+ end
81
+
82
+ def Jobtracker.prep_workers
83
+ Resque.prep_workers
84
+ end
85
+
86
+ def Jobtracker.failures
87
+ Resque.failures
88
+ end
89
+
90
+ def Jobtracker.start
91
+ if Jobtracker.status!='stopped'
92
+ Jobtracker.update_status("Jobtracker still #{Jobtracker.status}")
93
+ else
94
+ #make sure that workers are running and at the right number
95
+ #Resque.prep_workers
96
+ #queue up the jobtracker (starts the perform method)
97
+ Jobtracker.enqueue!
98
+ end
99
+ return true
100
+ end
101
+
102
+ def Jobtracker.enqueue!
103
+ ::Resque::Job.create(Resque.queue_name, Jobtracker, 'jobtracker',{})
104
+ end
105
+
106
+ def Jobtracker.restart!
107
+ Jobtracker.stop!
108
+ Jobtracker.start
109
+ return true
110
+ end
111
+
112
+ def Jobtracker.restart_workers!
113
+ Jobtracker.kill_workers
114
+ sleep 10
115
+ Jobtracker.prep_workers
116
+ Jobtracker.update_status("put workers back on the queue")
117
+ end
118
+
119
+ def Jobtracker.stop!
120
+ #send signal for Jobtracker to check for
121
+ Jobtracker.update_status('stopping')
122
+ sleep 5
123
+ i=0
124
+ while Jobtracker.status=='stopping'
125
+ Jobtracker.update_status("#{Jobtracker.to_s} still on queue, waiting")
126
+ sleep 5
127
+ i+=1
128
+ end
129
+ return true
130
+ end
131
+
132
+ def Jobtracker.last_notification
133
+ return Jobtracker.get_args["last_notification"] if Jobtracker.get_args
134
+ end
135
+
136
+ def Jobtracker.last_notification=(time)
137
+ Jobtracker.set_args({"last_notification"=>time})
138
+ end
139
+
140
+ def Jobtracker.notif_due?
141
+ last_duetime = Time.now.utc - Jobtracker.notification_freq
142
+ return (Jobtracker.last_notification.to_s.length==0 || Jobtracker.last_notification.to_datetime < last_duetime)
143
+ end
5
144
 
6
145
  def Jobtracker.max_run_time_workers
7
146
  #return workers who have been cranking away for 6+ hours
8
147
  workers = Jobtracker.workers('working').select do |w|
9
- w.job['run_at'].to_s.length>0 and
10
- (Time.now.utc - Time.parse(w.job['run_at'])) > Jobtracker.max_run_time
148
+ w.job['runat'].to_s.length>0 and
149
+ (Time.now.utc - Time.parse(w.job['runat'])) > Jobtracker.max_run_time
11
150
  end
12
151
  return workers
13
152
  end
@@ -35,36 +174,24 @@ module Mobilize
35
174
  end
36
175
  end.flatten.join("\n\n")
37
176
  u = User.where(:name=>email.split("@").first).first
38
- if u
39
- runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
40
- n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
41
- end
177
+ runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
178
+ n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
42
179
  n['to'] = email
43
- n['bcc'] = [Gdrive.admin_group_name,Gdrive.domain].join("@")
180
+ n['bcc'] = Jobtracker.admin_emails.join(",")
44
181
  notifs << n
45
182
  end
46
183
  end
47
184
  lws = Jobtracker.max_run_time_workers
48
185
  if lws.length>0
49
- bod = begin
50
- lws.map{|w| w.job['payload']['args']}.first.join("\n")
51
- rescue
52
- "Failed to get job names"
53
- end
54
186
  n = {}
55
187
  n['subject'] = "#{lws.length.to_s} max run time jobs"
56
- n['body'] = bod
57
- n['to'] = [Gdrive.admin_group_name,Gdrive.domain].join("@")
188
+ n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
189
+ n['to'] = Jobtracker.admin_emails.join(",")
58
190
  notifs << n
59
191
  end
60
192
  #deliver each email generated
61
193
  notifs.each do |notif|
62
- begin
63
- Email.write(notif).deliver
64
- rescue
65
- #log email on failure
66
- Jobtracker.update_status("Failed to deliver #{notif.to_s}")
67
- end
194
+ Email.write(notif).deliver
68
195
  end
69
196
  #update notification time so JT knows to wait a while
70
197
  Jobtracker.last_notification = Time.now.utc.to_s
@@ -116,5 +243,52 @@ module Mobilize
116
243
  end.to_s.strip
117
244
  Time.parse(deploy_time)
118
245
  end
246
+
247
+ #test methods
248
+ def Jobtracker.restart_test_redis
249
+ Jobtracker.stop_test_redis
250
+ if !system("which redis-server")
251
+ raise "** can't find `redis-server` in your path, you need redis to run Resque and Mobilize"
252
+ end
253
+ "redis-server #{Base.root}/test/redis-test.conf".bash
254
+ end
255
+
256
+ def Jobtracker.stop_test_redis
257
+ processes = `ps -A -o pid,command | grep [r]edis-test`.split($/)
258
+ pids = processes.map { |process| process.split(" ")[0] }
259
+ puts "Killing test redis server..."
260
+ pids.each { |pid| Process.kill("TERM", pid.to_i) }
261
+ puts "removing redis db dump file"
262
+ sleep 5
263
+ `rm -f #{Base.root}/test/dump.rdb #{Base.root}/test/dump-cluster.rdb`
264
+ end
265
+
266
+ def Jobtracker.set_test_env
267
+ ENV['MOBILIZE_ENV']='test'
268
+ ::Resque.redis="localhost:9736"
269
+ mongoid_config_path = "#{Base.root}/config/mobilize/mongoid.yml"
270
+ Mongoid.load!(mongoid_config_path, Base.env)
271
+ end
272
+
273
+ def Jobtracker.drop_test_db
274
+ Jobtracker.set_test_env
275
+ Mongoid.session(:default).collections.each do |collection|
276
+ unless collection.name =~ /^system\./
277
+ collection.drop
278
+ end
279
+ end
280
+ end
281
+
282
+ def Jobtracker.build_test_runner(user_name)
283
+ Jobtracker.set_test_env
284
+ u = User.where(:name=>user_name).first
285
+ Jobtracker.update_status("delete old books and datasets")
286
+ # delete any old runner from previous test runs
287
+ gdrive_slot = Gdrive.owner_email
288
+ u.runner.gsheet(gdrive_slot).spreadsheet.delete
289
+ Jobtracker.update_status("enqueue jobtracker, wait 45s")
290
+ Mobilize::Jobtracker.start
291
+ sleep 45
292
+ end
119
293
  end
120
294
  end