mobilize-base 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -440,19 +440,19 @@ name>)` and enter values under each header:
440
440
  * write_handler This is where the job writes its data to. For
441
441
  mobilize-base, you should enter "gsheet"
442
442
 
443
- * param_source This is the path to an array of data, as read from a google sheet,
444
- that is relayed to the job.
443
+ * param_sheets This is a comma-delimited list of sheets, relayed to the job,
444
+ which can be used for parameters.
445
445
  The format is `<google docs book>/<google docs sheet>`, so if you
446
- wanted to read from the "output" sheet on the "monthly results" book you
447
- would write in `<monthly results>/<output>`. For a sheet in the Jobspec
446
+ wanted to read from the "output" sheet on the "monthly_results" book you
447
+ would write in `monthly_results/output`. For a sheet in the Jobspec
448
448
  itself you could write simply `<output>`.
449
449
 
450
- * params This is a hash of data, expressed in a JSON. Not relevant to
451
- mobilize-base
450
+ * params This is a hash of data, expressed in a JSON, which can be used
451
+ for parameters.
452
452
 
453
453
  * destination This is the destination for the data, relayed to the job.
454
454
  For a gsheet write_handler, this would be the name of the sheet to be
455
- written to, similar to param_source.
455
+ written to, similar to param_sheets.
456
456
 
457
457
  <a name='section_Start_Run_Test'></a>
458
458
  ### Run Test
@@ -231,17 +231,11 @@ module Mobilize
231
231
 
232
232
  def Gsheeter.read_by_job_id(job_id)
233
233
  j = Job.find(job_id)
234
- r = j.requestor
235
234
  #reserve email account for read
236
235
  email = Gdriver.get_worker_email_by_mongo_id(job_id)
237
236
  return false unless email
238
- source = j.param_source
239
- book,sheet = source.split("/")
240
- #assume jobspec source if none given
241
- source = [r.jobspec_title,source].join("/") if sheet.nil?
242
- tsv = Gsheeter.find_or_create_by_name(source,email).to_tsv
243
- book = nil
244
- return tsv
237
+ #pull tsv from cache
238
+ j.param_sheet_dsts.first.read_cache
245
239
  end
246
240
 
247
241
  def Gsheeter.read_by_dst_id(dst_id,email=nil)
@@ -277,32 +271,32 @@ module Mobilize
277
271
  def Gsheeter.write_by_job_id(job_id)
278
272
  j = Job.find(job_id)
279
273
  r = j.requestor
280
- dest_name = if j.destination.split("/").length==1
274
+ tgt_name = if j.destination.split("/").length==1
281
275
  "#{r.jobspec_title}#{"/"}#{j.destination}"
282
276
  else
283
277
  j.destination
284
278
  end
285
- sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',dest_name)
279
+ sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',tgt_name)
286
280
  sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
287
281
  email = Gdriver.get_worker_email_by_mongo_id(job_id)
288
282
  #return false if there are no emails available
289
283
  return false unless email
290
284
  #create temp tab, write data to it, checksum it against the source
291
- tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{dest_name}_temp")
285
+ tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{tgt_name}_temp")
292
286
  tempsheet_dst.update_attributes(:requestor_id=>r.id.to_s) if tempsheet_dst.requestor_id.nil?
293
287
  tempsheet = Gsheeter.find_or_create_by_dst_id(tempsheet_dst.id.to_s)
294
288
  #tsv is the second to last stage's output (the last is the write)
295
289
  tsv = Dataset.find(j.tasks[j.prior_task]['output_dst_id']).read
296
290
  tempsheet.write(tsv,true,job_id)
297
291
  #delete current sheet, replace it with temp one
298
- sheet = Gsheeter.find_or_create_by_name(dest_name,email)
292
+ sheet = Gsheeter.find_or_create_by_name(tgt_name,email)
299
293
  title = sheet.title
300
294
  #http
301
295
  sheet.delete
302
296
  tempsheet.title = title
303
297
  tempsheet.save
304
298
  sheet_dst.update_attributes(:url=>tempsheet.spreadsheet.human_url)
305
- "Write successful for #{dest_name}".oputs
299
+ "Write successful for #{tgt_name}".oputs
306
300
  return true
307
301
  end
308
302
  end
@@ -193,7 +193,7 @@ module Mobilize
193
193
  requestors = Requestor.all
194
194
  Jobtracker.run_notifications
195
195
  requestors.each do |r|
196
- Jobtracker.update_status("Running requestor #{r.name}")
196
+ Jobtracker.update_status("Checking requestor #{r.name}")
197
197
  if r.is_due?
198
198
  r.enqueue!
199
199
  Jobtracker.update_status("Enqueued requestor #{r.name}")
@@ -14,8 +14,8 @@ module Mobilize
14
14
  field :last_completed_at, type: Time
15
15
  field :read_handler, type: String
16
16
  field :write_handler, type: String
17
- field :param_source, type: String #name of sheet on doc
18
- field :param_hash, type: String #JSON
17
+ field :files, type: String #name of sheet(s) on doc
18
+ field :params, type: String #JSON
19
19
  field :destination, type: String #output destination - could be file, could be sheet
20
20
 
21
21
  index({ requestor_id: 1})
@@ -28,6 +28,19 @@ module Mobilize
28
28
  Mobilize::Resque.find_worker_by_mongo_id(j.id.to_s)
29
29
  end
30
30
 
31
+ def param_sheet_dsts
32
+ j = self
33
+ r = j.requestor
34
+ j.param_sheets.split(",").map do |ps|
35
+ #prepend jobspec title if there is no path separator
36
+ full_ps = ps.index("/") ? ps : [r.jobspec_title,ps].join("/")
37
+ #find or create dataset for this sheet
38
+ dst = Dataset.find_or_create_by_handler_and_name("gsheeter",full_ps)
39
+ dst.update_attributes(:requestor_id=>r.id.to_s) unless dst.requestor_id
40
+ dst
41
+ end
42
+ end
43
+
31
44
  def Job.find_by_name(name)
32
45
  Job.where(:name=>name).first
33
46
  end
@@ -186,11 +199,11 @@ module Mobilize
186
199
 
187
200
  def is_due?
188
201
  j = self
189
- return false if j.is_working? or j.schedule.to_s.starts_with?("after")
202
+ return false if j.is_working? or j.active == false or j.schedule.to_s.starts_with?("after")
190
203
  last_run = j.last_completed_at
191
204
  #check schedule
192
205
  schedule = j.schedule
193
- return true if schedule == 'once' and j.active
206
+ return true if schedule == 'once'
194
207
  #strip the "every" from the front if present
195
208
  schedule = schedule.gsub("every","").gsub("."," ").strip
196
209
  value,unit,operator,job_utctime = schedule.split(" ")
@@ -31,14 +31,17 @@ module Mobilize
31
31
  end
32
32
 
33
33
  def Requestor.jobs_sheet_headers
34
- %w{name active schedule status last_error destination_url read_handler write_handler param_source params destination}
34
+ %w{name active schedule status last_error destination_url read_handler write_handler param_sheets params destination}
35
35
  end
36
36
 
37
37
  def Requestor.perform(id,*args)
38
38
  r = Requestor.find(id.to_s)
39
39
  #reserve email account for read
40
40
  gdrive_email = Gdriver.get_worker_email_by_mongo_id(id)
41
- return false unless gdrive_email
41
+ unless gdrive_email
42
+ "no gdrive_email available for #{r.name}".oputs
43
+ return false
44
+ end
42
45
  jobs_sheet = r.jobs_sheet(gdrive_email)
43
46
  #write headers to sheet
44
47
  Requestor.jobs_sheet_headers.each_with_index do |h,h_i|
@@ -52,7 +55,16 @@ module Mobilize
52
55
  #queue up the jobs that are due and active
53
56
  r.jobs.each do |j|
54
57
  begin
55
- j.enqueue! if j.active and j.is_due?
58
+ if j.active and j.is_due?
59
+ #cache all param_sheets
60
+ j.param_sheet_dsts.each do |psdst|
61
+ #read tsv, write to cache for job to use
62
+ tsv = Gsheeter.find_or_create_by_name(psdst.name,gdrive_email).to_tsv
63
+ r.update_status("caching #{psdst.name}")
64
+ psdst.write_cache(tsv)
65
+ end
66
+ j.enqueue!
67
+ end
56
68
  rescue ScriptError,StandardError => exc
57
69
  #update errors
58
70
  j.update_attributes(:last_error=>exc.to_s,:last_trace=>exc.backtrace.to_s)
@@ -85,7 +97,7 @@ module Mobilize
85
97
  :schedule => rj['schedule'],
86
98
  :read_handler => rj['read_handler'],
87
99
  :write_handler => rj['write_handler'],
88
- :param_source => rj['param_source'],
100
+ :param_sheets => rj['param_sheets'],
89
101
  :params => rj['params'],
90
102
  :destination => rj['destination'])
91
103
  #update laststatus with "Created job for" if job is due
@@ -207,7 +219,7 @@ module Mobilize
207
219
  end
208
220
 
209
221
  def is_due?
210
- r = self
222
+ r = self.reload
211
223
  return false if r.is_working?
212
224
  last_due_time = Time.now.utc - Jobtracker.requestor_refresh_freq
213
225
  return true if r.last_run.nil? or r.last_run < last_due_time
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.0.0"
3
+ VERSION = "1.0.1"
4
4
  end
5
5
  end
@@ -9,6 +9,13 @@ describe "Mobilize" do
9
9
 
10
10
  # enqueues 4 workers on Resque
11
11
  it "runs integration test" do
12
+ puts "clear out test db"
13
+ Mongoid.session(:default).collections.each do |collection|
14
+ unless collection.name =~ /^system\./
15
+ collection.drop
16
+ end
17
+ end
18
+
12
19
  email = Mobilize::Gdriver.owner_email
13
20
 
14
21
  #kill all workers
@@ -19,9 +26,6 @@ describe "Mobilize" do
19
26
  sleep 20
20
27
  assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
21
28
 
22
- #make sure old one is deleted
23
- Mobilize::Requestor.find_or_create_by_email(email).delete
24
-
25
29
  puts "create requestor 'mobilize'"
26
30
  requestor = Mobilize::Requestor.find_or_create_by_email(email)
27
31
  assert requestor.email == email
@@ -31,12 +35,10 @@ describe "Mobilize" do
31
35
  jobspec_title = requestor.jobspec_title
32
36
  books = Mobilize::Gbooker.find_all_by_title(jobspec_title)
33
37
  books.each{|book| book.delete}
34
- #delete old datasets for this specbook
35
- Mobilize::Dataset.all.select{|d| d.name.starts_with?(jobspec_title)}.each{|d| d.delete}
36
38
 
37
- puts "enqueue jobtracker, wait 60s"
39
+ puts "enqueue jobtracker, wait 45s"
38
40
  Mobilize::Jobtracker.start
39
- sleep 60
41
+ sleep 45
40
42
  puts "jobtracker status: #{Mobilize::Jobtracker.status}"
41
43
  puts "status:#{Mobilize::Jobtracker.status}" #!= 'stopped'
42
44
 
@@ -62,10 +64,7 @@ describe "Mobilize" do
62
64
  test_source_tsv = test_source_rows.map{|r| r.join("\t")}.join("\n")
63
65
  test_source_sheet.write(test_source_tsv)
64
66
 
65
- puts "add row to jobs sheet, wait 100s"
66
-
67
- #delete existing Jobs from the db
68
- Mobilize::Job.each{|j| j.delete}
67
+ puts "add row to jobs sheet, wait 60s"
69
68
 
70
69
  jobs_sheet = jobs_sheets.first
71
70
 
@@ -77,7 +76,7 @@ describe "Mobilize" do
77
76
  "destination_url" => "",
78
77
  "read_handler" => "gsheeter",
79
78
  "write_handler" => "gsheeter",
80
- "param_source" => "test_source",
79
+ "param_sheets" => "test_source",
81
80
  "params" => "",
82
81
  "destination" => "test_destination"},
83
82
  #run after the first
@@ -89,7 +88,7 @@ describe "Mobilize" do
89
88
  "destination_url" => "",
90
89
  "read_handler" => "gsheeter",
91
90
  "write_handler" => "gsheeter",
92
- "param_source" => "test_source",
91
+ "param_sheets" => "test_source",
93
92
  "params" => "",
94
93
  "destination" => "test_destination2"}
95
94
  ]
@@ -105,9 +104,9 @@ describe "Mobilize" do
105
104
 
106
105
  puts "job row added, force enqueued requestor"
107
106
  requestor.enqueue!
108
- sleep 100
107
+ sleep 60
109
108
 
110
- puts "jobtracker posted test source data to test destination, and checksum succeeded?"
109
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
111
110
  test_destination_sheet = Mobilize::Gsheeter.find_or_create_by_name("#{jobspec_title}/test_destination",email)
112
111
 
113
112
  assert test_destination_sheet.to_tsv == test_source_sheet.to_tsv
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-24 00:00:00.000000000 Z
12
+ date: 2012-11-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake