mobilize-base 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -440,19 +440,19 @@ name>)` and enter values under each header:
440
440
  * write_handler This is where the job writes its data to. For
441
441
  mobilize-base, you should enter "gsheet"
442
442
 
443
- * param_source This is the path to an array of data, as read from a google sheet,
444
- that is relayed to the job.
443
+ * param_sheets This is a comma-delimited list of sheets, relayed to the job,
444
+ which can be used for parameters.
445
445
  The format is `<google docs book>/<google docs sheet>`, so if you
446
- wanted to read from the "output" sheet on the "monthly results" book you
447
- would write in `<monthly results>/<output>`. For a sheet in the Jobspec
446
+ wanted to read from the "output" sheet on the "monthly_results" book you
447
+ would write in `monthly_results/output`. For a sheet in the Jobspec
448
448
  itself you could write simply `<output>`.
449
449
 
450
- * params This is a hash of data, expressed in a JSON. Not relevant to
451
- mobilize-base
450
+ * params This is a hash of data, expressed in a JSON, which can be used
451
+ for parameters.
452
452
 
453
453
  * destination This is the destination for the data, relayed to the job.
454
454
  For a gsheet write_handler, this would be the name of the sheet to be
455
- written to, similar to param_source.
455
+ written to, similar to param_sheets.
456
456
 
457
457
  <a name='section_Start_Run_Test'></a>
458
458
  ### Run Test
@@ -231,17 +231,11 @@ module Mobilize
231
231
 
232
232
  def Gsheeter.read_by_job_id(job_id)
233
233
  j = Job.find(job_id)
234
- r = j.requestor
235
234
  #reserve email account for read
236
235
  email = Gdriver.get_worker_email_by_mongo_id(job_id)
237
236
  return false unless email
238
- source = j.param_source
239
- book,sheet = source.split("/")
240
- #assume jobspec source if none given
241
- source = [r.jobspec_title,source].join("/") if sheet.nil?
242
- tsv = Gsheeter.find_or_create_by_name(source,email).to_tsv
243
- book = nil
244
- return tsv
237
+ #pull tsv from cache
238
+ j.param_sheet_dsts.first.read_cache
245
239
  end
246
240
 
247
241
  def Gsheeter.read_by_dst_id(dst_id,email=nil)
@@ -277,32 +271,32 @@ module Mobilize
277
271
  def Gsheeter.write_by_job_id(job_id)
278
272
  j = Job.find(job_id)
279
273
  r = j.requestor
280
- dest_name = if j.destination.split("/").length==1
274
+ tgt_name = if j.destination.split("/").length==1
281
275
  "#{r.jobspec_title}#{"/"}#{j.destination}"
282
276
  else
283
277
  j.destination
284
278
  end
285
- sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',dest_name)
279
+ sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',tgt_name)
286
280
  sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
287
281
  email = Gdriver.get_worker_email_by_mongo_id(job_id)
288
282
  #return false if there are no emails available
289
283
  return false unless email
290
284
  #create temp tab, write data to it, checksum it against the source
291
- tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{dest_name}_temp")
285
+ tempsheet_dst = Dataset.find_or_create_by_handler_and_name('gsheeter',"#{tgt_name}_temp")
292
286
  tempsheet_dst.update_attributes(:requestor_id=>r.id.to_s) if tempsheet_dst.requestor_id.nil?
293
287
  tempsheet = Gsheeter.find_or_create_by_dst_id(tempsheet_dst.id.to_s)
294
288
  #tsv is the second to last stage's output (the last is the write)
295
289
  tsv = Dataset.find(j.tasks[j.prior_task]['output_dst_id']).read
296
290
  tempsheet.write(tsv,true,job_id)
297
291
  #delete current sheet, replace it with temp one
298
- sheet = Gsheeter.find_or_create_by_name(dest_name,email)
292
+ sheet = Gsheeter.find_or_create_by_name(tgt_name,email)
299
293
  title = sheet.title
300
294
  #http
301
295
  sheet.delete
302
296
  tempsheet.title = title
303
297
  tempsheet.save
304
298
  sheet_dst.update_attributes(:url=>tempsheet.spreadsheet.human_url)
305
- "Write successful for #{dest_name}".oputs
299
+ "Write successful for #{tgt_name}".oputs
306
300
  return true
307
301
  end
308
302
  end
@@ -193,7 +193,7 @@ module Mobilize
193
193
  requestors = Requestor.all
194
194
  Jobtracker.run_notifications
195
195
  requestors.each do |r|
196
- Jobtracker.update_status("Running requestor #{r.name}")
196
+ Jobtracker.update_status("Checking requestor #{r.name}")
197
197
  if r.is_due?
198
198
  r.enqueue!
199
199
  Jobtracker.update_status("Enqueued requestor #{r.name}")
@@ -14,8 +14,8 @@ module Mobilize
14
14
  field :last_completed_at, type: Time
15
15
  field :read_handler, type: String
16
16
  field :write_handler, type: String
17
- field :param_source, type: String #name of sheet on doc
18
- field :param_hash, type: String #JSON
17
+ field :files, type: String #name of sheet(s) on doc
18
+ field :params, type: String #JSON
19
19
  field :destination, type: String #output destination - could be file, could be sheet
20
20
 
21
21
  index({ requestor_id: 1})
@@ -28,6 +28,19 @@ module Mobilize
28
28
  Mobilize::Resque.find_worker_by_mongo_id(j.id.to_s)
29
29
  end
30
30
 
31
+ def param_sheet_dsts
32
+ j = self
33
+ r = j.requestor
34
+ j.param_sheets.split(",").map do |ps|
35
+ #prepend jobspec title if there is no path separator
36
+ full_ps = ps.index("/") ? ps : [r.jobspec_title,ps].join("/")
37
+ #find or create dataset for this sheet
38
+ dst = Dataset.find_or_create_by_handler_and_name("gsheeter",full_ps)
39
+ dst.update_attributes(:requestor_id=>r.id.to_s) unless dst.requestor_id
40
+ dst
41
+ end
42
+ end
43
+
31
44
  def Job.find_by_name(name)
32
45
  Job.where(:name=>name).first
33
46
  end
@@ -186,11 +199,11 @@ module Mobilize
186
199
 
187
200
  def is_due?
188
201
  j = self
189
- return false if j.is_working? or j.schedule.to_s.starts_with?("after")
202
+ return false if j.is_working? or j.active == false or j.schedule.to_s.starts_with?("after")
190
203
  last_run = j.last_completed_at
191
204
  #check schedule
192
205
  schedule = j.schedule
193
- return true if schedule == 'once' and j.active
206
+ return true if schedule == 'once'
194
207
  #strip the "every" from the front if present
195
208
  schedule = schedule.gsub("every","").gsub("."," ").strip
196
209
  value,unit,operator,job_utctime = schedule.split(" ")
@@ -31,14 +31,17 @@ module Mobilize
31
31
  end
32
32
 
33
33
  def Requestor.jobs_sheet_headers
34
- %w{name active schedule status last_error destination_url read_handler write_handler param_source params destination}
34
+ %w{name active schedule status last_error destination_url read_handler write_handler param_sheets params destination}
35
35
  end
36
36
 
37
37
  def Requestor.perform(id,*args)
38
38
  r = Requestor.find(id.to_s)
39
39
  #reserve email account for read
40
40
  gdrive_email = Gdriver.get_worker_email_by_mongo_id(id)
41
- return false unless gdrive_email
41
+ unless gdrive_email
42
+ "no gdrive_email available for #{r.name}".oputs
43
+ return false
44
+ end
42
45
  jobs_sheet = r.jobs_sheet(gdrive_email)
43
46
  #write headers to sheet
44
47
  Requestor.jobs_sheet_headers.each_with_index do |h,h_i|
@@ -52,7 +55,16 @@ module Mobilize
52
55
  #queue up the jobs that are due and active
53
56
  r.jobs.each do |j|
54
57
  begin
55
- j.enqueue! if j.active and j.is_due?
58
+ if j.active and j.is_due?
59
+ #cache all param_sheets
60
+ j.param_sheet_dsts.each do |psdst|
61
+ #read tsv, write to cache for job to use
62
+ tsv = Gsheeter.find_or_create_by_name(psdst.name,gdrive_email).to_tsv
63
+ r.update_status("caching #{psdst.name}")
64
+ psdst.write_cache(tsv)
65
+ end
66
+ j.enqueue!
67
+ end
56
68
  rescue ScriptError,StandardError => exc
57
69
  #update errors
58
70
  j.update_attributes(:last_error=>exc.to_s,:last_trace=>exc.backtrace.to_s)
@@ -85,7 +97,7 @@ module Mobilize
85
97
  :schedule => rj['schedule'],
86
98
  :read_handler => rj['read_handler'],
87
99
  :write_handler => rj['write_handler'],
88
- :param_source => rj['param_source'],
100
+ :param_sheets => rj['param_sheets'],
89
101
  :params => rj['params'],
90
102
  :destination => rj['destination'])
91
103
  #update laststatus with "Created job for" if job is due
@@ -207,7 +219,7 @@ module Mobilize
207
219
  end
208
220
 
209
221
  def is_due?
210
- r = self
222
+ r = self.reload
211
223
  return false if r.is_working?
212
224
  last_due_time = Time.now.utc - Jobtracker.requestor_refresh_freq
213
225
  return true if r.last_run.nil? or r.last_run < last_due_time
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.0.0"
3
+ VERSION = "1.0.1"
4
4
  end
5
5
  end
@@ -9,6 +9,13 @@ describe "Mobilize" do
9
9
 
10
10
  # enqueues 4 workers on Resque
11
11
  it "runs integration test" do
12
+ puts "clear out test db"
13
+ Mongoid.session(:default).collections.each do |collection|
14
+ unless collection.name =~ /^system\./
15
+ collection.drop
16
+ end
17
+ end
18
+
12
19
  email = Mobilize::Gdriver.owner_email
13
20
 
14
21
  #kill all workers
@@ -19,9 +26,6 @@ describe "Mobilize" do
19
26
  sleep 20
20
27
  assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
21
28
 
22
- #make sure old one is deleted
23
- Mobilize::Requestor.find_or_create_by_email(email).delete
24
-
25
29
  puts "create requestor 'mobilize'"
26
30
  requestor = Mobilize::Requestor.find_or_create_by_email(email)
27
31
  assert requestor.email == email
@@ -31,12 +35,10 @@ describe "Mobilize" do
31
35
  jobspec_title = requestor.jobspec_title
32
36
  books = Mobilize::Gbooker.find_all_by_title(jobspec_title)
33
37
  books.each{|book| book.delete}
34
- #delete old datasets for this specbook
35
- Mobilize::Dataset.all.select{|d| d.name.starts_with?(jobspec_title)}.each{|d| d.delete}
36
38
 
37
- puts "enqueue jobtracker, wait 60s"
39
+ puts "enqueue jobtracker, wait 45s"
38
40
  Mobilize::Jobtracker.start
39
- sleep 60
41
+ sleep 45
40
42
  puts "jobtracker status: #{Mobilize::Jobtracker.status}"
41
43
  puts "status:#{Mobilize::Jobtracker.status}" #!= 'stopped'
42
44
 
@@ -62,10 +64,7 @@ describe "Mobilize" do
62
64
  test_source_tsv = test_source_rows.map{|r| r.join("\t")}.join("\n")
63
65
  test_source_sheet.write(test_source_tsv)
64
66
 
65
- puts "add row to jobs sheet, wait 100s"
66
-
67
- #delete existing Jobs from the db
68
- Mobilize::Job.each{|j| j.delete}
67
+ puts "add row to jobs sheet, wait 60s"
69
68
 
70
69
  jobs_sheet = jobs_sheets.first
71
70
 
@@ -77,7 +76,7 @@ describe "Mobilize" do
77
76
  "destination_url" => "",
78
77
  "read_handler" => "gsheeter",
79
78
  "write_handler" => "gsheeter",
80
- "param_source" => "test_source",
79
+ "param_sheets" => "test_source",
81
80
  "params" => "",
82
81
  "destination" => "test_destination"},
83
82
  #run after the first
@@ -89,7 +88,7 @@ describe "Mobilize" do
89
88
  "destination_url" => "",
90
89
  "read_handler" => "gsheeter",
91
90
  "write_handler" => "gsheeter",
92
- "param_source" => "test_source",
91
+ "param_sheets" => "test_source",
93
92
  "params" => "",
94
93
  "destination" => "test_destination2"}
95
94
  ]
@@ -105,9 +104,9 @@ describe "Mobilize" do
105
104
 
106
105
  puts "job row added, force enqueued requestor"
107
106
  requestor.enqueue!
108
- sleep 100
107
+ sleep 60
109
108
 
110
- puts "jobtracker posted test source data to test destination, and checksum succeeded?"
109
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
111
110
  test_destination_sheet = Mobilize::Gsheeter.find_or_create_by_name("#{jobspec_title}/test_destination",email)
112
111
 
113
112
  assert test_destination_sheet.to_tsv == test_source_sheet.to_tsv
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-24 00:00:00.000000000 Z
12
+ date: 2012-11-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake