mobilize-base 1.2 → 1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -220,9 +220,8 @@ production:
220
220
 
221
221
  gsheet.yml needs:
222
222
  * max_cells, which is the number of cells a sheet is allowed to have
223
- written to it at one time. Default is 400k cells, which is the max per
224
- book. Google Drive will throw its own exception if
225
- you try to write more than that.
223
+ written to it at one time. Default is 50k cells, which is about how
224
+ much you can write before things start breaking.
226
225
  * Because Google Docs ties date formatting to the Locale for the
227
226
  spreadsheet, there are 2 date format parameters:
228
227
  * read_date_format, which is the format that should be read FROM google
@@ -356,22 +355,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
356
355
  Mobilize stores cached data in MongoDB Gridfs.
357
356
  It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
358
357
 
359
- * max_versions - the number of __different__ versions of data to keep
360
- for a given cache. Default is 10. This is meant mostly to allow you to
361
- restore Runners from cache if necessary.
362
358
  * max_compressed_write_size - the amount of compressed data Gridfs will
363
359
  allow. If you try to write more than this, an exception will be thrown.
364
360
 
365
361
  ``` yml
366
362
  ---
367
363
  development:
368
- max_versions: 10 #number of versions of cache to keep in gridfs
369
364
  max_compressed_write_size: 1000000000 #~1GB
370
365
  test:
371
- max_versions: 10 #number of versions of cache to keep in gridfs
372
366
  max_compressed_write_size: 1000000000 #~1GB
373
367
  production:
374
- max_versions: 10 #number of versions of cache to keep in gridfs
375
368
  max_compressed_write_size: 1000000000 #~1GB
376
369
  ```
377
370
 
@@ -564,8 +557,14 @@ the Runner itself.
564
557
  and "base1.out" for the second test. The first
565
558
  takes the output from the first stage and the second reads it straight
566
559
  from the referenced sheet.
567
- * All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
568
- giving up.
560
+ * All stages accept retry parameters:
561
+ * retries: an integer specifying the number of times that the system will try it again before giving up.
562
+ * delay: an integer specifying the number of seconds between retries.
563
+ * always_on: if false, turns the job off on stage failures.
564
+ Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
565
+ * notify: by default, the stage owner will be notified on failure.
566
+ * if false, will not notify the stage owner in the event of a failure.
567
+ * If it's an email address, will email the specified person.
569
568
  * If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
570
569
  * The tab will be headed "response" and will contain the exception and backtrace for the error.
571
570
  * The test uses "Requestor_mobilize(test)/base1.out" and
@@ -13,7 +13,7 @@ module GoogleDrive
13
13
  f = self
14
14
  #admin includes workers
15
15
  return true if f.has_admin_acl?
16
- accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
16
+ accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
17
17
  accounts.each do |email|
18
18
  f.update_acl(email)
19
19
  end
@@ -21,9 +21,9 @@ module GoogleDrive
21
21
 
22
22
  def has_admin_acl?
23
23
  f = self
24
- curr_emails = f.acls.map{|a| a.scope}.sort
25
- admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
26
- if (curr_emails & admin_emails) == admin_emails
24
+ curr_emails = f.acls.map{|a| a.scope}.compact.sort
25
+ admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
26
+ if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
27
27
  return true
28
28
  else
29
29
  return false
@@ -32,9 +32,9 @@ module GoogleDrive
32
32
 
33
33
  def has_worker_acl?
34
34
  f = self
35
- curr_emails = f.acls.map{|a| a.scope}.sort
35
+ curr_emails = f.acls.map{|a| a.scope}.compact.sort
36
36
  worker_emails = Mobilize::Gdrive.worker_emails.sort
37
- if (curr_emails & worker_emails) == worker_emails
37
+ if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
38
38
  return true
39
39
  else
40
40
  return false
@@ -84,7 +84,7 @@ module GoogleDrive
84
84
  end
85
85
  def acl_entry(email)
86
86
  f = self
87
- f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
87
+ f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
88
88
  end
89
89
  def entry_hash
90
90
  f = self
@@ -1,6 +1,6 @@
1
1
  module GoogleDrive
2
2
  class Worksheet
3
- def to_tsv
3
+ def to_tsv(gsub_line_breaks="\n")
4
4
  sheet = self
5
5
  rows = sheet.rows
6
6
  header = rows.first
@@ -8,7 +8,12 @@ module GoogleDrive
8
8
  #look for blank cols to indicate end of row
9
9
  col_last_i = (header.index("") || header.length)-1
10
10
  #ignore user-entered line breaks for purposes of tsv reads
11
- out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
11
+ out_tsv = rows.map do |r|
12
+ row = r[0..col_last_i].join("\t")
13
+ row.gsub!("\n",gsub_line_breaks)
14
+ row = row + "\n"
15
+ row
16
+ end.join + "\n"
12
17
  out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
13
18
  Mobilize::Gsheet.config['read_date_format'])
14
19
  end
@@ -0,0 +1,59 @@
1
+ <% @subtabs = resque.queues unless partial? || params[:id].nil? %>
2
+
3
+ <% if queue = params[:id] %>
4
+
5
+ <h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
6
+ <form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
7
+ <input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
8
+ </form>
9
+ <p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
10
+ <table class='jobs'>
11
+ <tr>
12
+ <th>Class</th>
13
+ <th>Args</th>
14
+ </tr>
15
+ <% for job in (jobs = resque.peek(queue, start, 20)) %>
16
+ <tr>
17
+ <td class='class'><%= job['class'] %></td>
18
+ <td class='args'><%=h job['args'].inspect %></td>
19
+ </tr>
20
+ <% end %>
21
+ <% if jobs.empty? %>
22
+ <tr>
23
+ <td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
24
+ </tr>
25
+ <% end %>
26
+ </table>
27
+ <%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
28
+ <% else %>
29
+
30
+ <h1 class='wi'>Queues</h1>
31
+ <p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
32
+ <table class='queues'>
33
+ <tr>
34
+ <th>Name</th>
35
+ <th>Jobs</th>
36
+ </tr>
37
+ <!-- only show nonzero length queues-->
38
+ <% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
39
+ <tr>
40
+ <td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
41
+ <td class='size'><%= resque.size queue %></td>
42
+ </tr>
43
+ <% end %>
44
+ <% if failed_multiple_queues? %>
45
+ <% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
46
+ <tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
47
+ <td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
48
+ <td class='size'><%= Resque::Failure.count(queue) %></td>
49
+ </tr>
50
+ <% end %>
51
+ <% else %>
52
+ <tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
53
+ <td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
54
+ <td class='size'><%= Resque::Failure.count %></td>
55
+ </tr>
56
+ <% end %>
57
+ </table>
58
+
59
+ <% end %>
@@ -0,0 +1,85 @@
1
+ <% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
2
+ <h1><%= worker %>'s job</h1>
3
+
4
+ <table>
5
+ <tr>
6
+ <th>&nbsp;</th>
7
+ <th>Where</th>
8
+ <th>Queue</th>
9
+ <th>Started</th>
10
+ <th>Class</th>
11
+ <th>Args</th>
12
+ </tr>
13
+ <tr>
14
+ <td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
15
+ <% host, pid, _ = worker.to_s.split(':') %>
16
+ <td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
17
+ <% data = worker.job %>
18
+ <% queue = data['queue'] %>
19
+ <td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
20
+ <td><span class="time"><%= data['run_at'] %></span></td>
21
+ <td>
22
+ <code><%= data['payload']['class'] %></code>
23
+ </td>
24
+ <td><%=h data['payload']['args'].inspect %></td>
25
+ </tr>
26
+ </table>
27
+
28
+ <% else %>
29
+
30
+ <%
31
+ workers = resque.working
32
+ jobs = workers.collect {|w| w.job }
33
+ worker_jobs = workers.zip(jobs)
34
+ worker_jobs = worker_jobs.reject { |w, j| w.idle? }
35
+ %>
36
+
37
+ <h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
38
+ <p class='intro'>The list below contains all workers which are currently running a job.</p>
39
+ <table class='workers'>
40
+ <tr>
41
+ <th>&nbsp;</th>
42
+ <th>Where</th>
43
+ <th>Queue</th>
44
+ <th>Processing</th>
45
+ </tr>
46
+ <% if worker_jobs.empty? %>
47
+ <tr>
48
+ <td colspan="4" class='no-data'>Nothing is happening right now...</td>
49
+ </tr>
50
+ <% end %>
51
+
52
+ <% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
53
+ <tr>
54
+ <td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
55
+ <% host, pid, queues = worker.to_s.split(':') %>
56
+ <td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
57
+ <td class='queues queue'>
58
+ <a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
59
+ </td>
60
+ <td class='process'>
61
+ <% if job['queue']
62
+ job_stats = begin
63
+ j = job
64
+ args_hash = j['payload']['args'][1]
65
+ args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
66
+ args = [args_array].compact.join("")
67
+ path = j['payload']['args'].first
68
+ [path,args].join("</code><br><code>")
69
+ rescue => exc
70
+ [exc.to_s,exc.backtrace.join("<br>")].join("<br>")
71
+ end
72
+ %>
73
+ <%=job_stats%>
74
+ </code>
75
+ <br>
76
+ <small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
77
+ <% else %>
78
+ <span class='waiting'>Waiting for a job...</span>
79
+ <% end %>
80
+ </td>
81
+ </tr>
82
+ <% end %>
83
+ </table>
84
+
85
+ <% end %>
@@ -11,11 +11,19 @@ class String
11
11
  def opp
12
12
  pp self
13
13
  end
14
+ def to_md5
15
+ Digest::MD5.hexdigest(self)
16
+ end
14
17
  def bash(except=true)
15
- pid,stdin,stdout,stderr = Open4.popen4(self)
16
- pid,stdin = [nil,nil]
17
- raise stderr.read if (stderr.read.length>0 and except==true)
18
- return stdout.read
18
+ str = self
19
+ out_str,err_str = []
20
+ status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
21
+ out_str = stdout.read
22
+ err_str = stderr.read
23
+ end
24
+ exit_status = status.exitstatus
25
+ raise err_str if (exit_status !=0 and except==true)
26
+ return out_str
19
27
  end
20
28
  def escape_regex
21
29
  str = self
@@ -1,3 +1,4 @@
1
+ require 'yaml'
1
2
  module YAML
2
3
  def YAML.easy_load(string)
3
4
  begin
@@ -9,13 +10,16 @@ module YAML
9
10
  #make sure urls have their colon spaces fixed
10
11
  result_hash={}
11
12
  easy_hash.each do |k,v|
12
- result_hash[k] = if v.class==String
13
- v.gsub(": //","://")
14
- elsif v.class==Array
15
- v.map{|av| av.to_s.gsub(": //","://")}
16
- else
17
- v
18
- end
13
+ #fucking yaml puts spaces in front of the key
14
+ #or something
15
+ strip_k = k.strip
16
+ result_hash[strip_k] = if v.class==String
17
+ v.gsub(": //","://")
18
+ elsif v.class==Array
19
+ v.map{|av| av.to_s.gsub(": //","://")}
20
+ else
21
+ v
22
+ end
19
23
  end
20
24
  return result_hash
21
25
  end
@@ -14,51 +14,44 @@ module Mobilize
14
14
  dst = Dataset.find_by_handler_and_path('gbook',path)
15
15
  if dst and dst.http_url.to_s.length>0
16
16
  book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
17
- #doesn't count if it's deleted
18
- if book.entry_hash[:deleted]
19
- book = nil
20
- else
17
+ if book
21
18
  return book
19
+ else
20
+ raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
22
21
  end
23
22
  end
23
+ #try to find books by title
24
24
  books = Gbook.find_all_by_path(path,gdrive_slot)
25
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
26
- book = nil
27
- if books.length>1 and dst.http_url.to_s.length>0
28
- #some idiot process or malicious user created a duplicate book.
29
- #Fix by deleting all but the one with dst entry's key
30
- dkey = dst.http_url.split("key=").last
31
- books.each do |b|
32
- bkey = b.resource_id.split(":").last
33
- if bkey == dkey
34
- book = b
35
- dst.update_attributes(:http_url=>book.human_url)
36
- else
37
- #delete the invalid book
38
- b.delete
39
- ("Deleted duplicate book #{path}").oputs
40
- end
41
- end
42
- else
43
- #If it's a new dst or if there are multiple books
44
- #take the first
45
- book = books.first
46
- dst.update_attributes(:http_url=>book.human_url) if book
25
+ #sort by publish date; if entry hash retrieval fails (as it does)
26
+ #assume the book was published now
27
+ book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
28
+ if book
29
+ #we know dataset will have blank url since it wasn't picked up above
30
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
31
+ api_url = book.human_url.split("&").first
32
+ dst.update_attributes(:http_url=>api_url)
47
33
  end
48
34
  return book
49
35
  end
36
+
50
37
  def Gbook.find_or_create_by_path(path,gdrive_slot)
51
38
  book = Gbook.find_by_path(path,gdrive_slot)
52
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
53
39
  if book.nil?
54
40
  #always use owner email to make sure all books are owned by owner account
55
41
  book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
56
42
  ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
43
+ #check to make sure the dataset has a blank url; if not, error out
44
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
45
+ if dst.http_url.to_s.length>0
46
+ #add acls to book regardless
47
+ book.add_admin_acl
48
+ raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
49
+ else
50
+ api_url = book.human_url.split("&").first
51
+ dst.update_attributes(:http_url=>api_url)
52
+ book.add_admin_acl
53
+ end
57
54
  end
58
- #always make sure book dataset http URL is up to date
59
- #and that book has admin acl
60
- dst.update_attributes(:http_url=>book.human_url)
61
- book.add_admin_acl
62
55
  return book
63
56
  end
64
57
  end
@@ -1,6 +1,6 @@
1
1
  module Mobilize
2
2
  module Gfile
3
- def Gfile.path_to_dst(path,stage_path)
3
+ def Gfile.path_to_dst(path,stage_path,gdrive_slot)
4
4
  #don't need the ://
5
5
  path = path.split("://").last if path.index("://")
6
6
  if Gfile.find_by_path(path)
@@ -38,7 +38,8 @@ module Mobilize
38
38
  end
39
39
  #update http url for file
40
40
  dst = Dataset.find_by_handler_and_path("gfile",dst_path)
41
- dst.update_attributes(:http_url=>file.human_url)
41
+ api_url = file.human_url.split("&").first
42
+ dst.update_attributes(:http_url=>api_url)
42
43
  true
43
44
  end
44
45
 
@@ -86,7 +87,8 @@ module Mobilize
86
87
  #always make sure dataset http URL is up to date
87
88
  #and that it has admin acl
88
89
  if file
89
- dst.update_attributes(:http_url=>file.human_url)
90
+ api_url = file.human_url.split("&").first
91
+ dst.update_attributes(:http_url=>api_url)
90
92
  file.add_admin_acl
91
93
  end
92
94
  return file
@@ -1,43 +1,38 @@
1
+ require 'tempfile'
1
2
  module Mobilize
2
3
  module Gridfs
3
4
  def Gridfs.config
4
5
  Base.config('gridfs')
5
6
  end
6
7
 
7
- def Gridfs.grid
8
- session = ::Mongoid.configure.sessions['default']
9
- database_name = session['database']
10
- host,port = session['hosts'].first.split(":")
11
- return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
8
+ def Gridfs.read_by_dataset_path(dst_path,*args)
9
+ curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
10
+ zs = curr_file.data if curr_file
11
+ return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
12
12
  end
13
13
 
14
- def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
15
- begin
16
- zs=Gridfs.grid.open(dst_path,'r').read
17
- return ::Zlib::Inflate.inflate(zs)
18
- rescue
19
- return nil
20
- end
21
- end
22
-
23
- def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
14
+ def Gridfs.write_by_dataset_path(dst_path,string,*args)
24
15
  zs = ::Zlib::Deflate.deflate(string)
25
16
  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
26
- curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
27
- #write a new version when there is a change
17
+ #find and delete existing file
18
+ curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
19
+ curr_zs = curr_file.data if curr_file
20
+ #overwrite when there is a change
28
21
  if curr_zs != zs
29
- Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
22
+ Mongoid::GridFs.delete(curr_file.id) if curr_file
23
+ #create temp file w zstring
24
+ temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
25
+ temp_file.print(zs)
26
+ temp_file.close
27
+ #put data in file
28
+ Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
30
29
  end
31
30
  return true
32
31
  end
33
32
 
34
33
  def Gridfs.delete(dst_path)
35
- begin
36
- Gridfs.grid.delete(dst_path)
37
- return true
38
- rescue
39
- return nil
40
- end
34
+ curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
35
+ curr_file.delete
41
36
  end
42
37
  end
43
38
  end
@@ -10,12 +10,10 @@ module Mobilize
10
10
  end
11
11
 
12
12
  # converts a source path or target path to a dst in the context of handler and stage
13
- def Gsheet.path_to_dst(path,stage_path)
13
+ def Gsheet.path_to_dst(path,stage_path,gdrive_slot)
14
14
  s = Stage.where(:path=>stage_path).first
15
15
  params = s.params
16
16
  target_path = params['target']
17
- #take random slot if one is not available
18
- gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
19
17
  #if this is the target, it doesn't have to exist already
20
18
  is_target = true if path == target_path
21
19
  #don't need the ://
@@ -46,9 +44,7 @@ module Mobilize
46
44
 
47
45
  def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
48
46
  #expects gdrive slot as first arg, otherwise chooses random
49
- gdrive_slot = args
50
- worker_emails = Gdrive.worker_emails.sort_by{rand}
51
- gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
47
+ gdrive_slot = args.to_a.first
52
48
  sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
53
49
  sheet.read(user_name) if sheet
54
50
  end
@@ -56,8 +52,6 @@ module Mobilize
56
52
  def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
57
53
  #expects gdrive slot as first arg, otherwise chooses random
58
54
  gdrive_slot,crop = args
59
- worker_emails = Gdrive.worker_emails.sort_by{rand}
60
- gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
61
55
  crop ||= true
62
56
  Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
63
57
  end
@@ -87,15 +81,16 @@ module Mobilize
87
81
 
88
82
  def Gsheet.write_temp(target_path,gdrive_slot,tsv)
89
83
  #find and delete temp sheet, if any
90
- temp_path = [target_path.gridsafe,"temp"].join("/")
91
- temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
92
- temp_sheet.delete if temp_sheet
93
- #write data to temp sheet
94
- temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
84
+ temp_book_title = target_path.gridsafe
85
+ #create book and sheet
86
+ temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
87
+ rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
88
+ temp_sheet = temp_book.add_worksheet("temp",rows,cols)
95
89
  #this step has a tendency to fail; if it does,
96
90
  #don't fail the stage, mark it as false
97
91
  begin
98
- temp_sheet.write(tsv,Gdrive.owner_name)
92
+ gdrive_user = gdrive_slot.split("@").first
93
+ temp_sheet.write(tsv,gdrive_user)
99
94
  rescue
100
95
  return nil
101
96
  end
@@ -114,7 +109,7 @@ module Mobilize
114
109
  #only give the user edit permissions if they're the ones
115
110
  #creating it
116
111
  target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
117
- target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
112
+ target_sheet.spreadsheet.update_acl(u.email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
118
113
  target_sheet.delete_sheet1
119
114
  end
120
115
  #pass it crop param to determine whether to shrink target sheet to fit data
@@ -134,14 +129,24 @@ module Mobilize
134
129
  crop = s.params['crop'] || true
135
130
  begin
136
131
  #get tsv to write from stage
137
- source = s.sources.first
132
+ source = s.sources(gdrive_slot).first
138
133
  raise "Need source for gsheet write" unless source
139
134
  tsv = source.read(u.name,gdrive_slot)
140
- raise "No data found in #{source.url}" unless tsv
141
- Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
135
+ raise "No data source found for #{source.url}" unless tsv
136
+ tsv_row_count = tsv.to_s.split("\n").length
137
+ tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
138
+ tsv_cell_count = tsv_row_count * tsv_col_count
139
+ stdout = if tsv_row_count == 0
140
+ #soft error; no data to write. Stage will complete.
141
+ "Write skipped for #{s.target.url}"
142
+ elsif tsv_cell_count > Gsheet.max_cells
143
+ raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
144
+ else
145
+ Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
146
+ #update status
147
+ "Write successful for #{s.target.url}"
148
+ end
142
149
  Gdrive.unslot_worker_by_path(stage_path)
143
- #update status
144
- stdout = "Write successful for #{s.target.url}"
145
150
  stderr = nil
146
151
  s.update_status(stdout)
147
152
  signal = 0
@@ -25,7 +25,7 @@ module Mobilize
25
25
  return idle_workers if state == 'idle'
26
26
  stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
27
27
  return stale_workers if state == 'stale'
28
- timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['runat'] < (Time.now.utc - Jobtracker.max_run_time)}
28
+ timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
29
29
  return timeout_workers if state == 'timeout'
30
30
  raise "invalid state #{state}"
31
31
  end
@@ -109,16 +109,28 @@ module Mobilize
109
109
  Resque.failures.each_with_index do |f,f_i|
110
110
  #skip if already notified
111
111
  next if f['notified']
112
+ #try to send message to stage owner, where appropriate
112
113
  stage_path = f['payload']['args'].first
113
- s = Stage.where(:path=>stage_path).first
114
- email = s.job.runner.user.email
114
+ email = begin
115
+ s = Stage.where(:path=>stage_path).first
116
+ if s.params['notify'].to_s=="false"
117
+ next
118
+ elsif s.params['notify'].index("@")
119
+ s.params['notify']
120
+ else
121
+ s.job.runner.user.email
122
+ end
123
+ rescue
124
+ #jobs without stages are sent to first admin
125
+ Jobtracker.admin_emails.first
126
+ end
115
127
  exc_to_s = f['error']
116
128
  if fjobs[email].nil?
117
129
  fjobs[email] = {stage_path => {exc_to_s => 1}}
118
130
  elsif fjobs[email][stage_path].nil?
119
131
  fjobs[email][stage_path] = {exc_to_s => 1}
120
132
  elsif fjobs[email][stage_path][exc_to_s].nil?
121
- fjobs[email][stage_path][exc_to_s] = 1
133
+ fjobs[email][stage_path][exc_to_s] = 1
122
134
  else
123
135
  fjobs[email][stage_path][exc_to_s] += 1
124
136
  end