RubyGems - mobilize-base - Versions diffs - 1.2 → 1.3 - Mend

mobilize-base 1.2 → 1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

data/README.md +10 -11
data/lib/mobilize-base/extensions/google_drive/file.rb +7 -7
data/lib/mobilize-base/extensions/google_drive/worksheet.rb +7 -2
data/lib/mobilize-base/extensions/resque-server/views/queues.erb +59 -0
data/lib/mobilize-base/extensions/resque-server/views/working.erb +85 -0
data/lib/mobilize-base/extensions/string.rb +12 -4
data/lib/mobilize-base/extensions/yaml.rb +11 -7
data/lib/mobilize-base/handlers/gbook.rb +24 -31
data/lib/mobilize-base/handlers/gfile.rb +5 -3
data/lib/mobilize-base/handlers/gridfs.rb +19 -24
data/lib/mobilize-base/handlers/gsheet.rb +25 -20
data/lib/mobilize-base/handlers/resque.rb +16 -4
data/lib/mobilize-base/helpers/job_helper.rb +54 -0
data/lib/mobilize-base/helpers/runner_helper.rb +83 -0
data/lib/mobilize-base/helpers/stage_helper.rb +38 -0
data/lib/mobilize-base/jobtracker.rb +13 -5
data/lib/mobilize-base/models/job.rb +36 -48
data/lib/mobilize-base/models/runner.rb +24 -123
data/lib/mobilize-base/models/stage.rb +14 -43
data/lib/mobilize-base/tasks.rb +16 -3
data/lib/mobilize-base/version.rb +1 -1
data/lib/mobilize-base.rb +5 -1
data/lib/samples/gridfs.yml +0 -3
data/lib/samples/gsheet.yml +4 -4
data/mobilize-base.gemspec +4 -5
data/test/mobilize-base_test.rb +1 -0
metadata +21 -32

data/README.md CHANGED Viewed

@@ -220,9 +220,8 @@ production:
 gsheet.yml needs:
 * max_cells, which is the number of cells a sheet is allowed to have
-  written to it at one time. Default is 400k cells, which is the max per
-  book. Google Drive will throw its own exception if
-  you try to write more than that.
+  written to it at one time. Default is 50k cells, which is about how
+much you can write before things start breaking.
 * Because Google Docs ties date formatting to the Locale for the
   spreadsheet, there are 2 date format parameters:
   * read_date_format, which is the format that should be read FROM google
@@ -356,22 +355,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
 Mobilize stores cached data in MongoDB Gridfs.
 It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
-* max_versions - the number of __different__ versions of data to keep
-for a given cache. Default is 10. This is meant mostly to allow you to
-restore Runners from cache if necessary.
 * max_compressed_write_size - the amount of compressed data Gridfs will
 allow. If you try to write more than this, an exception will be thrown.
 ``` yml
 ---
 development:
-  max_versions: 10 #number of versions of cache to keep in gridfs
   max_compressed_write_size: 1000000000 #~1GB
 test:
-  max_versions: 10 #number of versions of cache to keep in gridfs
   max_compressed_write_size: 1000000000 #~1GB
 production:
-  max_versions: 10 #number of versions of cache to keep in gridfs
   max_compressed_write_size: 1000000000 #~1GB
 ```
@@ -564,8 +557,14 @@ the Runner itself.
 and "base1.out" for the second test. The first
 takes the output from the first stage and the second reads it straight
 from the referenced sheet.
-    * All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
-giving up.
+    * All stages accept retry parameters:
+      * retries: an integer specifying the number of times that the system will try it again before giving up.
+      * delay: an integer specifying the number of seconds between retries.
+      * always_on: if false, turns the job off on stage failures.
+Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
+      * notify: by default, the stage owner will be notified on failure.
+          * if false, will not notify the stage owner in the event of a failure.
+          * If it's an email address, will email the specified person.
     * If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
       * The tab will be headed "response" and will contain the exception and backtrace for the error.
     * The test uses "Requestor_mobilize(test)/base1.out" and

data/lib/mobilize-base/extensions/google_drive/file.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module GoogleDrive
       f = self
       #admin includes workers
       return true if f.has_admin_acl?
-      accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
+      accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
       accounts.each do |email|
         f.update_acl(email)
       end
@@ -21,9 +21,9 @@ module GoogleDrive
     def has_admin_acl?
       f = self
-      curr_emails = f.acls.map{|a| a.scope}.sort
-      admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
-      if (curr_emails & admin_emails) == admin_emails
+      curr_emails = f.acls.map{|a| a.scope}.compact.sort
+      admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
+      if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
         return true
       else
         return false
@@ -32,9 +32,9 @@ module GoogleDrive
     def has_worker_acl?
       f = self
-      curr_emails = f.acls.map{|a| a.scope}.sort
+      curr_emails = f.acls.map{|a| a.scope}.compact.sort
       worker_emails = Mobilize::Gdrive.worker_emails.sort
-      if (curr_emails & worker_emails) == worker_emails
+      if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
         return true
       else
         return false
@@ -84,7 +84,7 @@ module GoogleDrive
     end
     def acl_entry(email)
       f = self
-      f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
+      f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
     end
     def entry_hash
       f = self

data/lib/mobilize-base/extensions/google_drive/worksheet.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module GoogleDrive
   class Worksheet
-    def to_tsv
+    def to_tsv(gsub_line_breaks="\n")
       sheet = self
       rows = sheet.rows
       header = rows.first
@@ -8,7 +8,12 @@ module GoogleDrive
       #look for blank cols to indicate end of row
       col_last_i = (header.index("") || header.length)-1
       #ignore user-entered line breaks for purposes of tsv reads
-      out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
+      out_tsv = rows.map do |r|
+                             row = r[0..col_last_i].join("\t")
+                             row.gsub!("\n",gsub_line_breaks)
+                             row = row + "\n"
+                             row
+                         end.join + "\n"
       out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
                                 Mobilize::Gsheet.config['read_date_format'])
     end

data/lib/mobilize-base/extensions/resque-server/views/queues.erb ADDED Viewed

@@ -0,0 +1,59 @@
+<% @subtabs = resque.queues unless partial? || params[:id].nil? %>
+<% if queue = params[:id] %>
+  <h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
+  <form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
+    <input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
+  </form>
+  <p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
+  <table class='jobs'>
+    <tr>
+      <th>Class</th>
+      <th>Args</th>
+    </tr>
+    <% for job in (jobs = resque.peek(queue, start, 20)) %>
+    <tr>
+      <td class='class'><%= job['class'] %></td>
+      <td class='args'><%=h job['args'].inspect %></td>
+    </tr>
+    <% end %>
+    <% if jobs.empty? %>
+    <tr>
+      <td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
+    </tr>
+    <% end %>
+  </table>
+  <%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
+<% else %>
+  <h1 class='wi'>Queues</h1>
+  <p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
+  <table class='queues'>
+    <tr>
+      <th>Name</th>
+      <th>Jobs</th>
+    </tr>
+    <!-- only show nonzero length queues-->
+    <% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
+    <tr>
+      <td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
+      <td class='size'><%= resque.size queue %></td>
+    </tr>
+    <% end %>
+    <% if failed_multiple_queues? %>
+      <% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
+      <tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
+        <td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
+        <td class='size'><%= Resque::Failure.count(queue) %></td>
+      </tr>
+      <% end %>
+    <% else %>
+      <tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
+        <td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
+        <td class='size'><%= Resque::Failure.count %></td>
+      </tr>
+    <% end %>
+  </table>
+<% end %>

data/lib/mobilize-base/extensions/resque-server/views/working.erb ADDED Viewed

@@ -0,0 +1,85 @@
+<% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
+  <h1><%= worker %>'s job</h1>
+  <table>
+    <tr>
+      <th>&nbsp;</th>
+      <th>Where</th>
+      <th>Queue</th>
+      <th>Started</th>
+      <th>Class</th>
+      <th>Args</th>
+    </tr>
+      <tr>
+        <td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
+        <% host, pid, _ = worker.to_s.split(':') %>
+        <td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
+        <% data = worker.job %>
+        <% queue = data['queue'] %>
+        <td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
+        <td><span class="time"><%= data['run_at'] %></span></td>
+        <td>
+          <code><%= data['payload']['class'] %></code>
+        </td>
+        <td><%=h data['payload']['args'].inspect %></td>
+      </tr>
+  </table>
+<% else %>
+  <%
+    workers = resque.working
+    jobs = workers.collect {|w| w.job }
+    worker_jobs = workers.zip(jobs)
+    worker_jobs = worker_jobs.reject { |w, j| w.idle? }
+  %>
+  <h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
+  <p class='intro'>The list below contains all workers which are currently running a job.</p>
+  <table class='workers'>
+    <tr>
+      <th>&nbsp;</th>
+      <th>Where</th>
+      <th>Queue</th>
+      <th>Processing</th>
+    </tr>
+    <% if worker_jobs.empty? %>
+    <tr>
+      <td colspan="4" class='no-data'>Nothing is happening right now...</td>
+    </tr>
+    <% end %>
+    <% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
+      <tr>
+        <td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
+        <% host, pid, queues = worker.to_s.split(':') %>
+        <td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
+        <td class='queues queue'>
+          <a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
+        </td>
+        <td class='process'>
+          <% if job['queue']
+               job_stats = begin
+                             j = job
+                             args_hash = j['payload']['args'][1]
+                             args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
+                             args = [args_array].compact.join("")
+                             path = j['payload']['args'].first
+                             [path,args].join("</code><br><code>")
+                           rescue => exc
+                             [exc.to_s,exc.backtrace.join("<br>")].join("<br>")
+                           end
+           %>
+           <%=job_stats%>
+            </code>
+            <br>
+            <small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
+          <% else %>
+            <span class='waiting'>Waiting for a job...</span>
+          <% end %>
+        </td>
+      </tr>
+    <% end %>
+  </table>
+<% end %>

data/lib/mobilize-base/extensions/string.rb CHANGED Viewed

@@ -11,11 +11,19 @@ class String
   def opp
     pp self
   end
+  def to_md5
+    Digest::MD5.hexdigest(self)
+  end
   def bash(except=true)
-    pid,stdin,stdout,stderr = Open4.popen4(self)
-    pid,stdin = [nil,nil]
-    raise stderr.read if (stderr.read.length>0 and except==true)
-    return stdout.read
+    str = self
+    out_str,err_str = []
+    status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
+      out_str = stdout.read
+      err_str = stderr.read
+    end
+    exit_status = status.exitstatus
+    raise err_str if (exit_status !=0 and except==true)
+    return out_str
   end
   def escape_regex
     str = self

data/lib/mobilize-base/extensions/yaml.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'yaml'
 module YAML
   def YAML.easy_load(string)
     begin
@@ -9,13 +10,16 @@ module YAML
       #make sure urls have their colon spaces fixed
       result_hash={}
       easy_hash.each do |k,v|
-        result_hash[k] = if v.class==String
-                           v.gsub(": //","://")
-                         elsif v.class==Array
-                           v.map{|av| av.to_s.gsub(": //","://")}
-                         else
-                           v
-                         end
+        #fucking yaml puts spaces in front of the key
+        #or something
+        strip_k = k.strip
+        result_hash[strip_k] = if v.class==String
+                                 v.gsub(": //","://")
+                               elsif v.class==Array
+                                 v.map{|av| av.to_s.gsub(": //","://")}
+                               else
+                                 v
+                               end
       end
       return result_hash
     end

data/lib/mobilize-base/handlers/gbook.rb CHANGED Viewed

@@ -14,51 +14,44 @@ module Mobilize
       dst = Dataset.find_by_handler_and_path('gbook',path)
       if dst and dst.http_url.to_s.length>0
         book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
-        #doesn't count if it's deleted
-        if book.entry_hash[:deleted]
-          book = nil
-        else
+        if book
           return book
+        else
+          raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
         end
       end
+      #try to find books by title
       books = Gbook.find_all_by_path(path,gdrive_slot)
-      dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
-      book = nil
-      if books.length>1 and dst.http_url.to_s.length>0
-        #some idiot process or malicious user created a duplicate book.
-        #Fix by deleting all but the one with dst entry's key
-        dkey = dst.http_url.split("key=").last
-        books.each do |b|
-          bkey = b.resource_id.split(":").last
-          if bkey == dkey
-            book = b
-            dst.update_attributes(:http_url=>book.human_url)
-          else
-            #delete the invalid book
-            b.delete
-            ("Deleted duplicate book #{path}").oputs
-          end
-        end
-      else
-        #If it's a new dst or if there are multiple books
-        #take the first
-        book = books.first
-        dst.update_attributes(:http_url=>book.human_url) if book
+      #sort by publish date; if entry hash retrieval fails (as it does)
+      #assume the book was published now
+      book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
+      if book
+        #we know dataset will have blank url since it wasn't picked up above
+        dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
+        api_url = book.human_url.split("&").first
+        dst.update_attributes(:http_url=>api_url)
       end
       return book
     end
     def Gbook.find_or_create_by_path(path,gdrive_slot)
       book = Gbook.find_by_path(path,gdrive_slot)
-      dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
       if book.nil?
         #always use owner email to make sure all books are owned by owner account
         book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
         ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
+        #check to make sure the dataset has a blank url; if not, error out
+        dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
+        if dst.http_url.to_s.length>0
+          #add acls to book regardless
+          book.add_admin_acl
+          raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
+        else
+          api_url = book.human_url.split("&").first
+          dst.update_attributes(:http_url=>api_url)
+          book.add_admin_acl
+        end
       end
-      #always make sure book dataset http URL is up to date
-      #and that book has admin acl
-      dst.update_attributes(:http_url=>book.human_url)
-      book.add_admin_acl
       return book
     end
   end

data/lib/mobilize-base/handlers/gfile.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Mobilize
   module Gfile
-    def Gfile.path_to_dst(path,stage_path)
+    def Gfile.path_to_dst(path,stage_path,gdrive_slot)
       #don't need the ://
       path = path.split("://").last if path.index("://")
       if Gfile.find_by_path(path)
@@ -38,7 +38,8 @@ module Mobilize
       end
       #update http url for file
       dst = Dataset.find_by_handler_and_path("gfile",dst_path)
-      dst.update_attributes(:http_url=>file.human_url)
+      api_url = file.human_url.split("&").first
+      dst.update_attributes(:http_url=>api_url)
       true
     end
@@ -86,7 +87,8 @@ module Mobilize
       #always make sure dataset http URL is up to date
       #and that it has admin acl
       if file
-        dst.update_attributes(:http_url=>file.human_url)
+        api_url = file.human_url.split("&").first
+        dst.update_attributes(:http_url=>api_url)
         file.add_admin_acl
       end
       return file

data/lib/mobilize-base/handlers/gridfs.rb CHANGED Viewed

@@ -1,43 +1,38 @@
+require 'tempfile'
 module Mobilize
   module Gridfs
     def Gridfs.config
       Base.config('gridfs')
     end
-    def Gridfs.grid
-      session = ::Mongoid.configure.sessions['default']
-      database_name = session['database']
-      host,port = session['hosts'].first.split(":")
-      return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
+    def Gridfs.read_by_dataset_path(dst_path,*args)
+      curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
+      zs = curr_file.data if curr_file
+      return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
     end
-    def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
-      begin
-        zs=Gridfs.grid.open(dst_path,'r').read
-        return ::Zlib::Inflate.inflate(zs)
-      rescue
-        return nil
-      end
-    end
-    def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
+    def Gridfs.write_by_dataset_path(dst_path,string,*args)
       zs = ::Zlib::Deflate.deflate(string)
       raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
-      curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
-      #write a new version when there is a change
+      #find and delete existing file
+      curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
+      curr_zs =  curr_file.data if curr_file
+      #overwrite when there is a change
       if curr_zs != zs
-        Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
+        Mongoid::GridFs.delete(curr_file.id) if curr_file
+        #create temp file w zstring
+        temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
+        temp_file.print(zs)
+        temp_file.close
+        #put data in file
+        Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
       end
       return true
     end
     def Gridfs.delete(dst_path)
-      begin
-        Gridfs.grid.delete(dst_path)
-        return true
-      rescue
-        return nil
-      end
+      curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
+      curr_file.delete
     end
   end
 end

data/lib/mobilize-base/handlers/gsheet.rb CHANGED Viewed

@@ -10,12 +10,10 @@ module Mobilize
     end
     # converts a source path or target path to a dst in the context of handler and stage
-    def Gsheet.path_to_dst(path,stage_path)
+    def Gsheet.path_to_dst(path,stage_path,gdrive_slot)
       s = Stage.where(:path=>stage_path).first
       params = s.params
       target_path = params['target']
-      #take random slot if one is not available
-      gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
       #if this is the target, it doesn't have to exist already
       is_target = true if path == target_path
       #don't need the ://
@@ -46,9 +44,7 @@ module Mobilize
     def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
       #expects gdrive slot as first arg, otherwise chooses random
-      gdrive_slot = args
-      worker_emails = Gdrive.worker_emails.sort_by{rand}
-      gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
+      gdrive_slot = args.to_a.first
       sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
       sheet.read(user_name) if sheet
     end
@@ -56,8 +52,6 @@ module Mobilize
     def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
       #expects gdrive slot as first arg, otherwise chooses random
       gdrive_slot,crop = args
-      worker_emails = Gdrive.worker_emails.sort_by{rand}
-      gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
       crop ||= true
       Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
     end
@@ -87,15 +81,16 @@ module Mobilize
     def Gsheet.write_temp(target_path,gdrive_slot,tsv)
       #find and delete temp sheet, if any
-      temp_path = [target_path.gridsafe,"temp"].join("/")
-      temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
-      temp_sheet.delete if temp_sheet
-      #write data to temp sheet
-      temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
+      temp_book_title = target_path.gridsafe
+      #create book and sheet
+      temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
+      rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
+      temp_sheet = temp_book.add_worksheet("temp",rows,cols)
       #this step has a tendency to fail; if it does,
       #don't fail the stage, mark it as false
       begin
-        temp_sheet.write(tsv,Gdrive.owner_name)
+        gdrive_user = gdrive_slot.split("@").first
+        temp_sheet.write(tsv,gdrive_user)
       rescue
         return nil
       end
@@ -114,7 +109,7 @@ module Mobilize
         #only give the user edit permissions if they're the ones
         #creating it
         target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
-        target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
+        target_sheet.spreadsheet.update_acl(u.email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
         target_sheet.delete_sheet1
       end
       #pass it crop param to determine whether to shrink target sheet to fit data
@@ -134,14 +129,24 @@ module Mobilize
       crop = s.params['crop'] || true
       begin
         #get tsv to write from stage
-        source = s.sources.first
+        source = s.sources(gdrive_slot).first
         raise "Need source for gsheet write" unless source
         tsv = source.read(u.name,gdrive_slot)
-        raise "No data found in #{source.url}" unless tsv
-        Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
+        raise "No data source found for #{source.url}" unless tsv
+        tsv_row_count = tsv.to_s.split("\n").length
+        tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
+        tsv_cell_count = tsv_row_count * tsv_col_count
+        stdout = if tsv_row_count == 0
+                   #soft error; no data to write. Stage will complete.
+                   "Write skipped for #{s.target.url}"
+                 elsif tsv_cell_count > Gsheet.max_cells
+                   raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
+                 else
+                   Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
+                   #update status
+                   "Write successful for #{s.target.url}"
+                 end
         Gdrive.unslot_worker_by_path(stage_path)
-        #update status
-        stdout = "Write successful for #{s.target.url}"
         stderr = nil
         s.update_status(stdout)
         signal = 0

data/lib/mobilize-base/handlers/resque.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module Mobilize
       return idle_workers if state == 'idle'
       stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
       return stale_workers if state == 'stale'
-      timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['runat'] < (Time.now.utc - Jobtracker.max_run_time)}
+      timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
       return timeout_workers if state == 'timeout'
       raise "invalid state #{state}"
     end
@@ -109,16 +109,28 @@ module Mobilize
       Resque.failures.each_with_index do |f,f_i|
         #skip if already notified
         next if f['notified']
+        #try to send message to stage owner, where appropriate
         stage_path = f['payload']['args'].first
-        s = Stage.where(:path=>stage_path).first
-        email = s.job.runner.user.email
+        email = begin
+                  s = Stage.where(:path=>stage_path).first
+                  if s.params['notify'].to_s=="false"
+                    next
+                  elsif s.params['notify'].index("@")
+                    s.params['notify']
+                  else
+                    s.job.runner.user.email
+                  end
+                rescue
+                  #jobs without stages are sent to first admin
+                  Jobtracker.admin_emails.first
+                end
         exc_to_s = f['error']
         if fjobs[email].nil?
           fjobs[email] = {stage_path => {exc_to_s => 1}}
         elsif fjobs[email][stage_path].nil?
           fjobs[email][stage_path] = {exc_to_s => 1}
         elsif fjobs[email][stage_path][exc_to_s].nil?
-          fjobs[email][stage_path][exc_to_s] = 1
+          fjobs[email][stage_path][exc_to_s] = 1
         else
           fjobs[email][stage_path][exc_to_s] += 1
         end