RubyGems - mobilize-base - Versions diffs - 1.1.10 → 1.2 - Mend

mobilize-base 1.1.10 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

data/README.md +14 -9
data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +4 -4
data/lib/mobilize-base/extensions/google_drive/file.rb +7 -6
data/lib/mobilize-base/extensions/google_drive/worksheet.rb +22 -14
data/lib/mobilize-base/extensions/string.rb +1 -0
data/lib/mobilize-base/handlers/email.rb +5 -6
data/lib/mobilize-base/handlers/gbook.rb +19 -0
data/lib/mobilize-base/handlers/gdrive.rb +13 -0
data/lib/mobilize-base/handlers/gfile.rb +46 -14
data/lib/mobilize-base/handlers/gridfs.rb +3 -3
data/lib/mobilize-base/handlers/gsheet.rb +98 -39
data/lib/mobilize-base/handlers/resque.rb +14 -9
data/lib/mobilize-base/jobtracker.rb +20 -8
data/lib/mobilize-base/models/dataset.rb +23 -18
data/lib/mobilize-base/models/runner.rb +19 -18
data/lib/mobilize-base/models/stage.rb +137 -65
data/lib/mobilize-base/version.rb +1 -1
data/mobilize-base.gemspec +1 -1
data/test/base_job_rows.yml +1 -2
data/test/mobilize-base_test.rb +60 -21
metadata +7 -7
/data/test/{base1_stage1.yml → test_base_1.yml} +0 -0

data/README.md CHANGED Viewed

@@ -552,18 +552,23 @@ stage. These should be of the for `<key1>: <value1>, <key2>: <value2>`, where
 `<key>` is an unquoted string and `<value>` is a quoted string, an
 integer, an array (delimited by square braces), or a hash (delimited by
 curly braces).
-    * For mobilize-base, the following stages are available:
-      * gsheet.read `source: <input_gsheet_path>`, which reads the sheet.
-        * The gsheet_path should be of the form
-`<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
-the Runner itself. The test uses "base1_stage1.in".
-      * gsheet.write `source: <stage_name>`,`target: <target_gsheet_path>`,
-which writes the specified stage output to the target_gsheet.
-        * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
+    * For mobilize-base, the following stage is available:
+      * gsheet.write `source: <input_path>`, which reads the sheet.
+        * The input_path should be of the form:
+          * `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
+the Runner itself.
+          * `gfile://<gfile_name>` if the target is a file.
+            * The file must be owned by the Gdrive owner.
+            * The test uses "gfile://test_base_1.tsv".
+    * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
 and "base1.out" for the second test. The first
 takes the output from the first stage and the second reads it straight
 from the referenced sheet.
-        * The test uses "Requestor_mobilize(test)/base1.out" and
+    * All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
+giving up.
+    * If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
+      * The tab will be headed "response" and will contain the exception and backtrace for the error.
+    * The test uses "Requestor_mobilize(test)/base1.out" and
 "Runner_mobilize(test)/base2.out" for target sheets.
 <a name='section_Start_Run_Test'></a>

data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module GoogleDrive
       attempts = 0
       sleep_time = nil
       #try 5 times to make the call
-      while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
+      while (response.nil? or response.code.starts_with?("5")) and attempts < 20
         #instantiate http object, set params
         http = @proxy.new(uri.host, uri.port)
         http.use_ssl = true
@@ -21,10 +21,10 @@ module GoogleDrive
                      #timeouts etc.
                      nil
                    end
-        if response.nil?
+        if response.nil? or response.code.starts_with?("4")
           attempts +=1
-        else
-          if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
+        elsif
+          if response.code.starts_with?("5")
             #wait 10 seconds times number of attempts squared in case of error
             sleep_time = 10 * (attempts*attempts)
             attempts += 1

data/lib/mobilize-base/extensions/google_drive/file.rb CHANGED Viewed

@@ -13,15 +13,16 @@ module GoogleDrive
       f = self
       #admin includes workers
       return true if f.has_admin_acl?
-      (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
-        f.update_acl(a)
+      accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
+      accounts.each do |email|
+        f.update_acl(email)
       end
     end
     def has_admin_acl?
       f = self
       curr_emails = f.acls.map{|a| a.scope}.sort
-      admin_emails = Mobilize::Gdrive.admin_emails.sort
+      admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
       if (curr_emails & admin_emails) == admin_emails
         return true
       else
@@ -40,13 +41,13 @@ module GoogleDrive
       end
     end
-    def read(user)
+    def read(user_name)
       f = self
-      entry = f.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      entry = f.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
       if entry and ['reader','writer','owner'].include?(entry.role)
         f.download_to_string
       else
-        raise "User #{user} is not allowed to read #{f.title}"
+        raise "User #{user_name} is not allowed to read #{f.title}"
       end
     end

data/lib/mobilize-base/extensions/google_drive/worksheet.rb CHANGED Viewed

@@ -6,11 +6,11 @@ module GoogleDrive
       header = rows.first
       return nil unless header and header.first.to_s.length>0
       #look for blank cols to indicate end of row
-      row_last_i = (header.index("") || header.length)-1
-      out_tsv = rows.map{|r| r[0..row_last_i]}.map{|r| r.join("\t")}.join("\n")
+      col_last_i = (header.index("") || header.length)-1
+      #ignore user-entered line breaks for purposes of tsv reads
+      out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
       out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
                                 Mobilize::Gsheet.config['read_date_format'])
     end
     def add_headers(headers)
       headers.each_with_index do |h,h_i|
@@ -47,26 +47,30 @@ module GoogleDrive
       sheet.save
     end
-    def merge(merge_sheet,user)
+    def merge(merge_sheet,user_name,crop)
       #write the top left of sheet
       #with the contents of merge_sheet
       sheet = self
       sheet.reload
-      entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      entry = sheet.spreadsheet.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
       unless entry and ['writer','owner'].include?(entry.role)
-        raise "User #{user} is not allowed to write to #{sheet.spreadsheet.title}"
+        raise "User #{user_name} is not allowed to write to #{sheet.spreadsheet.title}"
       end
       merge_sheet.reload
       curr_rows = sheet.num_rows
       curr_cols = sheet.num_cols
       merge_rows = merge_sheet.num_rows
       merge_cols = merge_sheet.num_cols
+      raise "zero sized merge sheet" if merge_rows == 0 or merge_cols == 0
       #make sure sheet is at least as big as necessary
-      if merge_rows > curr_rows
+      #or as small as necessary if crop is specified
+      if merge_rows > curr_rows or
+        (merge_rows < curr_rows and crop==true)
         sheet.max_rows = merge_rows
         sheet.save
       end
-      if merge_cols > curr_cols
+      if merge_cols > curr_cols or
+        (merge_cols < curr_cols and crop==true)
         sheet.max_cols = merge_cols
         sheet.save
       end
@@ -94,7 +98,7 @@ module GoogleDrive
       end
     end
-    def write(tsv,user)
+    def write(tsv,user,crop=true)
       sheet = self
       entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
       unless entry and ['writer','owner'].include?(entry.role)
@@ -110,11 +114,14 @@ module GoogleDrive
       curr_rows = sheet.num_rows
       curr_cols = sheet.num_cols
       #make sure sheet is at least as big as necessary
-      if tsvrows.length != curr_rows
+      #or small as necessary if crop
+      if tsvrows.length > curr_rows or
+        (tsvrows.length < curr_rows and crop==true)
         sheet.max_rows = tsvrows.length
         sheet.save
       end
-      if headers.length != curr_cols
+      if headers.length > curr_cols or
+        (tsvrows.length < curr_rows and crop==true)
         sheet.max_cols = headers.length
         sheet.save
       end
@@ -124,13 +131,13 @@ module GoogleDrive
         tsvrows[batch_start..batch_end].each_with_index do |row,row_i|
           rowcols = row.split("\t")
           rowcols.each_with_index do |col_v,col_i|
-            sheet[row_i+batch_start+1,col_i+1]= %{#{col_v}}
+            sheet[row_i + batch_start + 1, col_i + 1]= %{#{col_v}}
           end
         end
         sheet.save
         batch_start += (batch_length + 1)
-        rows_written+=batch_length
-        if batch_start>tsvrows.length+1
+        rows_written += batch_length
+        if batch_start>tsvrows.length + 1
          break
         end
       end
@@ -141,6 +148,7 @@ module GoogleDrive
       sheet.reload
       #loading remote data for checksum
       rem_tsv = sheet.to_tsv
+      return true if rem_tsv.to_s.length==0
       rem_table = rem_tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
       loc_table = tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
       re_col_vs = []

data/lib/mobilize-base/extensions/string.rb CHANGED Viewed

@@ -32,6 +32,7 @@ class String
   def googlesafe
     v=self
     return "" if v.to_s==""
+    return v if v.to_s.strip==""
     #normalize numbers by removing '$', '%', ',', ' '
     vnorm = v.to_s.norm_num
     vdigits = vnorm.split(".").last.length

data/lib/mobilize-base/handlers/email.rb CHANGED Viewed

@@ -12,13 +12,12 @@ module Mobilize
     :authentication       => 'plain',
     :enable_starttls_auto => true  }
-    def write(subj,
-                      bod="",
-                      recipient=Jobtracker.admin_emails.join(","))
+    def write(params)
       mail(:from=>Gdrive.owner_email,
-           :to=>recipient,
-           :subject=>subj,
-           :body=>bod)
+           :to=>params['to'],
+           :subject=>params['subject'],
+           :body=>params['body'],
+           :bcc=>params['bcc'])
     end
   end
 end

data/lib/mobilize-base/handlers/gbook.rb CHANGED Viewed

@@ -3,7 +3,24 @@ module Mobilize
     def Gbook.find_all_by_path(path,gdrive_slot)
       Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
     end
+    def Gbook.find_by_http_url(http_url,gdrive_slot)
+      key = http_url.split("key=").last.split("#").first
+      Gdrive.root(gdrive_slot).spreadsheet_by_key(key)
+    end
     def Gbook.find_by_path(path,gdrive_slot)
+      #first try to find a dataset with the URL
+      dst = Dataset.find_by_handler_and_path('gbook',path)
+      if dst and dst.http_url.to_s.length>0
+        book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
+        #doesn't count if it's deleted
+        if book.entry_hash[:deleted]
+          book = nil
+        else
+          return book
+        end
+      end
       books = Gbook.find_all_by_path(path,gdrive_slot)
       dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
       book = nil
@@ -15,6 +32,7 @@ module Mobilize
           bkey = b.resource_id.split(":").last
           if bkey == dkey
             book = b
+            dst.update_attributes(:http_url=>book.human_url)
           else
             #delete the invalid book
             b.delete
@@ -25,6 +43,7 @@ module Mobilize
         #If it's a new dst or if there are multiple books
         #take the first
         book = books.first
+        dst.update_attributes(:http_url=>book.human_url) if book
       end
       return book
     end

data/lib/mobilize-base/handlers/gdrive.rb CHANGED Viewed

@@ -80,5 +80,18 @@ module Mobilize
     def Gdrive.books(gdrive_slot=nil,params={})
       Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
     end
+    #email management - used to make sure not too many emails get used at the same time
+    def Gdrive.slot_worker_by_path(path)
+      working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
+      Gdrive.workers.sort_by{rand}.each do |w|
+        unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
+          Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
+          return [w['name'],Gdrive.domain].join("@")
+        end
+      end
+      #return false if none are available
+      return false
+    end
   end
 end

data/lib/mobilize-base/handlers/gfile.rb CHANGED Viewed

@@ -1,5 +1,47 @@
 module Mobilize
   module Gfile
+    def Gfile.path_to_dst(path,stage_path)
+      #don't need the ://
+      path = path.split("://").last if path.index("://")
+      if Gfile.find_by_path(path)
+        handler = "gfile"
+        Dataset.find_or_create_by_url("#{handler}://#{path}")
+      else
+        raise "unable to find #{path}"
+      end
+    end
+    def Gfile.read_by_dataset_path(dst_path,user_name,*args)
+      #expects gdrive slot as first arg, otherwise chooses random
+      gdrive_slot = args
+      worker_emails = Gdrive.worker_emails.sort_by{rand}
+      gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
+      file = Gfile.find_by_path(dst_path)
+      file.read(user_name) if file
+    end
+    def Gfile.write_by_dataset_path(dst_path,string,user_name,*args)
+      #ignores *args as all files must be created and owned by owner
+      file = Gfile.find_by_path(dst_path)
+      file.delete if file
+      owner_root = Gdrive.root(Gdrive.owner_email)
+      file = owner_root.upload_from_string(string,
+                                    dst_path,
+                                    :content_type=>"test/plain",
+                                    :convert=>false)
+      file.add_admin_acl
+      #make sure user is owner or can edit
+      u = User.where(:name=>user_name).first
+      entry = file.acl_entry(u.email)
+      unless entry and ['writer','owner'].include?(entry.role)
+        file.update_acl(u.email)
+      end
+      #update http url for file
+      dst = Dataset.find_by_handler_and_path("gfile",dst_path)
+      dst.update_attributes(:http_url=>file.human_url)
+      true
+    end
     def Gfile.add_admin_acl_by_path(path)
       file = Gfile.find_by_path(path)
       file.add_admin_acl
@@ -18,18 +60,6 @@ module Mobilize
       file.update_acl(gdrive_slot,role)
     end
-    def Gfile.read_by_stage_path(stage_path)
-      #reserve gdrive_slot account for read
-      gdrive_slot = Gdrive.slot_worker_by_path(s.path)
-      return false unless gdrive_slot
-      s = Stage.where(:path=>stage_path)
-      gfile_path = s.params['file']
-      out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
-      #use Gridfs to cache result
-      out_url = "gridfs://#{s.path}/out"
-      Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
-    end
     def Gfile.find_by_path(path)
       #file must be owned by owner
       gdrive_slot = Gdrive.owner_email
@@ -55,8 +85,10 @@ module Mobilize
       end
       #always make sure dataset http URL is up to date
       #and that it has admin acl
-      dst.update_attributes(:http_url=>file.human_url)
-      file.add_admin_acl
+      if file
+        dst.update_attributes(:http_url=>file.human_url)
+        file.add_admin_acl
+      end
       return file
     end
   end

data/lib/mobilize-base/handlers/gridfs.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Mobilize
       return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
     end
-    def Gridfs.read_by_dataset_path(dst_path,user)
+    def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
       begin
         zs=Gridfs.grid.open(dst_path,'r').read
         return ::Zlib::Inflate.inflate(zs)
@@ -20,10 +20,10 @@ module Mobilize
       end
     end
-    def Gridfs.write_by_dataset_path(dst_path,string,user)
+    def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
       zs = ::Zlib::Deflate.deflate(string)
       raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
-      curr_zs = Gridfs.read_by_dataset_path(dst_path,user).to_s
+      curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
       #write a new version when there is a change
       if curr_zs != zs
         Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}

data/lib/mobilize-base/handlers/gsheet.rb CHANGED Viewed

@@ -9,9 +9,62 @@ module Mobilize
       Gsheet.config['max_cells']
     end
+    # converts a source path or target path to a dst in the context of handler and stage
+    def Gsheet.path_to_dst(path,stage_path)
+      s = Stage.where(:path=>stage_path).first
+      params = s.params
+      target_path = params['target']
+      #take random slot if one is not available
+      gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
+      #if this is the target, it doesn't have to exist already
+      is_target = true if path == target_path
+      #don't need the ://
+      path = path.split("://").last if path.index("://")
+      if path.split("/").length == 2
+        if is_target or Gsheet.find_by_path(path,gdrive_slot)
+          #user has specified path to a sheet
+          return Dataset.find_or_create_by_url("gsheet://#{path}")
+        else
+          raise "unable to find #{path}"
+        end
+      else
+        #user has specified a sheet
+        runner_title = stage_path.split("/").first
+        r = Runner.find_by_title(runner_title)
+        if is_target or r.gbook(gdrive_slot).worksheets.map{|w| w.title}.include?(path)
+          handler = "gsheet"
+          path = "#{runner_title}/#{path}"
+        elsif Gfile.find_by_path(path,gdrive_slot)
+          handler = "gfile"
+          path = "#{path}"
+        else
+          raise "unable to find #{path}"
+        end
+        return Dataset.find_or_create_by_url("#{handler}://#{path}")
+      end
+    end
+    def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
+      #expects gdrive slot as first arg, otherwise chooses random
+      gdrive_slot = args
+      worker_emails = Gdrive.worker_emails.sort_by{rand}
+      gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
+      sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
+      sheet.read(user_name) if sheet
+    end
+    def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
+      #expects gdrive slot as first arg, otherwise chooses random
+      gdrive_slot,crop = args
+      worker_emails = Gdrive.worker_emails.sort_by{rand}
+      gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
+      crop ||= true
+      Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
+    end
     def Gsheet.write(path,tsv,gdrive_slot)
       sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
-      sheet.write(tsv)
+      sheet.write(tsv,Gdrive.owner_name)
     end
     def Gsheet.find_by_path(path,gdrive_slot)
@@ -32,32 +85,9 @@ module Mobilize
       return sheet
     end
-    def Gsheet.read_by_stage_path(stage_path)
-      #reserve gdrive_slot account for read
-      gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
-      return false unless gdrive_slot
-      s = Stage.where(:path=>stage_path).first
-      user = s.job.runner.user.name
-      source_dst = s.source_dsts(gdrive_slot).first
-      out_tsv = source_dst.read(user)
-      #use Gridfs to cache result
-      out_url = "gridfs://#{s.path}/out"
-      Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
-    end
-    def Gsheet.write_by_stage_path(stage_path)
-      gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
-      #return blank response if there are no slots available
-      return nil unless gdrive_slot
-      s = Stage.where(:path=>stage_path).first
-      user = s.job.runner.user
-      target_path = s.params['target']
-      target_path = "#{s.job.runner.title}/#{target_path}" unless target_path.index("/")
-      source_dst = s.source_dsts(gdrive_slot).first
-      tsv = source_dst.read(user.name)
-      sheet_name = target_path.split("/").last
-      temp_path = [stage_path.gridsafe,sheet_name].join("/")
+    def Gsheet.write_temp(target_path,gdrive_slot,tsv)
       #find and delete temp sheet, if any
+      temp_path = [target_path.gridsafe,"temp"].join("/")
       temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
       temp_sheet.delete if temp_sheet
       #write data to temp sheet
@@ -70,28 +100,57 @@ module Mobilize
         return nil
       end
       temp_sheet.check_and_fix(tsv)
+      temp_sheet
+    end
+    def Gsheet.write_target(target_path,tsv,user_name,gdrive_slot,crop=true)
+      #write to temp sheet first, to ensure google compatibility
+      #and fix any discrepancies due to spradsheet assumptions
+      temp_sheet = Gsheet.write_temp(target_path,gdrive_slot,tsv)
+      #try to find target sheet
       target_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
+      u = User.where(:name=>user_name).first
       unless target_sheet
         #only give the user edit permissions if they're the ones
         #creating it
         target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
-        target_sheet.spreadsheet.update_acl(user.email,"writer") unless target_sheet.spreadsheet.acl_entry(user.email).ie{|e| e and e.role=="owner"}
+        target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
         target_sheet.delete_sheet1
       end
-      #this step has a tendency to fail; if it does,
-      #don't fail the stage, mark it as false
-      begin
-        target_sheet.merge(temp_sheet,user.name)
-      rescue
-        return nil
-      end
+      #pass it crop param to determine whether to shrink target sheet to fit data
+      #default is yes
+      target_sheet.merge(temp_sheet,user_name,crop)
       #delete the temp sheet's book
       temp_sheet.spreadsheet.delete
-      status = "Write successful for #{target_path}"
-      s.update_status(status)
-      #use Gridfs to cache result
-      out_url = "gridfs://#{s.path}/out"
-      Dataset.write_by_url(out_url,status,Gdrive.owner_name)
+      target_sheet
+    end
+    def Gsheet.write_by_stage_path(stage_path)
+      gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
+      #return blank response if there are no slots available
+      return nil unless gdrive_slot
+      s = Stage.where(:path=>stage_path).first
+      u = s.job.runner.user
+      crop = s.params['crop'] || true
+      begin
+        #get tsv to write from stage
+        source = s.sources.first
+        raise "Need source for gsheet write" unless source
+        tsv = source.read(u.name,gdrive_slot)
+        raise "No data found in #{source.url}" unless tsv
+        Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
+        Gdrive.unslot_worker_by_path(stage_path)
+        #update status
+        stdout = "Write successful for #{s.target.url}"
+        stderr = nil
+        s.update_status(stdout)
+        signal = 0
+      rescue => exc
+        stdout = nil
+        stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
+        signal = 500
+      end
+      return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
     end
   end
 end

data/lib/mobilize-base/handlers/resque.rb CHANGED Viewed

@@ -103,23 +103,28 @@ module Mobilize
       end
     end
-    def Resque.failure_report
+    def Resque.new_failures_by_email
       fjobs = {}
-      excs = Hash.new(0)
+      exc_to_s = Hash.new(0)
       Resque.failures.each_with_index do |f,f_i|
         #skip if already notified
         next if f['notified']
-        sname = f['payload']['args'].first
-        excs = f['error']
-        if fjobs[sname].nil?
-          fjobs[sname] = {excs => 1}
-        elsif fjobs[sname][excs].nil?
-          fjobs[sname][excs] = 1
+        stage_path = f['payload']['args'].first
+        s = Stage.where(:path=>stage_path).first
+        email = s.job.runner.user.email
+        exc_to_s = f['error']
+        if fjobs[email].nil?
+          fjobs[email] = {stage_path => {exc_to_s => 1}}
+        elsif fjobs[email][stage_path].nil?
+          fjobs[email][stage_path] = {exc_to_s => 1}
+        elsif fjobs[email][stage_path][exc_to_s].nil?
+          fjobs[email][stage_path][exc_to_s] = 1
         else
-          fjobs[sname][excs] += 1
+          fjobs[email][stage_path][exc_to_s] += 1
         end
         #add notified flag to redis
         f['notified'] = true
+        #tag stage with email
         ::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
       end
       return fjobs

data/lib/mobilize-base/jobtracker.rb CHANGED Viewed

@@ -163,27 +163,39 @@ module Mobilize
       if Jobtracker.notif_due?
         notifs = []
         if Jobtracker.failures.length>0
-          jfcs = Resque.failure_report
-          unless jfcs=={} #no new failures
+          failure_hash = Resque.new_failures_by_email
+          failure_hash.each do |email,stage_paths|
             n = {}
-            n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
+            n['subject'] = "#{stage_paths.keys.length.to_s} new failed jobs, #{stage_paths.values.map{|v| v.values}.flatten.sum.to_s} failures"
             #one row per exception type, with the job name
-            n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
+            n['body'] = stage_paths.map do |path,exceptions|
+                                          exceptions.map do |exc_to_s,times|
+                                            [path," : ",exc_to_s,", ",times," times"].join
+                                          end
+                                        end.flatten.join("\n\n")
+            u = User.where(:name=>email.split("@").first).first
+            runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
+            n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
+            n['to'] = email
+            n['bcc'] = Jobtracker.admin_emails.join(",")
             notifs << n
           end
         end
         lws = Jobtracker.max_run_time_workers
         if lws.length>0
           n = {}
-          n['subj'] = "#{lws.length.to_s} max run time jobs"
+          n['subject'] = "#{lws.length.to_s} max run time jobs"
           n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
+          n['to'] = Jobtracker.admin_emails.join(",")
           notifs << n
         end
+        #deliver each email generated
         notifs.each do |notif|
-          Email.write(n['subj'],notif['body']).deliver
-          Jobtracker.last_notification=Time.now.utc.to_s
-          Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
+          Email.write(notif).deliver
         end
+        #update notification time so JT knows to wait a while
+        Jobtracker.last_notification = Time.now.utc.to_s
+        Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
       end
       return true
     end

data/lib/mobilize-base/models/dataset.rb CHANGED Viewed

@@ -13,9 +13,23 @@ module Mobilize
     index({ handler: 1, path: 1}, { unique: true})
-    def read
+    def url
+      s = self
+      "#{s.handler}://#{s.path}"
+    end
+    def read(user_name,*args)
       dst = self
-      return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
+      dst.update_attributes(:last_read_at=>Time.now.utc)
+      "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user_name,*args)
+    end
+    def write(string,user_name,*args)
+      dst = self
+      "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user_name,*args)
+      dst.raw_size = string.length
+      dst.save!
+      return true
     end
     def Dataset.find_by_url(url)
@@ -38,24 +52,15 @@ module Mobilize
       return dst
     end
-    def Dataset.write_by_url(url,string,user)
-      dst = Dataset.find_or_create_by_url(url)
-      dst.write(string,user)
-      url
+    def Dataset.read_by_url(url,user_name,*args)
+      dst = Dataset.find_by_url(url)
+      dst.read(user_name,*args) if dst
     end
-    def read(user)
-      dst = self
-      dst.update_attributes(:last_read_at=>Time.now.utc)
-      "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user)
-    end
-    def write(string,user)
-      dst = self
-      "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
-      dst.raw_size = string.length
-      dst.save!
-      return true
+    def Dataset.write_by_url(url,string,user_name,*args)
+      dst = Dataset.find_or_create_by_url(url)
+      dst.write(string,user_name,*args)
+      url
     end
   end
 end

data/lib/mobilize-base/models/runner.rb CHANGED Viewed

@@ -15,11 +15,6 @@ module Mobilize
       %w{name active trigger status stage1 stage2 stage3 stage4 stage5}
     end
-    def cached_at
-      r = self
-      Dataset.find_or_create_by_path(r.path).cached_at
-    end
     def title
       r = self
       r.path.split("/").first
@@ -34,6 +29,9 @@ module Mobilize
       Runner.where(:path=>path).first
     end
+    def Runner.find_by_title(title)
+      Runner.where(:path=>"#{title}/jobs").first
+    end
     def Runner.perform(id,*args)
       r = Runner.find_by_path(id)
       #get gdrive slot for read
@@ -53,7 +51,9 @@ module Mobilize
         begin
           if j.is_due?
             j.update_attributes(:active=>false) if j.trigger=='once'
-            j.stages.first.enqueue!
+            s = j.stages.first
+            s.update_attributes(:retries_done=>0)
+            s.enqueue!
           end
         rescue ScriptError, StandardError => exc
           r.update_status("Failed to enqueue #{j.path} with #{exc.to_s}")
@@ -73,11 +73,6 @@ module Mobilize
       Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
     end
-    def cache
-      r = self
-      Dataset.find_or_create_by_url("gridfs://#{r.path}")
-    end
     def gbook(gdrive_slot)
       r = self
       title = r.path.split("/").first
@@ -86,17 +81,20 @@ module Mobilize
     def gsheet(gdrive_slot)
       r = self
+      u = r.user
       jobs_sheet = Gsheet.find_by_path(r.path,gdrive_slot)
       #make sure the user has a runner with a jobs sheet and has write privileges on the spreadsheet
-      unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(r.user.email).ie{|e| e and e.role=="writer"})
+      unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="writer"})
         #only give the user edit permissions if they're the ones
         #creating it
         jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
-        unless jobs_sheet.spreadsheet.acl_entry(r.user.email).ie{|e| e and e.role=="owner"}
-          jobs_sheet.spreadsheet.update_acl(r.user.email,"writer")
+        unless jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
+          jobs_sheet.spreadsheet.update_acl(u.email,"writer")
         end
       end
       jobs_sheet.add_headers(r.headers)
+      #add url to dataset
+      Dataset.find_or_create_by_url("gsheet://#{r.path}").update_attributes(:http_url=>jobs_sheet.spreadsheet.human_url)
       begin;jobs_sheet.delete_sheet1;rescue;end #don't care if sheet1 deletion fails
       return jobs_sheet
     end
@@ -104,8 +102,6 @@ module Mobilize
     def read_gsheet(gdrive_slot)
       r = self
       gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
-      #cache in DB
-      r.cache.write(gsheet_tsv,Gdrive.owner_name)
       #turn it into a hash array
       gsheet_jobs = gsheet_tsv.tsv_to_hash_array
       #go through each job, update relevant job with its params
@@ -122,8 +118,13 @@ module Mobilize
           stage_string = rj["stage#{s_idx.to_s}"]
           s = Stage.find_by_path("#{j.path}/stage#{s_idx.to_s}")
           if stage_string.to_s.length==0
-            #delete this stage; user has blanked it
-            s.delete if s
+            #delete this stage and all stages after
+            if s
+              j = s.job
+              j.stages[(s.idx-1)..-1].each{|ps| ps.delete}
+              #just in case
+              s.delete
+            end
             break
           elsif s.nil?
             #create this stage

data/lib/mobilize-base/models/stage.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module Mobilize
     field :call, type: String
     field :param_string, type: Array
     field :status, type: String
-    field :out_url, type: String
+    field :response, type: Hash
+    field :retries_done, type: Fixnum
     field :completed_at, type: Time
     field :started_at, type: Time
     field :failed_at, type: Time
@@ -25,7 +26,15 @@ module Mobilize
       #allowing you to determine its size
       #before committing to a read or write
       s = self
-      Dataset.find_by_url(s.out_url) if s.out_url
+      Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
+    end
+    def err_dst
+      #this gives a dataset that points to the output
+      #allowing you to determine its size
+      #before committing to a read or write
+      s = self
+      Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
     end
     def params
@@ -68,84 +77,91 @@ module Mobilize
     def Stage.perform(id,*args)
       s = Stage.where(:path=>id).first
-      j = s.job
       s.update_attributes(:started_at=>Time.now.utc)
       s.update_status(%{Starting at #{Time.now.utc}})
-      begin
-        #get response by running method
-        s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
-        s.save!
-        unless s.out_url
-          #re-queue self if no response
-          s.enqueue!
-          return false
-        end
-      rescue ScriptError, StandardError => exc
-        j.update_attributes(:active=>false)
-        s.update_attributes(:failed_at=>Time.now.utc)
-        s.update_status("Failed at #{Time.now.utc.to_s}")
-        raise exc
+      #get response by running method
+      response = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
+      unless response
+        #re-queue self if no response
+        s.enqueue!
+        return false
       end
-      s.update_attributes(:completed_at=>Time.now.utc)
+      if response['signal'] == 0
+        s.complete(response)
+      elsif s.retries_done.to_i < s.params['retries'].to_i
+        #retry
+        s.update_attributes(:retries_done => s.retries_done.to_i + 1, :response => response)
+        s.update_status(%{Retry #{s.retries_done.to_s} at #{Time.now.utc}})
+        s.enqueue!
+      else
+        #sleep as much as user specifies
+        sleep s['delay'].to_i
+        s.fail(response)
+      end
+      return true
+    end
+    def complete(response)
+      s = self
+      s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
       s.update_status("Completed at #{Time.now.utc.to_s}")
+      j = s.job
       if s.idx == j.stages.length
         #check for any dependent jobs, if there are, enqueue them
         r = j.runner
-        dep_jobs = r.jobs.select{|dj| dj.active==true and dj.trigger.strip.downcase == "after #{j.name}"}
+        dep_jobs = r.jobs.select do |dj|
+                                   dj.active==true and
+                                     dj.trigger.strip.downcase == "after #{j.name}"
+                                 end
         #put begin/rescue so all dependencies run
-        dep_jobs.each{|dj| begin;dj.stages.first.enqueue! unless dj.is_working?;rescue;end}
+        dep_jobs.each do |dj|
+                        begin
+                          unless dj.is_working?
+                            dj.stages.first.update_attributes(:retries_done=>0)
+                            dj.stages.first.enqueue!
+                          end
+                        rescue
+                          #job won't run if error, log it a failure
+                          response = {"err_str" => "Unable to enqueue first stage of #{dj.path}"}
+                          dj.stages.first.fail(response)
+                        end
+                      end
       else
         #queue up next stage
+        s.next.update_attributes(:retries_done=>0)
         s.next.enqueue!
       end
-      return true
+      true
     end
-    def source_dsts(gdrive_slot)
-      #returns an array of Datasets corresponding to
-      #gridfs caches for stage outputs, gsheets and gfiles
-      #or dataset pointers for other handlers
+    def fail(response,gdrive_slot=nil)
+      #get random worker if one is not provided
+      gdrive_slot ||= Gdrive.worker_emails.sort_by{rand}.first
       s = self
-      params = s.params
-      source_paths = if params['sources']
-                       params['sources']
-                     elsif params['source']
-                       [params['source']]
-                     end
-      user = s.job.runner.user.name
-      return [] if (source_paths.class!=Array or source_paths.length==0)
-      dsts = []
-      source_paths.each do |source_path|
-        if source_path.index(/^stage[1-5]$/)
-          source_stage_path = "#{s.job.runner.path}/#{s.job.name}/#{source_path}"
-          source_stage = Stage.where(:path=>source_stage_path).first
-          dsts << source_stage.out_dst
-        elsif source_path.index("://")
-          #find or create by url
-          dsts << Dataset.find_or_create_by_url(source_path)
-        else
-          if source_path.index("/")
-            #slashes mean sheets
-            out_tsv = Gsheet.find_by_path(source_path,gdrive_slot).read(user)
-          else
-            #check sheets in runner
-            r = s.job.runner
-            runner_sheet = r.gbook(gdrive_slot).worksheet_by_title(source_path)
-            out_tsv = if runner_sheet
-                        runner_sheet.read(user)
-                      else
-                        #check for gfile. will fail if there isn't one.
-                        Gfile.find_by_path(source_path).read(user)
-                      end
-          end
-          #use Gridfs to cache gdrive results
-          file_name = source_path.split("/").last
-          out_url = "gridfs://#{s.path}/#{file_name}"
-          Dataset.write_by_url(out_url,out_tsv,user)
-          dsts << Dataset.find_by_url(out_url)
-        end
-      end
-      return dsts
+      j = s.job
+      r = j.runner
+      u = r.user
+      j.update_attributes(:active=>false)
+      s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
+      stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
+      target_path =  (r.path.split("/")[0..-2] + [stage_name]).join("/")
+      status_msg = "Failed at #{Time.now.utc.to_s}"
+      #read err txt, add err sheet, write to it
+      err_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
+      err_sheet.delete if err_sheet
+      err_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
+      err_txt = if response['err_url']
+                  Dataset.read_by_url(response['err_url'],u.name)
+                elsif response['err_str']
+                  response['err_str']
+                end
+      err_txt = ["response","\n",err_txt].join
+      err_sheet.write(err_txt,u.name)
+      #exception will be first row below "response" header
+      exc_to_s,backtrace = err_txt.split("\n").ie{|ea| [ea[1], ea[2..-1]]}
+      s.update_status(status_msg)
+      #raise the exception so it bubbles up to resque
+      raise Exception,exc_to_s,backtrace
     end
     def enqueue!
@@ -180,5 +196,61 @@ module Mobilize
       s = self
       Mobilize::Resque.active_paths.include?(s.path)
     end
+    def target
+      s = self
+      params = s.params
+      target_path = params['target']
+      handler,path = target_path.split("://")
+      #if the user has specified a url for a target
+      #that is not this stage's handler, disallow
+      if handler and path and handler != s.handler
+        raise "incompatible target handler #{handler} for #{s.handler} stage"
+      else
+        begin
+          return "Mobilize::#{s.handler.downcase.capitalize}".constantize.path_to_dst(target_path,s.path)
+        rescue => exc
+          raise "Could not get #{target_path} with error: #{exc.to_s}"
+        end
+      end
+    end
+    def sources
+      #returns an array of Datasets corresponding to
+      #items listed as sources in the stage params
+      s = self
+      params = s.params
+      job = s.job
+      runner = job.runner
+      source_paths = if params['sources']
+                       params['sources']
+                     elsif params['source']
+                       [params['source']]
+                     end
+      return [] if (source_paths.class!=Array or source_paths.length==0)
+      dsts = []
+      source_paths.each do |source_path|
+        if source_path.index(/^stage[1-5]$/)
+          #stage arguments return the stage's output dst url
+          source_stage_path = "#{runner.path}/#{job.name}/#{source_path}"
+          source_stage = Stage.where(:path=>source_stage_path).first
+          source_stage_out_url = source_stage.response['out_url']
+          dsts << Dataset.find_by_url(source_stage_out_url)
+        else
+          handler = if source_path.index("://")
+                      source_path.split("://").first
+                    else
+                      s.handler
+                    end
+          begin
+            stage_path = s.path
+            dsts << "Mobilize::#{handler.downcase.capitalize}".constantize.path_to_dst(source_path,stage_path)
+          rescue => exc
+            raise "Could not get #{source_path} with error: #{exc.to_s}"
+          end
+        end
+      end
+      return dsts
+    end
   end
 end

data/lib/mobilize-base/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Mobilize
   module Base
-    VERSION = "1.1.10"
+    VERSION = "1.2"
   end
 end

data/mobilize-base.gemspec CHANGED Viewed

@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
   s.name        = "mobilize-base"
   s.version     = Mobilize::Base::VERSION
   s.authors     = ["Cassio Paes-Leme"]
-  s.email       = ["cpaesleme@ngmoco.com"]
+  s.email       = ["cpaesleme@dena.com"]
   s.homepage    = "http://github.com/ngmoco/mobilize-base"
   s.summary     = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
   s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.

data/test/base_job_rows.yml CHANGED Viewed

@@ -2,8 +2,7 @@
   active: true
   trigger: once
   status: ""
-  stage1: gsheet.read source:base1_stage1.in
-  stage2: gsheet.write source:stage1, target:base1.out
+  stage1: gsheet.write source:"gfile://test_base_1.tsv", target:base1.out
 - name: base2
   active: true

data/test/mobilize-base_test.rb CHANGED Viewed

@@ -30,38 +30,77 @@ describe "Mobilize" do
     puts "Jobtracker created runner with 'jobs' sheet?"
     r = u.runner
-    jobs_sheet = r.gsheet(gdrive_slot)
-    tsv = jobs_sheet.read(user_name)
-    assert tsv.tsv_header_array.join.length == 53 #total header length
-    puts "add base1_stage1 input sheet"
-    test_source_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.in",gdrive_slot)
-    test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/base1_stage1.yml")*40
+    jobs_sheet_url = "gsheet://#{r.path}"
+    jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
+    jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
+    jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
+    assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
+    #stop Jobtracker, if you're doing this by queueing runners
+    #Mobilize::Jobtracker.stop!
+    puts "add base1 input file"
+    test_filename = "test_base_1"
+    file_url = "gfile://#{test_filename}.tsv"
+    test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
     test_source_tsv = test_source_ha.hash_array_to_tsv
-    test_source_sheet.write(test_source_tsv,user_name)
+    Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
+    rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
+    assert rem_tsv == test_source_tsv
-    puts "add row to jobs sheet, wait 300s"
+    puts "add row to jobs sheet, wait for stages"
     test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
+    jobs_sheet.reload
     jobs_sheet.add_or_update_rows(test_job_rows)
-    sleep 300
+    #wait for stages to complete
+    #r.enqueue!
+    wait_for_stages
     puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
-    test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
-    test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base2.out",gdrive_slot)
+    test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
+    test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
+    test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
-    assert test_target_sheet_1.read(user_name) == test_source_sheet.read(user_name)
+    test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
+    test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
-    puts "delete both output sheets, set first job to active=true, wait 300s"
-    [test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
+    assert test_1_tsv == test_2_tsv
-    jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
-    sleep 300
+    puts "change first job to fail, wait for stages"
+    test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
+    Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
+    jobs_sheet.add_or_update_rows(test_job_rows)
-    test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base2.out",gdrive_slot)
-    puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
-    assert test_target_sheet_2.read(user_name)  == test_source_sheet.read(user_name)
+    #wait for stages to complete
+    wait_for_stages
+    test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
+    puts "jobtracker posted failing test error to sheet "
+    error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
+    assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
+    Mobilize::Jobtracker.stop!
   end
+  def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
+    time = 0
+    time_since_stage = 0
+    #check for 10 min
+    while time < time_limit and time_since_stage < stage_limit
+      sleep wait_length
+      job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
+      if job_classes.include?("Mobilize::Stage")
+        time_since_stage = 0
+        puts "saw stage at #{time.to_s} seconds"
+      else
+        time_since_stage += wait_length
+        puts "#{time_since_stage.to_s} seconds since stage seen"
+      end
+      time += wait_length
+      puts "total wait time #{time.to_s} seconds"
+    end
+    if time >= time_limit
+      raise "Timed out before stage completion"
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mobilize-base
 version: !ruby/object:Gem::Version
-  version: 1.1.10
+  version: '1.2'
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-03-05 00:00:00.000000000 Z
+date: 2013-03-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -177,7 +177,7 @@ description: ! "Manage your organization's workflows entirely through Google Doc
   and -mongodb packages\n                     to allow seamless transport of TSV and
   JSON data between any two endpoints. "
 email:
-- cpaesleme@ngmoco.com
+- cpaesleme@dena.com
 executables: []
 extensions: []
 extra_rdoc_files: []
@@ -220,10 +220,10 @@ files:
 - lib/samples/resque.yml
 - lib/samples/resque_web.rb
 - mobilize-base.gemspec
-- test/base1_stage1.yml
 - test/base_job_rows.yml
 - test/mobilize-base_test.rb
 - test/redis-test.conf
+- test/test_base_1.yml
 - test/test_helper.rb
 homepage: http://github.com/ngmoco/mobilize-base
 licenses: []
@@ -239,7 +239,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 944227401708125254
+      hash: -2718067622627955864
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -248,7 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 944227401708125254
+      hash: -2718067622627955864
 requirements: []
 rubyforge_project: mobilize-base
 rubygems_version: 1.8.24
@@ -257,8 +257,8 @@ specification_version: 3
 summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
   Docs
 test_files:
-- test/base1_stage1.yml
 - test/base_job_rows.yml
 - test/mobilize-base_test.rb
 - test/redis-test.conf
+- test/test_base_1.yml
 - test/test_helper.rb

/data/test/{base1_stage1.yml → test_base_1.yml} RENAMED Viewed

File without changes