mobilize-base 1.1.10 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -9
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +4 -4
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -6
- data/lib/mobilize-base/extensions/google_drive/worksheet.rb +22 -14
- data/lib/mobilize-base/extensions/string.rb +1 -0
- data/lib/mobilize-base/handlers/email.rb +5 -6
- data/lib/mobilize-base/handlers/gbook.rb +19 -0
- data/lib/mobilize-base/handlers/gdrive.rb +13 -0
- data/lib/mobilize-base/handlers/gfile.rb +46 -14
- data/lib/mobilize-base/handlers/gridfs.rb +3 -3
- data/lib/mobilize-base/handlers/gsheet.rb +98 -39
- data/lib/mobilize-base/handlers/resque.rb +14 -9
- data/lib/mobilize-base/jobtracker.rb +20 -8
- data/lib/mobilize-base/models/dataset.rb +23 -18
- data/lib/mobilize-base/models/runner.rb +19 -18
- data/lib/mobilize-base/models/stage.rb +137 -65
- data/lib/mobilize-base/version.rb +1 -1
- data/mobilize-base.gemspec +1 -1
- data/test/base_job_rows.yml +1 -2
- data/test/mobilize-base_test.rb +60 -21
- metadata +7 -7
- /data/test/{base1_stage1.yml → test_base_1.yml} +0 -0
    
        data/README.md
    CHANGED
    
    | @@ -552,18 +552,23 @@ stage. These should be of the for `<key1>: <value1>, <key2>: <value2>`, where | |
| 552 552 | 
             
            `<key>` is an unquoted string and `<value>` is a quoted string, an
         | 
| 553 553 | 
             
            integer, an array (delimited by square braces), or a hash (delimited by
         | 
| 554 554 | 
             
            curly braces).
         | 
| 555 | 
            -
                * For mobilize-base, the following  | 
| 556 | 
            -
                  * gsheet. | 
| 557 | 
            -
                    * The  | 
| 558 | 
            -
            `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
         | 
| 559 | 
            -
            the Runner itself.  | 
| 560 | 
            -
             | 
| 561 | 
            -
             | 
| 562 | 
            -
             | 
| 555 | 
            +
                * For mobilize-base, the following stage is available:
         | 
| 556 | 
            +
                  * gsheet.write `source: <input_path>`, which reads the sheet. 
         | 
| 557 | 
            +
                    * The input_path should be of the form:
         | 
| 558 | 
            +
                      * `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
         | 
| 559 | 
            +
            the Runner itself. 
         | 
| 560 | 
            +
                      * `gfile://<gfile_name>` if the target is a file.  
         | 
| 561 | 
            +
                        * The file must be owned by the Gdrive owner.
         | 
| 562 | 
            +
                        * The test uses "gfile://test_base_1.tsv".
         | 
| 563 | 
            +
                * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
         | 
| 563 564 | 
             
            and "base1.out" for the second test. The first
         | 
| 564 565 | 
             
            takes the output from the first stage and the second reads it straight
         | 
| 565 566 | 
             
            from the referenced sheet.
         | 
| 566 | 
            -
             | 
| 567 | 
            +
                * All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
         | 
| 568 | 
            +
            giving up.
         | 
| 569 | 
            +
                * If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
         | 
| 570 | 
            +
                  * The tab will be headed "response" and will contain the exception and backtrace for the error.
         | 
| 571 | 
            +
                * The test uses "Requestor_mobilize(test)/base1.out" and
         | 
| 567 572 | 
             
            "Runner_mobilize(test)/base2.out" for target sheets.
         | 
| 568 573 |  | 
| 569 574 | 
             
            <a name='section_Start_Run_Test'></a>
         | 
| @@ -8,7 +8,7 @@ module GoogleDrive | |
| 8 8 | 
             
                  attempts = 0
         | 
| 9 9 | 
             
                  sleep_time = nil
         | 
| 10 10 | 
             
                  #try 5 times to make the call
         | 
| 11 | 
            -
                  while (response.nil? or response.code. | 
| 11 | 
            +
                  while (response.nil? or response.code.starts_with?("5")) and attempts < 20
         | 
| 12 12 | 
             
                    #instantiate http object, set params
         | 
| 13 13 | 
             
                    http = @proxy.new(uri.host, uri.port)
         | 
| 14 14 | 
             
                    http.use_ssl = true
         | 
| @@ -21,10 +21,10 @@ module GoogleDrive | |
| 21 21 | 
             
                                 #timeouts etc.
         | 
| 22 22 | 
             
                                 nil
         | 
| 23 23 | 
             
                               end
         | 
| 24 | 
            -
                    if response.nil?
         | 
| 24 | 
            +
                    if response.nil? or response.code.starts_with?("4")
         | 
| 25 25 | 
             
                      attempts +=1
         | 
| 26 | 
            -
                     | 
| 27 | 
            -
                      if response.code. | 
| 26 | 
            +
                    elsif
         | 
| 27 | 
            +
                      if response.code.starts_with?("5")
         | 
| 28 28 | 
             
                        #wait 10 seconds times number of attempts squared in case of error
         | 
| 29 29 | 
             
                        sleep_time = 10 * (attempts*attempts)
         | 
| 30 30 | 
             
                        attempts += 1
         | 
| @@ -13,15 +13,16 @@ module GoogleDrive | |
| 13 13 | 
             
                  f = self
         | 
| 14 14 | 
             
                  #admin includes workers
         | 
| 15 15 | 
             
                  return true if f.has_admin_acl?
         | 
| 16 | 
            -
                  (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails) | 
| 17 | 
            -
             | 
| 16 | 
            +
                  accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
         | 
| 17 | 
            +
                  accounts.each do |email|
         | 
| 18 | 
            +
                    f.update_acl(email)
         | 
| 18 19 | 
             
                  end
         | 
| 19 20 | 
             
                end
         | 
| 20 21 |  | 
| 21 22 | 
             
                def has_admin_acl?
         | 
| 22 23 | 
             
                  f = self
         | 
| 23 24 | 
             
                  curr_emails = f.acls.map{|a| a.scope}.sort
         | 
| 24 | 
            -
                  admin_emails = Mobilize::Gdrive.admin_emails. | 
| 25 | 
            +
                  admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
         | 
| 25 26 | 
             
                  if (curr_emails & admin_emails) == admin_emails
         | 
| 26 27 | 
             
                    return true
         | 
| 27 28 | 
             
                  else
         | 
| @@ -40,13 +41,13 @@ module GoogleDrive | |
| 40 41 | 
             
                  end
         | 
| 41 42 | 
             
                end
         | 
| 42 43 |  | 
| 43 | 
            -
                def read( | 
| 44 | 
            +
                def read(user_name)
         | 
| 44 45 | 
             
                  f = self
         | 
| 45 | 
            -
                  entry = f.acl_entry("#{ | 
| 46 | 
            +
                  entry = f.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
         | 
| 46 47 | 
             
                  if entry and ['reader','writer','owner'].include?(entry.role)
         | 
| 47 48 | 
             
                    f.download_to_string
         | 
| 48 49 | 
             
                  else
         | 
| 49 | 
            -
                    raise "User #{ | 
| 50 | 
            +
                    raise "User #{user_name} is not allowed to read #{f.title}"
         | 
| 50 51 | 
             
                  end
         | 
| 51 52 | 
             
                end
         | 
| 52 53 |  | 
| @@ -6,11 +6,11 @@ module GoogleDrive | |
| 6 6 | 
             
                  header = rows.first
         | 
| 7 7 | 
             
                  return nil unless header and header.first.to_s.length>0
         | 
| 8 8 | 
             
                  #look for blank cols to indicate end of row
         | 
| 9 | 
            -
                   | 
| 10 | 
            -
                   | 
| 9 | 
            +
                  col_last_i = (header.index("") || header.length)-1
         | 
| 10 | 
            +
                  #ignore user-entered line breaks for purposes of tsv reads
         | 
| 11 | 
            +
                  out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
         | 
| 11 12 | 
             
                  out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
         | 
| 12 13 | 
             
                                            Mobilize::Gsheet.config['read_date_format'])
         | 
| 13 | 
            -
             | 
| 14 14 | 
             
                end
         | 
| 15 15 | 
             
                def add_headers(headers)
         | 
| 16 16 | 
             
                  headers.each_with_index do |h,h_i|
         | 
| @@ -47,26 +47,30 @@ module GoogleDrive | |
| 47 47 | 
             
                  sheet.save
         | 
| 48 48 | 
             
                end
         | 
| 49 49 |  | 
| 50 | 
            -
                def merge(merge_sheet, | 
| 50 | 
            +
                def merge(merge_sheet,user_name,crop)
         | 
| 51 51 | 
             
                  #write the top left of sheet
         | 
| 52 52 | 
             
                  #with the contents of merge_sheet
         | 
| 53 53 | 
             
                  sheet = self
         | 
| 54 54 | 
             
                  sheet.reload
         | 
| 55 | 
            -
                  entry = sheet.spreadsheet.acl_entry("#{ | 
| 55 | 
            +
                  entry = sheet.spreadsheet.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
         | 
| 56 56 | 
             
                  unless entry and ['writer','owner'].include?(entry.role)
         | 
| 57 | 
            -
                    raise "User #{ | 
| 57 | 
            +
                    raise "User #{user_name} is not allowed to write to #{sheet.spreadsheet.title}"
         | 
| 58 58 | 
             
                  end
         | 
| 59 59 | 
             
                  merge_sheet.reload
         | 
| 60 60 | 
             
                  curr_rows = sheet.num_rows
         | 
| 61 61 | 
             
                  curr_cols = sheet.num_cols
         | 
| 62 62 | 
             
                  merge_rows = merge_sheet.num_rows
         | 
| 63 63 | 
             
                  merge_cols = merge_sheet.num_cols
         | 
| 64 | 
            +
                  raise "zero sized merge sheet" if merge_rows == 0 or merge_cols == 0
         | 
| 64 65 | 
             
                  #make sure sheet is at least as big as necessary
         | 
| 65 | 
            -
                  if  | 
| 66 | 
            +
                  #or as small as necessary if crop is specified
         | 
| 67 | 
            +
                  if merge_rows > curr_rows or
         | 
| 68 | 
            +
                    (merge_rows < curr_rows and crop==true)
         | 
| 66 69 | 
             
                    sheet.max_rows = merge_rows
         | 
| 67 70 | 
             
                    sheet.save
         | 
| 68 71 | 
             
                  end
         | 
| 69 | 
            -
                  if merge_cols > curr_cols
         | 
| 72 | 
            +
                  if merge_cols > curr_cols or
         | 
| 73 | 
            +
                    (merge_cols < curr_cols and crop==true)
         | 
| 70 74 | 
             
                    sheet.max_cols = merge_cols
         | 
| 71 75 | 
             
                    sheet.save
         | 
| 72 76 | 
             
                  end
         | 
| @@ -94,7 +98,7 @@ module GoogleDrive | |
| 94 98 | 
             
                  end
         | 
| 95 99 | 
             
                end
         | 
| 96 100 |  | 
| 97 | 
            -
                def write(tsv,user)
         | 
| 101 | 
            +
                def write(tsv,user,crop=true)
         | 
| 98 102 | 
             
                  sheet = self
         | 
| 99 103 | 
             
                  entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
         | 
| 100 104 | 
             
                  unless entry and ['writer','owner'].include?(entry.role)
         | 
| @@ -110,11 +114,14 @@ module GoogleDrive | |
| 110 114 | 
             
                  curr_rows = sheet.num_rows
         | 
| 111 115 | 
             
                  curr_cols = sheet.num_cols
         | 
| 112 116 | 
             
                  #make sure sheet is at least as big as necessary
         | 
| 113 | 
            -
                   | 
| 117 | 
            +
                  #or small as necessary if crop
         | 
| 118 | 
            +
                  if tsvrows.length > curr_rows or
         | 
| 119 | 
            +
                    (tsvrows.length < curr_rows and crop==true)
         | 
| 114 120 | 
             
                    sheet.max_rows = tsvrows.length
         | 
| 115 121 | 
             
                    sheet.save
         | 
| 116 122 | 
             
                  end
         | 
| 117 | 
            -
                  if headers.length  | 
| 123 | 
            +
                  if headers.length > curr_cols or
         | 
| 124 | 
            +
                    (tsvrows.length < curr_rows and crop==true)
         | 
| 118 125 | 
             
                    sheet.max_cols = headers.length
         | 
| 119 126 | 
             
                    sheet.save
         | 
| 120 127 | 
             
                  end
         | 
| @@ -124,13 +131,13 @@ module GoogleDrive | |
| 124 131 | 
             
                    tsvrows[batch_start..batch_end].each_with_index do |row,row_i|
         | 
| 125 132 | 
             
                      rowcols = row.split("\t")
         | 
| 126 133 | 
             
                      rowcols.each_with_index do |col_v,col_i|
         | 
| 127 | 
            -
                        sheet[row_i+batch_start+1,col_i+1]= %{#{col_v}}
         | 
| 134 | 
            +
                        sheet[row_i + batch_start + 1, col_i + 1]= %{#{col_v}}
         | 
| 128 135 | 
             
                      end
         | 
| 129 136 | 
             
                    end
         | 
| 130 137 | 
             
                    sheet.save
         | 
| 131 138 | 
             
                    batch_start += (batch_length + 1)
         | 
| 132 | 
            -
                    rows_written+=batch_length
         | 
| 133 | 
            -
                    if batch_start>tsvrows.length+1
         | 
| 139 | 
            +
                    rows_written += batch_length
         | 
| 140 | 
            +
                    if batch_start>tsvrows.length + 1
         | 
| 134 141 | 
             
                     break
         | 
| 135 142 | 
             
                    end
         | 
| 136 143 | 
             
                  end
         | 
| @@ -141,6 +148,7 @@ module GoogleDrive | |
| 141 148 | 
             
                  sheet.reload
         | 
| 142 149 | 
             
                  #loading remote data for checksum
         | 
| 143 150 | 
             
                  rem_tsv = sheet.to_tsv
         | 
| 151 | 
            +
                  return true if rem_tsv.to_s.length==0
         | 
| 144 152 | 
             
                  rem_table = rem_tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
         | 
| 145 153 | 
             
                  loc_table = tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
         | 
| 146 154 | 
             
                  re_col_vs = []
         | 
| @@ -12,13 +12,12 @@ module Mobilize | |
| 12 12 | 
             
                :authentication       => 'plain',
         | 
| 13 13 | 
             
                :enable_starttls_auto => true  }
         | 
| 14 14 |  | 
| 15 | 
            -
                def write( | 
| 16 | 
            -
                                  bod="", 
         | 
| 17 | 
            -
                                  recipient=Jobtracker.admin_emails.join(","))
         | 
| 15 | 
            +
                def write(params)
         | 
| 18 16 | 
             
                  mail(:from=>Gdrive.owner_email,
         | 
| 19 | 
            -
                       :to=> | 
| 20 | 
            -
                       :subject=> | 
| 21 | 
            -
                       :body=> | 
| 17 | 
            +
                       :to=>params['to'], 
         | 
| 18 | 
            +
                       :subject=>params['subject'], 
         | 
| 19 | 
            +
                       :body=>params['body'],
         | 
| 20 | 
            +
                       :bcc=>params['bcc'])
         | 
| 22 21 | 
             
                end
         | 
| 23 22 | 
             
              end
         | 
| 24 23 | 
             
            end
         | 
| @@ -3,7 +3,24 @@ module Mobilize | |
| 3 3 | 
             
                def Gbook.find_all_by_path(path,gdrive_slot)
         | 
| 4 4 | 
             
                  Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
         | 
| 5 5 | 
             
                end
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def Gbook.find_by_http_url(http_url,gdrive_slot)
         | 
| 8 | 
            +
                  key = http_url.split("key=").last.split("#").first
         | 
| 9 | 
            +
                  Gdrive.root(gdrive_slot).spreadsheet_by_key(key)
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 6 12 | 
             
                def Gbook.find_by_path(path,gdrive_slot)
         | 
| 13 | 
            +
                  #first try to find a dataset with the URL
         | 
| 14 | 
            +
                  dst = Dataset.find_by_handler_and_path('gbook',path)
         | 
| 15 | 
            +
                  if dst and dst.http_url.to_s.length>0
         | 
| 16 | 
            +
                    book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
         | 
| 17 | 
            +
                    #doesn't count if it's deleted
         | 
| 18 | 
            +
                    if book.entry_hash[:deleted]
         | 
| 19 | 
            +
                      book = nil
         | 
| 20 | 
            +
                    else
         | 
| 21 | 
            +
                      return book
         | 
| 22 | 
            +
                    end
         | 
| 23 | 
            +
                  end
         | 
| 7 24 | 
             
                  books = Gbook.find_all_by_path(path,gdrive_slot)
         | 
| 8 25 | 
             
                  dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
         | 
| 9 26 | 
             
                  book = nil
         | 
| @@ -15,6 +32,7 @@ module Mobilize | |
| 15 32 | 
             
                      bkey = b.resource_id.split(":").last
         | 
| 16 33 | 
             
                      if bkey == dkey
         | 
| 17 34 | 
             
                        book = b
         | 
| 35 | 
            +
                        dst.update_attributes(:http_url=>book.human_url)
         | 
| 18 36 | 
             
                      else
         | 
| 19 37 | 
             
                        #delete the invalid book
         | 
| 20 38 | 
             
                        b.delete
         | 
| @@ -25,6 +43,7 @@ module Mobilize | |
| 25 43 | 
             
                    #If it's a new dst or if there are multiple books
         | 
| 26 44 | 
             
                    #take the first
         | 
| 27 45 | 
             
                    book = books.first
         | 
| 46 | 
            +
                    dst.update_attributes(:http_url=>book.human_url) if book
         | 
| 28 47 | 
             
                  end
         | 
| 29 48 | 
             
                  return book
         | 
| 30 49 | 
             
                end
         | 
| @@ -80,5 +80,18 @@ module Mobilize | |
| 80 80 | 
             
                def Gdrive.books(gdrive_slot=nil,params={})
         | 
| 81 81 | 
             
                  Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
         | 
| 82 82 | 
             
                end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                #email management - used to make sure not too many emails get used at the same time
         | 
| 85 | 
            +
                def Gdrive.slot_worker_by_path(path)
         | 
| 86 | 
            +
                  working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
         | 
| 87 | 
            +
                  Gdrive.workers.sort_by{rand}.each do |w|
         | 
| 88 | 
            +
                    unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
         | 
| 89 | 
            +
                      Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
         | 
| 90 | 
            +
                      return [w['name'],Gdrive.domain].join("@")
         | 
| 91 | 
            +
                    end
         | 
| 92 | 
            +
                  end
         | 
| 93 | 
            +
                  #return false if none are available
         | 
| 94 | 
            +
                  return false
         | 
| 95 | 
            +
                end
         | 
| 83 96 | 
             
              end
         | 
| 84 97 | 
             
            end
         | 
| @@ -1,5 +1,47 @@ | |
| 1 1 | 
             
            module Mobilize
         | 
| 2 2 | 
             
              module Gfile
         | 
| 3 | 
            +
                def Gfile.path_to_dst(path,stage_path)
         | 
| 4 | 
            +
                  #don't need the ://
         | 
| 5 | 
            +
                  path = path.split("://").last if path.index("://")
         | 
| 6 | 
            +
                  if Gfile.find_by_path(path)
         | 
| 7 | 
            +
                    handler = "gfile"
         | 
| 8 | 
            +
                    Dataset.find_or_create_by_url("#{handler}://#{path}")
         | 
| 9 | 
            +
                  else
         | 
| 10 | 
            +
                    raise "unable to find #{path}"
         | 
| 11 | 
            +
                  end
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def Gfile.read_by_dataset_path(dst_path,user_name,*args)
         | 
| 15 | 
            +
                  #expects gdrive slot as first arg, otherwise chooses random
         | 
| 16 | 
            +
                  gdrive_slot = args
         | 
| 17 | 
            +
                  worker_emails = Gdrive.worker_emails.sort_by{rand}
         | 
| 18 | 
            +
                  gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
         | 
| 19 | 
            +
                  file = Gfile.find_by_path(dst_path)
         | 
| 20 | 
            +
                  file.read(user_name) if file
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def Gfile.write_by_dataset_path(dst_path,string,user_name,*args)
         | 
| 24 | 
            +
                  #ignores *args as all files must be created and owned by owner
         | 
| 25 | 
            +
                  file = Gfile.find_by_path(dst_path)
         | 
| 26 | 
            +
                  file.delete if file
         | 
| 27 | 
            +
                  owner_root = Gdrive.root(Gdrive.owner_email)
         | 
| 28 | 
            +
                  file = owner_root.upload_from_string(string,
         | 
| 29 | 
            +
                                                dst_path,
         | 
| 30 | 
            +
                                                :content_type=>"test/plain",
         | 
| 31 | 
            +
                                                :convert=>false)
         | 
| 32 | 
            +
                  file.add_admin_acl
         | 
| 33 | 
            +
                  #make sure user is owner or can edit
         | 
| 34 | 
            +
                  u = User.where(:name=>user_name).first
         | 
| 35 | 
            +
                  entry = file.acl_entry(u.email)
         | 
| 36 | 
            +
                  unless entry and ['writer','owner'].include?(entry.role)
         | 
| 37 | 
            +
                    file.update_acl(u.email)
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                  #update http url for file
         | 
| 40 | 
            +
                  dst = Dataset.find_by_handler_and_path("gfile",dst_path)
         | 
| 41 | 
            +
                  dst.update_attributes(:http_url=>file.human_url)
         | 
| 42 | 
            +
                  true
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 3 45 | 
             
                def Gfile.add_admin_acl_by_path(path)
         | 
| 4 46 | 
             
                  file = Gfile.find_by_path(path)
         | 
| 5 47 | 
             
                  file.add_admin_acl
         | 
| @@ -18,18 +60,6 @@ module Mobilize | |
| 18 60 | 
             
                  file.update_acl(gdrive_slot,role)
         | 
| 19 61 | 
             
                end
         | 
| 20 62 |  | 
| 21 | 
            -
                def Gfile.read_by_stage_path(stage_path)
         | 
| 22 | 
            -
                  #reserve gdrive_slot account for read
         | 
| 23 | 
            -
                  gdrive_slot = Gdrive.slot_worker_by_path(s.path)
         | 
| 24 | 
            -
                  return false unless gdrive_slot
         | 
| 25 | 
            -
                  s = Stage.where(:path=>stage_path)
         | 
| 26 | 
            -
                  gfile_path = s.params['file']
         | 
| 27 | 
            -
                  out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
         | 
| 28 | 
            -
                  #use Gridfs to cache result
         | 
| 29 | 
            -
                  out_url = "gridfs://#{s.path}/out"
         | 
| 30 | 
            -
                  Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
         | 
| 31 | 
            -
                end
         | 
| 32 | 
            -
             | 
| 33 63 | 
             
                def Gfile.find_by_path(path)
         | 
| 34 64 | 
             
                  #file must be owned by owner
         | 
| 35 65 | 
             
                  gdrive_slot = Gdrive.owner_email
         | 
| @@ -55,8 +85,10 @@ module Mobilize | |
| 55 85 | 
             
                  end
         | 
| 56 86 | 
             
                  #always make sure dataset http URL is up to date
         | 
| 57 87 | 
             
                  #and that it has admin acl
         | 
| 58 | 
            -
                   | 
| 59 | 
            -
             | 
| 88 | 
            +
                  if file
         | 
| 89 | 
            +
                    dst.update_attributes(:http_url=>file.human_url)
         | 
| 90 | 
            +
                    file.add_admin_acl
         | 
| 91 | 
            +
                  end
         | 
| 60 92 | 
             
                  return file
         | 
| 61 93 | 
             
                end
         | 
| 62 94 | 
             
              end
         | 
| @@ -11,7 +11,7 @@ module Mobilize | |
| 11 11 | 
             
                  return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
         | 
| 12 12 | 
             
                end
         | 
| 13 13 |  | 
| 14 | 
            -
                def Gridfs.read_by_dataset_path(dst_path, | 
| 14 | 
            +
                def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
         | 
| 15 15 | 
             
                  begin
         | 
| 16 16 | 
             
                    zs=Gridfs.grid.open(dst_path,'r').read
         | 
| 17 17 | 
             
                    return ::Zlib::Inflate.inflate(zs)
         | 
| @@ -20,10 +20,10 @@ module Mobilize | |
| 20 20 | 
             
                  end
         | 
| 21 21 | 
             
                end
         | 
| 22 22 |  | 
| 23 | 
            -
                def Gridfs.write_by_dataset_path(dst_path,string, | 
| 23 | 
            +
                def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
         | 
| 24 24 | 
             
                  zs = ::Zlib::Deflate.deflate(string)
         | 
| 25 25 | 
             
                  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
         | 
| 26 | 
            -
                  curr_zs = Gridfs.read_by_dataset_path(dst_path, | 
| 26 | 
            +
                  curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
         | 
| 27 27 | 
             
                  #write a new version when there is a change
         | 
| 28 28 | 
             
                  if curr_zs != zs
         | 
| 29 29 | 
             
                    Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
         | 
| @@ -9,9 +9,62 @@ module Mobilize | |
| 9 9 | 
             
                  Gsheet.config['max_cells']
         | 
| 10 10 | 
             
                end
         | 
| 11 11 |  | 
| 12 | 
            +
                # converts a source path or target path to a dst in the context of handler and stage
         | 
| 13 | 
            +
                def Gsheet.path_to_dst(path,stage_path)
         | 
| 14 | 
            +
                  s = Stage.where(:path=>stage_path).first
         | 
| 15 | 
            +
                  params = s.params
         | 
| 16 | 
            +
                  target_path = params['target']
         | 
| 17 | 
            +
                  #take random slot if one is not available
         | 
| 18 | 
            +
                  gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
         | 
| 19 | 
            +
                  #if this is the target, it doesn't have to exist already
         | 
| 20 | 
            +
                  is_target = true if path == target_path
         | 
| 21 | 
            +
                  #don't need the ://
         | 
| 22 | 
            +
                  path = path.split("://").last if path.index("://")
         | 
| 23 | 
            +
                  if path.split("/").length == 2
         | 
| 24 | 
            +
                    if is_target or Gsheet.find_by_path(path,gdrive_slot)
         | 
| 25 | 
            +
                      #user has specified path to a sheet
         | 
| 26 | 
            +
                      return Dataset.find_or_create_by_url("gsheet://#{path}")
         | 
| 27 | 
            +
                    else
         | 
| 28 | 
            +
                      raise "unable to find #{path}"
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
                  else
         | 
| 31 | 
            +
                    #user has specified a sheet
         | 
| 32 | 
            +
                    runner_title = stage_path.split("/").first
         | 
| 33 | 
            +
                    r = Runner.find_by_title(runner_title)
         | 
| 34 | 
            +
                    if is_target or r.gbook(gdrive_slot).worksheets.map{|w| w.title}.include?(path)
         | 
| 35 | 
            +
                      handler = "gsheet"
         | 
| 36 | 
            +
                      path = "#{runner_title}/#{path}"
         | 
| 37 | 
            +
                    elsif Gfile.find_by_path(path,gdrive_slot)
         | 
| 38 | 
            +
                      handler = "gfile"
         | 
| 39 | 
            +
                      path = "#{path}"
         | 
| 40 | 
            +
                    else
         | 
| 41 | 
            +
                      raise "unable to find #{path}"
         | 
| 42 | 
            +
                    end
         | 
| 43 | 
            +
                    return Dataset.find_or_create_by_url("#{handler}://#{path}")
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
         | 
| 48 | 
            +
                  #expects gdrive slot as first arg, otherwise chooses random
         | 
| 49 | 
            +
                  gdrive_slot = args
         | 
| 50 | 
            +
                  worker_emails = Gdrive.worker_emails.sort_by{rand}
         | 
| 51 | 
            +
                  gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
         | 
| 52 | 
            +
                  sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
         | 
| 53 | 
            +
                  sheet.read(user_name) if sheet
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
         | 
| 57 | 
            +
                  #expects gdrive slot as first arg, otherwise chooses random
         | 
| 58 | 
            +
                  gdrive_slot,crop = args
         | 
| 59 | 
            +
                  worker_emails = Gdrive.worker_emails.sort_by{rand}
         | 
| 60 | 
            +
                  gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
         | 
| 61 | 
            +
                  crop ||= true
         | 
| 62 | 
            +
                  Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 12 65 | 
             
                def Gsheet.write(path,tsv,gdrive_slot)
         | 
| 13 66 | 
             
                  sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
         | 
| 14 | 
            -
                  sheet.write(tsv)
         | 
| 67 | 
            +
                  sheet.write(tsv,Gdrive.owner_name)
         | 
| 15 68 | 
             
                end
         | 
| 16 69 |  | 
| 17 70 | 
             
                def Gsheet.find_by_path(path,gdrive_slot)
         | 
| @@ -32,32 +85,9 @@ module Mobilize | |
| 32 85 | 
             
                  return sheet
         | 
| 33 86 | 
             
                end
         | 
| 34 87 |  | 
| 35 | 
            -
                def Gsheet. | 
| 36 | 
            -
                  #reserve gdrive_slot account for read
         | 
| 37 | 
            -
                  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
         | 
| 38 | 
            -
                  return false unless gdrive_slot
         | 
| 39 | 
            -
                  s = Stage.where(:path=>stage_path).first
         | 
| 40 | 
            -
                  user = s.job.runner.user.name
         | 
| 41 | 
            -
                  source_dst = s.source_dsts(gdrive_slot).first
         | 
| 42 | 
            -
                  out_tsv = source_dst.read(user)
         | 
| 43 | 
            -
                  #use Gridfs to cache result
         | 
| 44 | 
            -
                  out_url = "gridfs://#{s.path}/out"
         | 
| 45 | 
            -
                  Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
         | 
| 46 | 
            -
                end
         | 
| 47 | 
            -
             | 
| 48 | 
            -
                def Gsheet.write_by_stage_path(stage_path)
         | 
| 49 | 
            -
                  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
         | 
| 50 | 
            -
                  #return blank response if there are no slots available
         | 
| 51 | 
            -
                  return nil unless gdrive_slot
         | 
| 52 | 
            -
                  s = Stage.where(:path=>stage_path).first
         | 
| 53 | 
            -
                  user = s.job.runner.user
         | 
| 54 | 
            -
                  target_path = s.params['target']
         | 
| 55 | 
            -
                  target_path = "#{s.job.runner.title}/#{target_path}" unless target_path.index("/")
         | 
| 56 | 
            -
                  source_dst = s.source_dsts(gdrive_slot).first
         | 
| 57 | 
            -
                  tsv = source_dst.read(user.name)
         | 
| 58 | 
            -
                  sheet_name = target_path.split("/").last
         | 
| 59 | 
            -
                  temp_path = [stage_path.gridsafe,sheet_name].join("/")
         | 
| 88 | 
            +
                def Gsheet.write_temp(target_path,gdrive_slot,tsv)
         | 
| 60 89 | 
             
                  #find and delete temp sheet, if any
         | 
| 90 | 
            +
                  temp_path = [target_path.gridsafe,"temp"].join("/")
         | 
| 61 91 | 
             
                  temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
         | 
| 62 92 | 
             
                  temp_sheet.delete if temp_sheet
         | 
| 63 93 | 
             
                  #write data to temp sheet
         | 
| @@ -70,28 +100,57 @@ module Mobilize | |
| 70 100 | 
             
                    return nil
         | 
| 71 101 | 
             
                  end
         | 
| 72 102 | 
             
                  temp_sheet.check_and_fix(tsv)
         | 
| 103 | 
            +
                  temp_sheet
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                def Gsheet.write_target(target_path,tsv,user_name,gdrive_slot,crop=true)
         | 
| 107 | 
            +
                  #write to temp sheet first, to ensure google compatibility
         | 
| 108 | 
            +
                  #and fix any discrepancies due to spradsheet assumptions
         | 
| 109 | 
            +
                  temp_sheet = Gsheet.write_temp(target_path,gdrive_slot,tsv)
         | 
| 110 | 
            +
                  #try to find target sheet
         | 
| 73 111 | 
             
                  target_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
         | 
| 112 | 
            +
                  u = User.where(:name=>user_name).first
         | 
| 74 113 | 
             
                  unless target_sheet
         | 
| 75 114 | 
             
                    #only give the user edit permissions if they're the ones
         | 
| 76 115 | 
             
                    #creating it
         | 
| 77 116 | 
             
                    target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
         | 
| 78 | 
            -
                    target_sheet.spreadsheet.update_acl( | 
| 117 | 
            +
                    target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
         | 
| 79 118 | 
             
                    target_sheet.delete_sheet1
         | 
| 80 119 | 
             
                  end
         | 
| 81 | 
            -
                  # | 
| 82 | 
            -
                  # | 
| 83 | 
            -
                   | 
| 84 | 
            -
                    target_sheet.merge(temp_sheet,user.name)
         | 
| 85 | 
            -
                  rescue
         | 
| 86 | 
            -
                    return nil
         | 
| 87 | 
            -
                  end
         | 
| 120 | 
            +
                  #pass it crop param to determine whether to shrink target sheet to fit data
         | 
| 121 | 
            +
                  #default is yes
         | 
| 122 | 
            +
                  target_sheet.merge(temp_sheet,user_name,crop)
         | 
| 88 123 | 
             
                  #delete the temp sheet's book
         | 
| 89 124 | 
             
                  temp_sheet.spreadsheet.delete
         | 
| 90 | 
            -
                   | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
                   | 
| 125 | 
            +
                  target_sheet
         | 
| 126 | 
            +
                end
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                def Gsheet.write_by_stage_path(stage_path)
         | 
| 129 | 
            +
                  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
         | 
| 130 | 
            +
                  #return blank response if there are no slots available
         | 
| 131 | 
            +
                  return nil unless gdrive_slot
         | 
| 132 | 
            +
                  s = Stage.where(:path=>stage_path).first
         | 
| 133 | 
            +
                  u = s.job.runner.user
         | 
| 134 | 
            +
                  crop = s.params['crop'] || true
         | 
| 135 | 
            +
                  begin
         | 
| 136 | 
            +
                    #get tsv to write from stage
         | 
| 137 | 
            +
                    source = s.sources.first
         | 
| 138 | 
            +
                    raise "Need source for gsheet write" unless source
         | 
| 139 | 
            +
                    tsv = source.read(u.name,gdrive_slot)
         | 
| 140 | 
            +
                    raise "No data found in #{source.url}" unless tsv
         | 
| 141 | 
            +
                    Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
         | 
| 142 | 
            +
                    Gdrive.unslot_worker_by_path(stage_path)
         | 
| 143 | 
            +
                    #update status
         | 
| 144 | 
            +
                    stdout = "Write successful for #{s.target.url}"
         | 
| 145 | 
            +
                    stderr = nil
         | 
| 146 | 
            +
                    s.update_status(stdout)
         | 
| 147 | 
            +
                    signal = 0
         | 
| 148 | 
            +
                  rescue => exc
         | 
| 149 | 
            +
                    stdout = nil
         | 
| 150 | 
            +
                    stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
         | 
| 151 | 
            +
                    signal = 500
         | 
| 152 | 
            +
                  end
         | 
| 153 | 
            +
                  return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
         | 
| 95 154 | 
             
                end
         | 
| 96 155 | 
             
              end
         | 
| 97 156 | 
             
            end
         | 
| @@ -103,23 +103,28 @@ module Mobilize | |
| 103 103 | 
             
                  end
         | 
| 104 104 | 
             
                end
         | 
| 105 105 |  | 
| 106 | 
            -
                def Resque. | 
| 106 | 
            +
                def Resque.new_failures_by_email
         | 
| 107 107 | 
             
                  fjobs = {}
         | 
| 108 | 
            -
                   | 
| 108 | 
            +
                  exc_to_s = Hash.new(0)
         | 
| 109 109 | 
             
                  Resque.failures.each_with_index do |f,f_i|
         | 
| 110 110 | 
             
                    #skip if already notified
         | 
| 111 111 | 
             
                    next if f['notified']
         | 
| 112 | 
            -
                     | 
| 113 | 
            -
                     | 
| 114 | 
            -
                     | 
| 115 | 
            -
             | 
| 116 | 
            -
                     | 
| 117 | 
            -
                      fjobs[ | 
| 112 | 
            +
                    stage_path = f['payload']['args'].first
         | 
| 113 | 
            +
                    s = Stage.where(:path=>stage_path).first
         | 
| 114 | 
            +
                    email = s.job.runner.user.email
         | 
| 115 | 
            +
                    exc_to_s = f['error']
         | 
| 116 | 
            +
                    if fjobs[email].nil?
         | 
| 117 | 
            +
                      fjobs[email] = {stage_path => {exc_to_s => 1}}
         | 
| 118 | 
            +
                    elsif fjobs[email][stage_path].nil?
         | 
| 119 | 
            +
                      fjobs[email][stage_path] = {exc_to_s => 1}
         | 
| 120 | 
            +
                    elsif fjobs[email][stage_path][exc_to_s].nil?
         | 
| 121 | 
            +
                      fjobs[email][stage_path][exc_to_s] = 1        
         | 
| 118 122 | 
             
                    else
         | 
| 119 | 
            -
                      fjobs[ | 
| 123 | 
            +
                      fjobs[email][stage_path][exc_to_s] += 1
         | 
| 120 124 | 
             
                    end
         | 
| 121 125 | 
             
                    #add notified flag to redis
         | 
| 122 126 | 
             
                    f['notified'] = true
         | 
| 127 | 
            +
                    #tag stage with email
         | 
| 123 128 | 
             
                    ::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
         | 
| 124 129 | 
             
                  end
         | 
| 125 130 | 
             
                  return fjobs
         | 
| @@ -163,27 +163,39 @@ module Mobilize | |
| 163 163 | 
             
                  if Jobtracker.notif_due?
         | 
| 164 164 | 
             
                    notifs = []
         | 
| 165 165 | 
             
                    if Jobtracker.failures.length>0
         | 
| 166 | 
            -
                       | 
| 167 | 
            -
                       | 
| 166 | 
            +
                      failure_hash = Resque.new_failures_by_email
         | 
| 167 | 
            +
                      failure_hash.each do |email,stage_paths|
         | 
| 168 168 | 
             
                        n = {}
         | 
| 169 | 
            -
                        n[' | 
| 169 | 
            +
                        n['subject'] = "#{stage_paths.keys.length.to_s} new failed jobs, #{stage_paths.values.map{|v| v.values}.flatten.sum.to_s} failures"
         | 
| 170 170 | 
             
                        #one row per exception type, with the job name
         | 
| 171 | 
            -
                        n['body'] =  | 
| 171 | 
            +
                        n['body'] = stage_paths.map do |path,exceptions| 
         | 
| 172 | 
            +
                                                      exceptions.map do |exc_to_s,times| 
         | 
| 173 | 
            +
                                                        [path," : ",exc_to_s,", ",times," times"].join
         | 
| 174 | 
            +
                                                      end
         | 
| 175 | 
            +
                                                    end.flatten.join("\n\n")
         | 
| 176 | 
            +
                        u = User.where(:name=>email.split("@").first).first
         | 
| 177 | 
            +
                        runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
         | 
| 178 | 
            +
                        n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
         | 
| 179 | 
            +
                        n['to'] = email
         | 
| 180 | 
            +
                        n['bcc'] = Jobtracker.admin_emails.join(",")
         | 
| 172 181 | 
             
                        notifs << n
         | 
| 173 182 | 
             
                      end
         | 
| 174 183 | 
             
                    end
         | 
| 175 184 | 
             
                    lws = Jobtracker.max_run_time_workers
         | 
| 176 185 | 
             
                    if lws.length>0
         | 
| 177 186 | 
             
                      n = {}
         | 
| 178 | 
            -
                      n[' | 
| 187 | 
            +
                      n['subject'] = "#{lws.length.to_s} max run time jobs"
         | 
| 179 188 | 
             
                      n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
         | 
| 189 | 
            +
                      n['to'] = Jobtracker.admin_emails.join(",")
         | 
| 180 190 | 
             
                      notifs << n
         | 
| 181 191 | 
             
                    end
         | 
| 192 | 
            +
                    #deliver each email generated
         | 
| 182 193 | 
             
                    notifs.each do |notif|
         | 
| 183 | 
            -
                      Email.write( | 
| 184 | 
            -
                      Jobtracker.last_notification=Time.now.utc.to_s
         | 
| 185 | 
            -
                      Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
         | 
| 194 | 
            +
                      Email.write(notif).deliver
         | 
| 186 195 | 
             
                    end
         | 
| 196 | 
            +
                    #update notification time so JT knows to wait a while
         | 
| 197 | 
            +
                    Jobtracker.last_notification = Time.now.utc.to_s
         | 
| 198 | 
            +
                    Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
         | 
| 187 199 | 
             
                  end
         | 
| 188 200 | 
             
                  return true
         | 
| 189 201 | 
             
                end
         | 
| @@ -13,9 +13,23 @@ module Mobilize | |
| 13 13 |  | 
| 14 14 | 
             
                index({ handler: 1, path: 1}, { unique: true})
         | 
| 15 15 |  | 
| 16 | 
            -
                def  | 
| 16 | 
            +
                def url
         | 
| 17 | 
            +
                  s = self
         | 
| 18 | 
            +
                  "#{s.handler}://#{s.path}"
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def read(user_name,*args)
         | 
| 17 22 | 
             
                  dst = self
         | 
| 18 | 
            -
                   | 
| 23 | 
            +
                  dst.update_attributes(:last_read_at=>Time.now.utc)
         | 
| 24 | 
            +
                  "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user_name,*args)
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                def write(string,user_name,*args)
         | 
| 28 | 
            +
                  dst = self
         | 
| 29 | 
            +
                  "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user_name,*args)
         | 
| 30 | 
            +
                  dst.raw_size = string.length
         | 
| 31 | 
            +
                  dst.save!
         | 
| 32 | 
            +
                  return true
         | 
| 19 33 | 
             
                end
         | 
| 20 34 |  | 
| 21 35 | 
             
                def Dataset.find_by_url(url)
         | 
| @@ -38,24 +52,15 @@ module Mobilize | |
| 38 52 | 
             
                  return dst
         | 
| 39 53 | 
             
                end
         | 
| 40 54 |  | 
| 41 | 
            -
                def Dataset. | 
| 42 | 
            -
                  dst = Dataset. | 
| 43 | 
            -
                  dst. | 
| 44 | 
            -
                  url
         | 
| 55 | 
            +
                def Dataset.read_by_url(url,user_name,*args)
         | 
| 56 | 
            +
                  dst = Dataset.find_by_url(url)
         | 
| 57 | 
            +
                  dst.read(user_name,*args) if dst
         | 
| 45 58 | 
             
                end
         | 
| 46 59 |  | 
| 47 | 
            -
                def  | 
| 48 | 
            -
                  dst =  | 
| 49 | 
            -
                  dst. | 
| 50 | 
            -
                   | 
| 51 | 
            -
                end
         | 
| 52 | 
            -
             | 
| 53 | 
            -
                def write(string,user)
         | 
| 54 | 
            -
                  dst = self
         | 
| 55 | 
            -
                  "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
         | 
| 56 | 
            -
                  dst.raw_size = string.length
         | 
| 57 | 
            -
                  dst.save!
         | 
| 58 | 
            -
                  return true
         | 
| 60 | 
            +
                def Dataset.write_by_url(url,string,user_name,*args)
         | 
| 61 | 
            +
                  dst = Dataset.find_or_create_by_url(url)
         | 
| 62 | 
            +
                  dst.write(string,user_name,*args)
         | 
| 63 | 
            +
                  url
         | 
| 59 64 | 
             
                end
         | 
| 60 65 | 
             
              end
         | 
| 61 66 | 
             
            end
         | 
| @@ -15,11 +15,6 @@ module Mobilize | |
| 15 15 | 
             
                  %w{name active trigger status stage1 stage2 stage3 stage4 stage5}
         | 
| 16 16 | 
             
                end
         | 
| 17 17 |  | 
| 18 | 
            -
                def cached_at
         | 
| 19 | 
            -
                  r = self
         | 
| 20 | 
            -
                  Dataset.find_or_create_by_path(r.path).cached_at
         | 
| 21 | 
            -
                end
         | 
| 22 | 
            -
             | 
| 23 18 | 
             
                def title
         | 
| 24 19 | 
             
                  r = self
         | 
| 25 20 | 
             
                  r.path.split("/").first
         | 
| @@ -34,6 +29,9 @@ module Mobilize | |
| 34 29 | 
             
                  Runner.where(:path=>path).first
         | 
| 35 30 | 
             
                end
         | 
| 36 31 |  | 
| 32 | 
            +
                def Runner.find_by_title(title)
         | 
| 33 | 
            +
                  Runner.where(:path=>"#{title}/jobs").first
         | 
| 34 | 
            +
                end
         | 
| 37 35 | 
             
                def Runner.perform(id,*args)
         | 
| 38 36 | 
             
                  r = Runner.find_by_path(id)
         | 
| 39 37 | 
             
                  #get gdrive slot for read
         | 
| @@ -53,7 +51,9 @@ module Mobilize | |
| 53 51 | 
             
                    begin
         | 
| 54 52 | 
             
                      if j.is_due?
         | 
| 55 53 | 
             
                        j.update_attributes(:active=>false) if j.trigger=='once'
         | 
| 56 | 
            -
                        j.stages.first | 
| 54 | 
            +
                        s = j.stages.first
         | 
| 55 | 
            +
                        s.update_attributes(:retries_done=>0)
         | 
| 56 | 
            +
                        s.enqueue!
         | 
| 57 57 | 
             
                      end
         | 
| 58 58 | 
             
                    rescue ScriptError, StandardError => exc
         | 
| 59 59 | 
             
                      r.update_status("Failed to enqueue #{j.path} with #{exc.to_s}")
         | 
| @@ -73,11 +73,6 @@ module Mobilize | |
| 73 73 | 
             
                  Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
         | 
| 74 74 | 
             
                end
         | 
| 75 75 |  | 
| 76 | 
            -
                def cache
         | 
| 77 | 
            -
                  r = self
         | 
| 78 | 
            -
                  Dataset.find_or_create_by_url("gridfs://#{r.path}")
         | 
| 79 | 
            -
                end
         | 
| 80 | 
            -
             | 
| 81 76 | 
             
                def gbook(gdrive_slot)
         | 
| 82 77 | 
             
                  r = self
         | 
| 83 78 | 
             
                  title = r.path.split("/").first
         | 
| @@ -86,17 +81,20 @@ module Mobilize | |
| 86 81 |  | 
| 87 82 | 
             
                def gsheet(gdrive_slot)
         | 
| 88 83 | 
             
                  r = self
         | 
| 84 | 
            +
                  u = r.user
         | 
| 89 85 | 
             
                  jobs_sheet = Gsheet.find_by_path(r.path,gdrive_slot)
         | 
| 90 86 | 
             
                  #make sure the user has a runner with a jobs sheet and has write privileges on the spreadsheet
         | 
| 91 | 
            -
                  unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry( | 
| 87 | 
            +
                  unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="writer"})
         | 
| 92 88 | 
             
                    #only give the user edit permissions if they're the ones
         | 
| 93 89 | 
             
                    #creating it
         | 
| 94 90 | 
             
                    jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
         | 
| 95 | 
            -
                    unless jobs_sheet.spreadsheet.acl_entry( | 
| 96 | 
            -
                      jobs_sheet.spreadsheet.update_acl( | 
| 91 | 
            +
                    unless jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
         | 
| 92 | 
            +
                      jobs_sheet.spreadsheet.update_acl(u.email,"writer")
         | 
| 97 93 | 
             
                    end
         | 
| 98 94 | 
             
                  end
         | 
| 99 95 | 
             
                  jobs_sheet.add_headers(r.headers)
         | 
| 96 | 
            +
                  #add url to dataset
         | 
| 97 | 
            +
                  Dataset.find_or_create_by_url("gsheet://#{r.path}").update_attributes(:http_url=>jobs_sheet.spreadsheet.human_url)
         | 
| 100 98 | 
             
                  begin;jobs_sheet.delete_sheet1;rescue;end #don't care if sheet1 deletion fails
         | 
| 101 99 | 
             
                  return jobs_sheet
         | 
| 102 100 | 
             
                end
         | 
| @@ -104,8 +102,6 @@ module Mobilize | |
| 104 102 | 
             
                def read_gsheet(gdrive_slot)
         | 
| 105 103 | 
             
                  r = self
         | 
| 106 104 | 
             
                  gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
         | 
| 107 | 
            -
                  #cache in DB
         | 
| 108 | 
            -
                  r.cache.write(gsheet_tsv,Gdrive.owner_name)
         | 
| 109 105 | 
             
                  #turn it into a hash array
         | 
| 110 106 | 
             
                  gsheet_jobs = gsheet_tsv.tsv_to_hash_array
         | 
| 111 107 | 
             
                  #go through each job, update relevant job with its params
         | 
| @@ -122,8 +118,13 @@ module Mobilize | |
| 122 118 | 
             
                      stage_string = rj["stage#{s_idx.to_s}"]
         | 
| 123 119 | 
             
                      s = Stage.find_by_path("#{j.path}/stage#{s_idx.to_s}")
         | 
| 124 120 | 
             
                      if stage_string.to_s.length==0
         | 
| 125 | 
            -
                        #delete this stage | 
| 126 | 
            -
                         | 
| 121 | 
            +
                        #delete this stage and all stages after
         | 
| 122 | 
            +
                        if s
         | 
| 123 | 
            +
                          j = s.job
         | 
| 124 | 
            +
                          j.stages[(s.idx-1)..-1].each{|ps| ps.delete}
         | 
| 125 | 
            +
                          #just in case
         | 
| 126 | 
            +
                          s.delete
         | 
| 127 | 
            +
                        end
         | 
| 127 128 | 
             
                        break
         | 
| 128 129 | 
             
                      elsif s.nil?
         | 
| 129 130 | 
             
                        #create this stage
         | 
| @@ -7,7 +7,8 @@ module Mobilize | |
| 7 7 | 
             
                field :call, type: String
         | 
| 8 8 | 
             
                field :param_string, type: Array
         | 
| 9 9 | 
             
                field :status, type: String
         | 
| 10 | 
            -
                field : | 
| 10 | 
            +
                field :response, type: Hash
         | 
| 11 | 
            +
                field :retries_done, type: Fixnum
         | 
| 11 12 | 
             
                field :completed_at, type: Time
         | 
| 12 13 | 
             
                field :started_at, type: Time
         | 
| 13 14 | 
             
                field :failed_at, type: Time
         | 
| @@ -25,7 +26,15 @@ module Mobilize | |
| 25 26 | 
             
                  #allowing you to determine its size
         | 
| 26 27 | 
             
                  #before committing to a read or write
         | 
| 27 28 | 
             
                  s = self
         | 
| 28 | 
            -
                  Dataset.find_by_url(s.out_url) if s.out_url
         | 
| 29 | 
            +
                  Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def err_dst
         | 
| 33 | 
            +
                  #this gives a dataset that points to the output
         | 
| 34 | 
            +
                  #allowing you to determine its size
         | 
| 35 | 
            +
                  #before committing to a read or write
         | 
| 36 | 
            +
                  s = self
         | 
| 37 | 
            +
                  Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
         | 
| 29 38 | 
             
                end
         | 
| 30 39 |  | 
| 31 40 | 
             
                def params
         | 
| @@ -68,84 +77,91 @@ module Mobilize | |
| 68 77 |  | 
| 69 78 | 
             
                def Stage.perform(id,*args)
         | 
| 70 79 | 
             
                  s = Stage.where(:path=>id).first
         | 
| 71 | 
            -
                  j = s.job
         | 
| 72 80 | 
             
                  s.update_attributes(:started_at=>Time.now.utc)
         | 
| 73 81 | 
             
                  s.update_status(%{Starting at #{Time.now.utc}})
         | 
| 74 | 
            -
                   | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
                     | 
| 78 | 
            -
                     | 
| 79 | 
            -
             | 
| 80 | 
            -
                      s.enqueue!
         | 
| 81 | 
            -
                      return false
         | 
| 82 | 
            -
                    end
         | 
| 83 | 
            -
                  rescue ScriptError, StandardError => exc
         | 
| 84 | 
            -
                    j.update_attributes(:active=>false)
         | 
| 85 | 
            -
                    s.update_attributes(:failed_at=>Time.now.utc)
         | 
| 86 | 
            -
                    s.update_status("Failed at #{Time.now.utc.to_s}")
         | 
| 87 | 
            -
                    raise exc
         | 
| 82 | 
            +
                  #get response by running method
         | 
| 83 | 
            +
                  response = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
         | 
| 84 | 
            +
                  unless response
         | 
| 85 | 
            +
                    #re-queue self if no response
         | 
| 86 | 
            +
                    s.enqueue!
         | 
| 87 | 
            +
                    return false
         | 
| 88 88 | 
             
                  end
         | 
| 89 | 
            -
                   | 
| 89 | 
            +
                  if response['signal'] == 0
         | 
| 90 | 
            +
                    s.complete(response)
         | 
| 91 | 
            +
                  elsif s.retries_done.to_i < s.params['retries'].to_i
         | 
| 92 | 
            +
                    #retry
         | 
| 93 | 
            +
                    s.update_attributes(:retries_done => s.retries_done.to_i + 1, :response => response)
         | 
| 94 | 
            +
                    s.update_status(%{Retry #{s.retries_done.to_s} at #{Time.now.utc}})
         | 
| 95 | 
            +
                    s.enqueue!
         | 
| 96 | 
            +
                  else
         | 
| 97 | 
            +
                    #sleep as much as user specifies
         | 
| 98 | 
            +
                    sleep s['delay'].to_i
         | 
| 99 | 
            +
                    s.fail(response)
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
                  return true
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                def complete(response)
         | 
| 105 | 
            +
                  s = self
         | 
| 106 | 
            +
                  s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
         | 
| 90 107 | 
             
                  s.update_status("Completed at #{Time.now.utc.to_s}")
         | 
| 108 | 
            +
                  j = s.job
         | 
| 91 109 | 
             
                  if s.idx == j.stages.length
         | 
| 92 110 | 
             
                    #check for any dependent jobs, if there are, enqueue them
         | 
| 93 111 | 
             
                    r = j.runner
         | 
| 94 | 
            -
                    dep_jobs = r.jobs.select | 
| 112 | 
            +
                    dep_jobs = r.jobs.select do |dj|
         | 
| 113 | 
            +
                                               dj.active==true and
         | 
| 114 | 
            +
                                                 dj.trigger.strip.downcase == "after #{j.name}"
         | 
| 115 | 
            +
                                             end
         | 
| 95 116 | 
             
                    #put begin/rescue so all dependencies run
         | 
| 96 | 
            -
                    dep_jobs.each | 
| 117 | 
            +
                    dep_jobs.each do |dj|
         | 
| 118 | 
            +
                                    begin
         | 
| 119 | 
            +
                                      unless dj.is_working?
         | 
| 120 | 
            +
                                        dj.stages.first.update_attributes(:retries_done=>0)
         | 
| 121 | 
            +
                                        dj.stages.first.enqueue!
         | 
| 122 | 
            +
                                      end
         | 
| 123 | 
            +
                                    rescue
         | 
| 124 | 
            +
                                      #job won't run if error, log it a failure
         | 
| 125 | 
            +
                                      response = {"err_str" => "Unable to enqueue first stage of #{dj.path}"}
         | 
| 126 | 
            +
                                      dj.stages.first.fail(response)
         | 
| 127 | 
            +
                                    end
         | 
| 128 | 
            +
                                  end
         | 
| 97 129 | 
             
                  else
         | 
| 98 130 | 
             
                    #queue up next stage
         | 
| 131 | 
            +
                    s.next.update_attributes(:retries_done=>0)
         | 
| 99 132 | 
             
                    s.next.enqueue!
         | 
| 100 133 | 
             
                  end
         | 
| 101 | 
            -
                   | 
| 134 | 
            +
                  true
         | 
| 102 135 | 
             
                end
         | 
| 103 136 |  | 
| 104 | 
            -
                def  | 
| 105 | 
            -
                  # | 
| 106 | 
            -
                   | 
| 107 | 
            -
                  #or dataset pointers for other handlers
         | 
| 137 | 
            +
                def fail(response,gdrive_slot=nil)
         | 
| 138 | 
            +
                  #get random worker if one is not provided
         | 
| 139 | 
            +
                  gdrive_slot ||= Gdrive.worker_emails.sort_by{rand}.first
         | 
| 108 140 | 
             
                  s = self
         | 
| 109 | 
            -
                   | 
| 110 | 
            -
                   | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
                   | 
| 116 | 
            -
                   | 
| 117 | 
            -
                   | 
| 118 | 
            -
                   | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
             | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
                        runner_sheet = r.gbook(gdrive_slot).worksheet_by_title(source_path)
         | 
| 134 | 
            -
                        out_tsv = if runner_sheet
         | 
| 135 | 
            -
                                    runner_sheet.read(user)
         | 
| 136 | 
            -
                                  else
         | 
| 137 | 
            -
                                    #check for gfile. will fail if there isn't one.
         | 
| 138 | 
            -
                                    Gfile.find_by_path(source_path).read(user)
         | 
| 139 | 
            -
                                  end
         | 
| 140 | 
            -
                      end
         | 
| 141 | 
            -
                      #use Gridfs to cache gdrive results
         | 
| 142 | 
            -
                      file_name = source_path.split("/").last
         | 
| 143 | 
            -
                      out_url = "gridfs://#{s.path}/#{file_name}"
         | 
| 144 | 
            -
                      Dataset.write_by_url(out_url,out_tsv,user)
         | 
| 145 | 
            -
                      dsts << Dataset.find_by_url(out_url)
         | 
| 146 | 
            -
                    end
         | 
| 147 | 
            -
                  end 
         | 
| 148 | 
            -
                  return dsts
         | 
| 141 | 
            +
                  j = s.job
         | 
| 142 | 
            +
                  r = j.runner
         | 
| 143 | 
            +
                  u = r.user
         | 
| 144 | 
            +
                  j.update_attributes(:active=>false)
         | 
| 145 | 
            +
                  s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
         | 
| 146 | 
            +
                  stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
         | 
| 147 | 
            +
                  target_path =  (r.path.split("/")[0..-2] + [stage_name]).join("/")
         | 
| 148 | 
            +
                  status_msg = "Failed at #{Time.now.utc.to_s}"
         | 
| 149 | 
            +
                  #read err txt, add err sheet, write to it
         | 
| 150 | 
            +
                  err_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
         | 
| 151 | 
            +
                  err_sheet.delete if err_sheet
         | 
| 152 | 
            +
                  err_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
         | 
| 153 | 
            +
                  err_txt = if response['err_url']
         | 
| 154 | 
            +
                              Dataset.read_by_url(response['err_url'],u.name)
         | 
| 155 | 
            +
                            elsif response['err_str']
         | 
| 156 | 
            +
                              response['err_str']
         | 
| 157 | 
            +
                            end
         | 
| 158 | 
            +
                  err_txt = ["response","\n",err_txt].join
         | 
| 159 | 
            +
                  err_sheet.write(err_txt,u.name)
         | 
| 160 | 
            +
                  #exception will be first row below "response" header
         | 
| 161 | 
            +
                  exc_to_s,backtrace = err_txt.split("\n").ie{|ea| [ea[1], ea[2..-1]]}
         | 
| 162 | 
            +
                  s.update_status(status_msg)
         | 
| 163 | 
            +
                  #raise the exception so it bubbles up to resque
         | 
| 164 | 
            +
                  raise Exception,exc_to_s,backtrace
         | 
| 149 165 | 
             
                end
         | 
| 150 166 |  | 
| 151 167 | 
             
                def enqueue!
         | 
| @@ -180,5 +196,61 @@ module Mobilize | |
| 180 196 | 
             
                  s = self
         | 
| 181 197 | 
             
                  Mobilize::Resque.active_paths.include?(s.path)
         | 
| 182 198 | 
             
                end
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                def target
         | 
| 201 | 
            +
                  s = self
         | 
| 202 | 
            +
                  params = s.params
         | 
| 203 | 
            +
                  target_path = params['target']
         | 
| 204 | 
            +
                  handler,path = target_path.split("://")
         | 
| 205 | 
            +
                  #if the user has specified a url for a target
         | 
| 206 | 
            +
                  #that is not this stage's handler, disallow
         | 
| 207 | 
            +
                  if handler and path and handler != s.handler
         | 
| 208 | 
            +
                    raise "incompatible target handler #{handler} for #{s.handler} stage"
         | 
| 209 | 
            +
                  else
         | 
| 210 | 
            +
                    begin
         | 
| 211 | 
            +
                      return "Mobilize::#{s.handler.downcase.capitalize}".constantize.path_to_dst(target_path,s.path)
         | 
| 212 | 
            +
                    rescue => exc
         | 
| 213 | 
            +
                      raise "Could not get #{target_path} with error: #{exc.to_s}"
         | 
| 214 | 
            +
                    end
         | 
| 215 | 
            +
                  end
         | 
| 216 | 
            +
                end
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                def sources
         | 
| 219 | 
            +
                  #returns an array of Datasets corresponding to
         | 
| 220 | 
            +
                  #items listed as sources in the stage params
         | 
| 221 | 
            +
                  s = self
         | 
| 222 | 
            +
                  params = s.params
         | 
| 223 | 
            +
                  job = s.job
         | 
| 224 | 
            +
                  runner = job.runner
         | 
| 225 | 
            +
                  source_paths = if params['sources']
         | 
| 226 | 
            +
                                   params['sources']
         | 
| 227 | 
            +
                                 elsif params['source']
         | 
| 228 | 
            +
                                   [params['source']]
         | 
| 229 | 
            +
                                 end
         | 
| 230 | 
            +
                  return [] if (source_paths.class!=Array or source_paths.length==0)
         | 
| 231 | 
            +
                  dsts = []
         | 
| 232 | 
            +
                  source_paths.each do |source_path|
         | 
| 233 | 
            +
                    if source_path.index(/^stage[1-5]$/)
         | 
| 234 | 
            +
                      #stage arguments return the stage's output dst url
         | 
| 235 | 
            +
                      source_stage_path = "#{runner.path}/#{job.name}/#{source_path}"
         | 
| 236 | 
            +
                      source_stage = Stage.where(:path=>source_stage_path).first
         | 
| 237 | 
            +
                      source_stage_out_url = source_stage.response['out_url']
         | 
| 238 | 
            +
                      dsts << Dataset.find_by_url(source_stage_out_url)
         | 
| 239 | 
            +
                    else
         | 
| 240 | 
            +
                      handler = if source_path.index("://")
         | 
| 241 | 
            +
                                  source_path.split("://").first
         | 
| 242 | 
            +
                                else
         | 
| 243 | 
            +
                                  s.handler
         | 
| 244 | 
            +
                                end
         | 
| 245 | 
            +
                      begin
         | 
| 246 | 
            +
                        stage_path = s.path
         | 
| 247 | 
            +
                        dsts << "Mobilize::#{handler.downcase.capitalize}".constantize.path_to_dst(source_path,stage_path)
         | 
| 248 | 
            +
                      rescue => exc
         | 
| 249 | 
            +
                        raise "Could not get #{source_path} with error: #{exc.to_s}"
         | 
| 250 | 
            +
                      end
         | 
| 251 | 
            +
                    end
         | 
| 252 | 
            +
                  end
         | 
| 253 | 
            +
                  return dsts
         | 
| 254 | 
            +
                end
         | 
| 183 255 | 
             
              end
         | 
| 184 256 | 
             
            end
         | 
    
        data/mobilize-base.gemspec
    CHANGED
    
    | @@ -6,7 +6,7 @@ Gem::Specification.new do |s| | |
| 6 6 | 
             
              s.name        = "mobilize-base"
         | 
| 7 7 | 
             
              s.version     = Mobilize::Base::VERSION
         | 
| 8 8 | 
             
              s.authors     = ["Cassio Paes-Leme"]
         | 
| 9 | 
            -
              s.email       = ["cpaesleme@ | 
| 9 | 
            +
              s.email       = ["cpaesleme@dena.com"]
         | 
| 10 10 | 
             
              s.homepage    = "http://github.com/ngmoco/mobilize-base"
         | 
| 11 11 | 
             
              s.summary     = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
         | 
| 12 12 | 
             
              s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
         | 
    
        data/test/base_job_rows.yml
    CHANGED
    
    
    
        data/test/mobilize-base_test.rb
    CHANGED
    
    | @@ -30,38 +30,77 @@ describe "Mobilize" do | |
| 30 30 |  | 
| 31 31 | 
             
                puts "Jobtracker created runner with 'jobs' sheet?"
         | 
| 32 32 | 
             
                r = u.runner
         | 
| 33 | 
            -
                 | 
| 34 | 
            -
                 | 
| 35 | 
            -
                 | 
| 36 | 
            -
             | 
| 37 | 
            -
                 | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
                 | 
| 33 | 
            +
                jobs_sheet_url = "gsheet://#{r.path}"
         | 
| 34 | 
            +
                jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
         | 
| 35 | 
            +
                jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
         | 
| 36 | 
            +
                jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
         | 
| 37 | 
            +
                assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                #stop Jobtracker, if you're doing this by queueing runners
         | 
| 40 | 
            +
                #Mobilize::Jobtracker.stop!
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                puts "add base1 input file"
         | 
| 43 | 
            +
                test_filename = "test_base_1"
         | 
| 44 | 
            +
                file_url = "gfile://#{test_filename}.tsv"
         | 
| 45 | 
            +
                test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
         | 
| 41 46 | 
             
                test_source_tsv = test_source_ha.hash_array_to_tsv
         | 
| 42 | 
            -
                 | 
| 47 | 
            +
                Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
         | 
| 48 | 
            +
                rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
         | 
| 49 | 
            +
                assert rem_tsv == test_source_tsv
         | 
| 43 50 |  | 
| 44 | 
            -
                puts "add row to jobs sheet, wait  | 
| 51 | 
            +
                puts "add row to jobs sheet, wait for stages"
         | 
| 45 52 | 
             
                test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
         | 
| 53 | 
            +
                jobs_sheet.reload
         | 
| 46 54 | 
             
                jobs_sheet.add_or_update_rows(test_job_rows)
         | 
| 47 | 
            -
                 | 
| 55 | 
            +
                #wait for stages to complete
         | 
| 56 | 
            +
                #r.enqueue!
         | 
| 57 | 
            +
                wait_for_stages
         | 
| 48 58 |  | 
| 49 59 | 
             
                puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
         | 
| 50 | 
            -
                 | 
| 51 | 
            -
                 | 
| 60 | 
            +
                test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
         | 
| 61 | 
            +
                test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
         | 
| 62 | 
            +
                test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
         | 
| 52 63 |  | 
| 53 | 
            -
                 | 
| 64 | 
            +
                test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
         | 
| 65 | 
            +
                test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
         | 
| 54 66 |  | 
| 55 | 
            -
                 | 
| 56 | 
            -
                [test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
         | 
| 67 | 
            +
                assert test_1_tsv == test_2_tsv
         | 
| 57 68 |  | 
| 58 | 
            -
                 | 
| 59 | 
            -
                 | 
| 69 | 
            +
                puts "change first job to fail, wait for stages"
         | 
| 70 | 
            +
                test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
         | 
| 71 | 
            +
                Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
         | 
| 72 | 
            +
                jobs_sheet.add_or_update_rows(test_job_rows)
         | 
| 60 73 |  | 
| 61 | 
            -
                 | 
| 62 | 
            -
                 | 
| 63 | 
            -
                assert test_target_sheet_2.read(user_name)  == test_source_sheet.read(user_name)
         | 
| 74 | 
            +
                #wait for stages to complete
         | 
| 75 | 
            +
                wait_for_stages
         | 
| 64 76 |  | 
| 77 | 
            +
                test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
         | 
| 78 | 
            +
                puts "jobtracker posted failing test error to sheet "
         | 
| 79 | 
            +
                error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
         | 
| 80 | 
            +
                assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
         | 
| 81 | 
            +
                Mobilize::Jobtracker.stop!
         | 
| 65 82 | 
             
              end
         | 
| 66 83 |  | 
| 84 | 
            +
              def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
         | 
| 85 | 
            +
                time = 0
         | 
| 86 | 
            +
                time_since_stage = 0
         | 
| 87 | 
            +
                #check for 10 min
         | 
| 88 | 
            +
                while time < time_limit and time_since_stage < stage_limit
         | 
| 89 | 
            +
                  sleep wait_length
         | 
| 90 | 
            +
                  job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
         | 
| 91 | 
            +
                  if job_classes.include?("Mobilize::Stage")
         | 
| 92 | 
            +
                    time_since_stage = 0
         | 
| 93 | 
            +
                    puts "saw stage at #{time.to_s} seconds"
         | 
| 94 | 
            +
                  else
         | 
| 95 | 
            +
                    time_since_stage += wait_length
         | 
| 96 | 
            +
                    puts "#{time_since_stage.to_s} seconds since stage seen"
         | 
| 97 | 
            +
                  end
         | 
| 98 | 
            +
                  time += wait_length
         | 
| 99 | 
            +
                  puts "total wait time #{time.to_s} seconds"
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                if time >= time_limit
         | 
| 103 | 
            +
                  raise "Timed out before stage completion"
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
              end
         | 
| 67 106 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: mobilize-base
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1. | 
| 4 | 
            +
              version: '1.2'
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2013-03- | 
| 12 | 
            +
            date: 2013-03-21 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rake
         | 
| @@ -177,7 +177,7 @@ description: ! "Manage your organization's workflows entirely through Google Doc | |
| 177 177 | 
             
              and -mongodb packages\n                     to allow seamless transport of TSV and
         | 
| 178 178 | 
             
              JSON data between any two endpoints. "
         | 
| 179 179 | 
             
            email:
         | 
| 180 | 
            -
            - cpaesleme@ | 
| 180 | 
            +
            - cpaesleme@dena.com
         | 
| 181 181 | 
             
            executables: []
         | 
| 182 182 | 
             
            extensions: []
         | 
| 183 183 | 
             
            extra_rdoc_files: []
         | 
| @@ -220,10 +220,10 @@ files: | |
| 220 220 | 
             
            - lib/samples/resque.yml
         | 
| 221 221 | 
             
            - lib/samples/resque_web.rb
         | 
| 222 222 | 
             
            - mobilize-base.gemspec
         | 
| 223 | 
            -
            - test/base1_stage1.yml
         | 
| 224 223 | 
             
            - test/base_job_rows.yml
         | 
| 225 224 | 
             
            - test/mobilize-base_test.rb
         | 
| 226 225 | 
             
            - test/redis-test.conf
         | 
| 226 | 
            +
            - test/test_base_1.yml
         | 
| 227 227 | 
             
            - test/test_helper.rb
         | 
| 228 228 | 
             
            homepage: http://github.com/ngmoco/mobilize-base
         | 
| 229 229 | 
             
            licenses: []
         | 
| @@ -239,7 +239,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 239 239 | 
             
                  version: '0'
         | 
| 240 240 | 
             
                  segments:
         | 
| 241 241 | 
             
                  - 0
         | 
| 242 | 
            -
                  hash:  | 
| 242 | 
            +
                  hash: -2718067622627955864
         | 
| 243 243 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 244 244 | 
             
              none: false
         | 
| 245 245 | 
             
              requirements:
         | 
| @@ -248,7 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 248 248 | 
             
                  version: '0'
         | 
| 249 249 | 
             
                  segments:
         | 
| 250 250 | 
             
                  - 0
         | 
| 251 | 
            -
                  hash:  | 
| 251 | 
            +
                  hash: -2718067622627955864
         | 
| 252 252 | 
             
            requirements: []
         | 
| 253 253 | 
             
            rubyforge_project: mobilize-base
         | 
| 254 254 | 
             
            rubygems_version: 1.8.24
         | 
| @@ -257,8 +257,8 @@ specification_version: 3 | |
| 257 257 | 
             
            summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
         | 
| 258 258 | 
             
              Docs
         | 
| 259 259 | 
             
            test_files:
         | 
| 260 | 
            -
            - test/base1_stage1.yml
         | 
| 261 260 | 
             
            - test/base_job_rows.yml
         | 
| 262 261 | 
             
            - test/mobilize-base_test.rb
         | 
| 263 262 | 
             
            - test/redis-test.conf
         | 
| 263 | 
            +
            - test/test_base_1.yml
         | 
| 264 264 | 
             
            - test/test_helper.rb
         | 
| 
            File without changes
         |