mobilize-base 1.1.10 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -552,18 +552,23 @@ stage. These should be of the for `<key1>: <value1>, <key2>: <value2>`, where
552
552
  `<key>` is an unquoted string and `<value>` is a quoted string, an
553
553
  integer, an array (delimited by square braces), or a hash (delimited by
554
554
  curly braces).
555
- * For mobilize-base, the following stages are available:
556
- * gsheet.read `source: <input_gsheet_path>`, which reads the sheet.
557
- * The gsheet_path should be of the form
558
- `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
559
- the Runner itself. The test uses "base1_stage1.in".
560
- * gsheet.write `source: <stage_name>`,`target: <target_gsheet_path>`,
561
- which writes the specified stage output to the target_gsheet.
562
- * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
555
+ * For mobilize-base, the following stage is available:
556
+ * gsheet.write `source: <input_path>`, which reads the sheet.
557
+ * The input_path should be of the form:
558
+ * `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
559
+ the Runner itself.
560
+ * `gfile://<gfile_name>` if the target is a file.
561
+ * The file must be owned by the Gdrive owner.
562
+ * The test uses "gfile://test_base_1.tsv".
563
+ * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
563
564
  and "base1.out" for the second test. The first
564
565
  takes the output from the first stage and the second reads it straight
565
566
  from the referenced sheet.
566
- * The test uses "Requestor_mobilize(test)/base1.out" and
567
+ * All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
568
+ giving up.
569
+ * If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
570
+ * The tab will be headed "response" and will contain the exception and backtrace for the error.
571
+ * The test uses "Requestor_mobilize(test)/base1.out" and
567
572
  "Runner_mobilize(test)/base2.out" for target sheets.
568
573
 
569
574
  <a name='section_Start_Run_Test'></a>
@@ -8,7 +8,7 @@ module GoogleDrive
8
8
  attempts = 0
9
9
  sleep_time = nil
10
10
  #try 5 times to make the call
11
- while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
11
+ while (response.nil? or response.code.starts_with?("5")) and attempts < 20
12
12
  #instantiate http object, set params
13
13
  http = @proxy.new(uri.host, uri.port)
14
14
  http.use_ssl = true
@@ -21,10 +21,10 @@ module GoogleDrive
21
21
  #timeouts etc.
22
22
  nil
23
23
  end
24
- if response.nil?
24
+ if response.nil? or response.code.starts_with?("4")
25
25
  attempts +=1
26
- else
27
- if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
26
+ elsif
27
+ if response.code.starts_with?("5")
28
28
  #wait 10 seconds times number of attempts squared in case of error
29
29
  sleep_time = 10 * (attempts*attempts)
30
30
  attempts += 1
@@ -13,15 +13,16 @@ module GoogleDrive
13
13
  f = self
14
14
  #admin includes workers
15
15
  return true if f.has_admin_acl?
16
- (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
17
- f.update_acl(a)
16
+ accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
17
+ accounts.each do |email|
18
+ f.update_acl(email)
18
19
  end
19
20
  end
20
21
 
21
22
  def has_admin_acl?
22
23
  f = self
23
24
  curr_emails = f.acls.map{|a| a.scope}.sort
24
- admin_emails = Mobilize::Gdrive.admin_emails.sort
25
+ admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
25
26
  if (curr_emails & admin_emails) == admin_emails
26
27
  return true
27
28
  else
@@ -40,13 +41,13 @@ module GoogleDrive
40
41
  end
41
42
  end
42
43
 
43
- def read(user)
44
+ def read(user_name)
44
45
  f = self
45
- entry = f.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
46
+ entry = f.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
46
47
  if entry and ['reader','writer','owner'].include?(entry.role)
47
48
  f.download_to_string
48
49
  else
49
- raise "User #{user} is not allowed to read #{f.title}"
50
+ raise "User #{user_name} is not allowed to read #{f.title}"
50
51
  end
51
52
  end
52
53
 
@@ -6,11 +6,11 @@ module GoogleDrive
6
6
  header = rows.first
7
7
  return nil unless header and header.first.to_s.length>0
8
8
  #look for blank cols to indicate end of row
9
- row_last_i = (header.index("") || header.length)-1
10
- out_tsv = rows.map{|r| r[0..row_last_i]}.map{|r| r.join("\t")}.join("\n")
9
+ col_last_i = (header.index("") || header.length)-1
10
+ #ignore user-entered line breaks for purposes of tsv reads
11
+ out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
11
12
  out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
12
13
  Mobilize::Gsheet.config['read_date_format'])
13
-
14
14
  end
15
15
  def add_headers(headers)
16
16
  headers.each_with_index do |h,h_i|
@@ -47,26 +47,30 @@ module GoogleDrive
47
47
  sheet.save
48
48
  end
49
49
 
50
- def merge(merge_sheet,user)
50
+ def merge(merge_sheet,user_name,crop)
51
51
  #write the top left of sheet
52
52
  #with the contents of merge_sheet
53
53
  sheet = self
54
54
  sheet.reload
55
- entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
55
+ entry = sheet.spreadsheet.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
56
56
  unless entry and ['writer','owner'].include?(entry.role)
57
- raise "User #{user} is not allowed to write to #{sheet.spreadsheet.title}"
57
+ raise "User #{user_name} is not allowed to write to #{sheet.spreadsheet.title}"
58
58
  end
59
59
  merge_sheet.reload
60
60
  curr_rows = sheet.num_rows
61
61
  curr_cols = sheet.num_cols
62
62
  merge_rows = merge_sheet.num_rows
63
63
  merge_cols = merge_sheet.num_cols
64
+ raise "zero sized merge sheet" if merge_rows == 0 or merge_cols == 0
64
65
  #make sure sheet is at least as big as necessary
65
- if merge_rows > curr_rows
66
+ #or as small as necessary if crop is specified
67
+ if merge_rows > curr_rows or
68
+ (merge_rows < curr_rows and crop==true)
66
69
  sheet.max_rows = merge_rows
67
70
  sheet.save
68
71
  end
69
- if merge_cols > curr_cols
72
+ if merge_cols > curr_cols or
73
+ (merge_cols < curr_cols and crop==true)
70
74
  sheet.max_cols = merge_cols
71
75
  sheet.save
72
76
  end
@@ -94,7 +98,7 @@ module GoogleDrive
94
98
  end
95
99
  end
96
100
 
97
- def write(tsv,user)
101
+ def write(tsv,user,crop=true)
98
102
  sheet = self
99
103
  entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
100
104
  unless entry and ['writer','owner'].include?(entry.role)
@@ -110,11 +114,14 @@ module GoogleDrive
110
114
  curr_rows = sheet.num_rows
111
115
  curr_cols = sheet.num_cols
112
116
  #make sure sheet is at least as big as necessary
113
- if tsvrows.length != curr_rows
117
+ #or small as necessary if crop
118
+ if tsvrows.length > curr_rows or
119
+ (tsvrows.length < curr_rows and crop==true)
114
120
  sheet.max_rows = tsvrows.length
115
121
  sheet.save
116
122
  end
117
- if headers.length != curr_cols
123
+ if headers.length > curr_cols or
124
+ (tsvrows.length < curr_rows and crop==true)
118
125
  sheet.max_cols = headers.length
119
126
  sheet.save
120
127
  end
@@ -124,13 +131,13 @@ module GoogleDrive
124
131
  tsvrows[batch_start..batch_end].each_with_index do |row,row_i|
125
132
  rowcols = row.split("\t")
126
133
  rowcols.each_with_index do |col_v,col_i|
127
- sheet[row_i+batch_start+1,col_i+1]= %{#{col_v}}
134
+ sheet[row_i + batch_start + 1, col_i + 1]= %{#{col_v}}
128
135
  end
129
136
  end
130
137
  sheet.save
131
138
  batch_start += (batch_length + 1)
132
- rows_written+=batch_length
133
- if batch_start>tsvrows.length+1
139
+ rows_written += batch_length
140
+ if batch_start>tsvrows.length + 1
134
141
  break
135
142
  end
136
143
  end
@@ -141,6 +148,7 @@ module GoogleDrive
141
148
  sheet.reload
142
149
  #loading remote data for checksum
143
150
  rem_tsv = sheet.to_tsv
151
+ return true if rem_tsv.to_s.length==0
144
152
  rem_table = rem_tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
145
153
  loc_table = tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
146
154
  re_col_vs = []
@@ -32,6 +32,7 @@ class String
32
32
  def googlesafe
33
33
  v=self
34
34
  return "" if v.to_s==""
35
+ return v if v.to_s.strip==""
35
36
  #normalize numbers by removing '$', '%', ',', ' '
36
37
  vnorm = v.to_s.norm_num
37
38
  vdigits = vnorm.split(".").last.length
@@ -12,13 +12,12 @@ module Mobilize
12
12
  :authentication => 'plain',
13
13
  :enable_starttls_auto => true }
14
14
 
15
- def write(subj,
16
- bod="",
17
- recipient=Jobtracker.admin_emails.join(","))
15
+ def write(params)
18
16
  mail(:from=>Gdrive.owner_email,
19
- :to=>recipient,
20
- :subject=>subj,
21
- :body=>bod)
17
+ :to=>params['to'],
18
+ :subject=>params['subject'],
19
+ :body=>params['body'],
20
+ :bcc=>params['bcc'])
22
21
  end
23
22
  end
24
23
  end
@@ -3,7 +3,24 @@ module Mobilize
3
3
  def Gbook.find_all_by_path(path,gdrive_slot)
4
4
  Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
5
5
  end
6
+
7
+ def Gbook.find_by_http_url(http_url,gdrive_slot)
8
+ key = http_url.split("key=").last.split("#").first
9
+ Gdrive.root(gdrive_slot).spreadsheet_by_key(key)
10
+ end
11
+
6
12
  def Gbook.find_by_path(path,gdrive_slot)
13
+ #first try to find a dataset with the URL
14
+ dst = Dataset.find_by_handler_and_path('gbook',path)
15
+ if dst and dst.http_url.to_s.length>0
16
+ book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
17
+ #doesn't count if it's deleted
18
+ if book.entry_hash[:deleted]
19
+ book = nil
20
+ else
21
+ return book
22
+ end
23
+ end
7
24
  books = Gbook.find_all_by_path(path,gdrive_slot)
8
25
  dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
9
26
  book = nil
@@ -15,6 +32,7 @@ module Mobilize
15
32
  bkey = b.resource_id.split(":").last
16
33
  if bkey == dkey
17
34
  book = b
35
+ dst.update_attributes(:http_url=>book.human_url)
18
36
  else
19
37
  #delete the invalid book
20
38
  b.delete
@@ -25,6 +43,7 @@ module Mobilize
25
43
  #If it's a new dst or if there are multiple books
26
44
  #take the first
27
45
  book = books.first
46
+ dst.update_attributes(:http_url=>book.human_url) if book
28
47
  end
29
48
  return book
30
49
  end
@@ -80,5 +80,18 @@ module Mobilize
80
80
  def Gdrive.books(gdrive_slot=nil,params={})
81
81
  Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
82
82
  end
83
+
84
+ #email management - used to make sure not too many emails get used at the same time
85
+ def Gdrive.slot_worker_by_path(path)
86
+ working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
87
+ Gdrive.workers.sort_by{rand}.each do |w|
88
+ unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
89
+ Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
90
+ return [w['name'],Gdrive.domain].join("@")
91
+ end
92
+ end
93
+ #return false if none are available
94
+ return false
95
+ end
83
96
  end
84
97
  end
@@ -1,5 +1,47 @@
1
1
  module Mobilize
2
2
  module Gfile
3
+ def Gfile.path_to_dst(path,stage_path)
4
+ #don't need the ://
5
+ path = path.split("://").last if path.index("://")
6
+ if Gfile.find_by_path(path)
7
+ handler = "gfile"
8
+ Dataset.find_or_create_by_url("#{handler}://#{path}")
9
+ else
10
+ raise "unable to find #{path}"
11
+ end
12
+ end
13
+
14
+ def Gfile.read_by_dataset_path(dst_path,user_name,*args)
15
+ #expects gdrive slot as first arg, otherwise chooses random
16
+ gdrive_slot = args
17
+ worker_emails = Gdrive.worker_emails.sort_by{rand}
18
+ gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
19
+ file = Gfile.find_by_path(dst_path)
20
+ file.read(user_name) if file
21
+ end
22
+
23
+ def Gfile.write_by_dataset_path(dst_path,string,user_name,*args)
24
+ #ignores *args as all files must be created and owned by owner
25
+ file = Gfile.find_by_path(dst_path)
26
+ file.delete if file
27
+ owner_root = Gdrive.root(Gdrive.owner_email)
28
+ file = owner_root.upload_from_string(string,
29
+ dst_path,
30
+ :content_type=>"test/plain",
31
+ :convert=>false)
32
+ file.add_admin_acl
33
+ #make sure user is owner or can edit
34
+ u = User.where(:name=>user_name).first
35
+ entry = file.acl_entry(u.email)
36
+ unless entry and ['writer','owner'].include?(entry.role)
37
+ file.update_acl(u.email)
38
+ end
39
+ #update http url for file
40
+ dst = Dataset.find_by_handler_and_path("gfile",dst_path)
41
+ dst.update_attributes(:http_url=>file.human_url)
42
+ true
43
+ end
44
+
3
45
  def Gfile.add_admin_acl_by_path(path)
4
46
  file = Gfile.find_by_path(path)
5
47
  file.add_admin_acl
@@ -18,18 +60,6 @@ module Mobilize
18
60
  file.update_acl(gdrive_slot,role)
19
61
  end
20
62
 
21
- def Gfile.read_by_stage_path(stage_path)
22
- #reserve gdrive_slot account for read
23
- gdrive_slot = Gdrive.slot_worker_by_path(s.path)
24
- return false unless gdrive_slot
25
- s = Stage.where(:path=>stage_path)
26
- gfile_path = s.params['file']
27
- out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
28
- #use Gridfs to cache result
29
- out_url = "gridfs://#{s.path}/out"
30
- Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
31
- end
32
-
33
63
  def Gfile.find_by_path(path)
34
64
  #file must be owned by owner
35
65
  gdrive_slot = Gdrive.owner_email
@@ -55,8 +85,10 @@ module Mobilize
55
85
  end
56
86
  #always make sure dataset http URL is up to date
57
87
  #and that it has admin acl
58
- dst.update_attributes(:http_url=>file.human_url)
59
- file.add_admin_acl
88
+ if file
89
+ dst.update_attributes(:http_url=>file.human_url)
90
+ file.add_admin_acl
91
+ end
60
92
  return file
61
93
  end
62
94
  end
@@ -11,7 +11,7 @@ module Mobilize
11
11
  return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
12
12
  end
13
13
 
14
- def Gridfs.read_by_dataset_path(dst_path,user)
14
+ def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
15
15
  begin
16
16
  zs=Gridfs.grid.open(dst_path,'r').read
17
17
  return ::Zlib::Inflate.inflate(zs)
@@ -20,10 +20,10 @@ module Mobilize
20
20
  end
21
21
  end
22
22
 
23
- def Gridfs.write_by_dataset_path(dst_path,string,user)
23
+ def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
24
24
  zs = ::Zlib::Deflate.deflate(string)
25
25
  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
26
- curr_zs = Gridfs.read_by_dataset_path(dst_path,user).to_s
26
+ curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
27
27
  #write a new version when there is a change
28
28
  if curr_zs != zs
29
29
  Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
@@ -9,9 +9,62 @@ module Mobilize
9
9
  Gsheet.config['max_cells']
10
10
  end
11
11
 
12
+ # converts a source path or target path to a dst in the context of handler and stage
13
+ def Gsheet.path_to_dst(path,stage_path)
14
+ s = Stage.where(:path=>stage_path).first
15
+ params = s.params
16
+ target_path = params['target']
17
+ #take random slot if one is not available
18
+ gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
19
+ #if this is the target, it doesn't have to exist already
20
+ is_target = true if path == target_path
21
+ #don't need the ://
22
+ path = path.split("://").last if path.index("://")
23
+ if path.split("/").length == 2
24
+ if is_target or Gsheet.find_by_path(path,gdrive_slot)
25
+ #user has specified path to a sheet
26
+ return Dataset.find_or_create_by_url("gsheet://#{path}")
27
+ else
28
+ raise "unable to find #{path}"
29
+ end
30
+ else
31
+ #user has specified a sheet
32
+ runner_title = stage_path.split("/").first
33
+ r = Runner.find_by_title(runner_title)
34
+ if is_target or r.gbook(gdrive_slot).worksheets.map{|w| w.title}.include?(path)
35
+ handler = "gsheet"
36
+ path = "#{runner_title}/#{path}"
37
+ elsif Gfile.find_by_path(path,gdrive_slot)
38
+ handler = "gfile"
39
+ path = "#{path}"
40
+ else
41
+ raise "unable to find #{path}"
42
+ end
43
+ return Dataset.find_or_create_by_url("#{handler}://#{path}")
44
+ end
45
+ end
46
+
47
+ def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
48
+ #expects gdrive slot as first arg, otherwise chooses random
49
+ gdrive_slot = args
50
+ worker_emails = Gdrive.worker_emails.sort_by{rand}
51
+ gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
52
+ sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
53
+ sheet.read(user_name) if sheet
54
+ end
55
+
56
+ def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
57
+ #expects gdrive slot as first arg, otherwise chooses random
58
+ gdrive_slot,crop = args
59
+ worker_emails = Gdrive.worker_emails.sort_by{rand}
60
+ gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
61
+ crop ||= true
62
+ Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
63
+ end
64
+
12
65
  def Gsheet.write(path,tsv,gdrive_slot)
13
66
  sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
14
- sheet.write(tsv)
67
+ sheet.write(tsv,Gdrive.owner_name)
15
68
  end
16
69
 
17
70
  def Gsheet.find_by_path(path,gdrive_slot)
@@ -32,32 +85,9 @@ module Mobilize
32
85
  return sheet
33
86
  end
34
87
 
35
- def Gsheet.read_by_stage_path(stage_path)
36
- #reserve gdrive_slot account for read
37
- gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
38
- return false unless gdrive_slot
39
- s = Stage.where(:path=>stage_path).first
40
- user = s.job.runner.user.name
41
- source_dst = s.source_dsts(gdrive_slot).first
42
- out_tsv = source_dst.read(user)
43
- #use Gridfs to cache result
44
- out_url = "gridfs://#{s.path}/out"
45
- Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
46
- end
47
-
48
- def Gsheet.write_by_stage_path(stage_path)
49
- gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
50
- #return blank response if there are no slots available
51
- return nil unless gdrive_slot
52
- s = Stage.where(:path=>stage_path).first
53
- user = s.job.runner.user
54
- target_path = s.params['target']
55
- target_path = "#{s.job.runner.title}/#{target_path}" unless target_path.index("/")
56
- source_dst = s.source_dsts(gdrive_slot).first
57
- tsv = source_dst.read(user.name)
58
- sheet_name = target_path.split("/").last
59
- temp_path = [stage_path.gridsafe,sheet_name].join("/")
88
+ def Gsheet.write_temp(target_path,gdrive_slot,tsv)
60
89
  #find and delete temp sheet, if any
90
+ temp_path = [target_path.gridsafe,"temp"].join("/")
61
91
  temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
62
92
  temp_sheet.delete if temp_sheet
63
93
  #write data to temp sheet
@@ -70,28 +100,57 @@ module Mobilize
70
100
  return nil
71
101
  end
72
102
  temp_sheet.check_and_fix(tsv)
103
+ temp_sheet
104
+ end
105
+
106
+ def Gsheet.write_target(target_path,tsv,user_name,gdrive_slot,crop=true)
107
+ #write to temp sheet first, to ensure google compatibility
108
+ #and fix any discrepancies due to spradsheet assumptions
109
+ temp_sheet = Gsheet.write_temp(target_path,gdrive_slot,tsv)
110
+ #try to find target sheet
73
111
  target_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
112
+ u = User.where(:name=>user_name).first
74
113
  unless target_sheet
75
114
  #only give the user edit permissions if they're the ones
76
115
  #creating it
77
116
  target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
78
- target_sheet.spreadsheet.update_acl(user.email,"writer") unless target_sheet.spreadsheet.acl_entry(user.email).ie{|e| e and e.role=="owner"}
117
+ target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
79
118
  target_sheet.delete_sheet1
80
119
  end
81
- #this step has a tendency to fail; if it does,
82
- #don't fail the stage, mark it as false
83
- begin
84
- target_sheet.merge(temp_sheet,user.name)
85
- rescue
86
- return nil
87
- end
120
+ #pass it crop param to determine whether to shrink target sheet to fit data
121
+ #default is yes
122
+ target_sheet.merge(temp_sheet,user_name,crop)
88
123
  #delete the temp sheet's book
89
124
  temp_sheet.spreadsheet.delete
90
- status = "Write successful for #{target_path}"
91
- s.update_status(status)
92
- #use Gridfs to cache result
93
- out_url = "gridfs://#{s.path}/out"
94
- Dataset.write_by_url(out_url,status,Gdrive.owner_name)
125
+ target_sheet
126
+ end
127
+
128
+ def Gsheet.write_by_stage_path(stage_path)
129
+ gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
130
+ #return blank response if there are no slots available
131
+ return nil unless gdrive_slot
132
+ s = Stage.where(:path=>stage_path).first
133
+ u = s.job.runner.user
134
+ crop = s.params['crop'] || true
135
+ begin
136
+ #get tsv to write from stage
137
+ source = s.sources.first
138
+ raise "Need source for gsheet write" unless source
139
+ tsv = source.read(u.name,gdrive_slot)
140
+ raise "No data found in #{source.url}" unless tsv
141
+ Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
142
+ Gdrive.unslot_worker_by_path(stage_path)
143
+ #update status
144
+ stdout = "Write successful for #{s.target.url}"
145
+ stderr = nil
146
+ s.update_status(stdout)
147
+ signal = 0
148
+ rescue => exc
149
+ stdout = nil
150
+ stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
151
+ signal = 500
152
+ end
153
+ return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
95
154
  end
96
155
  end
97
156
  end
@@ -103,23 +103,28 @@ module Mobilize
103
103
  end
104
104
  end
105
105
 
106
- def Resque.failure_report
106
+ def Resque.new_failures_by_email
107
107
  fjobs = {}
108
- excs = Hash.new(0)
108
+ exc_to_s = Hash.new(0)
109
109
  Resque.failures.each_with_index do |f,f_i|
110
110
  #skip if already notified
111
111
  next if f['notified']
112
- sname = f['payload']['args'].first
113
- excs = f['error']
114
- if fjobs[sname].nil?
115
- fjobs[sname] = {excs => 1}
116
- elsif fjobs[sname][excs].nil?
117
- fjobs[sname][excs] = 1
112
+ stage_path = f['payload']['args'].first
113
+ s = Stage.where(:path=>stage_path).first
114
+ email = s.job.runner.user.email
115
+ exc_to_s = f['error']
116
+ if fjobs[email].nil?
117
+ fjobs[email] = {stage_path => {exc_to_s => 1}}
118
+ elsif fjobs[email][stage_path].nil?
119
+ fjobs[email][stage_path] = {exc_to_s => 1}
120
+ elsif fjobs[email][stage_path][exc_to_s].nil?
121
+ fjobs[email][stage_path][exc_to_s] = 1
118
122
  else
119
- fjobs[sname][excs] += 1
123
+ fjobs[email][stage_path][exc_to_s] += 1
120
124
  end
121
125
  #add notified flag to redis
122
126
  f['notified'] = true
127
+ #tag stage with email
123
128
  ::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
124
129
  end
125
130
  return fjobs
@@ -163,27 +163,39 @@ module Mobilize
163
163
  if Jobtracker.notif_due?
164
164
  notifs = []
165
165
  if Jobtracker.failures.length>0
166
- jfcs = Resque.failure_report
167
- unless jfcs=={} #no new failures
166
+ failure_hash = Resque.new_failures_by_email
167
+ failure_hash.each do |email,stage_paths|
168
168
  n = {}
169
- n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
169
+ n['subject'] = "#{stage_paths.keys.length.to_s} new failed jobs, #{stage_paths.values.map{|v| v.values}.flatten.sum.to_s} failures"
170
170
  #one row per exception type, with the job name
171
- n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
171
+ n['body'] = stage_paths.map do |path,exceptions|
172
+ exceptions.map do |exc_to_s,times|
173
+ [path," : ",exc_to_s,", ",times," times"].join
174
+ end
175
+ end.flatten.join("\n\n")
176
+ u = User.where(:name=>email.split("@").first).first
177
+ runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
178
+ n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
179
+ n['to'] = email
180
+ n['bcc'] = Jobtracker.admin_emails.join(",")
172
181
  notifs << n
173
182
  end
174
183
  end
175
184
  lws = Jobtracker.max_run_time_workers
176
185
  if lws.length>0
177
186
  n = {}
178
- n['subj'] = "#{lws.length.to_s} max run time jobs"
187
+ n['subject'] = "#{lws.length.to_s} max run time jobs"
179
188
  n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
189
+ n['to'] = Jobtracker.admin_emails.join(",")
180
190
  notifs << n
181
191
  end
192
+ #deliver each email generated
182
193
  notifs.each do |notif|
183
- Email.write(n['subj'],notif['body']).deliver
184
- Jobtracker.last_notification=Time.now.utc.to_s
185
- Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
194
+ Email.write(notif).deliver
186
195
  end
196
+ #update notification time so JT knows to wait a while
197
+ Jobtracker.last_notification = Time.now.utc.to_s
198
+ Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
187
199
  end
188
200
  return true
189
201
  end
@@ -13,9 +13,23 @@ module Mobilize
13
13
 
14
14
  index({ handler: 1, path: 1}, { unique: true})
15
15
 
16
- def read
16
+ def url
17
+ s = self
18
+ "#{s.handler}://#{s.path}"
19
+ end
20
+
21
+ def read(user_name,*args)
17
22
  dst = self
18
- return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
23
+ dst.update_attributes(:last_read_at=>Time.now.utc)
24
+ "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user_name,*args)
25
+ end
26
+
27
+ def write(string,user_name,*args)
28
+ dst = self
29
+ "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user_name,*args)
30
+ dst.raw_size = string.length
31
+ dst.save!
32
+ return true
19
33
  end
20
34
 
21
35
  def Dataset.find_by_url(url)
@@ -38,24 +52,15 @@ module Mobilize
38
52
  return dst
39
53
  end
40
54
 
41
- def Dataset.write_by_url(url,string,user)
42
- dst = Dataset.find_or_create_by_url(url)
43
- dst.write(string,user)
44
- url
55
+ def Dataset.read_by_url(url,user_name,*args)
56
+ dst = Dataset.find_by_url(url)
57
+ dst.read(user_name,*args) if dst
45
58
  end
46
59
 
47
- def read(user)
48
- dst = self
49
- dst.update_attributes(:last_read_at=>Time.now.utc)
50
- "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user)
51
- end
52
-
53
- def write(string,user)
54
- dst = self
55
- "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
56
- dst.raw_size = string.length
57
- dst.save!
58
- return true
60
+ def Dataset.write_by_url(url,string,user_name,*args)
61
+ dst = Dataset.find_or_create_by_url(url)
62
+ dst.write(string,user_name,*args)
63
+ url
59
64
  end
60
65
  end
61
66
  end
@@ -15,11 +15,6 @@ module Mobilize
15
15
  %w{name active trigger status stage1 stage2 stage3 stage4 stage5}
16
16
  end
17
17
 
18
- def cached_at
19
- r = self
20
- Dataset.find_or_create_by_path(r.path).cached_at
21
- end
22
-
23
18
  def title
24
19
  r = self
25
20
  r.path.split("/").first
@@ -34,6 +29,9 @@ module Mobilize
34
29
  Runner.where(:path=>path).first
35
30
  end
36
31
 
32
+ def Runner.find_by_title(title)
33
+ Runner.where(:path=>"#{title}/jobs").first
34
+ end
37
35
  def Runner.perform(id,*args)
38
36
  r = Runner.find_by_path(id)
39
37
  #get gdrive slot for read
@@ -53,7 +51,9 @@ module Mobilize
53
51
  begin
54
52
  if j.is_due?
55
53
  j.update_attributes(:active=>false) if j.trigger=='once'
56
- j.stages.first.enqueue!
54
+ s = j.stages.first
55
+ s.update_attributes(:retries_done=>0)
56
+ s.enqueue!
57
57
  end
58
58
  rescue ScriptError, StandardError => exc
59
59
  r.update_status("Failed to enqueue #{j.path} with #{exc.to_s}")
@@ -73,11 +73,6 @@ module Mobilize
73
73
  Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
74
74
  end
75
75
 
76
- def cache
77
- r = self
78
- Dataset.find_or_create_by_url("gridfs://#{r.path}")
79
- end
80
-
81
76
  def gbook(gdrive_slot)
82
77
  r = self
83
78
  title = r.path.split("/").first
@@ -86,17 +81,20 @@ module Mobilize
86
81
 
87
82
  def gsheet(gdrive_slot)
88
83
  r = self
84
+ u = r.user
89
85
  jobs_sheet = Gsheet.find_by_path(r.path,gdrive_slot)
90
86
  #make sure the user has a runner with a jobs sheet and has write privileges on the spreadsheet
91
- unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(r.user.email).ie{|e| e and e.role=="writer"})
87
+ unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="writer"})
92
88
  #only give the user edit permissions if they're the ones
93
89
  #creating it
94
90
  jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
95
- unless jobs_sheet.spreadsheet.acl_entry(r.user.email).ie{|e| e and e.role=="owner"}
96
- jobs_sheet.spreadsheet.update_acl(r.user.email,"writer")
91
+ unless jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
92
+ jobs_sheet.spreadsheet.update_acl(u.email,"writer")
97
93
  end
98
94
  end
99
95
  jobs_sheet.add_headers(r.headers)
96
+ #add url to dataset
97
+ Dataset.find_or_create_by_url("gsheet://#{r.path}").update_attributes(:http_url=>jobs_sheet.spreadsheet.human_url)
100
98
  begin;jobs_sheet.delete_sheet1;rescue;end #don't care if sheet1 deletion fails
101
99
  return jobs_sheet
102
100
  end
@@ -104,8 +102,6 @@ module Mobilize
104
102
  def read_gsheet(gdrive_slot)
105
103
  r = self
106
104
  gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
107
- #cache in DB
108
- r.cache.write(gsheet_tsv,Gdrive.owner_name)
109
105
  #turn it into a hash array
110
106
  gsheet_jobs = gsheet_tsv.tsv_to_hash_array
111
107
  #go through each job, update relevant job with its params
@@ -122,8 +118,13 @@ module Mobilize
122
118
  stage_string = rj["stage#{s_idx.to_s}"]
123
119
  s = Stage.find_by_path("#{j.path}/stage#{s_idx.to_s}")
124
120
  if stage_string.to_s.length==0
125
- #delete this stage; user has blanked it
126
- s.delete if s
121
+ #delete this stage and all stages after
122
+ if s
123
+ j = s.job
124
+ j.stages[(s.idx-1)..-1].each{|ps| ps.delete}
125
+ #just in case
126
+ s.delete
127
+ end
127
128
  break
128
129
  elsif s.nil?
129
130
  #create this stage
@@ -7,7 +7,8 @@ module Mobilize
7
7
  field :call, type: String
8
8
  field :param_string, type: Array
9
9
  field :status, type: String
10
- field :out_url, type: String
10
+ field :response, type: Hash
11
+ field :retries_done, type: Fixnum
11
12
  field :completed_at, type: Time
12
13
  field :started_at, type: Time
13
14
  field :failed_at, type: Time
@@ -25,7 +26,15 @@ module Mobilize
25
26
  #allowing you to determine its size
26
27
  #before committing to a read or write
27
28
  s = self
28
- Dataset.find_by_url(s.out_url) if s.out_url
29
+ Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
30
+ end
31
+
32
+ def err_dst
33
+ #this gives a dataset that points to the output
34
+ #allowing you to determine its size
35
+ #before committing to a read or write
36
+ s = self
37
+ Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
29
38
  end
30
39
 
31
40
  def params
@@ -68,84 +77,91 @@ module Mobilize
68
77
 
69
78
  def Stage.perform(id,*args)
70
79
  s = Stage.where(:path=>id).first
71
- j = s.job
72
80
  s.update_attributes(:started_at=>Time.now.utc)
73
81
  s.update_status(%{Starting at #{Time.now.utc}})
74
- begin
75
- #get response by running method
76
- s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
77
- s.save!
78
- unless s.out_url
79
- #re-queue self if no response
80
- s.enqueue!
81
- return false
82
- end
83
- rescue ScriptError, StandardError => exc
84
- j.update_attributes(:active=>false)
85
- s.update_attributes(:failed_at=>Time.now.utc)
86
- s.update_status("Failed at #{Time.now.utc.to_s}")
87
- raise exc
82
+ #get response by running method
83
+ response = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
84
+ unless response
85
+ #re-queue self if no response
86
+ s.enqueue!
87
+ return false
88
88
  end
89
- s.update_attributes(:completed_at=>Time.now.utc)
89
+ if response['signal'] == 0
90
+ s.complete(response)
91
+ elsif s.retries_done.to_i < s.params['retries'].to_i
92
+ #retry
93
+ s.update_attributes(:retries_done => s.retries_done.to_i + 1, :response => response)
94
+ s.update_status(%{Retry #{s.retries_done.to_s} at #{Time.now.utc}})
95
+ s.enqueue!
96
+ else
97
+ #sleep as much as user specifies
98
+ sleep s['delay'].to_i
99
+ s.fail(response)
100
+ end
101
+ return true
102
+ end
103
+
104
+ def complete(response)
105
+ s = self
106
+ s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
90
107
  s.update_status("Completed at #{Time.now.utc.to_s}")
108
+ j = s.job
91
109
  if s.idx == j.stages.length
92
110
  #check for any dependent jobs, if there are, enqueue them
93
111
  r = j.runner
94
- dep_jobs = r.jobs.select{|dj| dj.active==true and dj.trigger.strip.downcase == "after #{j.name}"}
112
+ dep_jobs = r.jobs.select do |dj|
113
+ dj.active==true and
114
+ dj.trigger.strip.downcase == "after #{j.name}"
115
+ end
95
116
  #put begin/rescue so all dependencies run
96
- dep_jobs.each{|dj| begin;dj.stages.first.enqueue! unless dj.is_working?;rescue;end}
117
+ dep_jobs.each do |dj|
118
+ begin
119
+ unless dj.is_working?
120
+ dj.stages.first.update_attributes(:retries_done=>0)
121
+ dj.stages.first.enqueue!
122
+ end
123
+ rescue
124
+ #job won't run if error, log it a failure
125
+ response = {"err_str" => "Unable to enqueue first stage of #{dj.path}"}
126
+ dj.stages.first.fail(response)
127
+ end
128
+ end
97
129
  else
98
130
  #queue up next stage
131
+ s.next.update_attributes(:retries_done=>0)
99
132
  s.next.enqueue!
100
133
  end
101
- return true
134
+ true
102
135
  end
103
136
 
104
- def source_dsts(gdrive_slot)
105
- #returns an array of Datasets corresponding to
106
- #gridfs caches for stage outputs, gsheets and gfiles
107
- #or dataset pointers for other handlers
137
+ def fail(response,gdrive_slot=nil)
138
+ #get random worker if one is not provided
139
+ gdrive_slot ||= Gdrive.worker_emails.sort_by{rand}.first
108
140
  s = self
109
- params = s.params
110
- source_paths = if params['sources']
111
- params['sources']
112
- elsif params['source']
113
- [params['source']]
114
- end
115
- user = s.job.runner.user.name
116
- return [] if (source_paths.class!=Array or source_paths.length==0)
117
- dsts = []
118
- source_paths.each do |source_path|
119
- if source_path.index(/^stage[1-5]$/)
120
- source_stage_path = "#{s.job.runner.path}/#{s.job.name}/#{source_path}"
121
- source_stage = Stage.where(:path=>source_stage_path).first
122
- dsts << source_stage.out_dst
123
- elsif source_path.index("://")
124
- #find or create by url
125
- dsts << Dataset.find_or_create_by_url(source_path)
126
- else
127
- if source_path.index("/")
128
- #slashes mean sheets
129
- out_tsv = Gsheet.find_by_path(source_path,gdrive_slot).read(user)
130
- else
131
- #check sheets in runner
132
- r = s.job.runner
133
- runner_sheet = r.gbook(gdrive_slot).worksheet_by_title(source_path)
134
- out_tsv = if runner_sheet
135
- runner_sheet.read(user)
136
- else
137
- #check for gfile. will fail if there isn't one.
138
- Gfile.find_by_path(source_path).read(user)
139
- end
140
- end
141
- #use Gridfs to cache gdrive results
142
- file_name = source_path.split("/").last
143
- out_url = "gridfs://#{s.path}/#{file_name}"
144
- Dataset.write_by_url(out_url,out_tsv,user)
145
- dsts << Dataset.find_by_url(out_url)
146
- end
147
- end
148
- return dsts
141
+ j = s.job
142
+ r = j.runner
143
+ u = r.user
144
+ j.update_attributes(:active=>false)
145
+ s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
146
+ stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
147
+ target_path = (r.path.split("/")[0..-2] + [stage_name]).join("/")
148
+ status_msg = "Failed at #{Time.now.utc.to_s}"
149
+ #read err txt, add err sheet, write to it
150
+ err_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
151
+ err_sheet.delete if err_sheet
152
+ err_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
153
+ err_txt = if response['err_url']
154
+ Dataset.read_by_url(response['err_url'],u.name)
155
+ elsif response['err_str']
156
+ response['err_str']
157
+ end
158
+ err_txt = ["response","\n",err_txt].join
159
+ err_sheet.write(err_txt,u.name)
160
+ #exception will be first row below "response" header
161
+ exc_to_s,backtrace = err_txt.split("\n").ie{|ea| [ea[1], ea[2..-1]]}
162
+ s.update_status(status_msg)
163
+ #raise the exception so it bubbles up to resque
164
+ raise Exception,exc_to_s,backtrace
149
165
  end
150
166
 
151
167
  def enqueue!
@@ -180,5 +196,61 @@ module Mobilize
180
196
  s = self
181
197
  Mobilize::Resque.active_paths.include?(s.path)
182
198
  end
199
+
200
+ def target
201
+ s = self
202
+ params = s.params
203
+ target_path = params['target']
204
+ handler,path = target_path.split("://")
205
+ #if the user has specified a url for a target
206
+ #that is not this stage's handler, disallow
207
+ if handler and path and handler != s.handler
208
+ raise "incompatible target handler #{handler} for #{s.handler} stage"
209
+ else
210
+ begin
211
+ return "Mobilize::#{s.handler.downcase.capitalize}".constantize.path_to_dst(target_path,s.path)
212
+ rescue => exc
213
+ raise "Could not get #{target_path} with error: #{exc.to_s}"
214
+ end
215
+ end
216
+ end
217
+
218
+ def sources
219
+ #returns an array of Datasets corresponding to
220
+ #items listed as sources in the stage params
221
+ s = self
222
+ params = s.params
223
+ job = s.job
224
+ runner = job.runner
225
+ source_paths = if params['sources']
226
+ params['sources']
227
+ elsif params['source']
228
+ [params['source']]
229
+ end
230
+ return [] if (source_paths.class!=Array or source_paths.length==0)
231
+ dsts = []
232
+ source_paths.each do |source_path|
233
+ if source_path.index(/^stage[1-5]$/)
234
+ #stage arguments return the stage's output dst url
235
+ source_stage_path = "#{runner.path}/#{job.name}/#{source_path}"
236
+ source_stage = Stage.where(:path=>source_stage_path).first
237
+ source_stage_out_url = source_stage.response['out_url']
238
+ dsts << Dataset.find_by_url(source_stage_out_url)
239
+ else
240
+ handler = if source_path.index("://")
241
+ source_path.split("://").first
242
+ else
243
+ s.handler
244
+ end
245
+ begin
246
+ stage_path = s.path
247
+ dsts << "Mobilize::#{handler.downcase.capitalize}".constantize.path_to_dst(source_path,stage_path)
248
+ rescue => exc
249
+ raise "Could not get #{source_path} with error: #{exc.to_s}"
250
+ end
251
+ end
252
+ end
253
+ return dsts
254
+ end
183
255
  end
184
256
  end
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.1.10"
3
+ VERSION = "1.2"
4
4
  end
5
5
  end
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
6
6
  s.name = "mobilize-base"
7
7
  s.version = Mobilize::Base::VERSION
8
8
  s.authors = ["Cassio Paes-Leme"]
9
- s.email = ["cpaesleme@ngmoco.com"]
9
+ s.email = ["cpaesleme@dena.com"]
10
10
  s.homepage = "http://github.com/ngmoco/mobilize-base"
11
11
  s.summary = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
12
12
  s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
@@ -2,8 +2,7 @@
2
2
  active: true
3
3
  trigger: once
4
4
  status: ""
5
- stage1: gsheet.read source:base1_stage1.in
6
- stage2: gsheet.write source:stage1, target:base1.out
5
+ stage1: gsheet.write source:"gfile://test_base_1.tsv", target:base1.out
7
6
 
8
7
  - name: base2
9
8
  active: true
@@ -30,38 +30,77 @@ describe "Mobilize" do
30
30
 
31
31
  puts "Jobtracker created runner with 'jobs' sheet?"
32
32
  r = u.runner
33
- jobs_sheet = r.gsheet(gdrive_slot)
34
- tsv = jobs_sheet.read(user_name)
35
- assert tsv.tsv_header_array.join.length == 53 #total header length
36
-
37
- puts "add base1_stage1 input sheet"
38
- test_source_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.in",gdrive_slot)
39
-
40
- test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/base1_stage1.yml")*40
33
+ jobs_sheet_url = "gsheet://#{r.path}"
34
+ jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
35
+ jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
36
+ jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
37
+ assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
38
+
39
+ #stop Jobtracker, if you're doing this by queueing runners
40
+ #Mobilize::Jobtracker.stop!
41
+
42
+ puts "add base1 input file"
43
+ test_filename = "test_base_1"
44
+ file_url = "gfile://#{test_filename}.tsv"
45
+ test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
41
46
  test_source_tsv = test_source_ha.hash_array_to_tsv
42
- test_source_sheet.write(test_source_tsv,user_name)
47
+ Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
48
+ rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
49
+ assert rem_tsv == test_source_tsv
43
50
 
44
- puts "add row to jobs sheet, wait 300s"
51
+ puts "add row to jobs sheet, wait for stages"
45
52
  test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
53
+ jobs_sheet.reload
46
54
  jobs_sheet.add_or_update_rows(test_job_rows)
47
- sleep 300
55
+ #wait for stages to complete
56
+ #r.enqueue!
57
+ wait_for_stages
48
58
 
49
59
  puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
50
- test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
51
- test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base2.out",gdrive_slot)
60
+ test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
61
+ test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
62
+ test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
52
63
 
53
- assert test_target_sheet_1.read(user_name) == test_source_sheet.read(user_name)
64
+ test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
65
+ test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
54
66
 
55
- puts "delete both output sheets, set first job to active=true, wait 300s"
56
- [test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
67
+ assert test_1_tsv == test_2_tsv
57
68
 
58
- jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
59
- sleep 300
69
+ puts "change first job to fail, wait for stages"
70
+ test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
71
+ Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
72
+ jobs_sheet.add_or_update_rows(test_job_rows)
60
73
 
61
- test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base2.out",gdrive_slot)
62
- puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
63
- assert test_target_sheet_2.read(user_name) == test_source_sheet.read(user_name)
74
+ #wait for stages to complete
75
+ wait_for_stages
64
76
 
77
+ test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
78
+ puts "jobtracker posted failing test error to sheet "
79
+ error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
80
+ assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
81
+ Mobilize::Jobtracker.stop!
65
82
  end
66
83
 
84
+ def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
85
+ time = 0
86
+ time_since_stage = 0
87
+ #check for 10 min
88
+ while time < time_limit and time_since_stage < stage_limit
89
+ sleep wait_length
90
+ job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
91
+ if job_classes.include?("Mobilize::Stage")
92
+ time_since_stage = 0
93
+ puts "saw stage at #{time.to_s} seconds"
94
+ else
95
+ time_since_stage += wait_length
96
+ puts "#{time_since_stage.to_s} seconds since stage seen"
97
+ end
98
+ time += wait_length
99
+ puts "total wait time #{time.to_s} seconds"
100
+ end
101
+
102
+ if time >= time_limit
103
+ raise "Timed out before stage completion"
104
+ end
105
+ end
67
106
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.10
4
+ version: '1.2'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-05 00:00:00.000000000 Z
12
+ date: 2013-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -177,7 +177,7 @@ description: ! "Manage your organization's workflows entirely through Google Doc
177
177
  and -mongodb packages\n to allow seamless transport of TSV and
178
178
  JSON data between any two endpoints. "
179
179
  email:
180
- - cpaesleme@ngmoco.com
180
+ - cpaesleme@dena.com
181
181
  executables: []
182
182
  extensions: []
183
183
  extra_rdoc_files: []
@@ -220,10 +220,10 @@ files:
220
220
  - lib/samples/resque.yml
221
221
  - lib/samples/resque_web.rb
222
222
  - mobilize-base.gemspec
223
- - test/base1_stage1.yml
224
223
  - test/base_job_rows.yml
225
224
  - test/mobilize-base_test.rb
226
225
  - test/redis-test.conf
226
+ - test/test_base_1.yml
227
227
  - test/test_helper.rb
228
228
  homepage: http://github.com/ngmoco/mobilize-base
229
229
  licenses: []
@@ -239,7 +239,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
239
239
  version: '0'
240
240
  segments:
241
241
  - 0
242
- hash: 944227401708125254
242
+ hash: -2718067622627955864
243
243
  required_rubygems_version: !ruby/object:Gem::Requirement
244
244
  none: false
245
245
  requirements:
@@ -248,7 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
248
248
  version: '0'
249
249
  segments:
250
250
  - 0
251
- hash: 944227401708125254
251
+ hash: -2718067622627955864
252
252
  requirements: []
253
253
  rubyforge_project: mobilize-base
254
254
  rubygems_version: 1.8.24
@@ -257,8 +257,8 @@ specification_version: 3
257
257
  summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
258
258
  Docs
259
259
  test_files:
260
- - test/base1_stage1.yml
261
260
  - test/base_job_rows.yml
262
261
  - test/mobilize-base_test.rb
263
262
  - test/redis-test.conf
263
+ - test/test_base_1.yml
264
264
  - test/test_helper.rb
File without changes