mobilize-base 1.1.10 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +14 -9
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +4 -4
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -6
- data/lib/mobilize-base/extensions/google_drive/worksheet.rb +22 -14
- data/lib/mobilize-base/extensions/string.rb +1 -0
- data/lib/mobilize-base/handlers/email.rb +5 -6
- data/lib/mobilize-base/handlers/gbook.rb +19 -0
- data/lib/mobilize-base/handlers/gdrive.rb +13 -0
- data/lib/mobilize-base/handlers/gfile.rb +46 -14
- data/lib/mobilize-base/handlers/gridfs.rb +3 -3
- data/lib/mobilize-base/handlers/gsheet.rb +98 -39
- data/lib/mobilize-base/handlers/resque.rb +14 -9
- data/lib/mobilize-base/jobtracker.rb +20 -8
- data/lib/mobilize-base/models/dataset.rb +23 -18
- data/lib/mobilize-base/models/runner.rb +19 -18
- data/lib/mobilize-base/models/stage.rb +137 -65
- data/lib/mobilize-base/version.rb +1 -1
- data/mobilize-base.gemspec +1 -1
- data/test/base_job_rows.yml +1 -2
- data/test/mobilize-base_test.rb +60 -21
- metadata +7 -7
- /data/test/{base1_stage1.yml → test_base_1.yml} +0 -0
data/README.md
CHANGED
@@ -552,18 +552,23 @@ stage. These should be of the for `<key1>: <value1>, <key2>: <value2>`, where
|
|
552
552
|
`<key>` is an unquoted string and `<value>` is a quoted string, an
|
553
553
|
integer, an array (delimited by square braces), or a hash (delimited by
|
554
554
|
curly braces).
|
555
|
-
* For mobilize-base, the following
|
556
|
-
* gsheet.
|
557
|
-
* The
|
558
|
-
`<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
|
559
|
-
the Runner itself.
|
560
|
-
|
561
|
-
|
562
|
-
|
555
|
+
* For mobilize-base, the following stage is available:
|
556
|
+
* gsheet.write `source: <input_path>`, which reads the sheet.
|
557
|
+
* The input_path should be of the form:
|
558
|
+
* `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
|
559
|
+
the Runner itself.
|
560
|
+
* `gfile://<gfile_name>` if the target is a file.
|
561
|
+
* The file must be owned by the Gdrive owner.
|
562
|
+
* The test uses "gfile://test_base_1.tsv".
|
563
|
+
* The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
|
563
564
|
and "base1.out" for the second test. The first
|
564
565
|
takes the output from the first stage and the second reads it straight
|
565
566
|
from the referenced sheet.
|
566
|
-
|
567
|
+
* All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
|
568
|
+
giving up.
|
569
|
+
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
570
|
+
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
571
|
+
* The test uses "Requestor_mobilize(test)/base1.out" and
|
567
572
|
"Runner_mobilize(test)/base2.out" for target sheets.
|
568
573
|
|
569
574
|
<a name='section_Start_Run_Test'></a>
|
@@ -8,7 +8,7 @@ module GoogleDrive
|
|
8
8
|
attempts = 0
|
9
9
|
sleep_time = nil
|
10
10
|
#try 5 times to make the call
|
11
|
-
while (response.nil? or response.code.
|
11
|
+
while (response.nil? or response.code.starts_with?("5")) and attempts < 20
|
12
12
|
#instantiate http object, set params
|
13
13
|
http = @proxy.new(uri.host, uri.port)
|
14
14
|
http.use_ssl = true
|
@@ -21,10 +21,10 @@ module GoogleDrive
|
|
21
21
|
#timeouts etc.
|
22
22
|
nil
|
23
23
|
end
|
24
|
-
if response.nil?
|
24
|
+
if response.nil? or response.code.starts_with?("4")
|
25
25
|
attempts +=1
|
26
|
-
|
27
|
-
if response.code.
|
26
|
+
elsif
|
27
|
+
if response.code.starts_with?("5")
|
28
28
|
#wait 10 seconds times number of attempts squared in case of error
|
29
29
|
sleep_time = 10 * (attempts*attempts)
|
30
30
|
attempts += 1
|
@@ -13,15 +13,16 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
(Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
17
|
-
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
17
|
+
accounts.each do |email|
|
18
|
+
f.update_acl(email)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
|
21
22
|
def has_admin_acl?
|
22
23
|
f = self
|
23
24
|
curr_emails = f.acls.map{|a| a.scope}.sort
|
24
|
-
admin_emails = Mobilize::Gdrive.admin_emails.
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
25
26
|
if (curr_emails & admin_emails) == admin_emails
|
26
27
|
return true
|
27
28
|
else
|
@@ -40,13 +41,13 @@ module GoogleDrive
|
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
43
|
-
def read(
|
44
|
+
def read(user_name)
|
44
45
|
f = self
|
45
|
-
entry = f.acl_entry("#{
|
46
|
+
entry = f.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
|
46
47
|
if entry and ['reader','writer','owner'].include?(entry.role)
|
47
48
|
f.download_to_string
|
48
49
|
else
|
49
|
-
raise "User #{
|
50
|
+
raise "User #{user_name} is not allowed to read #{f.title}"
|
50
51
|
end
|
51
52
|
end
|
52
53
|
|
@@ -6,11 +6,11 @@ module GoogleDrive
|
|
6
6
|
header = rows.first
|
7
7
|
return nil unless header and header.first.to_s.length>0
|
8
8
|
#look for blank cols to indicate end of row
|
9
|
-
|
10
|
-
|
9
|
+
col_last_i = (header.index("") || header.length)-1
|
10
|
+
#ignore user-entered line breaks for purposes of tsv reads
|
11
|
+
out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
|
11
12
|
out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
|
12
13
|
Mobilize::Gsheet.config['read_date_format'])
|
13
|
-
|
14
14
|
end
|
15
15
|
def add_headers(headers)
|
16
16
|
headers.each_with_index do |h,h_i|
|
@@ -47,26 +47,30 @@ module GoogleDrive
|
|
47
47
|
sheet.save
|
48
48
|
end
|
49
49
|
|
50
|
-
def merge(merge_sheet,
|
50
|
+
def merge(merge_sheet,user_name,crop)
|
51
51
|
#write the top left of sheet
|
52
52
|
#with the contents of merge_sheet
|
53
53
|
sheet = self
|
54
54
|
sheet.reload
|
55
|
-
entry = sheet.spreadsheet.acl_entry("#{
|
55
|
+
entry = sheet.spreadsheet.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
|
56
56
|
unless entry and ['writer','owner'].include?(entry.role)
|
57
|
-
raise "User #{
|
57
|
+
raise "User #{user_name} is not allowed to write to #{sheet.spreadsheet.title}"
|
58
58
|
end
|
59
59
|
merge_sheet.reload
|
60
60
|
curr_rows = sheet.num_rows
|
61
61
|
curr_cols = sheet.num_cols
|
62
62
|
merge_rows = merge_sheet.num_rows
|
63
63
|
merge_cols = merge_sheet.num_cols
|
64
|
+
raise "zero sized merge sheet" if merge_rows == 0 or merge_cols == 0
|
64
65
|
#make sure sheet is at least as big as necessary
|
65
|
-
if
|
66
|
+
#or as small as necessary if crop is specified
|
67
|
+
if merge_rows > curr_rows or
|
68
|
+
(merge_rows < curr_rows and crop==true)
|
66
69
|
sheet.max_rows = merge_rows
|
67
70
|
sheet.save
|
68
71
|
end
|
69
|
-
if merge_cols > curr_cols
|
72
|
+
if merge_cols > curr_cols or
|
73
|
+
(merge_cols < curr_cols and crop==true)
|
70
74
|
sheet.max_cols = merge_cols
|
71
75
|
sheet.save
|
72
76
|
end
|
@@ -94,7 +98,7 @@ module GoogleDrive
|
|
94
98
|
end
|
95
99
|
end
|
96
100
|
|
97
|
-
def write(tsv,user)
|
101
|
+
def write(tsv,user,crop=true)
|
98
102
|
sheet = self
|
99
103
|
entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
|
100
104
|
unless entry and ['writer','owner'].include?(entry.role)
|
@@ -110,11 +114,14 @@ module GoogleDrive
|
|
110
114
|
curr_rows = sheet.num_rows
|
111
115
|
curr_cols = sheet.num_cols
|
112
116
|
#make sure sheet is at least as big as necessary
|
113
|
-
|
117
|
+
#or small as necessary if crop
|
118
|
+
if tsvrows.length > curr_rows or
|
119
|
+
(tsvrows.length < curr_rows and crop==true)
|
114
120
|
sheet.max_rows = tsvrows.length
|
115
121
|
sheet.save
|
116
122
|
end
|
117
|
-
if headers.length
|
123
|
+
if headers.length > curr_cols or
|
124
|
+
(tsvrows.length < curr_rows and crop==true)
|
118
125
|
sheet.max_cols = headers.length
|
119
126
|
sheet.save
|
120
127
|
end
|
@@ -124,13 +131,13 @@ module GoogleDrive
|
|
124
131
|
tsvrows[batch_start..batch_end].each_with_index do |row,row_i|
|
125
132
|
rowcols = row.split("\t")
|
126
133
|
rowcols.each_with_index do |col_v,col_i|
|
127
|
-
sheet[row_i+batch_start+1,col_i+1]= %{#{col_v}}
|
134
|
+
sheet[row_i + batch_start + 1, col_i + 1]= %{#{col_v}}
|
128
135
|
end
|
129
136
|
end
|
130
137
|
sheet.save
|
131
138
|
batch_start += (batch_length + 1)
|
132
|
-
rows_written+=batch_length
|
133
|
-
if batch_start>tsvrows.length+1
|
139
|
+
rows_written += batch_length
|
140
|
+
if batch_start>tsvrows.length + 1
|
134
141
|
break
|
135
142
|
end
|
136
143
|
end
|
@@ -141,6 +148,7 @@ module GoogleDrive
|
|
141
148
|
sheet.reload
|
142
149
|
#loading remote data for checksum
|
143
150
|
rem_tsv = sheet.to_tsv
|
151
|
+
return true if rem_tsv.to_s.length==0
|
144
152
|
rem_table = rem_tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
|
145
153
|
loc_table = tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
|
146
154
|
re_col_vs = []
|
@@ -12,13 +12,12 @@ module Mobilize
|
|
12
12
|
:authentication => 'plain',
|
13
13
|
:enable_starttls_auto => true }
|
14
14
|
|
15
|
-
def write(
|
16
|
-
bod="",
|
17
|
-
recipient=Jobtracker.admin_emails.join(","))
|
15
|
+
def write(params)
|
18
16
|
mail(:from=>Gdrive.owner_email,
|
19
|
-
:to=>
|
20
|
-
:subject=>
|
21
|
-
:body=>
|
17
|
+
:to=>params['to'],
|
18
|
+
:subject=>params['subject'],
|
19
|
+
:body=>params['body'],
|
20
|
+
:bcc=>params['bcc'])
|
22
21
|
end
|
23
22
|
end
|
24
23
|
end
|
@@ -3,7 +3,24 @@ module Mobilize
|
|
3
3
|
def Gbook.find_all_by_path(path,gdrive_slot)
|
4
4
|
Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
5
5
|
end
|
6
|
+
|
7
|
+
def Gbook.find_by_http_url(http_url,gdrive_slot)
|
8
|
+
key = http_url.split("key=").last.split("#").first
|
9
|
+
Gdrive.root(gdrive_slot).spreadsheet_by_key(key)
|
10
|
+
end
|
11
|
+
|
6
12
|
def Gbook.find_by_path(path,gdrive_slot)
|
13
|
+
#first try to find a dataset with the URL
|
14
|
+
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
|
+
if dst and dst.http_url.to_s.length>0
|
16
|
+
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
+
#doesn't count if it's deleted
|
18
|
+
if book.entry_hash[:deleted]
|
19
|
+
book = nil
|
20
|
+
else
|
21
|
+
return book
|
22
|
+
end
|
23
|
+
end
|
7
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
8
25
|
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
26
|
book = nil
|
@@ -15,6 +32,7 @@ module Mobilize
|
|
15
32
|
bkey = b.resource_id.split(":").last
|
16
33
|
if bkey == dkey
|
17
34
|
book = b
|
35
|
+
dst.update_attributes(:http_url=>book.human_url)
|
18
36
|
else
|
19
37
|
#delete the invalid book
|
20
38
|
b.delete
|
@@ -25,6 +43,7 @@ module Mobilize
|
|
25
43
|
#If it's a new dst or if there are multiple books
|
26
44
|
#take the first
|
27
45
|
book = books.first
|
46
|
+
dst.update_attributes(:http_url=>book.human_url) if book
|
28
47
|
end
|
29
48
|
return book
|
30
49
|
end
|
@@ -80,5 +80,18 @@ module Mobilize
|
|
80
80
|
def Gdrive.books(gdrive_slot=nil,params={})
|
81
81
|
Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
|
82
82
|
end
|
83
|
+
|
84
|
+
#email management - used to make sure not too many emails get used at the same time
|
85
|
+
def Gdrive.slot_worker_by_path(path)
|
86
|
+
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
|
87
|
+
Gdrive.workers.sort_by{rand}.each do |w|
|
88
|
+
unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
|
89
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
|
90
|
+
return [w['name'],Gdrive.domain].join("@")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
#return false if none are available
|
94
|
+
return false
|
95
|
+
end
|
83
96
|
end
|
84
97
|
end
|
@@ -1,5 +1,47 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gfile
|
3
|
+
def Gfile.path_to_dst(path,stage_path)
|
4
|
+
#don't need the ://
|
5
|
+
path = path.split("://").last if path.index("://")
|
6
|
+
if Gfile.find_by_path(path)
|
7
|
+
handler = "gfile"
|
8
|
+
Dataset.find_or_create_by_url("#{handler}://#{path}")
|
9
|
+
else
|
10
|
+
raise "unable to find #{path}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def Gfile.read_by_dataset_path(dst_path,user_name,*args)
|
15
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
16
|
+
gdrive_slot = args
|
17
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
18
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
19
|
+
file = Gfile.find_by_path(dst_path)
|
20
|
+
file.read(user_name) if file
|
21
|
+
end
|
22
|
+
|
23
|
+
def Gfile.write_by_dataset_path(dst_path,string,user_name,*args)
|
24
|
+
#ignores *args as all files must be created and owned by owner
|
25
|
+
file = Gfile.find_by_path(dst_path)
|
26
|
+
file.delete if file
|
27
|
+
owner_root = Gdrive.root(Gdrive.owner_email)
|
28
|
+
file = owner_root.upload_from_string(string,
|
29
|
+
dst_path,
|
30
|
+
:content_type=>"test/plain",
|
31
|
+
:convert=>false)
|
32
|
+
file.add_admin_acl
|
33
|
+
#make sure user is owner or can edit
|
34
|
+
u = User.where(:name=>user_name).first
|
35
|
+
entry = file.acl_entry(u.email)
|
36
|
+
unless entry and ['writer','owner'].include?(entry.role)
|
37
|
+
file.update_acl(u.email)
|
38
|
+
end
|
39
|
+
#update http url for file
|
40
|
+
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
+
dst.update_attributes(:http_url=>file.human_url)
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
3
45
|
def Gfile.add_admin_acl_by_path(path)
|
4
46
|
file = Gfile.find_by_path(path)
|
5
47
|
file.add_admin_acl
|
@@ -18,18 +60,6 @@ module Mobilize
|
|
18
60
|
file.update_acl(gdrive_slot,role)
|
19
61
|
end
|
20
62
|
|
21
|
-
def Gfile.read_by_stage_path(stage_path)
|
22
|
-
#reserve gdrive_slot account for read
|
23
|
-
gdrive_slot = Gdrive.slot_worker_by_path(s.path)
|
24
|
-
return false unless gdrive_slot
|
25
|
-
s = Stage.where(:path=>stage_path)
|
26
|
-
gfile_path = s.params['file']
|
27
|
-
out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
|
28
|
-
#use Gridfs to cache result
|
29
|
-
out_url = "gridfs://#{s.path}/out"
|
30
|
-
Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
|
31
|
-
end
|
32
|
-
|
33
63
|
def Gfile.find_by_path(path)
|
34
64
|
#file must be owned by owner
|
35
65
|
gdrive_slot = Gdrive.owner_email
|
@@ -55,8 +85,10 @@ module Mobilize
|
|
55
85
|
end
|
56
86
|
#always make sure dataset http URL is up to date
|
57
87
|
#and that it has admin acl
|
58
|
-
|
59
|
-
|
88
|
+
if file
|
89
|
+
dst.update_attributes(:http_url=>file.human_url)
|
90
|
+
file.add_admin_acl
|
91
|
+
end
|
60
92
|
return file
|
61
93
|
end
|
62
94
|
end
|
@@ -11,7 +11,7 @@ module Mobilize
|
|
11
11
|
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.read_by_dataset_path(dst_path,
|
14
|
+
def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
|
15
15
|
begin
|
16
16
|
zs=Gridfs.grid.open(dst_path,'r').read
|
17
17
|
return ::Zlib::Inflate.inflate(zs)
|
@@ -20,10 +20,10 @@ module Mobilize
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,
|
23
|
+
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
24
24
|
zs = ::Zlib::Deflate.deflate(string)
|
25
25
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
curr_zs = Gridfs.read_by_dataset_path(dst_path,
|
26
|
+
curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
|
27
27
|
#write a new version when there is a change
|
28
28
|
if curr_zs != zs
|
29
29
|
Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
@@ -9,9 +9,62 @@ module Mobilize
|
|
9
9
|
Gsheet.config['max_cells']
|
10
10
|
end
|
11
11
|
|
12
|
+
# converts a source path or target path to a dst in the context of handler and stage
|
13
|
+
def Gsheet.path_to_dst(path,stage_path)
|
14
|
+
s = Stage.where(:path=>stage_path).first
|
15
|
+
params = s.params
|
16
|
+
target_path = params['target']
|
17
|
+
#take random slot if one is not available
|
18
|
+
gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
|
19
|
+
#if this is the target, it doesn't have to exist already
|
20
|
+
is_target = true if path == target_path
|
21
|
+
#don't need the ://
|
22
|
+
path = path.split("://").last if path.index("://")
|
23
|
+
if path.split("/").length == 2
|
24
|
+
if is_target or Gsheet.find_by_path(path,gdrive_slot)
|
25
|
+
#user has specified path to a sheet
|
26
|
+
return Dataset.find_or_create_by_url("gsheet://#{path}")
|
27
|
+
else
|
28
|
+
raise "unable to find #{path}"
|
29
|
+
end
|
30
|
+
else
|
31
|
+
#user has specified a sheet
|
32
|
+
runner_title = stage_path.split("/").first
|
33
|
+
r = Runner.find_by_title(runner_title)
|
34
|
+
if is_target or r.gbook(gdrive_slot).worksheets.map{|w| w.title}.include?(path)
|
35
|
+
handler = "gsheet"
|
36
|
+
path = "#{runner_title}/#{path}"
|
37
|
+
elsif Gfile.find_by_path(path,gdrive_slot)
|
38
|
+
handler = "gfile"
|
39
|
+
path = "#{path}"
|
40
|
+
else
|
41
|
+
raise "unable to find #{path}"
|
42
|
+
end
|
43
|
+
return Dataset.find_or_create_by_url("#{handler}://#{path}")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
|
48
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
49
|
+
gdrive_slot = args
|
50
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
51
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
52
|
+
sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
|
53
|
+
sheet.read(user_name) if sheet
|
54
|
+
end
|
55
|
+
|
56
|
+
def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
|
57
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
58
|
+
gdrive_slot,crop = args
|
59
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
60
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
61
|
+
crop ||= true
|
62
|
+
Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
|
63
|
+
end
|
64
|
+
|
12
65
|
def Gsheet.write(path,tsv,gdrive_slot)
|
13
66
|
sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
|
14
|
-
sheet.write(tsv)
|
67
|
+
sheet.write(tsv,Gdrive.owner_name)
|
15
68
|
end
|
16
69
|
|
17
70
|
def Gsheet.find_by_path(path,gdrive_slot)
|
@@ -32,32 +85,9 @@ module Mobilize
|
|
32
85
|
return sheet
|
33
86
|
end
|
34
87
|
|
35
|
-
def Gsheet.
|
36
|
-
#reserve gdrive_slot account for read
|
37
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
38
|
-
return false unless gdrive_slot
|
39
|
-
s = Stage.where(:path=>stage_path).first
|
40
|
-
user = s.job.runner.user.name
|
41
|
-
source_dst = s.source_dsts(gdrive_slot).first
|
42
|
-
out_tsv = source_dst.read(user)
|
43
|
-
#use Gridfs to cache result
|
44
|
-
out_url = "gridfs://#{s.path}/out"
|
45
|
-
Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
|
46
|
-
end
|
47
|
-
|
48
|
-
def Gsheet.write_by_stage_path(stage_path)
|
49
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
50
|
-
#return blank response if there are no slots available
|
51
|
-
return nil unless gdrive_slot
|
52
|
-
s = Stage.where(:path=>stage_path).first
|
53
|
-
user = s.job.runner.user
|
54
|
-
target_path = s.params['target']
|
55
|
-
target_path = "#{s.job.runner.title}/#{target_path}" unless target_path.index("/")
|
56
|
-
source_dst = s.source_dsts(gdrive_slot).first
|
57
|
-
tsv = source_dst.read(user.name)
|
58
|
-
sheet_name = target_path.split("/").last
|
59
|
-
temp_path = [stage_path.gridsafe,sheet_name].join("/")
|
88
|
+
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
60
89
|
#find and delete temp sheet, if any
|
90
|
+
temp_path = [target_path.gridsafe,"temp"].join("/")
|
61
91
|
temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
|
62
92
|
temp_sheet.delete if temp_sheet
|
63
93
|
#write data to temp sheet
|
@@ -70,28 +100,57 @@ module Mobilize
|
|
70
100
|
return nil
|
71
101
|
end
|
72
102
|
temp_sheet.check_and_fix(tsv)
|
103
|
+
temp_sheet
|
104
|
+
end
|
105
|
+
|
106
|
+
def Gsheet.write_target(target_path,tsv,user_name,gdrive_slot,crop=true)
|
107
|
+
#write to temp sheet first, to ensure google compatibility
|
108
|
+
#and fix any discrepancies due to spradsheet assumptions
|
109
|
+
temp_sheet = Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
110
|
+
#try to find target sheet
|
73
111
|
target_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
|
112
|
+
u = User.where(:name=>user_name).first
|
74
113
|
unless target_sheet
|
75
114
|
#only give the user edit permissions if they're the ones
|
76
115
|
#creating it
|
77
116
|
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
78
|
-
target_sheet.spreadsheet.update_acl(
|
117
|
+
target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
79
118
|
target_sheet.delete_sheet1
|
80
119
|
end
|
81
|
-
#
|
82
|
-
#
|
83
|
-
|
84
|
-
target_sheet.merge(temp_sheet,user.name)
|
85
|
-
rescue
|
86
|
-
return nil
|
87
|
-
end
|
120
|
+
#pass it crop param to determine whether to shrink target sheet to fit data
|
121
|
+
#default is yes
|
122
|
+
target_sheet.merge(temp_sheet,user_name,crop)
|
88
123
|
#delete the temp sheet's book
|
89
124
|
temp_sheet.spreadsheet.delete
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
125
|
+
target_sheet
|
126
|
+
end
|
127
|
+
|
128
|
+
def Gsheet.write_by_stage_path(stage_path)
|
129
|
+
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
130
|
+
#return blank response if there are no slots available
|
131
|
+
return nil unless gdrive_slot
|
132
|
+
s = Stage.where(:path=>stage_path).first
|
133
|
+
u = s.job.runner.user
|
134
|
+
crop = s.params['crop'] || true
|
135
|
+
begin
|
136
|
+
#get tsv to write from stage
|
137
|
+
source = s.sources.first
|
138
|
+
raise "Need source for gsheet write" unless source
|
139
|
+
tsv = source.read(u.name,gdrive_slot)
|
140
|
+
raise "No data found in #{source.url}" unless tsv
|
141
|
+
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
142
|
+
Gdrive.unslot_worker_by_path(stage_path)
|
143
|
+
#update status
|
144
|
+
stdout = "Write successful for #{s.target.url}"
|
145
|
+
stderr = nil
|
146
|
+
s.update_status(stdout)
|
147
|
+
signal = 0
|
148
|
+
rescue => exc
|
149
|
+
stdout = nil
|
150
|
+
stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
|
151
|
+
signal = 500
|
152
|
+
end
|
153
|
+
return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
|
95
154
|
end
|
96
155
|
end
|
97
156
|
end
|
@@ -103,23 +103,28 @@ module Mobilize
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
-
def Resque.
|
106
|
+
def Resque.new_failures_by_email
|
107
107
|
fjobs = {}
|
108
|
-
|
108
|
+
exc_to_s = Hash.new(0)
|
109
109
|
Resque.failures.each_with_index do |f,f_i|
|
110
110
|
#skip if already notified
|
111
111
|
next if f['notified']
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
fjobs[
|
112
|
+
stage_path = f['payload']['args'].first
|
113
|
+
s = Stage.where(:path=>stage_path).first
|
114
|
+
email = s.job.runner.user.email
|
115
|
+
exc_to_s = f['error']
|
116
|
+
if fjobs[email].nil?
|
117
|
+
fjobs[email] = {stage_path => {exc_to_s => 1}}
|
118
|
+
elsif fjobs[email][stage_path].nil?
|
119
|
+
fjobs[email][stage_path] = {exc_to_s => 1}
|
120
|
+
elsif fjobs[email][stage_path][exc_to_s].nil?
|
121
|
+
fjobs[email][stage_path][exc_to_s] = 1
|
118
122
|
else
|
119
|
-
fjobs[
|
123
|
+
fjobs[email][stage_path][exc_to_s] += 1
|
120
124
|
end
|
121
125
|
#add notified flag to redis
|
122
126
|
f['notified'] = true
|
127
|
+
#tag stage with email
|
123
128
|
::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
|
124
129
|
end
|
125
130
|
return fjobs
|
@@ -163,27 +163,39 @@ module Mobilize
|
|
163
163
|
if Jobtracker.notif_due?
|
164
164
|
notifs = []
|
165
165
|
if Jobtracker.failures.length>0
|
166
|
-
|
167
|
-
|
166
|
+
failure_hash = Resque.new_failures_by_email
|
167
|
+
failure_hash.each do |email,stage_paths|
|
168
168
|
n = {}
|
169
|
-
n['
|
169
|
+
n['subject'] = "#{stage_paths.keys.length.to_s} new failed jobs, #{stage_paths.values.map{|v| v.values}.flatten.sum.to_s} failures"
|
170
170
|
#one row per exception type, with the job name
|
171
|
-
n['body'] =
|
171
|
+
n['body'] = stage_paths.map do |path,exceptions|
|
172
|
+
exceptions.map do |exc_to_s,times|
|
173
|
+
[path," : ",exc_to_s,", ",times," times"].join
|
174
|
+
end
|
175
|
+
end.flatten.join("\n\n")
|
176
|
+
u = User.where(:name=>email.split("@").first).first
|
177
|
+
runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
|
178
|
+
n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
|
179
|
+
n['to'] = email
|
180
|
+
n['bcc'] = Jobtracker.admin_emails.join(",")
|
172
181
|
notifs << n
|
173
182
|
end
|
174
183
|
end
|
175
184
|
lws = Jobtracker.max_run_time_workers
|
176
185
|
if lws.length>0
|
177
186
|
n = {}
|
178
|
-
n['
|
187
|
+
n['subject'] = "#{lws.length.to_s} max run time jobs"
|
179
188
|
n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
|
189
|
+
n['to'] = Jobtracker.admin_emails.join(",")
|
180
190
|
notifs << n
|
181
191
|
end
|
192
|
+
#deliver each email generated
|
182
193
|
notifs.each do |notif|
|
183
|
-
Email.write(
|
184
|
-
Jobtracker.last_notification=Time.now.utc.to_s
|
185
|
-
Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
|
194
|
+
Email.write(notif).deliver
|
186
195
|
end
|
196
|
+
#update notification time so JT knows to wait a while
|
197
|
+
Jobtracker.last_notification = Time.now.utc.to_s
|
198
|
+
Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
|
187
199
|
end
|
188
200
|
return true
|
189
201
|
end
|
@@ -13,9 +13,23 @@ module Mobilize
|
|
13
13
|
|
14
14
|
index({ handler: 1, path: 1}, { unique: true})
|
15
15
|
|
16
|
-
def
|
16
|
+
def url
|
17
|
+
s = self
|
18
|
+
"#{s.handler}://#{s.path}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def read(user_name,*args)
|
17
22
|
dst = self
|
18
|
-
|
23
|
+
dst.update_attributes(:last_read_at=>Time.now.utc)
|
24
|
+
"Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user_name,*args)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write(string,user_name,*args)
|
28
|
+
dst = self
|
29
|
+
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user_name,*args)
|
30
|
+
dst.raw_size = string.length
|
31
|
+
dst.save!
|
32
|
+
return true
|
19
33
|
end
|
20
34
|
|
21
35
|
def Dataset.find_by_url(url)
|
@@ -38,24 +52,15 @@ module Mobilize
|
|
38
52
|
return dst
|
39
53
|
end
|
40
54
|
|
41
|
-
def Dataset.
|
42
|
-
dst = Dataset.
|
43
|
-
dst.
|
44
|
-
url
|
55
|
+
def Dataset.read_by_url(url,user_name,*args)
|
56
|
+
dst = Dataset.find_by_url(url)
|
57
|
+
dst.read(user_name,*args) if dst
|
45
58
|
end
|
46
59
|
|
47
|
-
def
|
48
|
-
dst =
|
49
|
-
dst.
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def write(string,user)
|
54
|
-
dst = self
|
55
|
-
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
|
56
|
-
dst.raw_size = string.length
|
57
|
-
dst.save!
|
58
|
-
return true
|
60
|
+
def Dataset.write_by_url(url,string,user_name,*args)
|
61
|
+
dst = Dataset.find_or_create_by_url(url)
|
62
|
+
dst.write(string,user_name,*args)
|
63
|
+
url
|
59
64
|
end
|
60
65
|
end
|
61
66
|
end
|
@@ -15,11 +15,6 @@ module Mobilize
|
|
15
15
|
%w{name active trigger status stage1 stage2 stage3 stage4 stage5}
|
16
16
|
end
|
17
17
|
|
18
|
-
def cached_at
|
19
|
-
r = self
|
20
|
-
Dataset.find_or_create_by_path(r.path).cached_at
|
21
|
-
end
|
22
|
-
|
23
18
|
def title
|
24
19
|
r = self
|
25
20
|
r.path.split("/").first
|
@@ -34,6 +29,9 @@ module Mobilize
|
|
34
29
|
Runner.where(:path=>path).first
|
35
30
|
end
|
36
31
|
|
32
|
+
def Runner.find_by_title(title)
|
33
|
+
Runner.where(:path=>"#{title}/jobs").first
|
34
|
+
end
|
37
35
|
def Runner.perform(id,*args)
|
38
36
|
r = Runner.find_by_path(id)
|
39
37
|
#get gdrive slot for read
|
@@ -53,7 +51,9 @@ module Mobilize
|
|
53
51
|
begin
|
54
52
|
if j.is_due?
|
55
53
|
j.update_attributes(:active=>false) if j.trigger=='once'
|
56
|
-
j.stages.first
|
54
|
+
s = j.stages.first
|
55
|
+
s.update_attributes(:retries_done=>0)
|
56
|
+
s.enqueue!
|
57
57
|
end
|
58
58
|
rescue ScriptError, StandardError => exc
|
59
59
|
r.update_status("Failed to enqueue #{j.path} with #{exc.to_s}")
|
@@ -73,11 +73,6 @@ module Mobilize
|
|
73
73
|
Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
|
74
74
|
end
|
75
75
|
|
76
|
-
def cache
|
77
|
-
r = self
|
78
|
-
Dataset.find_or_create_by_url("gridfs://#{r.path}")
|
79
|
-
end
|
80
|
-
|
81
76
|
def gbook(gdrive_slot)
|
82
77
|
r = self
|
83
78
|
title = r.path.split("/").first
|
@@ -86,17 +81,20 @@ module Mobilize
|
|
86
81
|
|
87
82
|
def gsheet(gdrive_slot)
|
88
83
|
r = self
|
84
|
+
u = r.user
|
89
85
|
jobs_sheet = Gsheet.find_by_path(r.path,gdrive_slot)
|
90
86
|
#make sure the user has a runner with a jobs sheet and has write privileges on the spreadsheet
|
91
|
-
unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(
|
87
|
+
unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="writer"})
|
92
88
|
#only give the user edit permissions if they're the ones
|
93
89
|
#creating it
|
94
90
|
jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
|
95
|
-
unless jobs_sheet.spreadsheet.acl_entry(
|
96
|
-
jobs_sheet.spreadsheet.update_acl(
|
91
|
+
unless jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
92
|
+
jobs_sheet.spreadsheet.update_acl(u.email,"writer")
|
97
93
|
end
|
98
94
|
end
|
99
95
|
jobs_sheet.add_headers(r.headers)
|
96
|
+
#add url to dataset
|
97
|
+
Dataset.find_or_create_by_url("gsheet://#{r.path}").update_attributes(:http_url=>jobs_sheet.spreadsheet.human_url)
|
100
98
|
begin;jobs_sheet.delete_sheet1;rescue;end #don't care if sheet1 deletion fails
|
101
99
|
return jobs_sheet
|
102
100
|
end
|
@@ -104,8 +102,6 @@ module Mobilize
|
|
104
102
|
def read_gsheet(gdrive_slot)
|
105
103
|
r = self
|
106
104
|
gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
|
107
|
-
#cache in DB
|
108
|
-
r.cache.write(gsheet_tsv,Gdrive.owner_name)
|
109
105
|
#turn it into a hash array
|
110
106
|
gsheet_jobs = gsheet_tsv.tsv_to_hash_array
|
111
107
|
#go through each job, update relevant job with its params
|
@@ -122,8 +118,13 @@ module Mobilize
|
|
122
118
|
stage_string = rj["stage#{s_idx.to_s}"]
|
123
119
|
s = Stage.find_by_path("#{j.path}/stage#{s_idx.to_s}")
|
124
120
|
if stage_string.to_s.length==0
|
125
|
-
#delete this stage
|
126
|
-
|
121
|
+
#delete this stage and all stages after
|
122
|
+
if s
|
123
|
+
j = s.job
|
124
|
+
j.stages[(s.idx-1)..-1].each{|ps| ps.delete}
|
125
|
+
#just in case
|
126
|
+
s.delete
|
127
|
+
end
|
127
128
|
break
|
128
129
|
elsif s.nil?
|
129
130
|
#create this stage
|
@@ -7,7 +7,8 @@ module Mobilize
|
|
7
7
|
field :call, type: String
|
8
8
|
field :param_string, type: Array
|
9
9
|
field :status, type: String
|
10
|
-
field :
|
10
|
+
field :response, type: Hash
|
11
|
+
field :retries_done, type: Fixnum
|
11
12
|
field :completed_at, type: Time
|
12
13
|
field :started_at, type: Time
|
13
14
|
field :failed_at, type: Time
|
@@ -25,7 +26,15 @@ module Mobilize
|
|
25
26
|
#allowing you to determine its size
|
26
27
|
#before committing to a read or write
|
27
28
|
s = self
|
28
|
-
Dataset.find_by_url(s.out_url) if s.out_url
|
29
|
+
Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
|
30
|
+
end
|
31
|
+
|
32
|
+
def err_dst
|
33
|
+
#this gives a dataset that points to the output
|
34
|
+
#allowing you to determine its size
|
35
|
+
#before committing to a read or write
|
36
|
+
s = self
|
37
|
+
Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
|
29
38
|
end
|
30
39
|
|
31
40
|
def params
|
@@ -68,84 +77,91 @@ module Mobilize
|
|
68
77
|
|
69
78
|
def Stage.perform(id,*args)
|
70
79
|
s = Stage.where(:path=>id).first
|
71
|
-
j = s.job
|
72
80
|
s.update_attributes(:started_at=>Time.now.utc)
|
73
81
|
s.update_status(%{Starting at #{Time.now.utc}})
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
s.enqueue!
|
81
|
-
return false
|
82
|
-
end
|
83
|
-
rescue ScriptError, StandardError => exc
|
84
|
-
j.update_attributes(:active=>false)
|
85
|
-
s.update_attributes(:failed_at=>Time.now.utc)
|
86
|
-
s.update_status("Failed at #{Time.now.utc.to_s}")
|
87
|
-
raise exc
|
82
|
+
#get response by running method
|
83
|
+
response = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
|
84
|
+
unless response
|
85
|
+
#re-queue self if no response
|
86
|
+
s.enqueue!
|
87
|
+
return false
|
88
88
|
end
|
89
|
-
|
89
|
+
if response['signal'] == 0
|
90
|
+
s.complete(response)
|
91
|
+
elsif s.retries_done.to_i < s.params['retries'].to_i
|
92
|
+
#retry
|
93
|
+
s.update_attributes(:retries_done => s.retries_done.to_i + 1, :response => response)
|
94
|
+
s.update_status(%{Retry #{s.retries_done.to_s} at #{Time.now.utc}})
|
95
|
+
s.enqueue!
|
96
|
+
else
|
97
|
+
#sleep as much as user specifies
|
98
|
+
sleep s['delay'].to_i
|
99
|
+
s.fail(response)
|
100
|
+
end
|
101
|
+
return true
|
102
|
+
end
|
103
|
+
|
104
|
+
def complete(response)
|
105
|
+
s = self
|
106
|
+
s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
|
90
107
|
s.update_status("Completed at #{Time.now.utc.to_s}")
|
108
|
+
j = s.job
|
91
109
|
if s.idx == j.stages.length
|
92
110
|
#check for any dependent jobs, if there are, enqueue them
|
93
111
|
r = j.runner
|
94
|
-
dep_jobs = r.jobs.select
|
112
|
+
dep_jobs = r.jobs.select do |dj|
|
113
|
+
dj.active==true and
|
114
|
+
dj.trigger.strip.downcase == "after #{j.name}"
|
115
|
+
end
|
95
116
|
#put begin/rescue so all dependencies run
|
96
|
-
dep_jobs.each
|
117
|
+
dep_jobs.each do |dj|
|
118
|
+
begin
|
119
|
+
unless dj.is_working?
|
120
|
+
dj.stages.first.update_attributes(:retries_done=>0)
|
121
|
+
dj.stages.first.enqueue!
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
#job won't run if error, log it a failure
|
125
|
+
response = {"err_str" => "Unable to enqueue first stage of #{dj.path}"}
|
126
|
+
dj.stages.first.fail(response)
|
127
|
+
end
|
128
|
+
end
|
97
129
|
else
|
98
130
|
#queue up next stage
|
131
|
+
s.next.update_attributes(:retries_done=>0)
|
99
132
|
s.next.enqueue!
|
100
133
|
end
|
101
|
-
|
134
|
+
true
|
102
135
|
end
|
103
136
|
|
104
|
-
def
|
105
|
-
#
|
106
|
-
|
107
|
-
#or dataset pointers for other handlers
|
137
|
+
def fail(response,gdrive_slot=nil)
|
138
|
+
#get random worker if one is not provided
|
139
|
+
gdrive_slot ||= Gdrive.worker_emails.sort_by{rand}.first
|
108
140
|
s = self
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
runner_sheet = r.gbook(gdrive_slot).worksheet_by_title(source_path)
|
134
|
-
out_tsv = if runner_sheet
|
135
|
-
runner_sheet.read(user)
|
136
|
-
else
|
137
|
-
#check for gfile. will fail if there isn't one.
|
138
|
-
Gfile.find_by_path(source_path).read(user)
|
139
|
-
end
|
140
|
-
end
|
141
|
-
#use Gridfs to cache gdrive results
|
142
|
-
file_name = source_path.split("/").last
|
143
|
-
out_url = "gridfs://#{s.path}/#{file_name}"
|
144
|
-
Dataset.write_by_url(out_url,out_tsv,user)
|
145
|
-
dsts << Dataset.find_by_url(out_url)
|
146
|
-
end
|
147
|
-
end
|
148
|
-
return dsts
|
141
|
+
j = s.job
|
142
|
+
r = j.runner
|
143
|
+
u = r.user
|
144
|
+
j.update_attributes(:active=>false)
|
145
|
+
s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
|
146
|
+
stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
|
147
|
+
target_path = (r.path.split("/")[0..-2] + [stage_name]).join("/")
|
148
|
+
status_msg = "Failed at #{Time.now.utc.to_s}"
|
149
|
+
#read err txt, add err sheet, write to it
|
150
|
+
err_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
|
151
|
+
err_sheet.delete if err_sheet
|
152
|
+
err_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
153
|
+
err_txt = if response['err_url']
|
154
|
+
Dataset.read_by_url(response['err_url'],u.name)
|
155
|
+
elsif response['err_str']
|
156
|
+
response['err_str']
|
157
|
+
end
|
158
|
+
err_txt = ["response","\n",err_txt].join
|
159
|
+
err_sheet.write(err_txt,u.name)
|
160
|
+
#exception will be first row below "response" header
|
161
|
+
exc_to_s,backtrace = err_txt.split("\n").ie{|ea| [ea[1], ea[2..-1]]}
|
162
|
+
s.update_status(status_msg)
|
163
|
+
#raise the exception so it bubbles up to resque
|
164
|
+
raise Exception,exc_to_s,backtrace
|
149
165
|
end
|
150
166
|
|
151
167
|
def enqueue!
|
@@ -180,5 +196,61 @@ module Mobilize
|
|
180
196
|
s = self
|
181
197
|
Mobilize::Resque.active_paths.include?(s.path)
|
182
198
|
end
|
199
|
+
|
200
|
+
def target
|
201
|
+
s = self
|
202
|
+
params = s.params
|
203
|
+
target_path = params['target']
|
204
|
+
handler,path = target_path.split("://")
|
205
|
+
#if the user has specified a url for a target
|
206
|
+
#that is not this stage's handler, disallow
|
207
|
+
if handler and path and handler != s.handler
|
208
|
+
raise "incompatible target handler #{handler} for #{s.handler} stage"
|
209
|
+
else
|
210
|
+
begin
|
211
|
+
return "Mobilize::#{s.handler.downcase.capitalize}".constantize.path_to_dst(target_path,s.path)
|
212
|
+
rescue => exc
|
213
|
+
raise "Could not get #{target_path} with error: #{exc.to_s}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def sources
|
219
|
+
#returns an array of Datasets corresponding to
|
220
|
+
#items listed as sources in the stage params
|
221
|
+
s = self
|
222
|
+
params = s.params
|
223
|
+
job = s.job
|
224
|
+
runner = job.runner
|
225
|
+
source_paths = if params['sources']
|
226
|
+
params['sources']
|
227
|
+
elsif params['source']
|
228
|
+
[params['source']]
|
229
|
+
end
|
230
|
+
return [] if (source_paths.class!=Array or source_paths.length==0)
|
231
|
+
dsts = []
|
232
|
+
source_paths.each do |source_path|
|
233
|
+
if source_path.index(/^stage[1-5]$/)
|
234
|
+
#stage arguments return the stage's output dst url
|
235
|
+
source_stage_path = "#{runner.path}/#{job.name}/#{source_path}"
|
236
|
+
source_stage = Stage.where(:path=>source_stage_path).first
|
237
|
+
source_stage_out_url = source_stage.response['out_url']
|
238
|
+
dsts << Dataset.find_by_url(source_stage_out_url)
|
239
|
+
else
|
240
|
+
handler = if source_path.index("://")
|
241
|
+
source_path.split("://").first
|
242
|
+
else
|
243
|
+
s.handler
|
244
|
+
end
|
245
|
+
begin
|
246
|
+
stage_path = s.path
|
247
|
+
dsts << "Mobilize::#{handler.downcase.capitalize}".constantize.path_to_dst(source_path,stage_path)
|
248
|
+
rescue => exc
|
249
|
+
raise "Could not get #{source_path} with error: #{exc.to_s}"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
return dsts
|
254
|
+
end
|
183
255
|
end
|
184
256
|
end
|
data/mobilize-base.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = "mobilize-base"
|
7
7
|
s.version = Mobilize::Base::VERSION
|
8
8
|
s.authors = ["Cassio Paes-Leme"]
|
9
|
-
s.email = ["cpaesleme@
|
9
|
+
s.email = ["cpaesleme@dena.com"]
|
10
10
|
s.homepage = "http://github.com/ngmoco/mobilize-base"
|
11
11
|
s.summary = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
|
12
12
|
s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
|
data/test/base_job_rows.yml
CHANGED
data/test/mobilize-base_test.rb
CHANGED
@@ -30,38 +30,77 @@ describe "Mobilize" do
|
|
30
30
|
|
31
31
|
puts "Jobtracker created runner with 'jobs' sheet?"
|
32
32
|
r = u.runner
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
jobs_sheet_url = "gsheet://#{r.path}"
|
34
|
+
jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
|
35
|
+
jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
|
36
|
+
jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
|
37
|
+
assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
|
38
|
+
|
39
|
+
#stop Jobtracker, if you're doing this by queueing runners
|
40
|
+
#Mobilize::Jobtracker.stop!
|
41
|
+
|
42
|
+
puts "add base1 input file"
|
43
|
+
test_filename = "test_base_1"
|
44
|
+
file_url = "gfile://#{test_filename}.tsv"
|
45
|
+
test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
|
41
46
|
test_source_tsv = test_source_ha.hash_array_to_tsv
|
42
|
-
|
47
|
+
Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
|
48
|
+
rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
|
49
|
+
assert rem_tsv == test_source_tsv
|
43
50
|
|
44
|
-
puts "add row to jobs sheet, wait
|
51
|
+
puts "add row to jobs sheet, wait for stages"
|
45
52
|
test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
|
53
|
+
jobs_sheet.reload
|
46
54
|
jobs_sheet.add_or_update_rows(test_job_rows)
|
47
|
-
|
55
|
+
#wait for stages to complete
|
56
|
+
#r.enqueue!
|
57
|
+
wait_for_stages
|
48
58
|
|
49
59
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
50
|
-
|
51
|
-
|
60
|
+
test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
|
61
|
+
test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
|
62
|
+
test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
|
52
63
|
|
53
|
-
|
64
|
+
test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
|
65
|
+
test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
|
54
66
|
|
55
|
-
|
56
|
-
[test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
|
67
|
+
assert test_1_tsv == test_2_tsv
|
57
68
|
|
58
|
-
|
59
|
-
|
69
|
+
puts "change first job to fail, wait for stages"
|
70
|
+
test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
|
71
|
+
Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
|
72
|
+
jobs_sheet.add_or_update_rows(test_job_rows)
|
60
73
|
|
61
|
-
|
62
|
-
|
63
|
-
assert test_target_sheet_2.read(user_name) == test_source_sheet.read(user_name)
|
74
|
+
#wait for stages to complete
|
75
|
+
wait_for_stages
|
64
76
|
|
77
|
+
test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
|
78
|
+
puts "jobtracker posted failing test error to sheet "
|
79
|
+
error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
|
80
|
+
assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
|
81
|
+
Mobilize::Jobtracker.stop!
|
65
82
|
end
|
66
83
|
|
84
|
+
def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
|
85
|
+
time = 0
|
86
|
+
time_since_stage = 0
|
87
|
+
#check for 10 min
|
88
|
+
while time < time_limit and time_since_stage < stage_limit
|
89
|
+
sleep wait_length
|
90
|
+
job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
|
91
|
+
if job_classes.include?("Mobilize::Stage")
|
92
|
+
time_since_stage = 0
|
93
|
+
puts "saw stage at #{time.to_s} seconds"
|
94
|
+
else
|
95
|
+
time_since_stage += wait_length
|
96
|
+
puts "#{time_since_stage.to_s} seconds since stage seen"
|
97
|
+
end
|
98
|
+
time += wait_length
|
99
|
+
puts "total wait time #{time.to_s} seconds"
|
100
|
+
end
|
101
|
+
|
102
|
+
if time >= time_limit
|
103
|
+
raise "Timed out before stage completion"
|
104
|
+
end
|
105
|
+
end
|
67
106
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -177,7 +177,7 @@ description: ! "Manage your organization's workflows entirely through Google Doc
|
|
177
177
|
and -mongodb packages\n to allow seamless transport of TSV and
|
178
178
|
JSON data between any two endpoints. "
|
179
179
|
email:
|
180
|
-
- cpaesleme@
|
180
|
+
- cpaesleme@dena.com
|
181
181
|
executables: []
|
182
182
|
extensions: []
|
183
183
|
extra_rdoc_files: []
|
@@ -220,10 +220,10 @@ files:
|
|
220
220
|
- lib/samples/resque.yml
|
221
221
|
- lib/samples/resque_web.rb
|
222
222
|
- mobilize-base.gemspec
|
223
|
-
- test/base1_stage1.yml
|
224
223
|
- test/base_job_rows.yml
|
225
224
|
- test/mobilize-base_test.rb
|
226
225
|
- test/redis-test.conf
|
226
|
+
- test/test_base_1.yml
|
227
227
|
- test/test_helper.rb
|
228
228
|
homepage: http://github.com/ngmoco/mobilize-base
|
229
229
|
licenses: []
|
@@ -239,7 +239,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
239
239
|
version: '0'
|
240
240
|
segments:
|
241
241
|
- 0
|
242
|
-
hash:
|
242
|
+
hash: -2718067622627955864
|
243
243
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
244
244
|
none: false
|
245
245
|
requirements:
|
@@ -248,7 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
248
248
|
version: '0'
|
249
249
|
segments:
|
250
250
|
- 0
|
251
|
-
hash:
|
251
|
+
hash: -2718067622627955864
|
252
252
|
requirements: []
|
253
253
|
rubyforge_project: mobilize-base
|
254
254
|
rubygems_version: 1.8.24
|
@@ -257,8 +257,8 @@ specification_version: 3
|
|
257
257
|
summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
|
258
258
|
Docs
|
259
259
|
test_files:
|
260
|
-
- test/base1_stage1.yml
|
261
260
|
- test/base_job_rows.yml
|
262
261
|
- test/mobilize-base_test.rb
|
263
262
|
- test/redis-test.conf
|
263
|
+
- test/test_base_1.yml
|
264
264
|
- test/test_helper.rb
|
File without changes
|