mobilize-base 1.1.10 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +14 -9
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +4 -4
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -6
- data/lib/mobilize-base/extensions/google_drive/worksheet.rb +22 -14
- data/lib/mobilize-base/extensions/string.rb +1 -0
- data/lib/mobilize-base/handlers/email.rb +5 -6
- data/lib/mobilize-base/handlers/gbook.rb +19 -0
- data/lib/mobilize-base/handlers/gdrive.rb +13 -0
- data/lib/mobilize-base/handlers/gfile.rb +46 -14
- data/lib/mobilize-base/handlers/gridfs.rb +3 -3
- data/lib/mobilize-base/handlers/gsheet.rb +98 -39
- data/lib/mobilize-base/handlers/resque.rb +14 -9
- data/lib/mobilize-base/jobtracker.rb +20 -8
- data/lib/mobilize-base/models/dataset.rb +23 -18
- data/lib/mobilize-base/models/runner.rb +19 -18
- data/lib/mobilize-base/models/stage.rb +137 -65
- data/lib/mobilize-base/version.rb +1 -1
- data/mobilize-base.gemspec +1 -1
- data/test/base_job_rows.yml +1 -2
- data/test/mobilize-base_test.rb +60 -21
- metadata +7 -7
- /data/test/{base1_stage1.yml → test_base_1.yml} +0 -0
data/README.md
CHANGED
@@ -552,18 +552,23 @@ stage. These should be of the for `<key1>: <value1>, <key2>: <value2>`, where
|
|
552
552
|
`<key>` is an unquoted string and `<value>` is a quoted string, an
|
553
553
|
integer, an array (delimited by square braces), or a hash (delimited by
|
554
554
|
curly braces).
|
555
|
-
* For mobilize-base, the following
|
556
|
-
* gsheet.
|
557
|
-
* The
|
558
|
-
`<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
|
559
|
-
the Runner itself.
|
560
|
-
|
561
|
-
|
562
|
-
|
555
|
+
* For mobilize-base, the following stage is available:
|
556
|
+
* gsheet.write `source: <input_path>`, which reads the sheet.
|
557
|
+
* The input_path should be of the form:
|
558
|
+
* `<gbook_name>/<gsheet_name>` or just `<gsheet_name>` if the target is in
|
559
|
+
the Runner itself.
|
560
|
+
* `gfile://<gfile_name>` if the target is a file.
|
561
|
+
* The file must be owned by the Gdrive owner.
|
562
|
+
* The test uses "gfile://test_base_1.tsv".
|
563
|
+
* The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
|
563
564
|
and "base1.out" for the second test. The first
|
564
565
|
takes the output from the first stage and the second reads it straight
|
565
566
|
from the referenced sheet.
|
566
|
-
|
567
|
+
* All stages accept a "retries" parameter, which is an integer specifying the number of times that the system will try it again before
|
568
|
+
giving up.
|
569
|
+
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
570
|
+
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
571
|
+
* The test uses "Requestor_mobilize(test)/base1.out" and
|
567
572
|
"Runner_mobilize(test)/base2.out" for target sheets.
|
568
573
|
|
569
574
|
<a name='section_Start_Run_Test'></a>
|
@@ -8,7 +8,7 @@ module GoogleDrive
|
|
8
8
|
attempts = 0
|
9
9
|
sleep_time = nil
|
10
10
|
#try 5 times to make the call
|
11
|
-
while (response.nil? or response.code.
|
11
|
+
while (response.nil? or response.code.starts_with?("5")) and attempts < 20
|
12
12
|
#instantiate http object, set params
|
13
13
|
http = @proxy.new(uri.host, uri.port)
|
14
14
|
http.use_ssl = true
|
@@ -21,10 +21,10 @@ module GoogleDrive
|
|
21
21
|
#timeouts etc.
|
22
22
|
nil
|
23
23
|
end
|
24
|
-
if response.nil?
|
24
|
+
if response.nil? or response.code.starts_with?("4")
|
25
25
|
attempts +=1
|
26
|
-
|
27
|
-
if response.code.
|
26
|
+
elsif
|
27
|
+
if response.code.starts_with?("5")
|
28
28
|
#wait 10 seconds times number of attempts squared in case of error
|
29
29
|
sleep_time = 10 * (attempts*attempts)
|
30
30
|
attempts += 1
|
@@ -13,15 +13,16 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
(Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
17
|
-
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
17
|
+
accounts.each do |email|
|
18
|
+
f.update_acl(email)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
|
21
22
|
def has_admin_acl?
|
22
23
|
f = self
|
23
24
|
curr_emails = f.acls.map{|a| a.scope}.sort
|
24
|
-
admin_emails = Mobilize::Gdrive.admin_emails.
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
25
26
|
if (curr_emails & admin_emails) == admin_emails
|
26
27
|
return true
|
27
28
|
else
|
@@ -40,13 +41,13 @@ module GoogleDrive
|
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
43
|
-
def read(
|
44
|
+
def read(user_name)
|
44
45
|
f = self
|
45
|
-
entry = f.acl_entry("#{
|
46
|
+
entry = f.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
|
46
47
|
if entry and ['reader','writer','owner'].include?(entry.role)
|
47
48
|
f.download_to_string
|
48
49
|
else
|
49
|
-
raise "User #{
|
50
|
+
raise "User #{user_name} is not allowed to read #{f.title}"
|
50
51
|
end
|
51
52
|
end
|
52
53
|
|
@@ -6,11 +6,11 @@ module GoogleDrive
|
|
6
6
|
header = rows.first
|
7
7
|
return nil unless header and header.first.to_s.length>0
|
8
8
|
#look for blank cols to indicate end of row
|
9
|
-
|
10
|
-
|
9
|
+
col_last_i = (header.index("") || header.length)-1
|
10
|
+
#ignore user-entered line breaks for purposes of tsv reads
|
11
|
+
out_tsv = rows.map{|r| r[0..col_last_i].join("\t").gsub("\n","")+"\n"}.join + "\n"
|
11
12
|
out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
|
12
13
|
Mobilize::Gsheet.config['read_date_format'])
|
13
|
-
|
14
14
|
end
|
15
15
|
def add_headers(headers)
|
16
16
|
headers.each_with_index do |h,h_i|
|
@@ -47,26 +47,30 @@ module GoogleDrive
|
|
47
47
|
sheet.save
|
48
48
|
end
|
49
49
|
|
50
|
-
def merge(merge_sheet,
|
50
|
+
def merge(merge_sheet,user_name,crop)
|
51
51
|
#write the top left of sheet
|
52
52
|
#with the contents of merge_sheet
|
53
53
|
sheet = self
|
54
54
|
sheet.reload
|
55
|
-
entry = sheet.spreadsheet.acl_entry("#{
|
55
|
+
entry = sheet.spreadsheet.acl_entry("#{user_name}@#{Mobilize::Gdrive.domain}")
|
56
56
|
unless entry and ['writer','owner'].include?(entry.role)
|
57
|
-
raise "User #{
|
57
|
+
raise "User #{user_name} is not allowed to write to #{sheet.spreadsheet.title}"
|
58
58
|
end
|
59
59
|
merge_sheet.reload
|
60
60
|
curr_rows = sheet.num_rows
|
61
61
|
curr_cols = sheet.num_cols
|
62
62
|
merge_rows = merge_sheet.num_rows
|
63
63
|
merge_cols = merge_sheet.num_cols
|
64
|
+
raise "zero sized merge sheet" if merge_rows == 0 or merge_cols == 0
|
64
65
|
#make sure sheet is at least as big as necessary
|
65
|
-
if
|
66
|
+
#or as small as necessary if crop is specified
|
67
|
+
if merge_rows > curr_rows or
|
68
|
+
(merge_rows < curr_rows and crop==true)
|
66
69
|
sheet.max_rows = merge_rows
|
67
70
|
sheet.save
|
68
71
|
end
|
69
|
-
if merge_cols > curr_cols
|
72
|
+
if merge_cols > curr_cols or
|
73
|
+
(merge_cols < curr_cols and crop==true)
|
70
74
|
sheet.max_cols = merge_cols
|
71
75
|
sheet.save
|
72
76
|
end
|
@@ -94,7 +98,7 @@ module GoogleDrive
|
|
94
98
|
end
|
95
99
|
end
|
96
100
|
|
97
|
-
def write(tsv,user)
|
101
|
+
def write(tsv,user,crop=true)
|
98
102
|
sheet = self
|
99
103
|
entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
|
100
104
|
unless entry and ['writer','owner'].include?(entry.role)
|
@@ -110,11 +114,14 @@ module GoogleDrive
|
|
110
114
|
curr_rows = sheet.num_rows
|
111
115
|
curr_cols = sheet.num_cols
|
112
116
|
#make sure sheet is at least as big as necessary
|
113
|
-
|
117
|
+
#or small as necessary if crop
|
118
|
+
if tsvrows.length > curr_rows or
|
119
|
+
(tsvrows.length < curr_rows and crop==true)
|
114
120
|
sheet.max_rows = tsvrows.length
|
115
121
|
sheet.save
|
116
122
|
end
|
117
|
-
if headers.length
|
123
|
+
if headers.length > curr_cols or
|
124
|
+
(tsvrows.length < curr_rows and crop==true)
|
118
125
|
sheet.max_cols = headers.length
|
119
126
|
sheet.save
|
120
127
|
end
|
@@ -124,13 +131,13 @@ module GoogleDrive
|
|
124
131
|
tsvrows[batch_start..batch_end].each_with_index do |row,row_i|
|
125
132
|
rowcols = row.split("\t")
|
126
133
|
rowcols.each_with_index do |col_v,col_i|
|
127
|
-
sheet[row_i+batch_start+1,col_i+1]= %{#{col_v}}
|
134
|
+
sheet[row_i + batch_start + 1, col_i + 1]= %{#{col_v}}
|
128
135
|
end
|
129
136
|
end
|
130
137
|
sheet.save
|
131
138
|
batch_start += (batch_length + 1)
|
132
|
-
rows_written+=batch_length
|
133
|
-
if batch_start>tsvrows.length+1
|
139
|
+
rows_written += batch_length
|
140
|
+
if batch_start>tsvrows.length + 1
|
134
141
|
break
|
135
142
|
end
|
136
143
|
end
|
@@ -141,6 +148,7 @@ module GoogleDrive
|
|
141
148
|
sheet.reload
|
142
149
|
#loading remote data for checksum
|
143
150
|
rem_tsv = sheet.to_tsv
|
151
|
+
return true if rem_tsv.to_s.length==0
|
144
152
|
rem_table = rem_tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
|
145
153
|
loc_table = tsv.split("\n").map{|r| r.split("\t").map{|v| v.googlesafe}}
|
146
154
|
re_col_vs = []
|
@@ -12,13 +12,12 @@ module Mobilize
|
|
12
12
|
:authentication => 'plain',
|
13
13
|
:enable_starttls_auto => true }
|
14
14
|
|
15
|
-
def write(
|
16
|
-
bod="",
|
17
|
-
recipient=Jobtracker.admin_emails.join(","))
|
15
|
+
def write(params)
|
18
16
|
mail(:from=>Gdrive.owner_email,
|
19
|
-
:to=>
|
20
|
-
:subject=>
|
21
|
-
:body=>
|
17
|
+
:to=>params['to'],
|
18
|
+
:subject=>params['subject'],
|
19
|
+
:body=>params['body'],
|
20
|
+
:bcc=>params['bcc'])
|
22
21
|
end
|
23
22
|
end
|
24
23
|
end
|
@@ -3,7 +3,24 @@ module Mobilize
|
|
3
3
|
def Gbook.find_all_by_path(path,gdrive_slot)
|
4
4
|
Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
5
5
|
end
|
6
|
+
|
7
|
+
def Gbook.find_by_http_url(http_url,gdrive_slot)
|
8
|
+
key = http_url.split("key=").last.split("#").first
|
9
|
+
Gdrive.root(gdrive_slot).spreadsheet_by_key(key)
|
10
|
+
end
|
11
|
+
|
6
12
|
def Gbook.find_by_path(path,gdrive_slot)
|
13
|
+
#first try to find a dataset with the URL
|
14
|
+
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
|
+
if dst and dst.http_url.to_s.length>0
|
16
|
+
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
+
#doesn't count if it's deleted
|
18
|
+
if book.entry_hash[:deleted]
|
19
|
+
book = nil
|
20
|
+
else
|
21
|
+
return book
|
22
|
+
end
|
23
|
+
end
|
7
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
8
25
|
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
26
|
book = nil
|
@@ -15,6 +32,7 @@ module Mobilize
|
|
15
32
|
bkey = b.resource_id.split(":").last
|
16
33
|
if bkey == dkey
|
17
34
|
book = b
|
35
|
+
dst.update_attributes(:http_url=>book.human_url)
|
18
36
|
else
|
19
37
|
#delete the invalid book
|
20
38
|
b.delete
|
@@ -25,6 +43,7 @@ module Mobilize
|
|
25
43
|
#If it's a new dst or if there are multiple books
|
26
44
|
#take the first
|
27
45
|
book = books.first
|
46
|
+
dst.update_attributes(:http_url=>book.human_url) if book
|
28
47
|
end
|
29
48
|
return book
|
30
49
|
end
|
@@ -80,5 +80,18 @@ module Mobilize
|
|
80
80
|
def Gdrive.books(gdrive_slot=nil,params={})
|
81
81
|
Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
|
82
82
|
end
|
83
|
+
|
84
|
+
#email management - used to make sure not too many emails get used at the same time
|
85
|
+
def Gdrive.slot_worker_by_path(path)
|
86
|
+
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
|
87
|
+
Gdrive.workers.sort_by{rand}.each do |w|
|
88
|
+
unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
|
89
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
|
90
|
+
return [w['name'],Gdrive.domain].join("@")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
#return false if none are available
|
94
|
+
return false
|
95
|
+
end
|
83
96
|
end
|
84
97
|
end
|
@@ -1,5 +1,47 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gfile
|
3
|
+
def Gfile.path_to_dst(path,stage_path)
|
4
|
+
#don't need the ://
|
5
|
+
path = path.split("://").last if path.index("://")
|
6
|
+
if Gfile.find_by_path(path)
|
7
|
+
handler = "gfile"
|
8
|
+
Dataset.find_or_create_by_url("#{handler}://#{path}")
|
9
|
+
else
|
10
|
+
raise "unable to find #{path}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def Gfile.read_by_dataset_path(dst_path,user_name,*args)
|
15
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
16
|
+
gdrive_slot = args
|
17
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
18
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
19
|
+
file = Gfile.find_by_path(dst_path)
|
20
|
+
file.read(user_name) if file
|
21
|
+
end
|
22
|
+
|
23
|
+
def Gfile.write_by_dataset_path(dst_path,string,user_name,*args)
|
24
|
+
#ignores *args as all files must be created and owned by owner
|
25
|
+
file = Gfile.find_by_path(dst_path)
|
26
|
+
file.delete if file
|
27
|
+
owner_root = Gdrive.root(Gdrive.owner_email)
|
28
|
+
file = owner_root.upload_from_string(string,
|
29
|
+
dst_path,
|
30
|
+
:content_type=>"test/plain",
|
31
|
+
:convert=>false)
|
32
|
+
file.add_admin_acl
|
33
|
+
#make sure user is owner or can edit
|
34
|
+
u = User.where(:name=>user_name).first
|
35
|
+
entry = file.acl_entry(u.email)
|
36
|
+
unless entry and ['writer','owner'].include?(entry.role)
|
37
|
+
file.update_acl(u.email)
|
38
|
+
end
|
39
|
+
#update http url for file
|
40
|
+
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
+
dst.update_attributes(:http_url=>file.human_url)
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
3
45
|
def Gfile.add_admin_acl_by_path(path)
|
4
46
|
file = Gfile.find_by_path(path)
|
5
47
|
file.add_admin_acl
|
@@ -18,18 +60,6 @@ module Mobilize
|
|
18
60
|
file.update_acl(gdrive_slot,role)
|
19
61
|
end
|
20
62
|
|
21
|
-
def Gfile.read_by_stage_path(stage_path)
|
22
|
-
#reserve gdrive_slot account for read
|
23
|
-
gdrive_slot = Gdrive.slot_worker_by_path(s.path)
|
24
|
-
return false unless gdrive_slot
|
25
|
-
s = Stage.where(:path=>stage_path)
|
26
|
-
gfile_path = s.params['file']
|
27
|
-
out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
|
28
|
-
#use Gridfs to cache result
|
29
|
-
out_url = "gridfs://#{s.path}/out"
|
30
|
-
Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
|
31
|
-
end
|
32
|
-
|
33
63
|
def Gfile.find_by_path(path)
|
34
64
|
#file must be owned by owner
|
35
65
|
gdrive_slot = Gdrive.owner_email
|
@@ -55,8 +85,10 @@ module Mobilize
|
|
55
85
|
end
|
56
86
|
#always make sure dataset http URL is up to date
|
57
87
|
#and that it has admin acl
|
58
|
-
|
59
|
-
|
88
|
+
if file
|
89
|
+
dst.update_attributes(:http_url=>file.human_url)
|
90
|
+
file.add_admin_acl
|
91
|
+
end
|
60
92
|
return file
|
61
93
|
end
|
62
94
|
end
|
@@ -11,7 +11,7 @@ module Mobilize
|
|
11
11
|
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.read_by_dataset_path(dst_path,
|
14
|
+
def Gridfs.read_by_dataset_path(dst_path,user_name,*args)
|
15
15
|
begin
|
16
16
|
zs=Gridfs.grid.open(dst_path,'r').read
|
17
17
|
return ::Zlib::Inflate.inflate(zs)
|
@@ -20,10 +20,10 @@ module Mobilize
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,
|
23
|
+
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
24
24
|
zs = ::Zlib::Deflate.deflate(string)
|
25
25
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
curr_zs = Gridfs.read_by_dataset_path(dst_path,
|
26
|
+
curr_zs = Gridfs.read_by_dataset_path(dst_path,user_name).to_s
|
27
27
|
#write a new version when there is a change
|
28
28
|
if curr_zs != zs
|
29
29
|
Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
@@ -9,9 +9,62 @@ module Mobilize
|
|
9
9
|
Gsheet.config['max_cells']
|
10
10
|
end
|
11
11
|
|
12
|
+
# converts a source path or target path to a dst in the context of handler and stage
|
13
|
+
def Gsheet.path_to_dst(path,stage_path)
|
14
|
+
s = Stage.where(:path=>stage_path).first
|
15
|
+
params = s.params
|
16
|
+
target_path = params['target']
|
17
|
+
#take random slot if one is not available
|
18
|
+
gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
|
19
|
+
#if this is the target, it doesn't have to exist already
|
20
|
+
is_target = true if path == target_path
|
21
|
+
#don't need the ://
|
22
|
+
path = path.split("://").last if path.index("://")
|
23
|
+
if path.split("/").length == 2
|
24
|
+
if is_target or Gsheet.find_by_path(path,gdrive_slot)
|
25
|
+
#user has specified path to a sheet
|
26
|
+
return Dataset.find_or_create_by_url("gsheet://#{path}")
|
27
|
+
else
|
28
|
+
raise "unable to find #{path}"
|
29
|
+
end
|
30
|
+
else
|
31
|
+
#user has specified a sheet
|
32
|
+
runner_title = stage_path.split("/").first
|
33
|
+
r = Runner.find_by_title(runner_title)
|
34
|
+
if is_target or r.gbook(gdrive_slot).worksheets.map{|w| w.title}.include?(path)
|
35
|
+
handler = "gsheet"
|
36
|
+
path = "#{runner_title}/#{path}"
|
37
|
+
elsif Gfile.find_by_path(path,gdrive_slot)
|
38
|
+
handler = "gfile"
|
39
|
+
path = "#{path}"
|
40
|
+
else
|
41
|
+
raise "unable to find #{path}"
|
42
|
+
end
|
43
|
+
return Dataset.find_or_create_by_url("#{handler}://#{path}")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
|
48
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
49
|
+
gdrive_slot = args
|
50
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
51
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
52
|
+
sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
|
53
|
+
sheet.read(user_name) if sheet
|
54
|
+
end
|
55
|
+
|
56
|
+
def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
|
57
|
+
#expects gdrive slot as first arg, otherwise chooses random
|
58
|
+
gdrive_slot,crop = args
|
59
|
+
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
60
|
+
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
61
|
+
crop ||= true
|
62
|
+
Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
|
63
|
+
end
|
64
|
+
|
12
65
|
def Gsheet.write(path,tsv,gdrive_slot)
|
13
66
|
sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
|
14
|
-
sheet.write(tsv)
|
67
|
+
sheet.write(tsv,Gdrive.owner_name)
|
15
68
|
end
|
16
69
|
|
17
70
|
def Gsheet.find_by_path(path,gdrive_slot)
|
@@ -32,32 +85,9 @@ module Mobilize
|
|
32
85
|
return sheet
|
33
86
|
end
|
34
87
|
|
35
|
-
def Gsheet.
|
36
|
-
#reserve gdrive_slot account for read
|
37
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
38
|
-
return false unless gdrive_slot
|
39
|
-
s = Stage.where(:path=>stage_path).first
|
40
|
-
user = s.job.runner.user.name
|
41
|
-
source_dst = s.source_dsts(gdrive_slot).first
|
42
|
-
out_tsv = source_dst.read(user)
|
43
|
-
#use Gridfs to cache result
|
44
|
-
out_url = "gridfs://#{s.path}/out"
|
45
|
-
Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
|
46
|
-
end
|
47
|
-
|
48
|
-
def Gsheet.write_by_stage_path(stage_path)
|
49
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
50
|
-
#return blank response if there are no slots available
|
51
|
-
return nil unless gdrive_slot
|
52
|
-
s = Stage.where(:path=>stage_path).first
|
53
|
-
user = s.job.runner.user
|
54
|
-
target_path = s.params['target']
|
55
|
-
target_path = "#{s.job.runner.title}/#{target_path}" unless target_path.index("/")
|
56
|
-
source_dst = s.source_dsts(gdrive_slot).first
|
57
|
-
tsv = source_dst.read(user.name)
|
58
|
-
sheet_name = target_path.split("/").last
|
59
|
-
temp_path = [stage_path.gridsafe,sheet_name].join("/")
|
88
|
+
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
60
89
|
#find and delete temp sheet, if any
|
90
|
+
temp_path = [target_path.gridsafe,"temp"].join("/")
|
61
91
|
temp_sheet = Gsheet.find_by_path(temp_path,gdrive_slot)
|
62
92
|
temp_sheet.delete if temp_sheet
|
63
93
|
#write data to temp sheet
|
@@ -70,28 +100,57 @@ module Mobilize
|
|
70
100
|
return nil
|
71
101
|
end
|
72
102
|
temp_sheet.check_and_fix(tsv)
|
103
|
+
temp_sheet
|
104
|
+
end
|
105
|
+
|
106
|
+
def Gsheet.write_target(target_path,tsv,user_name,gdrive_slot,crop=true)
|
107
|
+
#write to temp sheet first, to ensure google compatibility
|
108
|
+
#and fix any discrepancies due to spradsheet assumptions
|
109
|
+
temp_sheet = Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
110
|
+
#try to find target sheet
|
73
111
|
target_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
|
112
|
+
u = User.where(:name=>user_name).first
|
74
113
|
unless target_sheet
|
75
114
|
#only give the user edit permissions if they're the ones
|
76
115
|
#creating it
|
77
116
|
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
78
|
-
target_sheet.spreadsheet.update_acl(
|
117
|
+
target_sheet.spreadsheet.update_acl(user_email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
79
118
|
target_sheet.delete_sheet1
|
80
119
|
end
|
81
|
-
#
|
82
|
-
#
|
83
|
-
|
84
|
-
target_sheet.merge(temp_sheet,user.name)
|
85
|
-
rescue
|
86
|
-
return nil
|
87
|
-
end
|
120
|
+
#pass it crop param to determine whether to shrink target sheet to fit data
|
121
|
+
#default is yes
|
122
|
+
target_sheet.merge(temp_sheet,user_name,crop)
|
88
123
|
#delete the temp sheet's book
|
89
124
|
temp_sheet.spreadsheet.delete
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
125
|
+
target_sheet
|
126
|
+
end
|
127
|
+
|
128
|
+
def Gsheet.write_by_stage_path(stage_path)
|
129
|
+
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
130
|
+
#return blank response if there are no slots available
|
131
|
+
return nil unless gdrive_slot
|
132
|
+
s = Stage.where(:path=>stage_path).first
|
133
|
+
u = s.job.runner.user
|
134
|
+
crop = s.params['crop'] || true
|
135
|
+
begin
|
136
|
+
#get tsv to write from stage
|
137
|
+
source = s.sources.first
|
138
|
+
raise "Need source for gsheet write" unless source
|
139
|
+
tsv = source.read(u.name,gdrive_slot)
|
140
|
+
raise "No data found in #{source.url}" unless tsv
|
141
|
+
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
142
|
+
Gdrive.unslot_worker_by_path(stage_path)
|
143
|
+
#update status
|
144
|
+
stdout = "Write successful for #{s.target.url}"
|
145
|
+
stderr = nil
|
146
|
+
s.update_status(stdout)
|
147
|
+
signal = 0
|
148
|
+
rescue => exc
|
149
|
+
stdout = nil
|
150
|
+
stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
|
151
|
+
signal = 500
|
152
|
+
end
|
153
|
+
return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
|
95
154
|
end
|
96
155
|
end
|
97
156
|
end
|
@@ -103,23 +103,28 @@ module Mobilize
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
-
def Resque.
|
106
|
+
def Resque.new_failures_by_email
|
107
107
|
fjobs = {}
|
108
|
-
|
108
|
+
exc_to_s = Hash.new(0)
|
109
109
|
Resque.failures.each_with_index do |f,f_i|
|
110
110
|
#skip if already notified
|
111
111
|
next if f['notified']
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
fjobs[
|
112
|
+
stage_path = f['payload']['args'].first
|
113
|
+
s = Stage.where(:path=>stage_path).first
|
114
|
+
email = s.job.runner.user.email
|
115
|
+
exc_to_s = f['error']
|
116
|
+
if fjobs[email].nil?
|
117
|
+
fjobs[email] = {stage_path => {exc_to_s => 1}}
|
118
|
+
elsif fjobs[email][stage_path].nil?
|
119
|
+
fjobs[email][stage_path] = {exc_to_s => 1}
|
120
|
+
elsif fjobs[email][stage_path][exc_to_s].nil?
|
121
|
+
fjobs[email][stage_path][exc_to_s] = 1
|
118
122
|
else
|
119
|
-
fjobs[
|
123
|
+
fjobs[email][stage_path][exc_to_s] += 1
|
120
124
|
end
|
121
125
|
#add notified flag to redis
|
122
126
|
f['notified'] = true
|
127
|
+
#tag stage with email
|
123
128
|
::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
|
124
129
|
end
|
125
130
|
return fjobs
|
@@ -163,27 +163,39 @@ module Mobilize
|
|
163
163
|
if Jobtracker.notif_due?
|
164
164
|
notifs = []
|
165
165
|
if Jobtracker.failures.length>0
|
166
|
-
|
167
|
-
|
166
|
+
failure_hash = Resque.new_failures_by_email
|
167
|
+
failure_hash.each do |email,stage_paths|
|
168
168
|
n = {}
|
169
|
-
n['
|
169
|
+
n['subject'] = "#{stage_paths.keys.length.to_s} new failed jobs, #{stage_paths.values.map{|v| v.values}.flatten.sum.to_s} failures"
|
170
170
|
#one row per exception type, with the job name
|
171
|
-
n['body'] =
|
171
|
+
n['body'] = stage_paths.map do |path,exceptions|
|
172
|
+
exceptions.map do |exc_to_s,times|
|
173
|
+
[path," : ",exc_to_s,", ",times," times"].join
|
174
|
+
end
|
175
|
+
end.flatten.join("\n\n")
|
176
|
+
u = User.where(:name=>email.split("@").first).first
|
177
|
+
runner_dst = Dataset.find_by_url("gsheet://#{u.runner.path}")
|
178
|
+
n['body'] += "\n\n#{runner_dst.http_url}" if runner_dst and runner_dst.http_url
|
179
|
+
n['to'] = email
|
180
|
+
n['bcc'] = Jobtracker.admin_emails.join(",")
|
172
181
|
notifs << n
|
173
182
|
end
|
174
183
|
end
|
175
184
|
lws = Jobtracker.max_run_time_workers
|
176
185
|
if lws.length>0
|
177
186
|
n = {}
|
178
|
-
n['
|
187
|
+
n['subject'] = "#{lws.length.to_s} max run time jobs"
|
179
188
|
n['body'] = lws.map{|w| %{spec:#{w['spec']} stg:#{w['stg']} runat:#{w['runat'].to_s}}}.join("\n\n")
|
189
|
+
n['to'] = Jobtracker.admin_emails.join(",")
|
180
190
|
notifs << n
|
181
191
|
end
|
192
|
+
#deliver each email generated
|
182
193
|
notifs.each do |notif|
|
183
|
-
Email.write(
|
184
|
-
Jobtracker.last_notification=Time.now.utc.to_s
|
185
|
-
Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
|
194
|
+
Email.write(notif).deliver
|
186
195
|
end
|
196
|
+
#update notification time so JT knows to wait a while
|
197
|
+
Jobtracker.last_notification = Time.now.utc.to_s
|
198
|
+
Jobtracker.update_status("Sent notification at #{Jobtracker.last_notification}")
|
187
199
|
end
|
188
200
|
return true
|
189
201
|
end
|
@@ -13,9 +13,23 @@ module Mobilize
|
|
13
13
|
|
14
14
|
index({ handler: 1, path: 1}, { unique: true})
|
15
15
|
|
16
|
-
def
|
16
|
+
def url
|
17
|
+
s = self
|
18
|
+
"#{s.handler}://#{s.path}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def read(user_name,*args)
|
17
22
|
dst = self
|
18
|
-
|
23
|
+
dst.update_attributes(:last_read_at=>Time.now.utc)
|
24
|
+
"Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user_name,*args)
|
25
|
+
end
|
26
|
+
|
27
|
+
def write(string,user_name,*args)
|
28
|
+
dst = self
|
29
|
+
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user_name,*args)
|
30
|
+
dst.raw_size = string.length
|
31
|
+
dst.save!
|
32
|
+
return true
|
19
33
|
end
|
20
34
|
|
21
35
|
def Dataset.find_by_url(url)
|
@@ -38,24 +52,15 @@ module Mobilize
|
|
38
52
|
return dst
|
39
53
|
end
|
40
54
|
|
41
|
-
def Dataset.
|
42
|
-
dst = Dataset.
|
43
|
-
dst.
|
44
|
-
url
|
55
|
+
def Dataset.read_by_url(url,user_name,*args)
|
56
|
+
dst = Dataset.find_by_url(url)
|
57
|
+
dst.read(user_name,*args) if dst
|
45
58
|
end
|
46
59
|
|
47
|
-
def
|
48
|
-
dst =
|
49
|
-
dst.
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def write(string,user)
|
54
|
-
dst = self
|
55
|
-
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
|
56
|
-
dst.raw_size = string.length
|
57
|
-
dst.save!
|
58
|
-
return true
|
60
|
+
def Dataset.write_by_url(url,string,user_name,*args)
|
61
|
+
dst = Dataset.find_or_create_by_url(url)
|
62
|
+
dst.write(string,user_name,*args)
|
63
|
+
url
|
59
64
|
end
|
60
65
|
end
|
61
66
|
end
|
@@ -15,11 +15,6 @@ module Mobilize
|
|
15
15
|
%w{name active trigger status stage1 stage2 stage3 stage4 stage5}
|
16
16
|
end
|
17
17
|
|
18
|
-
def cached_at
|
19
|
-
r = self
|
20
|
-
Dataset.find_or_create_by_path(r.path).cached_at
|
21
|
-
end
|
22
|
-
|
23
18
|
def title
|
24
19
|
r = self
|
25
20
|
r.path.split("/").first
|
@@ -34,6 +29,9 @@ module Mobilize
|
|
34
29
|
Runner.where(:path=>path).first
|
35
30
|
end
|
36
31
|
|
32
|
+
def Runner.find_by_title(title)
|
33
|
+
Runner.where(:path=>"#{title}/jobs").first
|
34
|
+
end
|
37
35
|
def Runner.perform(id,*args)
|
38
36
|
r = Runner.find_by_path(id)
|
39
37
|
#get gdrive slot for read
|
@@ -53,7 +51,9 @@ module Mobilize
|
|
53
51
|
begin
|
54
52
|
if j.is_due?
|
55
53
|
j.update_attributes(:active=>false) if j.trigger=='once'
|
56
|
-
j.stages.first
|
54
|
+
s = j.stages.first
|
55
|
+
s.update_attributes(:retries_done=>0)
|
56
|
+
s.enqueue!
|
57
57
|
end
|
58
58
|
rescue ScriptError, StandardError => exc
|
59
59
|
r.update_status("Failed to enqueue #{j.path} with #{exc.to_s}")
|
@@ -73,11 +73,6 @@ module Mobilize
|
|
73
73
|
Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
|
74
74
|
end
|
75
75
|
|
76
|
-
def cache
|
77
|
-
r = self
|
78
|
-
Dataset.find_or_create_by_url("gridfs://#{r.path}")
|
79
|
-
end
|
80
|
-
|
81
76
|
def gbook(gdrive_slot)
|
82
77
|
r = self
|
83
78
|
title = r.path.split("/").first
|
@@ -86,17 +81,20 @@ module Mobilize
|
|
86
81
|
|
87
82
|
def gsheet(gdrive_slot)
|
88
83
|
r = self
|
84
|
+
u = r.user
|
89
85
|
jobs_sheet = Gsheet.find_by_path(r.path,gdrive_slot)
|
90
86
|
#make sure the user has a runner with a jobs sheet and has write privileges on the spreadsheet
|
91
|
-
unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(
|
87
|
+
unless (jobs_sheet and jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="writer"})
|
92
88
|
#only give the user edit permissions if they're the ones
|
93
89
|
#creating it
|
94
90
|
jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
|
95
|
-
unless jobs_sheet.spreadsheet.acl_entry(
|
96
|
-
jobs_sheet.spreadsheet.update_acl(
|
91
|
+
unless jobs_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
92
|
+
jobs_sheet.spreadsheet.update_acl(u.email,"writer")
|
97
93
|
end
|
98
94
|
end
|
99
95
|
jobs_sheet.add_headers(r.headers)
|
96
|
+
#add url to dataset
|
97
|
+
Dataset.find_or_create_by_url("gsheet://#{r.path}").update_attributes(:http_url=>jobs_sheet.spreadsheet.human_url)
|
100
98
|
begin;jobs_sheet.delete_sheet1;rescue;end #don't care if sheet1 deletion fails
|
101
99
|
return jobs_sheet
|
102
100
|
end
|
@@ -104,8 +102,6 @@ module Mobilize
|
|
104
102
|
def read_gsheet(gdrive_slot)
|
105
103
|
r = self
|
106
104
|
gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
|
107
|
-
#cache in DB
|
108
|
-
r.cache.write(gsheet_tsv,Gdrive.owner_name)
|
109
105
|
#turn it into a hash array
|
110
106
|
gsheet_jobs = gsheet_tsv.tsv_to_hash_array
|
111
107
|
#go through each job, update relevant job with its params
|
@@ -122,8 +118,13 @@ module Mobilize
|
|
122
118
|
stage_string = rj["stage#{s_idx.to_s}"]
|
123
119
|
s = Stage.find_by_path("#{j.path}/stage#{s_idx.to_s}")
|
124
120
|
if stage_string.to_s.length==0
|
125
|
-
#delete this stage
|
126
|
-
|
121
|
+
#delete this stage and all stages after
|
122
|
+
if s
|
123
|
+
j = s.job
|
124
|
+
j.stages[(s.idx-1)..-1].each{|ps| ps.delete}
|
125
|
+
#just in case
|
126
|
+
s.delete
|
127
|
+
end
|
127
128
|
break
|
128
129
|
elsif s.nil?
|
129
130
|
#create this stage
|
@@ -7,7 +7,8 @@ module Mobilize
|
|
7
7
|
field :call, type: String
|
8
8
|
field :param_string, type: Array
|
9
9
|
field :status, type: String
|
10
|
-
field :
|
10
|
+
field :response, type: Hash
|
11
|
+
field :retries_done, type: Fixnum
|
11
12
|
field :completed_at, type: Time
|
12
13
|
field :started_at, type: Time
|
13
14
|
field :failed_at, type: Time
|
@@ -25,7 +26,15 @@ module Mobilize
|
|
25
26
|
#allowing you to determine its size
|
26
27
|
#before committing to a read or write
|
27
28
|
s = self
|
28
|
-
Dataset.find_by_url(s.out_url) if s.out_url
|
29
|
+
Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
|
30
|
+
end
|
31
|
+
|
32
|
+
def err_dst
|
33
|
+
#this gives a dataset that points to the output
|
34
|
+
#allowing you to determine its size
|
35
|
+
#before committing to a read or write
|
36
|
+
s = self
|
37
|
+
Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
|
29
38
|
end
|
30
39
|
|
31
40
|
def params
|
@@ -68,84 +77,91 @@ module Mobilize
|
|
68
77
|
|
69
78
|
def Stage.perform(id,*args)
|
70
79
|
s = Stage.where(:path=>id).first
|
71
|
-
j = s.job
|
72
80
|
s.update_attributes(:started_at=>Time.now.utc)
|
73
81
|
s.update_status(%{Starting at #{Time.now.utc}})
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
s.enqueue!
|
81
|
-
return false
|
82
|
-
end
|
83
|
-
rescue ScriptError, StandardError => exc
|
84
|
-
j.update_attributes(:active=>false)
|
85
|
-
s.update_attributes(:failed_at=>Time.now.utc)
|
86
|
-
s.update_status("Failed at #{Time.now.utc.to_s}")
|
87
|
-
raise exc
|
82
|
+
#get response by running method
|
83
|
+
response = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
|
84
|
+
unless response
|
85
|
+
#re-queue self if no response
|
86
|
+
s.enqueue!
|
87
|
+
return false
|
88
88
|
end
|
89
|
-
|
89
|
+
if response['signal'] == 0
|
90
|
+
s.complete(response)
|
91
|
+
elsif s.retries_done.to_i < s.params['retries'].to_i
|
92
|
+
#retry
|
93
|
+
s.update_attributes(:retries_done => s.retries_done.to_i + 1, :response => response)
|
94
|
+
s.update_status(%{Retry #{s.retries_done.to_s} at #{Time.now.utc}})
|
95
|
+
s.enqueue!
|
96
|
+
else
|
97
|
+
#sleep as much as user specifies
|
98
|
+
sleep s['delay'].to_i
|
99
|
+
s.fail(response)
|
100
|
+
end
|
101
|
+
return true
|
102
|
+
end
|
103
|
+
|
104
|
+
def complete(response)
|
105
|
+
s = self
|
106
|
+
s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
|
90
107
|
s.update_status("Completed at #{Time.now.utc.to_s}")
|
108
|
+
j = s.job
|
91
109
|
if s.idx == j.stages.length
|
92
110
|
#check for any dependent jobs, if there are, enqueue them
|
93
111
|
r = j.runner
|
94
|
-
dep_jobs = r.jobs.select
|
112
|
+
dep_jobs = r.jobs.select do |dj|
|
113
|
+
dj.active==true and
|
114
|
+
dj.trigger.strip.downcase == "after #{j.name}"
|
115
|
+
end
|
95
116
|
#put begin/rescue so all dependencies run
|
96
|
-
dep_jobs.each
|
117
|
+
dep_jobs.each do |dj|
|
118
|
+
begin
|
119
|
+
unless dj.is_working?
|
120
|
+
dj.stages.first.update_attributes(:retries_done=>0)
|
121
|
+
dj.stages.first.enqueue!
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
#job won't run if error, log it a failure
|
125
|
+
response = {"err_str" => "Unable to enqueue first stage of #{dj.path}"}
|
126
|
+
dj.stages.first.fail(response)
|
127
|
+
end
|
128
|
+
end
|
97
129
|
else
|
98
130
|
#queue up next stage
|
131
|
+
s.next.update_attributes(:retries_done=>0)
|
99
132
|
s.next.enqueue!
|
100
133
|
end
|
101
|
-
|
134
|
+
true
|
102
135
|
end
|
103
136
|
|
104
|
-
def
|
105
|
-
#
|
106
|
-
|
107
|
-
#or dataset pointers for other handlers
|
137
|
+
def fail(response,gdrive_slot=nil)
|
138
|
+
#get random worker if one is not provided
|
139
|
+
gdrive_slot ||= Gdrive.worker_emails.sort_by{rand}.first
|
108
140
|
s = self
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
runner_sheet = r.gbook(gdrive_slot).worksheet_by_title(source_path)
|
134
|
-
out_tsv = if runner_sheet
|
135
|
-
runner_sheet.read(user)
|
136
|
-
else
|
137
|
-
#check for gfile. will fail if there isn't one.
|
138
|
-
Gfile.find_by_path(source_path).read(user)
|
139
|
-
end
|
140
|
-
end
|
141
|
-
#use Gridfs to cache gdrive results
|
142
|
-
file_name = source_path.split("/").last
|
143
|
-
out_url = "gridfs://#{s.path}/#{file_name}"
|
144
|
-
Dataset.write_by_url(out_url,out_tsv,user)
|
145
|
-
dsts << Dataset.find_by_url(out_url)
|
146
|
-
end
|
147
|
-
end
|
148
|
-
return dsts
|
141
|
+
j = s.job
|
142
|
+
r = j.runner
|
143
|
+
u = r.user
|
144
|
+
j.update_attributes(:active=>false)
|
145
|
+
s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
|
146
|
+
stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
|
147
|
+
target_path = (r.path.split("/")[0..-2] + [stage_name]).join("/")
|
148
|
+
status_msg = "Failed at #{Time.now.utc.to_s}"
|
149
|
+
#read err txt, add err sheet, write to it
|
150
|
+
err_sheet = Gsheet.find_by_path(target_path,gdrive_slot)
|
151
|
+
err_sheet.delete if err_sheet
|
152
|
+
err_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
153
|
+
err_txt = if response['err_url']
|
154
|
+
Dataset.read_by_url(response['err_url'],u.name)
|
155
|
+
elsif response['err_str']
|
156
|
+
response['err_str']
|
157
|
+
end
|
158
|
+
err_txt = ["response","\n",err_txt].join
|
159
|
+
err_sheet.write(err_txt,u.name)
|
160
|
+
#exception will be first row below "response" header
|
161
|
+
exc_to_s,backtrace = err_txt.split("\n").ie{|ea| [ea[1], ea[2..-1]]}
|
162
|
+
s.update_status(status_msg)
|
163
|
+
#raise the exception so it bubbles up to resque
|
164
|
+
raise Exception,exc_to_s,backtrace
|
149
165
|
end
|
150
166
|
|
151
167
|
def enqueue!
|
@@ -180,5 +196,61 @@ module Mobilize
|
|
180
196
|
s = self
|
181
197
|
Mobilize::Resque.active_paths.include?(s.path)
|
182
198
|
end
|
199
|
+
|
200
|
+
def target
|
201
|
+
s = self
|
202
|
+
params = s.params
|
203
|
+
target_path = params['target']
|
204
|
+
handler,path = target_path.split("://")
|
205
|
+
#if the user has specified a url for a target
|
206
|
+
#that is not this stage's handler, disallow
|
207
|
+
if handler and path and handler != s.handler
|
208
|
+
raise "incompatible target handler #{handler} for #{s.handler} stage"
|
209
|
+
else
|
210
|
+
begin
|
211
|
+
return "Mobilize::#{s.handler.downcase.capitalize}".constantize.path_to_dst(target_path,s.path)
|
212
|
+
rescue => exc
|
213
|
+
raise "Could not get #{target_path} with error: #{exc.to_s}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def sources
|
219
|
+
#returns an array of Datasets corresponding to
|
220
|
+
#items listed as sources in the stage params
|
221
|
+
s = self
|
222
|
+
params = s.params
|
223
|
+
job = s.job
|
224
|
+
runner = job.runner
|
225
|
+
source_paths = if params['sources']
|
226
|
+
params['sources']
|
227
|
+
elsif params['source']
|
228
|
+
[params['source']]
|
229
|
+
end
|
230
|
+
return [] if (source_paths.class!=Array or source_paths.length==0)
|
231
|
+
dsts = []
|
232
|
+
source_paths.each do |source_path|
|
233
|
+
if source_path.index(/^stage[1-5]$/)
|
234
|
+
#stage arguments return the stage's output dst url
|
235
|
+
source_stage_path = "#{runner.path}/#{job.name}/#{source_path}"
|
236
|
+
source_stage = Stage.where(:path=>source_stage_path).first
|
237
|
+
source_stage_out_url = source_stage.response['out_url']
|
238
|
+
dsts << Dataset.find_by_url(source_stage_out_url)
|
239
|
+
else
|
240
|
+
handler = if source_path.index("://")
|
241
|
+
source_path.split("://").first
|
242
|
+
else
|
243
|
+
s.handler
|
244
|
+
end
|
245
|
+
begin
|
246
|
+
stage_path = s.path
|
247
|
+
dsts << "Mobilize::#{handler.downcase.capitalize}".constantize.path_to_dst(source_path,stage_path)
|
248
|
+
rescue => exc
|
249
|
+
raise "Could not get #{source_path} with error: #{exc.to_s}"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
return dsts
|
254
|
+
end
|
183
255
|
end
|
184
256
|
end
|
data/mobilize-base.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |s|
|
|
6
6
|
s.name = "mobilize-base"
|
7
7
|
s.version = Mobilize::Base::VERSION
|
8
8
|
s.authors = ["Cassio Paes-Leme"]
|
9
|
-
s.email = ["cpaesleme@
|
9
|
+
s.email = ["cpaesleme@dena.com"]
|
10
10
|
s.homepage = "http://github.com/ngmoco/mobilize-base"
|
11
11
|
s.summary = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
|
12
12
|
s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
|
data/test/base_job_rows.yml
CHANGED
data/test/mobilize-base_test.rb
CHANGED
@@ -30,38 +30,77 @@ describe "Mobilize" do
|
|
30
30
|
|
31
31
|
puts "Jobtracker created runner with 'jobs' sheet?"
|
32
32
|
r = u.runner
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
jobs_sheet_url = "gsheet://#{r.path}"
|
34
|
+
jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
|
35
|
+
jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
|
36
|
+
jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
|
37
|
+
assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
|
38
|
+
|
39
|
+
#stop Jobtracker, if you're doing this by queueing runners
|
40
|
+
#Mobilize::Jobtracker.stop!
|
41
|
+
|
42
|
+
puts "add base1 input file"
|
43
|
+
test_filename = "test_base_1"
|
44
|
+
file_url = "gfile://#{test_filename}.tsv"
|
45
|
+
test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
|
41
46
|
test_source_tsv = test_source_ha.hash_array_to_tsv
|
42
|
-
|
47
|
+
Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
|
48
|
+
rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
|
49
|
+
assert rem_tsv == test_source_tsv
|
43
50
|
|
44
|
-
puts "add row to jobs sheet, wait
|
51
|
+
puts "add row to jobs sheet, wait for stages"
|
45
52
|
test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
|
53
|
+
jobs_sheet.reload
|
46
54
|
jobs_sheet.add_or_update_rows(test_job_rows)
|
47
|
-
|
55
|
+
#wait for stages to complete
|
56
|
+
#r.enqueue!
|
57
|
+
wait_for_stages
|
48
58
|
|
49
59
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
50
|
-
|
51
|
-
|
60
|
+
test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
|
61
|
+
test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
|
62
|
+
test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
|
52
63
|
|
53
|
-
|
64
|
+
test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
|
65
|
+
test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
|
54
66
|
|
55
|
-
|
56
|
-
[test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
|
67
|
+
assert test_1_tsv == test_2_tsv
|
57
68
|
|
58
|
-
|
59
|
-
|
69
|
+
puts "change first job to fail, wait for stages"
|
70
|
+
test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
|
71
|
+
Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
|
72
|
+
jobs_sheet.add_or_update_rows(test_job_rows)
|
60
73
|
|
61
|
-
|
62
|
-
|
63
|
-
assert test_target_sheet_2.read(user_name) == test_source_sheet.read(user_name)
|
74
|
+
#wait for stages to complete
|
75
|
+
wait_for_stages
|
64
76
|
|
77
|
+
test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
|
78
|
+
puts "jobtracker posted failing test error to sheet "
|
79
|
+
error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
|
80
|
+
assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
|
81
|
+
Mobilize::Jobtracker.stop!
|
65
82
|
end
|
66
83
|
|
84
|
+
def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
|
85
|
+
time = 0
|
86
|
+
time_since_stage = 0
|
87
|
+
#check for 10 min
|
88
|
+
while time < time_limit and time_since_stage < stage_limit
|
89
|
+
sleep wait_length
|
90
|
+
job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
|
91
|
+
if job_classes.include?("Mobilize::Stage")
|
92
|
+
time_since_stage = 0
|
93
|
+
puts "saw stage at #{time.to_s} seconds"
|
94
|
+
else
|
95
|
+
time_since_stage += wait_length
|
96
|
+
puts "#{time_since_stage.to_s} seconds since stage seen"
|
97
|
+
end
|
98
|
+
time += wait_length
|
99
|
+
puts "total wait time #{time.to_s} seconds"
|
100
|
+
end
|
101
|
+
|
102
|
+
if time >= time_limit
|
103
|
+
raise "Timed out before stage completion"
|
104
|
+
end
|
105
|
+
end
|
67
106
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.2'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -177,7 +177,7 @@ description: ! "Manage your organization's workflows entirely through Google Doc
|
|
177
177
|
and -mongodb packages\n to allow seamless transport of TSV and
|
178
178
|
JSON data between any two endpoints. "
|
179
179
|
email:
|
180
|
-
- cpaesleme@
|
180
|
+
- cpaesleme@dena.com
|
181
181
|
executables: []
|
182
182
|
extensions: []
|
183
183
|
extra_rdoc_files: []
|
@@ -220,10 +220,10 @@ files:
|
|
220
220
|
- lib/samples/resque.yml
|
221
221
|
- lib/samples/resque_web.rb
|
222
222
|
- mobilize-base.gemspec
|
223
|
-
- test/base1_stage1.yml
|
224
223
|
- test/base_job_rows.yml
|
225
224
|
- test/mobilize-base_test.rb
|
226
225
|
- test/redis-test.conf
|
226
|
+
- test/test_base_1.yml
|
227
227
|
- test/test_helper.rb
|
228
228
|
homepage: http://github.com/ngmoco/mobilize-base
|
229
229
|
licenses: []
|
@@ -239,7 +239,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
239
239
|
version: '0'
|
240
240
|
segments:
|
241
241
|
- 0
|
242
|
-
hash:
|
242
|
+
hash: -2718067622627955864
|
243
243
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
244
244
|
none: false
|
245
245
|
requirements:
|
@@ -248,7 +248,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
248
248
|
version: '0'
|
249
249
|
segments:
|
250
250
|
- 0
|
251
|
-
hash:
|
251
|
+
hash: -2718067622627955864
|
252
252
|
requirements: []
|
253
253
|
rubyforge_project: mobilize-base
|
254
254
|
rubygems_version: 1.8.24
|
@@ -257,8 +257,8 @@ specification_version: 3
|
|
257
257
|
summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
|
258
258
|
Docs
|
259
259
|
test_files:
|
260
|
-
- test/base1_stage1.yml
|
261
260
|
- test/base_job_rows.yml
|
262
261
|
- test/mobilize-base_test.rb
|
263
262
|
- test/redis-test.conf
|
263
|
+
- test/test_base_1.yml
|
264
264
|
- test/test_helper.rb
|
File without changes
|