mobilize-base 1.0.84 → 1.0.91
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +2 -2
- data/lib/mobilize-base/handlers/gbook.rb +4 -4
- data/lib/mobilize-base/handlers/gfile.rb +4 -1
- data/lib/mobilize-base/handlers/gridfs.rb +7 -7
- data/lib/mobilize-base/handlers/gsheet.rb +12 -6
- data/lib/mobilize-base/handlers/resque.rb +11 -6
- data/lib/mobilize-base/jobtracker.rb +7 -5
- data/lib/mobilize-base/models/dataset.rb +25 -26
- data/lib/mobilize-base/models/runner.rb +3 -3
- data/lib/mobilize-base/models/stage.rb +14 -26
- data/lib/mobilize-base/models/user.rb +1 -1
- data/lib/mobilize-base/version.rb +1 -1
- data/test/mobilize-base_test.rb +4 -4
- metadata +4 -4
@@ -8,7 +8,7 @@ module GoogleDrive
|
|
8
8
|
attempts = 0
|
9
9
|
sleep_time = nil
|
10
10
|
#try 5 times to make the call
|
11
|
-
while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts <
|
11
|
+
while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
|
12
12
|
#instantiate http object, set params
|
13
13
|
http = @proxy.new(uri.host, uri.port)
|
14
14
|
http.use_ssl = true
|
@@ -40,7 +40,7 @@ module GoogleDrive
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
43
|
-
raise "No response after
|
43
|
+
raise "No response after 20 attempts" if response.nil?
|
44
44
|
raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
|
45
45
|
return response
|
46
46
|
end
|
@@ -8,10 +8,10 @@ module Mobilize
|
|
8
8
|
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
9
|
#there should only be one book with each path, otherwise we have fail
|
10
10
|
book = nil
|
11
|
-
if books.length>1 and dst.
|
11
|
+
if books.length>1 and dst.http_url.to_s.length>0
|
12
12
|
#some idiot process created a duplicate book.
|
13
13
|
#Fix by renaming all but one with dst entry's key
|
14
|
-
dkey = dst.
|
14
|
+
dkey = dst.http_url.split("key=").last
|
15
15
|
books.each do |b|
|
16
16
|
bkey = b.resource_id.split(":").last
|
17
17
|
if bkey == dkey
|
@@ -30,9 +30,9 @@ module Mobilize
|
|
30
30
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
31
31
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
32
32
|
end
|
33
|
-
#always make sure book dataset URL is up to date
|
33
|
+
#always make sure book dataset http URL is up to date
|
34
34
|
#and that book has admin acl
|
35
|
-
dst.update_attributes(:
|
35
|
+
dst.update_attributes(:http_url=>book.human_url)
|
36
36
|
book.add_admin_acl
|
37
37
|
return book
|
38
38
|
end
|
@@ -28,7 +28,10 @@ module Mobilize
|
|
28
28
|
return false unless gdrive_slot
|
29
29
|
s = Stage.where(:path=>stage_path)
|
30
30
|
gfile_path = s.params['file']
|
31
|
-
Gfile.find_by_path(gfile_path,gdrive_slot).read
|
31
|
+
out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
|
32
|
+
#use Gridfs to cache result
|
33
|
+
out_url = "gridfs://#{s.path}/out"
|
34
|
+
Dataset.write_to_url(out_url,out_tsv)
|
32
35
|
end
|
33
36
|
end
|
34
37
|
end
|
@@ -11,29 +11,29 @@ module Mobilize
|
|
11
11
|
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
14
|
+
def Gridfs.read_by_dataset_path(dst_path)
|
15
15
|
begin
|
16
|
-
zs=Gridfs.grid.open(
|
16
|
+
zs=Gridfs.grid.open(dst_path,'r').read
|
17
17
|
return ::Zlib::Inflate.inflate(zs)
|
18
18
|
rescue
|
19
19
|
return nil
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def Gridfs.
|
23
|
+
def Gridfs.write_by_dataset_path(dst_path,string)
|
24
24
|
zs = ::Zlib::Deflate.deflate(string)
|
25
25
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
curr_zs = Gridfs.
|
26
|
+
curr_zs = Gridfs.read_by_dataset_path(dst_path).to_s
|
27
27
|
#write a new version when there is a change
|
28
28
|
if curr_zs != zs
|
29
|
-
Gridfs.grid.open(
|
29
|
+
Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
30
30
|
end
|
31
31
|
return true
|
32
32
|
end
|
33
33
|
|
34
|
-
def Gridfs.delete(
|
34
|
+
def Gridfs.delete(dst_path)
|
35
35
|
begin
|
36
|
-
Gridfs.grid.delete(
|
36
|
+
Gridfs.grid.delete(dst_path)
|
37
37
|
return true
|
38
38
|
rescue
|
39
39
|
return nil
|
@@ -38,13 +38,16 @@ module Mobilize
|
|
38
38
|
return false unless gdrive_slot
|
39
39
|
s = Stage.where(:path=>stage_path).first
|
40
40
|
gsheet_path = s.params['source']
|
41
|
-
Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
41
|
+
out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
42
|
+
#use Gridfs to cache result
|
43
|
+
out_url = "gridfs://#{s.path}/out"
|
44
|
+
Dataset.write_to_url(out_url,out_tsv)
|
42
45
|
end
|
43
46
|
|
44
47
|
def Gsheet.write_by_stage_path(stage_path)
|
45
48
|
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
46
|
-
#return
|
47
|
-
return
|
49
|
+
#return blank response if there are no slots available
|
50
|
+
return nil unless gdrive_slot
|
48
51
|
s = Stage.where(:path=>stage_path).first
|
49
52
|
source = s.params['source']
|
50
53
|
target_path = s.params['target']
|
@@ -55,7 +58,7 @@ module Mobilize
|
|
55
58
|
end
|
56
59
|
source_stage_path = "#{s.job.runner.path}/#{source_job_name || s.job.name}/#{source_stage_name}"
|
57
60
|
source_stage = Stage.where(:path=>source_stage_path).first
|
58
|
-
tsv = source_stage.
|
61
|
+
tsv = source_stage.out_dst.read
|
59
62
|
sheet_name = target_path.split("/").last
|
60
63
|
temp_path = [stage_path.gridsafe,sheet_name].join("/")
|
61
64
|
temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
|
@@ -65,8 +68,11 @@ module Mobilize
|
|
65
68
|
target_sheet.merge(temp_sheet)
|
66
69
|
#delete the temp sheet's book
|
67
70
|
temp_sheet.spreadsheet.delete
|
68
|
-
"Write successful for #{target_path}"
|
69
|
-
|
71
|
+
status = "Write successful for #{target_path}"
|
72
|
+
s.update_status(status)
|
73
|
+
#use Gridfs to cache result
|
74
|
+
out_url = "gridfs://#{s.path}/out"
|
75
|
+
Dataset.write_to_url(out_url,status)
|
70
76
|
end
|
71
77
|
end
|
72
78
|
end
|
@@ -51,12 +51,12 @@ module Mobilize
|
|
51
51
|
|
52
52
|
def Resque.active_paths
|
53
53
|
#first argument of the payload is the runner / stage path unless the worker is Jobtracker
|
54
|
-
Resque.jobs('active').map{|j| j['args'].first unless
|
54
|
+
Resque.jobs('active').compact.map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
|
55
55
|
end
|
56
56
|
|
57
57
|
#Resque workers and methods to find
|
58
58
|
def Resque.find_worker_by_path(path)
|
59
|
-
Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
|
59
|
+
Resque.workers('working').select{|w| w.job and w.job['payload'] and w.job['payload']['args'].first == path}.first
|
60
60
|
end
|
61
61
|
|
62
62
|
def Resque.set_worker_args_by_path(path,args)
|
@@ -106,7 +106,9 @@ module Mobilize
|
|
106
106
|
def Resque.failure_report
|
107
107
|
fjobs = {}
|
108
108
|
excs = Hash.new(0)
|
109
|
-
Resque.failures.
|
109
|
+
Resque.failures.each_with_index do |f,f_i|
|
110
|
+
#skip if already notified
|
111
|
+
next if f['notified']
|
110
112
|
sname = f['payload']['args'].first
|
111
113
|
excs = f['error']
|
112
114
|
if fjobs[sname].nil?
|
@@ -116,6 +118,9 @@ module Mobilize
|
|
116
118
|
else
|
117
119
|
fjobs[sname][excs] += 1
|
118
120
|
end
|
121
|
+
#add notified flag to redis
|
122
|
+
f['notified'] = true
|
123
|
+
::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
|
119
124
|
end
|
120
125
|
return fjobs
|
121
126
|
end
|
@@ -134,9 +139,9 @@ module Mobilize
|
|
134
139
|
if count.to_i > idle_pids.length or count == 0
|
135
140
|
return false
|
136
141
|
elsif count
|
137
|
-
"kill #{idle_pids[0..count-1].join(" ")}".bash
|
142
|
+
"kill #{idle_pids[0..count-1].join(" ")}".bash(false)
|
138
143
|
else
|
139
|
-
"kill #{idle_pids.join(" ")}".bash
|
144
|
+
"kill #{idle_pids.join(" ")}".bash(false)
|
140
145
|
end
|
141
146
|
return true
|
142
147
|
end
|
@@ -148,7 +153,7 @@ module Mobilize
|
|
148
153
|
if idle_stale_pids.length == 0
|
149
154
|
return false
|
150
155
|
else
|
151
|
-
"kill #{idle_stale_pids.join(" ")}".bash
|
156
|
+
"kill #{idle_stale_pids.join(" ")}".bash(false)
|
152
157
|
end
|
153
158
|
return true
|
154
159
|
end
|
@@ -163,12 +163,14 @@ module Mobilize
|
|
163
163
|
if Jobtracker.notif_due?
|
164
164
|
notifs = []
|
165
165
|
if Jobtracker.failures.length>0
|
166
|
-
n = {}
|
167
166
|
jfcs = Resque.failure_report
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
167
|
+
unless jfcs=={} #no new failures
|
168
|
+
n = {}
|
169
|
+
n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
|
170
|
+
#one row per exception type, with the job name
|
171
|
+
n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
|
172
|
+
notifs << n
|
173
|
+
end
|
172
174
|
end
|
173
175
|
lws = Jobtracker.max_run_time_workers
|
174
176
|
if lws.length>0
|
@@ -4,7 +4,7 @@ module Mobilize
|
|
4
4
|
include Mongoid::Timestamps
|
5
5
|
field :handler, type: String
|
6
6
|
field :path, type: String
|
7
|
-
field :
|
7
|
+
field :http_url, type: String
|
8
8
|
field :raw_size, type: Fixnum
|
9
9
|
field :last_cached_at, type: Time
|
10
10
|
field :last_cache_handler, type: String
|
@@ -18,6 +18,16 @@ module Mobilize
|
|
18
18
|
return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
|
19
19
|
end
|
20
20
|
|
21
|
+
def Dataset.find_by_url(url)
|
22
|
+
handler,path = url.split("://")
|
23
|
+
Dataset.find_by_handler_and_path(handler,path)
|
24
|
+
end
|
25
|
+
|
26
|
+
def Dataset.find_or_create_by_url(url)
|
27
|
+
handler,path = url.split("://")
|
28
|
+
Dataset.find_or_create_by_handler_and_path(handler,path)
|
29
|
+
end
|
30
|
+
|
21
31
|
def Dataset.find_by_handler_and_path(handler,path)
|
22
32
|
Dataset.where(handler: handler, path: path).first
|
23
33
|
end
|
@@ -28,41 +38,30 @@ module Mobilize
|
|
28
38
|
return dst
|
29
39
|
end
|
30
40
|
|
31
|
-
def
|
32
|
-
dst =
|
33
|
-
|
34
|
-
|
35
|
-
dst.save!
|
36
|
-
return true
|
41
|
+
def Dataset.write_to_url(url,string)
|
42
|
+
dst = Dataset.find_or_create_by_url(url)
|
43
|
+
dst.write(string)
|
44
|
+
url
|
37
45
|
end
|
38
46
|
|
39
|
-
def
|
47
|
+
def read
|
40
48
|
dst = self
|
41
|
-
|
49
|
+
dst.update_attributes(:last_read_at=>Time.now.utc)
|
50
|
+
"Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path)
|
42
51
|
end
|
43
52
|
|
44
|
-
def
|
53
|
+
def write(string)
|
45
54
|
dst = self
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
raise "Cache invalid or not found for #{cache_handler}://#{dst.path}"
|
51
|
-
end
|
55
|
+
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string)
|
56
|
+
dst.raw_size = string.length
|
57
|
+
dst.save!
|
58
|
+
return true
|
52
59
|
end
|
53
60
|
|
54
|
-
def
|
61
|
+
def delete
|
55
62
|
dst = self
|
56
|
-
"Mobilize::#{
|
57
|
-
dst.update_attributes(:last_cached_at=>Time.now.utc,
|
58
|
-
:last_cache_handler=>cache_handler.to_s.downcase,
|
59
|
-
:cache_expire_at=>expire_at,
|
60
|
-
:size=>string.length)
|
63
|
+
"Mobilize::#{dst.handler.humanize}".constantize.delete_by_dataset_path(dst.path)
|
61
64
|
return true
|
62
65
|
end
|
63
|
-
|
64
|
-
def delete_cache(cache_handler="gridfs")
|
65
|
-
return "Mobilize::#{cache_handler.humanize}".constantize.delete(dst.handler, dst.path)
|
66
|
-
end
|
67
66
|
end
|
68
67
|
end
|
@@ -71,9 +71,9 @@ module Mobilize
|
|
71
71
|
Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
|
72
72
|
end
|
73
73
|
|
74
|
-
def
|
74
|
+
def cache
|
75
75
|
r = self
|
76
|
-
r.
|
76
|
+
Dataset.find_or_create_by_url("gridfs://#{r.path}")
|
77
77
|
end
|
78
78
|
|
79
79
|
def gsheet(gdrive_slot)
|
@@ -88,7 +88,7 @@ module Mobilize
|
|
88
88
|
r = self
|
89
89
|
gsheet_tsv = r.gsheet(gdrive_slot).to_tsv
|
90
90
|
#cache in DB
|
91
|
-
r.
|
91
|
+
r.cache.write(gsheet_tsv)
|
92
92
|
#turn it into a hash array
|
93
93
|
gsheet_jobs = gsheet_tsv.tsv_to_hash_array
|
94
94
|
#go through each job, update relevant job with its params
|
@@ -7,6 +7,7 @@ module Mobilize
|
|
7
7
|
field :call, type: String
|
8
8
|
field :param_string, type: Array
|
9
9
|
field :status, type: String
|
10
|
+
field :out_url, type: String
|
10
11
|
field :completed_at, type: Time
|
11
12
|
field :started_at, type: Time
|
12
13
|
field :failed_at, type: Time
|
@@ -19,19 +20,12 @@ module Mobilize
|
|
19
20
|
s.path.split("/").last.gsub("stage","").to_i
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
+
def out_dst
|
24
|
+
#this gives a dataset that points to the output
|
25
|
+
#allowing you to determine its size
|
26
|
+
#before committing to a read or write
|
23
27
|
s = self
|
24
|
-
Dataset.
|
25
|
-
end
|
26
|
-
|
27
|
-
def stderr_dataset
|
28
|
-
s = self
|
29
|
-
Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stderr")
|
30
|
-
end
|
31
|
-
|
32
|
-
def log_dataset
|
33
|
-
s = self
|
34
|
-
Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/log")
|
28
|
+
Dataset.find_by_url(s.out_url) if s.out_url
|
35
29
|
end
|
36
30
|
|
37
31
|
def params
|
@@ -79,27 +73,21 @@ module Mobilize
|
|
79
73
|
j = s.job
|
80
74
|
s.update_attributes(:started_at=>Time.now.utc)
|
81
75
|
s.update_status(%{Starting at #{Time.now.utc}})
|
82
|
-
stdout, stderr = [nil,nil]
|
83
76
|
begin
|
84
|
-
|
85
|
-
|
86
|
-
s.
|
77
|
+
#get response by running method
|
78
|
+
s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
|
79
|
+
s.save!
|
80
|
+
unless s.out_url
|
81
|
+
#re-queue self if no response
|
82
|
+
s.enqueue!
|
83
|
+
return false
|
84
|
+
end
|
87
85
|
rescue ScriptError, StandardError => exc
|
88
|
-
stderr = [exc.to_s,exc.backtrace.to_s].join("\n")
|
89
|
-
#record the failure in Job so it appears on Runner, turn it off
|
90
|
-
#so it doesn't run again
|
91
86
|
j.update_attributes(:active=>false)
|
92
87
|
s.update_attributes(:failed_at=>Time.now.utc)
|
93
88
|
s.update_status("Failed at #{Time.now.utc.to_s}")
|
94
89
|
raise exc
|
95
90
|
end
|
96
|
-
if stdout == false
|
97
|
-
#re-queue self if output is false
|
98
|
-
s.enqueue!
|
99
|
-
return false
|
100
|
-
end
|
101
|
-
#write output to cache
|
102
|
-
s.stdout_dataset.write_cache(stdout)
|
103
91
|
s.update_attributes(:completed_at=>Time.now.utc)
|
104
92
|
s.update_status("Completed at #{Time.now.utc.to_s}")
|
105
93
|
if s.idx == j.stages.length
|
data/test/mobilize-base_test.rb
CHANGED
@@ -41,10 +41,10 @@ describe "Mobilize" do
|
|
41
41
|
test_source_tsv = test_source_ha.hash_array_to_tsv
|
42
42
|
test_source_sheet.write(test_source_tsv)
|
43
43
|
|
44
|
-
puts "add row to jobs sheet, wait
|
44
|
+
puts "add row to jobs sheet, wait 150s"
|
45
45
|
test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
|
46
46
|
jobs_sheet.add_or_update_rows(test_job_rows)
|
47
|
-
sleep
|
47
|
+
sleep 150
|
48
48
|
|
49
49
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
50
50
|
test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
|
@@ -52,11 +52,11 @@ describe "Mobilize" do
|
|
52
52
|
|
53
53
|
assert test_target_sheet_1.to_tsv == test_source_sheet.to_tsv
|
54
54
|
|
55
|
-
puts "delete both output sheets, set first job to active=true"
|
55
|
+
puts "delete both output sheets, set first job to active=true, wait 120s"
|
56
56
|
[test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
|
57
57
|
|
58
58
|
jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
|
59
|
-
sleep
|
59
|
+
sleep 120
|
60
60
|
|
61
61
|
test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
|
62
62
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.91
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -238,7 +238,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
238
238
|
version: '0'
|
239
239
|
segments:
|
240
240
|
- 0
|
241
|
-
hash:
|
241
|
+
hash: -136156851409089699
|
242
242
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
243
243
|
none: false
|
244
244
|
requirements:
|
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
247
247
|
version: '0'
|
248
248
|
segments:
|
249
249
|
- 0
|
250
|
-
hash:
|
250
|
+
hash: -136156851409089699
|
251
251
|
requirements: []
|
252
252
|
rubyforge_project: mobilize-base
|
253
253
|
rubygems_version: 1.8.24
|