mobilize-base 1.0.84 → 1.0.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +2 -2
- data/lib/mobilize-base/handlers/gbook.rb +4 -4
- data/lib/mobilize-base/handlers/gfile.rb +4 -1
- data/lib/mobilize-base/handlers/gridfs.rb +7 -7
- data/lib/mobilize-base/handlers/gsheet.rb +12 -6
- data/lib/mobilize-base/handlers/resque.rb +11 -6
- data/lib/mobilize-base/jobtracker.rb +7 -5
- data/lib/mobilize-base/models/dataset.rb +25 -26
- data/lib/mobilize-base/models/runner.rb +3 -3
- data/lib/mobilize-base/models/stage.rb +14 -26
- data/lib/mobilize-base/models/user.rb +1 -1
- data/lib/mobilize-base/version.rb +1 -1
- data/test/mobilize-base_test.rb +4 -4
- metadata +4 -4
@@ -8,7 +8,7 @@ module GoogleDrive
|
|
8
8
|
attempts = 0
|
9
9
|
sleep_time = nil
|
10
10
|
#try 5 times to make the call
|
11
|
-
while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts <
|
11
|
+
while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
|
12
12
|
#instantiate http object, set params
|
13
13
|
http = @proxy.new(uri.host, uri.port)
|
14
14
|
http.use_ssl = true
|
@@ -40,7 +40,7 @@ module GoogleDrive
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
43
|
-
raise "No response after
|
43
|
+
raise "No response after 20 attempts" if response.nil?
|
44
44
|
raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
|
45
45
|
return response
|
46
46
|
end
|
@@ -8,10 +8,10 @@ module Mobilize
|
|
8
8
|
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
9
|
#there should only be one book with each path, otherwise we have fail
|
10
10
|
book = nil
|
11
|
-
if books.length>1 and dst.
|
11
|
+
if books.length>1 and dst.http_url.to_s.length>0
|
12
12
|
#some idiot process created a duplicate book.
|
13
13
|
#Fix by renaming all but one with dst entry's key
|
14
|
-
dkey = dst.
|
14
|
+
dkey = dst.http_url.split("key=").last
|
15
15
|
books.each do |b|
|
16
16
|
bkey = b.resource_id.split(":").last
|
17
17
|
if bkey == dkey
|
@@ -30,9 +30,9 @@ module Mobilize
|
|
30
30
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
31
31
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
32
32
|
end
|
33
|
-
#always make sure book dataset URL is up to date
|
33
|
+
#always make sure book dataset http URL is up to date
|
34
34
|
#and that book has admin acl
|
35
|
-
dst.update_attributes(:
|
35
|
+
dst.update_attributes(:http_url=>book.human_url)
|
36
36
|
book.add_admin_acl
|
37
37
|
return book
|
38
38
|
end
|
@@ -28,7 +28,10 @@ module Mobilize
|
|
28
28
|
return false unless gdrive_slot
|
29
29
|
s = Stage.where(:path=>stage_path)
|
30
30
|
gfile_path = s.params['file']
|
31
|
-
Gfile.find_by_path(gfile_path,gdrive_slot).read
|
31
|
+
out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
|
32
|
+
#use Gridfs to cache result
|
33
|
+
out_url = "gridfs://#{s.path}/out"
|
34
|
+
Dataset.write_to_url(out_url,out_tsv)
|
32
35
|
end
|
33
36
|
end
|
34
37
|
end
|
@@ -11,29 +11,29 @@ module Mobilize
|
|
11
11
|
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
14
|
+
def Gridfs.read_by_dataset_path(dst_path)
|
15
15
|
begin
|
16
|
-
zs=Gridfs.grid.open(
|
16
|
+
zs=Gridfs.grid.open(dst_path,'r').read
|
17
17
|
return ::Zlib::Inflate.inflate(zs)
|
18
18
|
rescue
|
19
19
|
return nil
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def Gridfs.
|
23
|
+
def Gridfs.write_by_dataset_path(dst_path,string)
|
24
24
|
zs = ::Zlib::Deflate.deflate(string)
|
25
25
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
curr_zs = Gridfs.
|
26
|
+
curr_zs = Gridfs.read_by_dataset_path(dst_path).to_s
|
27
27
|
#write a new version when there is a change
|
28
28
|
if curr_zs != zs
|
29
|
-
Gridfs.grid.open(
|
29
|
+
Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
30
30
|
end
|
31
31
|
return true
|
32
32
|
end
|
33
33
|
|
34
|
-
def Gridfs.delete(
|
34
|
+
def Gridfs.delete(dst_path)
|
35
35
|
begin
|
36
|
-
Gridfs.grid.delete(
|
36
|
+
Gridfs.grid.delete(dst_path)
|
37
37
|
return true
|
38
38
|
rescue
|
39
39
|
return nil
|
@@ -38,13 +38,16 @@ module Mobilize
|
|
38
38
|
return false unless gdrive_slot
|
39
39
|
s = Stage.where(:path=>stage_path).first
|
40
40
|
gsheet_path = s.params['source']
|
41
|
-
Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
41
|
+
out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
42
|
+
#use Gridfs to cache result
|
43
|
+
out_url = "gridfs://#{s.path}/out"
|
44
|
+
Dataset.write_to_url(out_url,out_tsv)
|
42
45
|
end
|
43
46
|
|
44
47
|
def Gsheet.write_by_stage_path(stage_path)
|
45
48
|
gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
|
46
|
-
#return
|
47
|
-
return
|
49
|
+
#return blank response if there are no slots available
|
50
|
+
return nil unless gdrive_slot
|
48
51
|
s = Stage.where(:path=>stage_path).first
|
49
52
|
source = s.params['source']
|
50
53
|
target_path = s.params['target']
|
@@ -55,7 +58,7 @@ module Mobilize
|
|
55
58
|
end
|
56
59
|
source_stage_path = "#{s.job.runner.path}/#{source_job_name || s.job.name}/#{source_stage_name}"
|
57
60
|
source_stage = Stage.where(:path=>source_stage_path).first
|
58
|
-
tsv = source_stage.
|
61
|
+
tsv = source_stage.out_dst.read
|
59
62
|
sheet_name = target_path.split("/").last
|
60
63
|
temp_path = [stage_path.gridsafe,sheet_name].join("/")
|
61
64
|
temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
|
@@ -65,8 +68,11 @@ module Mobilize
|
|
65
68
|
target_sheet.merge(temp_sheet)
|
66
69
|
#delete the temp sheet's book
|
67
70
|
temp_sheet.spreadsheet.delete
|
68
|
-
"Write successful for #{target_path}"
|
69
|
-
|
71
|
+
status = "Write successful for #{target_path}"
|
72
|
+
s.update_status(status)
|
73
|
+
#use Gridfs to cache result
|
74
|
+
out_url = "gridfs://#{s.path}/out"
|
75
|
+
Dataset.write_to_url(out_url,status)
|
70
76
|
end
|
71
77
|
end
|
72
78
|
end
|
@@ -51,12 +51,12 @@ module Mobilize
|
|
51
51
|
|
52
52
|
def Resque.active_paths
|
53
53
|
#first argument of the payload is the runner / stage path unless the worker is Jobtracker
|
54
|
-
Resque.jobs('active').map{|j| j['args'].first unless
|
54
|
+
Resque.jobs('active').compact.map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
|
55
55
|
end
|
56
56
|
|
57
57
|
#Resque workers and methods to find
|
58
58
|
def Resque.find_worker_by_path(path)
|
59
|
-
Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
|
59
|
+
Resque.workers('working').select{|w| w.job and w.job['payload'] and w.job['payload']['args'].first == path}.first
|
60
60
|
end
|
61
61
|
|
62
62
|
def Resque.set_worker_args_by_path(path,args)
|
@@ -106,7 +106,9 @@ module Mobilize
|
|
106
106
|
def Resque.failure_report
|
107
107
|
fjobs = {}
|
108
108
|
excs = Hash.new(0)
|
109
|
-
Resque.failures.
|
109
|
+
Resque.failures.each_with_index do |f,f_i|
|
110
|
+
#skip if already notified
|
111
|
+
next if f['notified']
|
110
112
|
sname = f['payload']['args'].first
|
111
113
|
excs = f['error']
|
112
114
|
if fjobs[sname].nil?
|
@@ -116,6 +118,9 @@ module Mobilize
|
|
116
118
|
else
|
117
119
|
fjobs[sname][excs] += 1
|
118
120
|
end
|
121
|
+
#add notified flag to redis
|
122
|
+
f['notified'] = true
|
123
|
+
::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
|
119
124
|
end
|
120
125
|
return fjobs
|
121
126
|
end
|
@@ -134,9 +139,9 @@ module Mobilize
|
|
134
139
|
if count.to_i > idle_pids.length or count == 0
|
135
140
|
return false
|
136
141
|
elsif count
|
137
|
-
"kill #{idle_pids[0..count-1].join(" ")}".bash
|
142
|
+
"kill #{idle_pids[0..count-1].join(" ")}".bash(false)
|
138
143
|
else
|
139
|
-
"kill #{idle_pids.join(" ")}".bash
|
144
|
+
"kill #{idle_pids.join(" ")}".bash(false)
|
140
145
|
end
|
141
146
|
return true
|
142
147
|
end
|
@@ -148,7 +153,7 @@ module Mobilize
|
|
148
153
|
if idle_stale_pids.length == 0
|
149
154
|
return false
|
150
155
|
else
|
151
|
-
"kill #{idle_stale_pids.join(" ")}".bash
|
156
|
+
"kill #{idle_stale_pids.join(" ")}".bash(false)
|
152
157
|
end
|
153
158
|
return true
|
154
159
|
end
|
@@ -163,12 +163,14 @@ module Mobilize
|
|
163
163
|
if Jobtracker.notif_due?
|
164
164
|
notifs = []
|
165
165
|
if Jobtracker.failures.length>0
|
166
|
-
n = {}
|
167
166
|
jfcs = Resque.failure_report
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
167
|
+
unless jfcs=={} #no new failures
|
168
|
+
n = {}
|
169
|
+
n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
|
170
|
+
#one row per exception type, with the job name
|
171
|
+
n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
|
172
|
+
notifs << n
|
173
|
+
end
|
172
174
|
end
|
173
175
|
lws = Jobtracker.max_run_time_workers
|
174
176
|
if lws.length>0
|
@@ -4,7 +4,7 @@ module Mobilize
|
|
4
4
|
include Mongoid::Timestamps
|
5
5
|
field :handler, type: String
|
6
6
|
field :path, type: String
|
7
|
-
field :
|
7
|
+
field :http_url, type: String
|
8
8
|
field :raw_size, type: Fixnum
|
9
9
|
field :last_cached_at, type: Time
|
10
10
|
field :last_cache_handler, type: String
|
@@ -18,6 +18,16 @@ module Mobilize
|
|
18
18
|
return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
|
19
19
|
end
|
20
20
|
|
21
|
+
def Dataset.find_by_url(url)
|
22
|
+
handler,path = url.split("://")
|
23
|
+
Dataset.find_by_handler_and_path(handler,path)
|
24
|
+
end
|
25
|
+
|
26
|
+
def Dataset.find_or_create_by_url(url)
|
27
|
+
handler,path = url.split("://")
|
28
|
+
Dataset.find_or_create_by_handler_and_path(handler,path)
|
29
|
+
end
|
30
|
+
|
21
31
|
def Dataset.find_by_handler_and_path(handler,path)
|
22
32
|
Dataset.where(handler: handler, path: path).first
|
23
33
|
end
|
@@ -28,41 +38,30 @@ module Mobilize
|
|
28
38
|
return dst
|
29
39
|
end
|
30
40
|
|
31
|
-
def
|
32
|
-
dst =
|
33
|
-
|
34
|
-
|
35
|
-
dst.save!
|
36
|
-
return true
|
41
|
+
def Dataset.write_to_url(url,string)
|
42
|
+
dst = Dataset.find_or_create_by_url(url)
|
43
|
+
dst.write(string)
|
44
|
+
url
|
37
45
|
end
|
38
46
|
|
39
|
-
def
|
47
|
+
def read
|
40
48
|
dst = self
|
41
|
-
|
49
|
+
dst.update_attributes(:last_read_at=>Time.now.utc)
|
50
|
+
"Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path)
|
42
51
|
end
|
43
52
|
|
44
|
-
def
|
53
|
+
def write(string)
|
45
54
|
dst = self
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
raise "Cache invalid or not found for #{cache_handler}://#{dst.path}"
|
51
|
-
end
|
55
|
+
"Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string)
|
56
|
+
dst.raw_size = string.length
|
57
|
+
dst.save!
|
58
|
+
return true
|
52
59
|
end
|
53
60
|
|
54
|
-
def
|
61
|
+
def delete
|
55
62
|
dst = self
|
56
|
-
"Mobilize::#{
|
57
|
-
dst.update_attributes(:last_cached_at=>Time.now.utc,
|
58
|
-
:last_cache_handler=>cache_handler.to_s.downcase,
|
59
|
-
:cache_expire_at=>expire_at,
|
60
|
-
:size=>string.length)
|
63
|
+
"Mobilize::#{dst.handler.humanize}".constantize.delete_by_dataset_path(dst.path)
|
61
64
|
return true
|
62
65
|
end
|
63
|
-
|
64
|
-
def delete_cache(cache_handler="gridfs")
|
65
|
-
return "Mobilize::#{cache_handler.humanize}".constantize.delete(dst.handler, dst.path)
|
66
|
-
end
|
67
66
|
end
|
68
67
|
end
|
@@ -71,9 +71,9 @@ module Mobilize
|
|
71
71
|
Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
|
72
72
|
end
|
73
73
|
|
74
|
-
def
|
74
|
+
def cache
|
75
75
|
r = self
|
76
|
-
r.
|
76
|
+
Dataset.find_or_create_by_url("gridfs://#{r.path}")
|
77
77
|
end
|
78
78
|
|
79
79
|
def gsheet(gdrive_slot)
|
@@ -88,7 +88,7 @@ module Mobilize
|
|
88
88
|
r = self
|
89
89
|
gsheet_tsv = r.gsheet(gdrive_slot).to_tsv
|
90
90
|
#cache in DB
|
91
|
-
r.
|
91
|
+
r.cache.write(gsheet_tsv)
|
92
92
|
#turn it into a hash array
|
93
93
|
gsheet_jobs = gsheet_tsv.tsv_to_hash_array
|
94
94
|
#go through each job, update relevant job with its params
|
@@ -7,6 +7,7 @@ module Mobilize
|
|
7
7
|
field :call, type: String
|
8
8
|
field :param_string, type: Array
|
9
9
|
field :status, type: String
|
10
|
+
field :out_url, type: String
|
10
11
|
field :completed_at, type: Time
|
11
12
|
field :started_at, type: Time
|
12
13
|
field :failed_at, type: Time
|
@@ -19,19 +20,12 @@ module Mobilize
|
|
19
20
|
s.path.split("/").last.gsub("stage","").to_i
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
+
def out_dst
|
24
|
+
#this gives a dataset that points to the output
|
25
|
+
#allowing you to determine its size
|
26
|
+
#before committing to a read or write
|
23
27
|
s = self
|
24
|
-
Dataset.
|
25
|
-
end
|
26
|
-
|
27
|
-
def stderr_dataset
|
28
|
-
s = self
|
29
|
-
Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stderr")
|
30
|
-
end
|
31
|
-
|
32
|
-
def log_dataset
|
33
|
-
s = self
|
34
|
-
Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/log")
|
28
|
+
Dataset.find_by_url(s.out_url) if s.out_url
|
35
29
|
end
|
36
30
|
|
37
31
|
def params
|
@@ -79,27 +73,21 @@ module Mobilize
|
|
79
73
|
j = s.job
|
80
74
|
s.update_attributes(:started_at=>Time.now.utc)
|
81
75
|
s.update_status(%{Starting at #{Time.now.utc}})
|
82
|
-
stdout, stderr = [nil,nil]
|
83
76
|
begin
|
84
|
-
|
85
|
-
|
86
|
-
s.
|
77
|
+
#get response by running method
|
78
|
+
s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
|
79
|
+
s.save!
|
80
|
+
unless s.out_url
|
81
|
+
#re-queue self if no response
|
82
|
+
s.enqueue!
|
83
|
+
return false
|
84
|
+
end
|
87
85
|
rescue ScriptError, StandardError => exc
|
88
|
-
stderr = [exc.to_s,exc.backtrace.to_s].join("\n")
|
89
|
-
#record the failure in Job so it appears on Runner, turn it off
|
90
|
-
#so it doesn't run again
|
91
86
|
j.update_attributes(:active=>false)
|
92
87
|
s.update_attributes(:failed_at=>Time.now.utc)
|
93
88
|
s.update_status("Failed at #{Time.now.utc.to_s}")
|
94
89
|
raise exc
|
95
90
|
end
|
96
|
-
if stdout == false
|
97
|
-
#re-queue self if output is false
|
98
|
-
s.enqueue!
|
99
|
-
return false
|
100
|
-
end
|
101
|
-
#write output to cache
|
102
|
-
s.stdout_dataset.write_cache(stdout)
|
103
91
|
s.update_attributes(:completed_at=>Time.now.utc)
|
104
92
|
s.update_status("Completed at #{Time.now.utc.to_s}")
|
105
93
|
if s.idx == j.stages.length
|
data/test/mobilize-base_test.rb
CHANGED
@@ -41,10 +41,10 @@ describe "Mobilize" do
|
|
41
41
|
test_source_tsv = test_source_ha.hash_array_to_tsv
|
42
42
|
test_source_sheet.write(test_source_tsv)
|
43
43
|
|
44
|
-
puts "add row to jobs sheet, wait
|
44
|
+
puts "add row to jobs sheet, wait 150s"
|
45
45
|
test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
|
46
46
|
jobs_sheet.add_or_update_rows(test_job_rows)
|
47
|
-
sleep
|
47
|
+
sleep 150
|
48
48
|
|
49
49
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
50
50
|
test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
|
@@ -52,11 +52,11 @@ describe "Mobilize" do
|
|
52
52
|
|
53
53
|
assert test_target_sheet_1.to_tsv == test_source_sheet.to_tsv
|
54
54
|
|
55
|
-
puts "delete both output sheets, set first job to active=true"
|
55
|
+
puts "delete both output sheets, set first job to active=true, wait 120s"
|
56
56
|
[test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
|
57
57
|
|
58
58
|
jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
|
59
|
-
sleep
|
59
|
+
sleep 120
|
60
60
|
|
61
61
|
test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
|
62
62
|
puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.91
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-01-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -238,7 +238,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
238
238
|
version: '0'
|
239
239
|
segments:
|
240
240
|
- 0
|
241
|
-
hash:
|
241
|
+
hash: -136156851409089699
|
242
242
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
243
243
|
none: false
|
244
244
|
requirements:
|
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
247
247
|
version: '0'
|
248
248
|
segments:
|
249
249
|
- 0
|
250
|
-
hash:
|
250
|
+
hash: -136156851409089699
|
251
251
|
requirements: []
|
252
252
|
rubyforge_project: mobilize-base
|
253
253
|
rubygems_version: 1.8.24
|