mobilize-base 1.29 → 1.33
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +19 -10
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -7
- data/lib/mobilize-base/extensions/resque-server/views/queues.erb +59 -0
- data/lib/mobilize-base/extensions/resque-server/views/working.erb +85 -0
- data/lib/mobilize-base/extensions/string.rb +12 -4
- data/lib/mobilize-base/extensions/yaml.rb +10 -7
- data/lib/mobilize-base/handlers/gbook.rb +25 -38
- data/lib/mobilize-base/handlers/gdrive.rb +8 -0
- data/lib/mobilize-base/handlers/gfile.rb +4 -2
- data/lib/mobilize-base/handlers/gridfs.rb +19 -24
- data/lib/mobilize-base/handlers/gsheet.rb +13 -7
- data/lib/mobilize-base/handlers/resque.rb +9 -3
- data/lib/mobilize-base/helpers/job_helper.rb +54 -0
- data/lib/mobilize-base/helpers/jobtracker_helper.rb +147 -0
- data/lib/mobilize-base/helpers/runner_helper.rb +83 -0
- data/lib/mobilize-base/helpers/stage_helper.rb +38 -0
- data/lib/mobilize-base/jobtracker.rb +16 -192
- data/lib/mobilize-base/models/job.rb +36 -48
- data/lib/mobilize-base/models/runner.rb +22 -122
- data/lib/mobilize-base/models/stage.rb +14 -35
- data/lib/mobilize-base/tasks.rb +16 -3
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +5 -1
- data/lib/samples/gdrive.yml +6 -0
- data/lib/samples/gridfs.yml +0 -3
- data/lib/samples/gsheet.yml +4 -4
- data/mobilize-base.gemspec +3 -3
- data/test/base_job_rows.yml +5 -1
- data/test/mobilize-base_test.rb +20 -61
- data/test/test_helper.rb +79 -0
- metadata +57 -25
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -162,9 +162,15 @@ the same domain, and all Users should have emails in this domain.
|
|
162
162
|
* an owner name and password. You can set up separate owners
|
163
163
|
for different environments as in the below file, which will keep your
|
164
164
|
mission critical workers from getting rate-limit errors.
|
165
|
+
* one admin_group_name, which the owner and all admins should be added to -- this
|
166
|
+
group will need read permissions to read from and edit permissions to write
|
167
|
+
to files.
|
165
168
|
* one or more admins with email attributes -- these will be for people
|
166
169
|
who should be given write permissions to all Mobilize books in the
|
167
170
|
environment for maintenance purposes.
|
171
|
+
* one worker_group_name, which the owner and all workers should be added to -- this
|
172
|
+
group will need read permissions to read from and edit permissions to write
|
173
|
+
to files.
|
168
174
|
* one or more workers with name and pw attributes -- they will be used
|
169
175
|
to queue up google reads and writes. This can be the same as the owner
|
170
176
|
account for testing purposes or low-volume environments.
|
@@ -182,8 +188,10 @@ development:
|
|
182
188
|
owner:
|
183
189
|
name: owner_development
|
184
190
|
pw: google_drive_password
|
191
|
+
admin_group_name: admins_development
|
185
192
|
admins:
|
186
193
|
- name: admin
|
194
|
+
worker_group_name: workers_development
|
187
195
|
workers:
|
188
196
|
- name: worker_development001
|
189
197
|
pw: worker001_google_drive_password
|
@@ -194,8 +202,10 @@ test:
|
|
194
202
|
owner:
|
195
203
|
name: owner_test
|
196
204
|
pw: google_drive_password
|
205
|
+
admin_group_name: admins_test
|
197
206
|
admins:
|
198
207
|
- name: admin
|
208
|
+
worker_group_name: workers_test
|
199
209
|
workers:
|
200
210
|
- name: worker_test001
|
201
211
|
pw: worker001_google_drive_password
|
@@ -206,8 +216,10 @@ production:
|
|
206
216
|
owner:
|
207
217
|
name: owner_production
|
208
218
|
pw: google_drive_password
|
219
|
+
admin_group_name: admins_production
|
209
220
|
admins:
|
210
221
|
- name: admin
|
222
|
+
worker_group_name: workers_production
|
211
223
|
workers:
|
212
224
|
- name: worker_production001
|
213
225
|
pw: worker001_google_drive_password
|
@@ -220,9 +232,8 @@ production:
|
|
220
232
|
|
221
233
|
gsheet.yml needs:
|
222
234
|
* max_cells, which is the number of cells a sheet is allowed to have
|
223
|
-
written to it at one time. Default is
|
224
|
-
|
225
|
-
you try to write more than that.
|
235
|
+
written to it at one time. Default is 50k cells, which is about how
|
236
|
+
much you can write before things start breaking.
|
226
237
|
* Because Google Docs ties date formatting to the Locale for the
|
227
238
|
spreadsheet, there are 2 date format parameters:
|
228
239
|
* read_date_format, which is the format that should be read FROM google
|
@@ -356,22 +367,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
|
|
356
367
|
Mobilize stores cached data in MongoDB Gridfs.
|
357
368
|
It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
|
358
369
|
|
359
|
-
* max_versions - the number of __different__ versions of data to keep
|
360
|
-
for a given cache. Default is 10. This is meant mostly to allow you to
|
361
|
-
restore Runners from cache if necessary.
|
362
370
|
* max_compressed_write_size - the amount of compressed data Gridfs will
|
363
371
|
allow. If you try to write more than this, an exception will be thrown.
|
364
372
|
|
365
373
|
``` yml
|
366
374
|
---
|
367
375
|
development:
|
368
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
369
376
|
max_compressed_write_size: 1000000000 #~1GB
|
370
377
|
test:
|
371
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
372
378
|
max_compressed_write_size: 1000000000 #~1GB
|
373
379
|
production:
|
374
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
375
380
|
max_compressed_write_size: 1000000000 #~1GB
|
376
381
|
```
|
377
382
|
|
@@ -567,7 +572,11 @@ from the referenced sheet.
|
|
567
572
|
* All stages accept retry parameters:
|
568
573
|
* retries: an integer specifying the number of times that the system will try it again before giving up.
|
569
574
|
* delay: an integer specifying the number of seconds between retries.
|
570
|
-
* always_on: if
|
575
|
+
* always_on: if false, turns the job off on stage failures.
|
576
|
+
Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
|
577
|
+
* notify: by default, the stage owner will be notified on failure.
|
578
|
+
* if false, will not notify the stage owner in the event of a failure.
|
579
|
+
* If it's an email address, will email the specified person.
|
571
580
|
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
572
581
|
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
573
582
|
* The test uses "Requestor_mobilize(test)/base1.out" and
|
@@ -13,7 +13,7 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
17
17
|
accounts.each do |email|
|
18
18
|
f.update_acl(email)
|
19
19
|
end
|
@@ -21,9 +21,9 @@ module GoogleDrive
|
|
21
21
|
|
22
22
|
def has_admin_acl?
|
23
23
|
f = self
|
24
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
25
|
-
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
26
|
-
if (curr_emails & admin_emails) == admin_emails
|
24
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
26
|
+
if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
|
27
27
|
return true
|
28
28
|
else
|
29
29
|
return false
|
@@ -32,9 +32,9 @@ module GoogleDrive
|
|
32
32
|
|
33
33
|
def has_worker_acl?
|
34
34
|
f = self
|
35
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
35
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
36
36
|
worker_emails = Mobilize::Gdrive.worker_emails.sort
|
37
|
-
if (curr_emails & worker_emails) == worker_emails
|
37
|
+
if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
|
38
38
|
return true
|
39
39
|
else
|
40
40
|
return false
|
@@ -84,7 +84,7 @@ module GoogleDrive
|
|
84
84
|
end
|
85
85
|
def acl_entry(email)
|
86
86
|
f = self
|
87
|
-
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
|
87
|
+
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
|
88
88
|
end
|
89
89
|
def entry_hash
|
90
90
|
f = self
|
@@ -0,0 +1,59 @@
|
|
1
|
+
<% @subtabs = resque.queues unless partial? || params[:id].nil? %>
|
2
|
+
|
3
|
+
<% if queue = params[:id] %>
|
4
|
+
|
5
|
+
<h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
|
6
|
+
<form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
|
7
|
+
<input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
|
8
|
+
</form>
|
9
|
+
<p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
|
10
|
+
<table class='jobs'>
|
11
|
+
<tr>
|
12
|
+
<th>Class</th>
|
13
|
+
<th>Args</th>
|
14
|
+
</tr>
|
15
|
+
<% for job in (jobs = resque.peek(queue, start, 20)) %>
|
16
|
+
<tr>
|
17
|
+
<td class='class'><%= job['class'] %></td>
|
18
|
+
<td class='args'><%=h job['args'].inspect %></td>
|
19
|
+
</tr>
|
20
|
+
<% end %>
|
21
|
+
<% if jobs.empty? %>
|
22
|
+
<tr>
|
23
|
+
<td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
|
24
|
+
</tr>
|
25
|
+
<% end %>
|
26
|
+
</table>
|
27
|
+
<%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<h1 class='wi'>Queues</h1>
|
31
|
+
<p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
|
32
|
+
<table class='queues'>
|
33
|
+
<tr>
|
34
|
+
<th>Name</th>
|
35
|
+
<th>Jobs</th>
|
36
|
+
</tr>
|
37
|
+
<!-- only show nonzero length queues-->
|
38
|
+
<% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
|
39
|
+
<tr>
|
40
|
+
<td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
|
41
|
+
<td class='size'><%= resque.size queue %></td>
|
42
|
+
</tr>
|
43
|
+
<% end %>
|
44
|
+
<% if failed_multiple_queues? %>
|
45
|
+
<% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
|
46
|
+
<tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
|
47
|
+
<td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
|
48
|
+
<td class='size'><%= Resque::Failure.count(queue) %></td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
<% else %>
|
52
|
+
<tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
|
53
|
+
<td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
|
54
|
+
<td class='size'><%= Resque::Failure.count %></td>
|
55
|
+
</tr>
|
56
|
+
<% end %>
|
57
|
+
</table>
|
58
|
+
|
59
|
+
<% end %>
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
|
2
|
+
<h1><%= worker %>'s job</h1>
|
3
|
+
|
4
|
+
<table>
|
5
|
+
<tr>
|
6
|
+
<th> </th>
|
7
|
+
<th>Where</th>
|
8
|
+
<th>Queue</th>
|
9
|
+
<th>Started</th>
|
10
|
+
<th>Class</th>
|
11
|
+
<th>Args</th>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
|
15
|
+
<% host, pid, _ = worker.to_s.split(':') %>
|
16
|
+
<td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
17
|
+
<% data = worker.job %>
|
18
|
+
<% queue = data['queue'] %>
|
19
|
+
<td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
|
20
|
+
<td><span class="time"><%= data['run_at'] %></span></td>
|
21
|
+
<td>
|
22
|
+
<code><%= data['payload']['class'] %></code>
|
23
|
+
</td>
|
24
|
+
<td><%=h data['payload']['args'].inspect %></td>
|
25
|
+
</tr>
|
26
|
+
</table>
|
27
|
+
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<%
|
31
|
+
workers = resque.working
|
32
|
+
jobs = workers.collect {|w| w.job }
|
33
|
+
worker_jobs = workers.zip(jobs)
|
34
|
+
worker_jobs = worker_jobs.reject { |w, j| w.idle? }
|
35
|
+
%>
|
36
|
+
|
37
|
+
<h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
|
38
|
+
<p class='intro'>The list below contains all workers which are currently running a job.</p>
|
39
|
+
<table class='workers'>
|
40
|
+
<tr>
|
41
|
+
<th> </th>
|
42
|
+
<th>Where</th>
|
43
|
+
<th>Queue</th>
|
44
|
+
<th>Processing</th>
|
45
|
+
</tr>
|
46
|
+
<% if worker_jobs.empty? %>
|
47
|
+
<tr>
|
48
|
+
<td colspan="4" class='no-data'>Nothing is happening right now...</td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
|
52
|
+
<% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
|
53
|
+
<tr>
|
54
|
+
<td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
|
55
|
+
<% host, pid, queues = worker.to_s.split(':') %>
|
56
|
+
<td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
57
|
+
<td class='queues queue'>
|
58
|
+
<a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
|
59
|
+
</td>
|
60
|
+
<td class='process'>
|
61
|
+
<% if job['queue']
|
62
|
+
job_stats = begin
|
63
|
+
j = job
|
64
|
+
args_hash = j['payload']['args'][1]
|
65
|
+
args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
|
66
|
+
args = [args_array].compact.join("")
|
67
|
+
path = j['payload']['args'].first
|
68
|
+
[path,args].join("</code><br><code>")
|
69
|
+
rescue => exc
|
70
|
+
[exc.to_s,exc.backtrace.join("<br>")].join("<br>")
|
71
|
+
end
|
72
|
+
%>
|
73
|
+
<%=job_stats%>
|
74
|
+
</code>
|
75
|
+
<br>
|
76
|
+
<small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
|
77
|
+
<% else %>
|
78
|
+
<span class='waiting'>Waiting for a job...</span>
|
79
|
+
<% end %>
|
80
|
+
</td>
|
81
|
+
</tr>
|
82
|
+
<% end %>
|
83
|
+
</table>
|
84
|
+
|
85
|
+
<% end %>
|
@@ -11,11 +11,19 @@ class String
|
|
11
11
|
def opp
|
12
12
|
pp self
|
13
13
|
end
|
14
|
+
def to_md5
|
15
|
+
Digest::MD5.hexdigest(self)
|
16
|
+
end
|
14
17
|
def bash(except=true)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
str = self
|
19
|
+
out_str,err_str = []
|
20
|
+
status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
|
21
|
+
out_str = stdout.read
|
22
|
+
err_str = stderr.read
|
23
|
+
end
|
24
|
+
exit_status = status.exitstatus
|
25
|
+
raise err_str if (exit_status !=0 and except==true)
|
26
|
+
return out_str
|
19
27
|
end
|
20
28
|
def escape_regex
|
21
29
|
str = self
|
@@ -10,13 +10,16 @@ module YAML
|
|
10
10
|
#make sure urls have their colon spaces fixed
|
11
11
|
result_hash={}
|
12
12
|
easy_hash.each do |k,v|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
#fucking yaml puts spaces in front of the key
|
14
|
+
#or something
|
15
|
+
strip_k = k.strip
|
16
|
+
result_hash[strip_k] = if v.class==String
|
17
|
+
v.gsub(": //","://")
|
18
|
+
elsif v.class==Array
|
19
|
+
v.map{|av| av.to_s.gsub(": //","://")}
|
20
|
+
else
|
21
|
+
v
|
22
|
+
end
|
20
23
|
end
|
21
24
|
return result_hash
|
22
25
|
end
|
@@ -14,57 +14,44 @@ module Mobilize
|
|
14
14
|
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
15
|
if dst and dst.http_url.to_s.length>0
|
16
16
|
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
book = nil
|
22
|
-
else
|
23
|
-
return book
|
24
|
-
end
|
25
|
-
rescue
|
26
|
-
#use regular process if book entry hash fails
|
27
|
-
book = nil
|
17
|
+
if book
|
18
|
+
return book
|
19
|
+
else
|
20
|
+
raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
|
28
21
|
end
|
29
22
|
end
|
23
|
+
#try to find books by title
|
30
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
31
|
-
|
32
|
-
book
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
if bkey == dkey
|
40
|
-
book = b
|
41
|
-
dst.update_attributes(:http_url=>book.human_url)
|
42
|
-
else
|
43
|
-
#delete the invalid book
|
44
|
-
b.delete
|
45
|
-
("Deleted duplicate book #{path}").oputs
|
46
|
-
end
|
47
|
-
end
|
48
|
-
else
|
49
|
-
#If it's a new dst or if there are multiple books
|
50
|
-
#take the first
|
51
|
-
book = books.first
|
52
|
-
dst.update_attributes(:http_url=>book.human_url) if book
|
25
|
+
#sort by publish date; if entry hash retrieval fails (as it does)
|
26
|
+
#assume the book was published now
|
27
|
+
book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
|
28
|
+
if book
|
29
|
+
#we know dataset will have blank url since it wasn't picked up above
|
30
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
31
|
+
api_url = book.human_url.split("&").first
|
32
|
+
dst.update_attributes(:http_url=>api_url)
|
53
33
|
end
|
54
34
|
return book
|
55
35
|
end
|
36
|
+
|
56
37
|
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
57
38
|
book = Gbook.find_by_path(path,gdrive_slot)
|
58
|
-
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
59
39
|
if book.nil?
|
60
40
|
#always use owner email to make sure all books are owned by owner account
|
61
41
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
62
42
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
43
|
+
#check to make sure the dataset has a blank url; if not, error out
|
44
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
45
|
+
if dst.http_url.to_s.length>0
|
46
|
+
#add acls to book regardless
|
47
|
+
book.add_admin_acl
|
48
|
+
raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
|
49
|
+
else
|
50
|
+
api_url = book.human_url.split("&").first
|
51
|
+
dst.update_attributes(:http_url=>api_url)
|
52
|
+
book.add_admin_acl
|
53
|
+
end
|
63
54
|
end
|
64
|
-
#always make sure book dataset http URL is up to date
|
65
|
-
#and that book has admin acl
|
66
|
-
dst.update_attributes(:http_url=>book.human_url)
|
67
|
-
book.add_admin_acl
|
68
55
|
return book
|
69
56
|
end
|
70
57
|
end
|
@@ -37,6 +37,14 @@ module Mobilize
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
+
def Gdrive.worker_group_name
|
41
|
+
Gdrive.config['worker_group_name']
|
42
|
+
end
|
43
|
+
|
44
|
+
def Gdrive.admin_group_name
|
45
|
+
Gdrive.config['admin_group_name']
|
46
|
+
end
|
47
|
+
|
40
48
|
def Gdrive.worker_emails
|
41
49
|
Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
|
42
50
|
end
|
@@ -38,7 +38,8 @@ module Mobilize
|
|
38
38
|
end
|
39
39
|
#update http url for file
|
40
40
|
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
-
|
41
|
+
api_url = file.human_url.split("&").first
|
42
|
+
dst.update_attributes(:http_url=>api_url)
|
42
43
|
true
|
43
44
|
end
|
44
45
|
|
@@ -86,7 +87,8 @@ module Mobilize
|
|
86
87
|
#always make sure dataset http URL is up to date
|
87
88
|
#and that it has admin acl
|
88
89
|
if file
|
89
|
-
|
90
|
+
api_url = file.human_url.split("&").first
|
91
|
+
dst.update_attributes(:http_url=>api_url)
|
90
92
|
file.add_admin_acl
|
91
93
|
end
|
92
94
|
return file
|
@@ -1,43 +1,38 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
module Mobilize
|
2
3
|
module Gridfs
|
3
4
|
def Gridfs.config
|
4
5
|
Base.config('gridfs')
|
5
6
|
end
|
6
7
|
|
7
|
-
def Gridfs.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
8
|
+
def Gridfs.read_by_dataset_path(dst_path,*args)
|
9
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
10
|
+
zs = curr_file.data if curr_file
|
11
|
+
return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
15
|
-
begin
|
16
|
-
zs=Gridfs.grid.open(dst_path,'r').read
|
17
|
-
return ::Zlib::Inflate.inflate(zs)
|
18
|
-
rescue
|
19
|
-
return nil
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
14
|
+
def Gridfs.write_by_dataset_path(dst_path,string,*args)
|
24
15
|
zs = ::Zlib::Deflate.deflate(string)
|
25
16
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
|
27
|
-
|
17
|
+
#find and delete existing file
|
18
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
19
|
+
curr_zs = curr_file.data if curr_file
|
20
|
+
#overwrite when there is a change
|
28
21
|
if curr_zs != zs
|
29
|
-
|
22
|
+
Mongoid::GridFs.delete(curr_file.id) if curr_file
|
23
|
+
#create temp file w zstring
|
24
|
+
temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
|
25
|
+
temp_file.print(zs)
|
26
|
+
temp_file.close
|
27
|
+
#put data in file
|
28
|
+
Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
|
30
29
|
end
|
31
30
|
return true
|
32
31
|
end
|
33
32
|
|
34
33
|
def Gridfs.delete(dst_path)
|
35
|
-
|
36
|
-
|
37
|
-
return true
|
38
|
-
rescue
|
39
|
-
return nil
|
40
|
-
end
|
34
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
35
|
+
curr_file.delete
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
@@ -81,15 +81,16 @@ module Mobilize
|
|
81
81
|
|
82
82
|
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
83
83
|
#find and delete temp sheet, if any
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
temp_sheet =
|
84
|
+
temp_book_title = target_path.gridsafe
|
85
|
+
#create book and sheet
|
86
|
+
temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
|
87
|
+
rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
|
88
|
+
temp_sheet = temp_book.add_worksheet("temp",rows,cols)
|
89
89
|
#this step has a tendency to fail; if it does,
|
90
90
|
#don't fail the stage, mark it as false
|
91
91
|
begin
|
92
|
-
|
92
|
+
gdrive_user = gdrive_slot.split("@").first
|
93
|
+
temp_sheet.write(tsv,gdrive_user)
|
93
94
|
rescue
|
94
95
|
return nil
|
95
96
|
end
|
@@ -132,9 +133,14 @@ module Mobilize
|
|
132
133
|
raise "Need source for gsheet write" unless source
|
133
134
|
tsv = source.read(u.name,gdrive_slot)
|
134
135
|
raise "No data source found for #{source.url}" unless tsv
|
135
|
-
|
136
|
+
tsv_row_count = tsv.to_s.split("\n").length
|
137
|
+
tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
|
138
|
+
tsv_cell_count = tsv_row_count * tsv_col_count
|
139
|
+
stdout = if tsv_row_count == 0
|
136
140
|
#soft error; no data to write. Stage will complete.
|
137
141
|
"Write skipped for #{s.target.url}"
|
142
|
+
elsif tsv_cell_count > Gsheet.max_cells
|
143
|
+
raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
|
138
144
|
else
|
139
145
|
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
140
146
|
#update status
|
@@ -25,7 +25,7 @@ module Mobilize
|
|
25
25
|
return idle_workers if state == 'idle'
|
26
26
|
stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
|
27
27
|
return stale_workers if state == 'stale'
|
28
|
-
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['
|
28
|
+
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
|
29
29
|
return timeout_workers if state == 'timeout'
|
30
30
|
raise "invalid state #{state}"
|
31
31
|
end
|
@@ -113,8 +113,14 @@ module Mobilize
|
|
113
113
|
stage_path = f['payload']['args'].first
|
114
114
|
email = begin
|
115
115
|
s = Stage.where(:path=>stage_path).first
|
116
|
-
s.
|
117
|
-
|
116
|
+
if s.params['notify'].to_s=="false"
|
117
|
+
next
|
118
|
+
elsif s.params['notify'].index("@")
|
119
|
+
s.params['notify']
|
120
|
+
else
|
121
|
+
s.job.runner.user.email
|
122
|
+
end
|
123
|
+
rescue ScriptError, StandardError
|
118
124
|
#jobs without stages are sent to first admin
|
119
125
|
Jobtracker.admin_emails.first
|
120
126
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#this module adds convenience methods to the Job model
|
2
|
+
module Mobilize
|
3
|
+
module JobHelper
|
4
|
+
def name
|
5
|
+
j = self
|
6
|
+
j.path.split("/").last
|
7
|
+
end
|
8
|
+
|
9
|
+
def stages
|
10
|
+
j = self
|
11
|
+
#starts with the job path, followed by a slash
|
12
|
+
Stage.where(:path=>/^#{j.path.escape_regex}\//).to_a.sort_by{|s| s.path}
|
13
|
+
end
|
14
|
+
|
15
|
+
def status
|
16
|
+
#last stage status
|
17
|
+
j = self
|
18
|
+
j.active_stage.status if j.active_stage
|
19
|
+
end
|
20
|
+
|
21
|
+
def active_stage
|
22
|
+
j = self
|
23
|
+
#latest started at or first
|
24
|
+
j.stages.select{|s| s.started_at}.sort_by{|s| s.started_at}.last || j.stages.first
|
25
|
+
end
|
26
|
+
|
27
|
+
def completed_at
|
28
|
+
j = self
|
29
|
+
j.stages.last.completed_at if j.stages.last
|
30
|
+
end
|
31
|
+
|
32
|
+
def failed_at
|
33
|
+
j = self
|
34
|
+
j.active_stage.failed_at if j.active_stage
|
35
|
+
end
|
36
|
+
|
37
|
+
def status_at
|
38
|
+
j = self
|
39
|
+
j.active_stage.status_at if j.active_stage
|
40
|
+
end
|
41
|
+
|
42
|
+
#convenience methods
|
43
|
+
def runner
|
44
|
+
j = self
|
45
|
+
runner_path = j.path.split("/")[0..-2].join("/")
|
46
|
+
return Runner.where(:path=>runner_path).first
|
47
|
+
end
|
48
|
+
|
49
|
+
def is_working?
|
50
|
+
j = self
|
51
|
+
j.stages.select{|s| s.is_working?}.compact.length>0
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|