mobilize-base 1.29 → 1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +19 -10
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -7
- data/lib/mobilize-base/extensions/resque-server/views/queues.erb +59 -0
- data/lib/mobilize-base/extensions/resque-server/views/working.erb +85 -0
- data/lib/mobilize-base/extensions/string.rb +12 -4
- data/lib/mobilize-base/extensions/yaml.rb +10 -7
- data/lib/mobilize-base/handlers/gbook.rb +25 -38
- data/lib/mobilize-base/handlers/gdrive.rb +8 -0
- data/lib/mobilize-base/handlers/gfile.rb +4 -2
- data/lib/mobilize-base/handlers/gridfs.rb +19 -24
- data/lib/mobilize-base/handlers/gsheet.rb +13 -7
- data/lib/mobilize-base/handlers/resque.rb +9 -3
- data/lib/mobilize-base/helpers/job_helper.rb +54 -0
- data/lib/mobilize-base/helpers/jobtracker_helper.rb +147 -0
- data/lib/mobilize-base/helpers/runner_helper.rb +83 -0
- data/lib/mobilize-base/helpers/stage_helper.rb +38 -0
- data/lib/mobilize-base/jobtracker.rb +16 -192
- data/lib/mobilize-base/models/job.rb +36 -48
- data/lib/mobilize-base/models/runner.rb +22 -122
- data/lib/mobilize-base/models/stage.rb +14 -35
- data/lib/mobilize-base/tasks.rb +16 -3
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +5 -1
- data/lib/samples/gdrive.yml +6 -0
- data/lib/samples/gridfs.yml +0 -3
- data/lib/samples/gsheet.yml +4 -4
- data/mobilize-base.gemspec +3 -3
- data/test/base_job_rows.yml +5 -1
- data/test/mobilize-base_test.rb +20 -61
- data/test/test_helper.rb +79 -0
- metadata +57 -25
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -162,9 +162,15 @@ the same domain, and all Users should have emails in this domain.
|
|
162
162
|
* an owner name and password. You can set up separate owners
|
163
163
|
for different environments as in the below file, which will keep your
|
164
164
|
mission critical workers from getting rate-limit errors.
|
165
|
+
* one admin_group_name, which the owner and all admins should be added to -- this
|
166
|
+
group will need read permissions to read from and edit permissions to write
|
167
|
+
to files.
|
165
168
|
* one or more admins with email attributes -- these will be for people
|
166
169
|
who should be given write permissions to all Mobilize books in the
|
167
170
|
environment for maintenance purposes.
|
171
|
+
* one worker_group_name, which the owner and all workers should be added to -- this
|
172
|
+
group will need read permissions to read from and edit permissions to write
|
173
|
+
to files.
|
168
174
|
* one or more workers with name and pw attributes -- they will be used
|
169
175
|
to queue up google reads and writes. This can be the same as the owner
|
170
176
|
account for testing purposes or low-volume environments.
|
@@ -182,8 +188,10 @@ development:
|
|
182
188
|
owner:
|
183
189
|
name: owner_development
|
184
190
|
pw: google_drive_password
|
191
|
+
admin_group_name: admins_development
|
185
192
|
admins:
|
186
193
|
- name: admin
|
194
|
+
worker_group_name: workers_development
|
187
195
|
workers:
|
188
196
|
- name: worker_development001
|
189
197
|
pw: worker001_google_drive_password
|
@@ -194,8 +202,10 @@ test:
|
|
194
202
|
owner:
|
195
203
|
name: owner_test
|
196
204
|
pw: google_drive_password
|
205
|
+
admin_group_name: admins_test
|
197
206
|
admins:
|
198
207
|
- name: admin
|
208
|
+
worker_group_name: workers_test
|
199
209
|
workers:
|
200
210
|
- name: worker_test001
|
201
211
|
pw: worker001_google_drive_password
|
@@ -206,8 +216,10 @@ production:
|
|
206
216
|
owner:
|
207
217
|
name: owner_production
|
208
218
|
pw: google_drive_password
|
219
|
+
admin_group_name: admins_production
|
209
220
|
admins:
|
210
221
|
- name: admin
|
222
|
+
worker_group_name: workers_production
|
211
223
|
workers:
|
212
224
|
- name: worker_production001
|
213
225
|
pw: worker001_google_drive_password
|
@@ -220,9 +232,8 @@ production:
|
|
220
232
|
|
221
233
|
gsheet.yml needs:
|
222
234
|
* max_cells, which is the number of cells a sheet is allowed to have
|
223
|
-
written to it at one time. Default is
|
224
|
-
|
225
|
-
you try to write more than that.
|
235
|
+
written to it at one time. Default is 50k cells, which is about how
|
236
|
+
much you can write before things start breaking.
|
226
237
|
* Because Google Docs ties date formatting to the Locale for the
|
227
238
|
spreadsheet, there are 2 date format parameters:
|
228
239
|
* read_date_format, which is the format that should be read FROM google
|
@@ -356,22 +367,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
|
|
356
367
|
Mobilize stores cached data in MongoDB Gridfs.
|
357
368
|
It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
|
358
369
|
|
359
|
-
* max_versions - the number of __different__ versions of data to keep
|
360
|
-
for a given cache. Default is 10. This is meant mostly to allow you to
|
361
|
-
restore Runners from cache if necessary.
|
362
370
|
* max_compressed_write_size - the amount of compressed data Gridfs will
|
363
371
|
allow. If you try to write more than this, an exception will be thrown.
|
364
372
|
|
365
373
|
``` yml
|
366
374
|
---
|
367
375
|
development:
|
368
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
369
376
|
max_compressed_write_size: 1000000000 #~1GB
|
370
377
|
test:
|
371
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
372
378
|
max_compressed_write_size: 1000000000 #~1GB
|
373
379
|
production:
|
374
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
375
380
|
max_compressed_write_size: 1000000000 #~1GB
|
376
381
|
```
|
377
382
|
|
@@ -567,7 +572,11 @@ from the referenced sheet.
|
|
567
572
|
* All stages accept retry parameters:
|
568
573
|
* retries: an integer specifying the number of times that the system will try it again before giving up.
|
569
574
|
* delay: an integer specifying the number of seconds between retries.
|
570
|
-
* always_on: if
|
575
|
+
* always_on: if false, turns the job off on stage failures.
|
576
|
+
Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
|
577
|
+
* notify: by default, the stage owner will be notified on failure.
|
578
|
+
* if false, will not notify the stage owner in the event of a failure.
|
579
|
+
* If it's an email address, will email the specified person.
|
571
580
|
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
572
581
|
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
573
582
|
* The test uses "Requestor_mobilize(test)/base1.out" and
|
@@ -13,7 +13,7 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
17
17
|
accounts.each do |email|
|
18
18
|
f.update_acl(email)
|
19
19
|
end
|
@@ -21,9 +21,9 @@ module GoogleDrive
|
|
21
21
|
|
22
22
|
def has_admin_acl?
|
23
23
|
f = self
|
24
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
25
|
-
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
26
|
-
if (curr_emails & admin_emails) == admin_emails
|
24
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
26
|
+
if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
|
27
27
|
return true
|
28
28
|
else
|
29
29
|
return false
|
@@ -32,9 +32,9 @@ module GoogleDrive
|
|
32
32
|
|
33
33
|
def has_worker_acl?
|
34
34
|
f = self
|
35
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
35
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
36
36
|
worker_emails = Mobilize::Gdrive.worker_emails.sort
|
37
|
-
if (curr_emails & worker_emails) == worker_emails
|
37
|
+
if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
|
38
38
|
return true
|
39
39
|
else
|
40
40
|
return false
|
@@ -84,7 +84,7 @@ module GoogleDrive
|
|
84
84
|
end
|
85
85
|
def acl_entry(email)
|
86
86
|
f = self
|
87
|
-
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
|
87
|
+
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
|
88
88
|
end
|
89
89
|
def entry_hash
|
90
90
|
f = self
|
@@ -0,0 +1,59 @@
|
|
1
|
+
<% @subtabs = resque.queues unless partial? || params[:id].nil? %>
|
2
|
+
|
3
|
+
<% if queue = params[:id] %>
|
4
|
+
|
5
|
+
<h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
|
6
|
+
<form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
|
7
|
+
<input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
|
8
|
+
</form>
|
9
|
+
<p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
|
10
|
+
<table class='jobs'>
|
11
|
+
<tr>
|
12
|
+
<th>Class</th>
|
13
|
+
<th>Args</th>
|
14
|
+
</tr>
|
15
|
+
<% for job in (jobs = resque.peek(queue, start, 20)) %>
|
16
|
+
<tr>
|
17
|
+
<td class='class'><%= job['class'] %></td>
|
18
|
+
<td class='args'><%=h job['args'].inspect %></td>
|
19
|
+
</tr>
|
20
|
+
<% end %>
|
21
|
+
<% if jobs.empty? %>
|
22
|
+
<tr>
|
23
|
+
<td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
|
24
|
+
</tr>
|
25
|
+
<% end %>
|
26
|
+
</table>
|
27
|
+
<%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<h1 class='wi'>Queues</h1>
|
31
|
+
<p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
|
32
|
+
<table class='queues'>
|
33
|
+
<tr>
|
34
|
+
<th>Name</th>
|
35
|
+
<th>Jobs</th>
|
36
|
+
</tr>
|
37
|
+
<!-- only show nonzero length queues-->
|
38
|
+
<% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
|
39
|
+
<tr>
|
40
|
+
<td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
|
41
|
+
<td class='size'><%= resque.size queue %></td>
|
42
|
+
</tr>
|
43
|
+
<% end %>
|
44
|
+
<% if failed_multiple_queues? %>
|
45
|
+
<% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
|
46
|
+
<tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
|
47
|
+
<td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
|
48
|
+
<td class='size'><%= Resque::Failure.count(queue) %></td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
<% else %>
|
52
|
+
<tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
|
53
|
+
<td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
|
54
|
+
<td class='size'><%= Resque::Failure.count %></td>
|
55
|
+
</tr>
|
56
|
+
<% end %>
|
57
|
+
</table>
|
58
|
+
|
59
|
+
<% end %>
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
|
2
|
+
<h1><%= worker %>'s job</h1>
|
3
|
+
|
4
|
+
<table>
|
5
|
+
<tr>
|
6
|
+
<th> </th>
|
7
|
+
<th>Where</th>
|
8
|
+
<th>Queue</th>
|
9
|
+
<th>Started</th>
|
10
|
+
<th>Class</th>
|
11
|
+
<th>Args</th>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
|
15
|
+
<% host, pid, _ = worker.to_s.split(':') %>
|
16
|
+
<td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
17
|
+
<% data = worker.job %>
|
18
|
+
<% queue = data['queue'] %>
|
19
|
+
<td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
|
20
|
+
<td><span class="time"><%= data['run_at'] %></span></td>
|
21
|
+
<td>
|
22
|
+
<code><%= data['payload']['class'] %></code>
|
23
|
+
</td>
|
24
|
+
<td><%=h data['payload']['args'].inspect %></td>
|
25
|
+
</tr>
|
26
|
+
</table>
|
27
|
+
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<%
|
31
|
+
workers = resque.working
|
32
|
+
jobs = workers.collect {|w| w.job }
|
33
|
+
worker_jobs = workers.zip(jobs)
|
34
|
+
worker_jobs = worker_jobs.reject { |w, j| w.idle? }
|
35
|
+
%>
|
36
|
+
|
37
|
+
<h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
|
38
|
+
<p class='intro'>The list below contains all workers which are currently running a job.</p>
|
39
|
+
<table class='workers'>
|
40
|
+
<tr>
|
41
|
+
<th> </th>
|
42
|
+
<th>Where</th>
|
43
|
+
<th>Queue</th>
|
44
|
+
<th>Processing</th>
|
45
|
+
</tr>
|
46
|
+
<% if worker_jobs.empty? %>
|
47
|
+
<tr>
|
48
|
+
<td colspan="4" class='no-data'>Nothing is happening right now...</td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
|
52
|
+
<% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
|
53
|
+
<tr>
|
54
|
+
<td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
|
55
|
+
<% host, pid, queues = worker.to_s.split(':') %>
|
56
|
+
<td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
57
|
+
<td class='queues queue'>
|
58
|
+
<a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
|
59
|
+
</td>
|
60
|
+
<td class='process'>
|
61
|
+
<% if job['queue']
|
62
|
+
job_stats = begin
|
63
|
+
j = job
|
64
|
+
args_hash = j['payload']['args'][1]
|
65
|
+
args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
|
66
|
+
args = [args_array].compact.join("")
|
67
|
+
path = j['payload']['args'].first
|
68
|
+
[path,args].join("</code><br><code>")
|
69
|
+
rescue => exc
|
70
|
+
[exc.to_s,exc.backtrace.join("<br>")].join("<br>")
|
71
|
+
end
|
72
|
+
%>
|
73
|
+
<%=job_stats%>
|
74
|
+
</code>
|
75
|
+
<br>
|
76
|
+
<small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
|
77
|
+
<% else %>
|
78
|
+
<span class='waiting'>Waiting for a job...</span>
|
79
|
+
<% end %>
|
80
|
+
</td>
|
81
|
+
</tr>
|
82
|
+
<% end %>
|
83
|
+
</table>
|
84
|
+
|
85
|
+
<% end %>
|
@@ -11,11 +11,19 @@ class String
|
|
11
11
|
def opp
|
12
12
|
pp self
|
13
13
|
end
|
14
|
+
def to_md5
|
15
|
+
Digest::MD5.hexdigest(self)
|
16
|
+
end
|
14
17
|
def bash(except=true)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
str = self
|
19
|
+
out_str,err_str = []
|
20
|
+
status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
|
21
|
+
out_str = stdout.read
|
22
|
+
err_str = stderr.read
|
23
|
+
end
|
24
|
+
exit_status = status.exitstatus
|
25
|
+
raise err_str if (exit_status !=0 and except==true)
|
26
|
+
return out_str
|
19
27
|
end
|
20
28
|
def escape_regex
|
21
29
|
str = self
|
@@ -10,13 +10,16 @@ module YAML
|
|
10
10
|
#make sure urls have their colon spaces fixed
|
11
11
|
result_hash={}
|
12
12
|
easy_hash.each do |k,v|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
#fucking yaml puts spaces in front of the key
|
14
|
+
#or something
|
15
|
+
strip_k = k.strip
|
16
|
+
result_hash[strip_k] = if v.class==String
|
17
|
+
v.gsub(": //","://")
|
18
|
+
elsif v.class==Array
|
19
|
+
v.map{|av| av.to_s.gsub(": //","://")}
|
20
|
+
else
|
21
|
+
v
|
22
|
+
end
|
20
23
|
end
|
21
24
|
return result_hash
|
22
25
|
end
|
@@ -14,57 +14,44 @@ module Mobilize
|
|
14
14
|
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
15
|
if dst and dst.http_url.to_s.length>0
|
16
16
|
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
book = nil
|
22
|
-
else
|
23
|
-
return book
|
24
|
-
end
|
25
|
-
rescue
|
26
|
-
#use regular process if book entry hash fails
|
27
|
-
book = nil
|
17
|
+
if book
|
18
|
+
return book
|
19
|
+
else
|
20
|
+
raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
|
28
21
|
end
|
29
22
|
end
|
23
|
+
#try to find books by title
|
30
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
31
|
-
|
32
|
-
book
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
if bkey == dkey
|
40
|
-
book = b
|
41
|
-
dst.update_attributes(:http_url=>book.human_url)
|
42
|
-
else
|
43
|
-
#delete the invalid book
|
44
|
-
b.delete
|
45
|
-
("Deleted duplicate book #{path}").oputs
|
46
|
-
end
|
47
|
-
end
|
48
|
-
else
|
49
|
-
#If it's a new dst or if there are multiple books
|
50
|
-
#take the first
|
51
|
-
book = books.first
|
52
|
-
dst.update_attributes(:http_url=>book.human_url) if book
|
25
|
+
#sort by publish date; if entry hash retrieval fails (as it does)
|
26
|
+
#assume the book was published now
|
27
|
+
book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
|
28
|
+
if book
|
29
|
+
#we know dataset will have blank url since it wasn't picked up above
|
30
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
31
|
+
api_url = book.human_url.split("&").first
|
32
|
+
dst.update_attributes(:http_url=>api_url)
|
53
33
|
end
|
54
34
|
return book
|
55
35
|
end
|
36
|
+
|
56
37
|
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
57
38
|
book = Gbook.find_by_path(path,gdrive_slot)
|
58
|
-
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
59
39
|
if book.nil?
|
60
40
|
#always use owner email to make sure all books are owned by owner account
|
61
41
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
62
42
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
43
|
+
#check to make sure the dataset has a blank url; if not, error out
|
44
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
45
|
+
if dst.http_url.to_s.length>0
|
46
|
+
#add acls to book regardless
|
47
|
+
book.add_admin_acl
|
48
|
+
raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
|
49
|
+
else
|
50
|
+
api_url = book.human_url.split("&").first
|
51
|
+
dst.update_attributes(:http_url=>api_url)
|
52
|
+
book.add_admin_acl
|
53
|
+
end
|
63
54
|
end
|
64
|
-
#always make sure book dataset http URL is up to date
|
65
|
-
#and that book has admin acl
|
66
|
-
dst.update_attributes(:http_url=>book.human_url)
|
67
|
-
book.add_admin_acl
|
68
55
|
return book
|
69
56
|
end
|
70
57
|
end
|
@@ -37,6 +37,14 @@ module Mobilize
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
+
def Gdrive.worker_group_name
|
41
|
+
Gdrive.config['worker_group_name']
|
42
|
+
end
|
43
|
+
|
44
|
+
def Gdrive.admin_group_name
|
45
|
+
Gdrive.config['admin_group_name']
|
46
|
+
end
|
47
|
+
|
40
48
|
def Gdrive.worker_emails
|
41
49
|
Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
|
42
50
|
end
|
@@ -38,7 +38,8 @@ module Mobilize
|
|
38
38
|
end
|
39
39
|
#update http url for file
|
40
40
|
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
-
|
41
|
+
api_url = file.human_url.split("&").first
|
42
|
+
dst.update_attributes(:http_url=>api_url)
|
42
43
|
true
|
43
44
|
end
|
44
45
|
|
@@ -86,7 +87,8 @@ module Mobilize
|
|
86
87
|
#always make sure dataset http URL is up to date
|
87
88
|
#and that it has admin acl
|
88
89
|
if file
|
89
|
-
|
90
|
+
api_url = file.human_url.split("&").first
|
91
|
+
dst.update_attributes(:http_url=>api_url)
|
90
92
|
file.add_admin_acl
|
91
93
|
end
|
92
94
|
return file
|
@@ -1,43 +1,38 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
module Mobilize
|
2
3
|
module Gridfs
|
3
4
|
def Gridfs.config
|
4
5
|
Base.config('gridfs')
|
5
6
|
end
|
6
7
|
|
7
|
-
def Gridfs.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
8
|
+
def Gridfs.read_by_dataset_path(dst_path,*args)
|
9
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
10
|
+
zs = curr_file.data if curr_file
|
11
|
+
return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
15
|
-
begin
|
16
|
-
zs=Gridfs.grid.open(dst_path,'r').read
|
17
|
-
return ::Zlib::Inflate.inflate(zs)
|
18
|
-
rescue
|
19
|
-
return nil
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
14
|
+
def Gridfs.write_by_dataset_path(dst_path,string,*args)
|
24
15
|
zs = ::Zlib::Deflate.deflate(string)
|
25
16
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
|
27
|
-
|
17
|
+
#find and delete existing file
|
18
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
19
|
+
curr_zs = curr_file.data if curr_file
|
20
|
+
#overwrite when there is a change
|
28
21
|
if curr_zs != zs
|
29
|
-
|
22
|
+
Mongoid::GridFs.delete(curr_file.id) if curr_file
|
23
|
+
#create temp file w zstring
|
24
|
+
temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
|
25
|
+
temp_file.print(zs)
|
26
|
+
temp_file.close
|
27
|
+
#put data in file
|
28
|
+
Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
|
30
29
|
end
|
31
30
|
return true
|
32
31
|
end
|
33
32
|
|
34
33
|
def Gridfs.delete(dst_path)
|
35
|
-
|
36
|
-
|
37
|
-
return true
|
38
|
-
rescue
|
39
|
-
return nil
|
40
|
-
end
|
34
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
35
|
+
curr_file.delete
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
@@ -81,15 +81,16 @@ module Mobilize
|
|
81
81
|
|
82
82
|
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
83
83
|
#find and delete temp sheet, if any
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
temp_sheet =
|
84
|
+
temp_book_title = target_path.gridsafe
|
85
|
+
#create book and sheet
|
86
|
+
temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
|
87
|
+
rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
|
88
|
+
temp_sheet = temp_book.add_worksheet("temp",rows,cols)
|
89
89
|
#this step has a tendency to fail; if it does,
|
90
90
|
#don't fail the stage, mark it as false
|
91
91
|
begin
|
92
|
-
|
92
|
+
gdrive_user = gdrive_slot.split("@").first
|
93
|
+
temp_sheet.write(tsv,gdrive_user)
|
93
94
|
rescue
|
94
95
|
return nil
|
95
96
|
end
|
@@ -132,9 +133,14 @@ module Mobilize
|
|
132
133
|
raise "Need source for gsheet write" unless source
|
133
134
|
tsv = source.read(u.name,gdrive_slot)
|
134
135
|
raise "No data source found for #{source.url}" unless tsv
|
135
|
-
|
136
|
+
tsv_row_count = tsv.to_s.split("\n").length
|
137
|
+
tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
|
138
|
+
tsv_cell_count = tsv_row_count * tsv_col_count
|
139
|
+
stdout = if tsv_row_count == 0
|
136
140
|
#soft error; no data to write. Stage will complete.
|
137
141
|
"Write skipped for #{s.target.url}"
|
142
|
+
elsif tsv_cell_count > Gsheet.max_cells
|
143
|
+
raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
|
138
144
|
else
|
139
145
|
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
140
146
|
#update status
|
@@ -25,7 +25,7 @@ module Mobilize
|
|
25
25
|
return idle_workers if state == 'idle'
|
26
26
|
stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
|
27
27
|
return stale_workers if state == 'stale'
|
28
|
-
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['
|
28
|
+
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
|
29
29
|
return timeout_workers if state == 'timeout'
|
30
30
|
raise "invalid state #{state}"
|
31
31
|
end
|
@@ -113,8 +113,14 @@ module Mobilize
|
|
113
113
|
stage_path = f['payload']['args'].first
|
114
114
|
email = begin
|
115
115
|
s = Stage.where(:path=>stage_path).first
|
116
|
-
s.
|
117
|
-
|
116
|
+
if s.params['notify'].to_s=="false"
|
117
|
+
next
|
118
|
+
elsif s.params['notify'].index("@")
|
119
|
+
s.params['notify']
|
120
|
+
else
|
121
|
+
s.job.runner.user.email
|
122
|
+
end
|
123
|
+
rescue ScriptError, StandardError
|
118
124
|
#jobs without stages are sent to first admin
|
119
125
|
Jobtracker.admin_emails.first
|
120
126
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#this module adds convenience methods to the Job model
|
2
|
+
module Mobilize
|
3
|
+
module JobHelper
|
4
|
+
def name
|
5
|
+
j = self
|
6
|
+
j.path.split("/").last
|
7
|
+
end
|
8
|
+
|
9
|
+
def stages
|
10
|
+
j = self
|
11
|
+
#starts with the job path, followed by a slash
|
12
|
+
Stage.where(:path=>/^#{j.path.escape_regex}\//).to_a.sort_by{|s| s.path}
|
13
|
+
end
|
14
|
+
|
15
|
+
def status
|
16
|
+
#last stage status
|
17
|
+
j = self
|
18
|
+
j.active_stage.status if j.active_stage
|
19
|
+
end
|
20
|
+
|
21
|
+
def active_stage
|
22
|
+
j = self
|
23
|
+
#latest started at or first
|
24
|
+
j.stages.select{|s| s.started_at}.sort_by{|s| s.started_at}.last || j.stages.first
|
25
|
+
end
|
26
|
+
|
27
|
+
def completed_at
|
28
|
+
j = self
|
29
|
+
j.stages.last.completed_at if j.stages.last
|
30
|
+
end
|
31
|
+
|
32
|
+
def failed_at
|
33
|
+
j = self
|
34
|
+
j.active_stage.failed_at if j.active_stage
|
35
|
+
end
|
36
|
+
|
37
|
+
def status_at
|
38
|
+
j = self
|
39
|
+
j.active_stage.status_at if j.active_stage
|
40
|
+
end
|
41
|
+
|
42
|
+
#convenience methods
|
43
|
+
def runner
|
44
|
+
j = self
|
45
|
+
runner_path = j.path.split("/")[0..-2].join("/")
|
46
|
+
return Runner.where(:path=>runner_path).first
|
47
|
+
end
|
48
|
+
|
49
|
+
def is_working?
|
50
|
+
j = self
|
51
|
+
j.stages.select{|s| s.is_working?}.compact.length>0
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|