mobilize-base 1.2 → 1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +10 -11
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -7
- data/lib/mobilize-base/extensions/google_drive/worksheet.rb +7 -2
- data/lib/mobilize-base/extensions/resque-server/views/queues.erb +59 -0
- data/lib/mobilize-base/extensions/resque-server/views/working.erb +85 -0
- data/lib/mobilize-base/extensions/string.rb +12 -4
- data/lib/mobilize-base/extensions/yaml.rb +11 -7
- data/lib/mobilize-base/handlers/gbook.rb +24 -31
- data/lib/mobilize-base/handlers/gfile.rb +5 -3
- data/lib/mobilize-base/handlers/gridfs.rb +19 -24
- data/lib/mobilize-base/handlers/gsheet.rb +25 -20
- data/lib/mobilize-base/handlers/resque.rb +16 -4
- data/lib/mobilize-base/helpers/job_helper.rb +54 -0
- data/lib/mobilize-base/helpers/runner_helper.rb +83 -0
- data/lib/mobilize-base/helpers/stage_helper.rb +38 -0
- data/lib/mobilize-base/jobtracker.rb +13 -5
- data/lib/mobilize-base/models/job.rb +36 -48
- data/lib/mobilize-base/models/runner.rb +24 -123
- data/lib/mobilize-base/models/stage.rb +14 -43
- data/lib/mobilize-base/tasks.rb +16 -3
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +5 -1
- data/lib/samples/gridfs.yml +0 -3
- data/lib/samples/gsheet.yml +4 -4
- data/mobilize-base.gemspec +4 -5
- data/test/mobilize-base_test.rb +1 -0
- metadata +21 -32
data/README.md
CHANGED
@@ -220,9 +220,8 @@ production:
|
|
220
220
|
|
221
221
|
gsheet.yml needs:
|
222
222
|
* max_cells, which is the number of cells a sheet is allowed to have
|
223
|
-
written to it at one time. Default is
|
224
|
-
|
225
|
-
you try to write more than that.
|
223
|
+
written to it at one time. Default is 50k cells, which is about how
|
224
|
+
much you can write before things start breaking.
|
226
225
|
* Because Google Docs ties date formatting to the Locale for the
|
227
226
|
spreadsheet, there are 2 date format parameters:
|
228
227
|
* read_date_format, which is the format that should be read FROM google
|
@@ -356,22 +355,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
|
|
356
355
|
Mobilize stores cached data in MongoDB Gridfs.
|
357
356
|
It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
|
358
357
|
|
359
|
-
* max_versions - the number of __different__ versions of data to keep
|
360
|
-
for a given cache. Default is 10. This is meant mostly to allow you to
|
361
|
-
restore Runners from cache if necessary.
|
362
358
|
* max_compressed_write_size - the amount of compressed data Gridfs will
|
363
359
|
allow. If you try to write more than this, an exception will be thrown.
|
364
360
|
|
365
361
|
``` yml
|
366
362
|
---
|
367
363
|
development:
|
368
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
369
364
|
max_compressed_write_size: 1000000000 #~1GB
|
370
365
|
test:
|
371
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
372
366
|
max_compressed_write_size: 1000000000 #~1GB
|
373
367
|
production:
|
374
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
375
368
|
max_compressed_write_size: 1000000000 #~1GB
|
376
369
|
```
|
377
370
|
|
@@ -564,8 +557,14 @@ the Runner itself.
|
|
564
557
|
and "base1.out" for the second test. The first
|
565
558
|
takes the output from the first stage and the second reads it straight
|
566
559
|
from the referenced sheet.
|
567
|
-
* All stages accept
|
568
|
-
giving up.
|
560
|
+
* All stages accept retry parameters:
|
561
|
+
* retries: an integer specifying the number of times that the system will try it again before giving up.
|
562
|
+
* delay: an integer specifying the number of seconds between retries.
|
563
|
+
* always_on: if false, turns the job off on stage failures.
|
564
|
+
Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
|
565
|
+
* notify: by default, the stage owner will be notified on failure.
|
566
|
+
* if false, will not notify the stage owner in the event of a failure.
|
567
|
+
* If it's an email address, will email the specified person.
|
569
568
|
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
570
569
|
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
571
570
|
* The test uses "Requestor_mobilize(test)/base1.out" and
|
@@ -13,7 +13,7 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
17
17
|
accounts.each do |email|
|
18
18
|
f.update_acl(email)
|
19
19
|
end
|
@@ -21,9 +21,9 @@ module GoogleDrive
|
|
21
21
|
|
22
22
|
def has_admin_acl?
|
23
23
|
f = self
|
24
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
25
|
-
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
26
|
-
if (curr_emails & admin_emails) == admin_emails
|
24
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
26
|
+
if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
|
27
27
|
return true
|
28
28
|
else
|
29
29
|
return false
|
@@ -32,9 +32,9 @@ module GoogleDrive
|
|
32
32
|
|
33
33
|
def has_worker_acl?
|
34
34
|
f = self
|
35
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
35
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
36
36
|
worker_emails = Mobilize::Gdrive.worker_emails.sort
|
37
|
-
if (curr_emails & worker_emails) == worker_emails
|
37
|
+
if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
|
38
38
|
return true
|
39
39
|
else
|
40
40
|
return false
|
@@ -84,7 +84,7 @@ module GoogleDrive
|
|
84
84
|
end
|
85
85
|
def acl_entry(email)
|
86
86
|
f = self
|
87
|
-
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
|
87
|
+
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
|
88
88
|
end
|
89
89
|
def entry_hash
|
90
90
|
f = self
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module GoogleDrive
|
2
2
|
class Worksheet
|
3
|
-
def to_tsv
|
3
|
+
def to_tsv(gsub_line_breaks="\n")
|
4
4
|
sheet = self
|
5
5
|
rows = sheet.rows
|
6
6
|
header = rows.first
|
@@ -8,7 +8,12 @@ module GoogleDrive
|
|
8
8
|
#look for blank cols to indicate end of row
|
9
9
|
col_last_i = (header.index("") || header.length)-1
|
10
10
|
#ignore user-entered line breaks for purposes of tsv reads
|
11
|
-
out_tsv = rows.map
|
11
|
+
out_tsv = rows.map do |r|
|
12
|
+
row = r[0..col_last_i].join("\t")
|
13
|
+
row.gsub!("\n",gsub_line_breaks)
|
14
|
+
row = row + "\n"
|
15
|
+
row
|
16
|
+
end.join + "\n"
|
12
17
|
out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
|
13
18
|
Mobilize::Gsheet.config['read_date_format'])
|
14
19
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
<% @subtabs = resque.queues unless partial? || params[:id].nil? %>
|
2
|
+
|
3
|
+
<% if queue = params[:id] %>
|
4
|
+
|
5
|
+
<h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
|
6
|
+
<form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
|
7
|
+
<input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
|
8
|
+
</form>
|
9
|
+
<p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
|
10
|
+
<table class='jobs'>
|
11
|
+
<tr>
|
12
|
+
<th>Class</th>
|
13
|
+
<th>Args</th>
|
14
|
+
</tr>
|
15
|
+
<% for job in (jobs = resque.peek(queue, start, 20)) %>
|
16
|
+
<tr>
|
17
|
+
<td class='class'><%= job['class'] %></td>
|
18
|
+
<td class='args'><%=h job['args'].inspect %></td>
|
19
|
+
</tr>
|
20
|
+
<% end %>
|
21
|
+
<% if jobs.empty? %>
|
22
|
+
<tr>
|
23
|
+
<td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
|
24
|
+
</tr>
|
25
|
+
<% end %>
|
26
|
+
</table>
|
27
|
+
<%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<h1 class='wi'>Queues</h1>
|
31
|
+
<p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
|
32
|
+
<table class='queues'>
|
33
|
+
<tr>
|
34
|
+
<th>Name</th>
|
35
|
+
<th>Jobs</th>
|
36
|
+
</tr>
|
37
|
+
<!-- only show nonzero length queues-->
|
38
|
+
<% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
|
39
|
+
<tr>
|
40
|
+
<td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
|
41
|
+
<td class='size'><%= resque.size queue %></td>
|
42
|
+
</tr>
|
43
|
+
<% end %>
|
44
|
+
<% if failed_multiple_queues? %>
|
45
|
+
<% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
|
46
|
+
<tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
|
47
|
+
<td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
|
48
|
+
<td class='size'><%= Resque::Failure.count(queue) %></td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
<% else %>
|
52
|
+
<tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
|
53
|
+
<td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
|
54
|
+
<td class='size'><%= Resque::Failure.count %></td>
|
55
|
+
</tr>
|
56
|
+
<% end %>
|
57
|
+
</table>
|
58
|
+
|
59
|
+
<% end %>
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
|
2
|
+
<h1><%= worker %>'s job</h1>
|
3
|
+
|
4
|
+
<table>
|
5
|
+
<tr>
|
6
|
+
<th> </th>
|
7
|
+
<th>Where</th>
|
8
|
+
<th>Queue</th>
|
9
|
+
<th>Started</th>
|
10
|
+
<th>Class</th>
|
11
|
+
<th>Args</th>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
|
15
|
+
<% host, pid, _ = worker.to_s.split(':') %>
|
16
|
+
<td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
17
|
+
<% data = worker.job %>
|
18
|
+
<% queue = data['queue'] %>
|
19
|
+
<td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
|
20
|
+
<td><span class="time"><%= data['run_at'] %></span></td>
|
21
|
+
<td>
|
22
|
+
<code><%= data['payload']['class'] %></code>
|
23
|
+
</td>
|
24
|
+
<td><%=h data['payload']['args'].inspect %></td>
|
25
|
+
</tr>
|
26
|
+
</table>
|
27
|
+
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<%
|
31
|
+
workers = resque.working
|
32
|
+
jobs = workers.collect {|w| w.job }
|
33
|
+
worker_jobs = workers.zip(jobs)
|
34
|
+
worker_jobs = worker_jobs.reject { |w, j| w.idle? }
|
35
|
+
%>
|
36
|
+
|
37
|
+
<h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
|
38
|
+
<p class='intro'>The list below contains all workers which are currently running a job.</p>
|
39
|
+
<table class='workers'>
|
40
|
+
<tr>
|
41
|
+
<th> </th>
|
42
|
+
<th>Where</th>
|
43
|
+
<th>Queue</th>
|
44
|
+
<th>Processing</th>
|
45
|
+
</tr>
|
46
|
+
<% if worker_jobs.empty? %>
|
47
|
+
<tr>
|
48
|
+
<td colspan="4" class='no-data'>Nothing is happening right now...</td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
|
52
|
+
<% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
|
53
|
+
<tr>
|
54
|
+
<td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
|
55
|
+
<% host, pid, queues = worker.to_s.split(':') %>
|
56
|
+
<td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
57
|
+
<td class='queues queue'>
|
58
|
+
<a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
|
59
|
+
</td>
|
60
|
+
<td class='process'>
|
61
|
+
<% if job['queue']
|
62
|
+
job_stats = begin
|
63
|
+
j = job
|
64
|
+
args_hash = j['payload']['args'][1]
|
65
|
+
args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
|
66
|
+
args = [args_array].compact.join("")
|
67
|
+
path = j['payload']['args'].first
|
68
|
+
[path,args].join("</code><br><code>")
|
69
|
+
rescue => exc
|
70
|
+
[exc.to_s,exc.backtrace.join("<br>")].join("<br>")
|
71
|
+
end
|
72
|
+
%>
|
73
|
+
<%=job_stats%>
|
74
|
+
</code>
|
75
|
+
<br>
|
76
|
+
<small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
|
77
|
+
<% else %>
|
78
|
+
<span class='waiting'>Waiting for a job...</span>
|
79
|
+
<% end %>
|
80
|
+
</td>
|
81
|
+
</tr>
|
82
|
+
<% end %>
|
83
|
+
</table>
|
84
|
+
|
85
|
+
<% end %>
|
@@ -11,11 +11,19 @@ class String
|
|
11
11
|
def opp
|
12
12
|
pp self
|
13
13
|
end
|
14
|
+
def to_md5
|
15
|
+
Digest::MD5.hexdigest(self)
|
16
|
+
end
|
14
17
|
def bash(except=true)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
str = self
|
19
|
+
out_str,err_str = []
|
20
|
+
status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
|
21
|
+
out_str = stdout.read
|
22
|
+
err_str = stderr.read
|
23
|
+
end
|
24
|
+
exit_status = status.exitstatus
|
25
|
+
raise err_str if (exit_status !=0 and except==true)
|
26
|
+
return out_str
|
19
27
|
end
|
20
28
|
def escape_regex
|
21
29
|
str = self
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'yaml'
|
1
2
|
module YAML
|
2
3
|
def YAML.easy_load(string)
|
3
4
|
begin
|
@@ -9,13 +10,16 @@ module YAML
|
|
9
10
|
#make sure urls have their colon spaces fixed
|
10
11
|
result_hash={}
|
11
12
|
easy_hash.each do |k,v|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
#fucking yaml puts spaces in front of the key
|
14
|
+
#or something
|
15
|
+
strip_k = k.strip
|
16
|
+
result_hash[strip_k] = if v.class==String
|
17
|
+
v.gsub(": //","://")
|
18
|
+
elsif v.class==Array
|
19
|
+
v.map{|av| av.to_s.gsub(": //","://")}
|
20
|
+
else
|
21
|
+
v
|
22
|
+
end
|
19
23
|
end
|
20
24
|
return result_hash
|
21
25
|
end
|
@@ -14,51 +14,44 @@ module Mobilize
|
|
14
14
|
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
15
|
if dst and dst.http_url.to_s.length>0
|
16
16
|
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
-
|
18
|
-
if book.entry_hash[:deleted]
|
19
|
-
book = nil
|
20
|
-
else
|
17
|
+
if book
|
21
18
|
return book
|
19
|
+
else
|
20
|
+
raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
|
22
21
|
end
|
23
22
|
end
|
23
|
+
#try to find books by title
|
24
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
25
|
-
|
26
|
-
book
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
if bkey == dkey
|
34
|
-
book = b
|
35
|
-
dst.update_attributes(:http_url=>book.human_url)
|
36
|
-
else
|
37
|
-
#delete the invalid book
|
38
|
-
b.delete
|
39
|
-
("Deleted duplicate book #{path}").oputs
|
40
|
-
end
|
41
|
-
end
|
42
|
-
else
|
43
|
-
#If it's a new dst or if there are multiple books
|
44
|
-
#take the first
|
45
|
-
book = books.first
|
46
|
-
dst.update_attributes(:http_url=>book.human_url) if book
|
25
|
+
#sort by publish date; if entry hash retrieval fails (as it does)
|
26
|
+
#assume the book was published now
|
27
|
+
book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
|
28
|
+
if book
|
29
|
+
#we know dataset will have blank url since it wasn't picked up above
|
30
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
31
|
+
api_url = book.human_url.split("&").first
|
32
|
+
dst.update_attributes(:http_url=>api_url)
|
47
33
|
end
|
48
34
|
return book
|
49
35
|
end
|
36
|
+
|
50
37
|
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
51
38
|
book = Gbook.find_by_path(path,gdrive_slot)
|
52
|
-
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
53
39
|
if book.nil?
|
54
40
|
#always use owner email to make sure all books are owned by owner account
|
55
41
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
56
42
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
43
|
+
#check to make sure the dataset has a blank url; if not, error out
|
44
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
45
|
+
if dst.http_url.to_s.length>0
|
46
|
+
#add acls to book regardless
|
47
|
+
book.add_admin_acl
|
48
|
+
raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
|
49
|
+
else
|
50
|
+
api_url = book.human_url.split("&").first
|
51
|
+
dst.update_attributes(:http_url=>api_url)
|
52
|
+
book.add_admin_acl
|
53
|
+
end
|
57
54
|
end
|
58
|
-
#always make sure book dataset http URL is up to date
|
59
|
-
#and that book has admin acl
|
60
|
-
dst.update_attributes(:http_url=>book.human_url)
|
61
|
-
book.add_admin_acl
|
62
55
|
return book
|
63
56
|
end
|
64
57
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gfile
|
3
|
-
def Gfile.path_to_dst(path,stage_path)
|
3
|
+
def Gfile.path_to_dst(path,stage_path,gdrive_slot)
|
4
4
|
#don't need the ://
|
5
5
|
path = path.split("://").last if path.index("://")
|
6
6
|
if Gfile.find_by_path(path)
|
@@ -38,7 +38,8 @@ module Mobilize
|
|
38
38
|
end
|
39
39
|
#update http url for file
|
40
40
|
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
-
|
41
|
+
api_url = file.human_url.split("&").first
|
42
|
+
dst.update_attributes(:http_url=>api_url)
|
42
43
|
true
|
43
44
|
end
|
44
45
|
|
@@ -86,7 +87,8 @@ module Mobilize
|
|
86
87
|
#always make sure dataset http URL is up to date
|
87
88
|
#and that it has admin acl
|
88
89
|
if file
|
89
|
-
|
90
|
+
api_url = file.human_url.split("&").first
|
91
|
+
dst.update_attributes(:http_url=>api_url)
|
90
92
|
file.add_admin_acl
|
91
93
|
end
|
92
94
|
return file
|
@@ -1,43 +1,38 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
module Mobilize
|
2
3
|
module Gridfs
|
3
4
|
def Gridfs.config
|
4
5
|
Base.config('gridfs')
|
5
6
|
end
|
6
7
|
|
7
|
-
def Gridfs.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
8
|
+
def Gridfs.read_by_dataset_path(dst_path,*args)
|
9
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
10
|
+
zs = curr_file.data if curr_file
|
11
|
+
return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
15
|
-
begin
|
16
|
-
zs=Gridfs.grid.open(dst_path,'r').read
|
17
|
-
return ::Zlib::Inflate.inflate(zs)
|
18
|
-
rescue
|
19
|
-
return nil
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
14
|
+
def Gridfs.write_by_dataset_path(dst_path,string,*args)
|
24
15
|
zs = ::Zlib::Deflate.deflate(string)
|
25
16
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
|
27
|
-
|
17
|
+
#find and delete existing file
|
18
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
19
|
+
curr_zs = curr_file.data if curr_file
|
20
|
+
#overwrite when there is a change
|
28
21
|
if curr_zs != zs
|
29
|
-
|
22
|
+
Mongoid::GridFs.delete(curr_file.id) if curr_file
|
23
|
+
#create temp file w zstring
|
24
|
+
temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
|
25
|
+
temp_file.print(zs)
|
26
|
+
temp_file.close
|
27
|
+
#put data in file
|
28
|
+
Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
|
30
29
|
end
|
31
30
|
return true
|
32
31
|
end
|
33
32
|
|
34
33
|
def Gridfs.delete(dst_path)
|
35
|
-
|
36
|
-
|
37
|
-
return true
|
38
|
-
rescue
|
39
|
-
return nil
|
40
|
-
end
|
34
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
35
|
+
curr_file.delete
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
@@ -10,12 +10,10 @@ module Mobilize
|
|
10
10
|
end
|
11
11
|
|
12
12
|
# converts a source path or target path to a dst in the context of handler and stage
|
13
|
-
def Gsheet.path_to_dst(path,stage_path)
|
13
|
+
def Gsheet.path_to_dst(path,stage_path,gdrive_slot)
|
14
14
|
s = Stage.where(:path=>stage_path).first
|
15
15
|
params = s.params
|
16
16
|
target_path = params['target']
|
17
|
-
#take random slot if one is not available
|
18
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
|
19
17
|
#if this is the target, it doesn't have to exist already
|
20
18
|
is_target = true if path == target_path
|
21
19
|
#don't need the ://
|
@@ -46,9 +44,7 @@ module Mobilize
|
|
46
44
|
|
47
45
|
def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
|
48
46
|
#expects gdrive slot as first arg, otherwise chooses random
|
49
|
-
gdrive_slot = args
|
50
|
-
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
51
|
-
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
47
|
+
gdrive_slot = args.to_a.first
|
52
48
|
sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
|
53
49
|
sheet.read(user_name) if sheet
|
54
50
|
end
|
@@ -56,8 +52,6 @@ module Mobilize
|
|
56
52
|
def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
|
57
53
|
#expects gdrive slot as first arg, otherwise chooses random
|
58
54
|
gdrive_slot,crop = args
|
59
|
-
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
60
|
-
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
61
55
|
crop ||= true
|
62
56
|
Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
|
63
57
|
end
|
@@ -87,15 +81,16 @@ module Mobilize
|
|
87
81
|
|
88
82
|
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
89
83
|
#find and delete temp sheet, if any
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
temp_sheet =
|
84
|
+
temp_book_title = target_path.gridsafe
|
85
|
+
#create book and sheet
|
86
|
+
temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
|
87
|
+
rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
|
88
|
+
temp_sheet = temp_book.add_worksheet("temp",rows,cols)
|
95
89
|
#this step has a tendency to fail; if it does,
|
96
90
|
#don't fail the stage, mark it as false
|
97
91
|
begin
|
98
|
-
|
92
|
+
gdrive_user = gdrive_slot.split("@").first
|
93
|
+
temp_sheet.write(tsv,gdrive_user)
|
99
94
|
rescue
|
100
95
|
return nil
|
101
96
|
end
|
@@ -114,7 +109,7 @@ module Mobilize
|
|
114
109
|
#only give the user edit permissions if they're the ones
|
115
110
|
#creating it
|
116
111
|
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
117
|
-
target_sheet.spreadsheet.update_acl(
|
112
|
+
target_sheet.spreadsheet.update_acl(u.email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
118
113
|
target_sheet.delete_sheet1
|
119
114
|
end
|
120
115
|
#pass it crop param to determine whether to shrink target sheet to fit data
|
@@ -134,14 +129,24 @@ module Mobilize
|
|
134
129
|
crop = s.params['crop'] || true
|
135
130
|
begin
|
136
131
|
#get tsv to write from stage
|
137
|
-
source = s.sources.first
|
132
|
+
source = s.sources(gdrive_slot).first
|
138
133
|
raise "Need source for gsheet write" unless source
|
139
134
|
tsv = source.read(u.name,gdrive_slot)
|
140
|
-
raise "No data found
|
141
|
-
|
135
|
+
raise "No data source found for #{source.url}" unless tsv
|
136
|
+
tsv_row_count = tsv.to_s.split("\n").length
|
137
|
+
tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
|
138
|
+
tsv_cell_count = tsv_row_count * tsv_col_count
|
139
|
+
stdout = if tsv_row_count == 0
|
140
|
+
#soft error; no data to write. Stage will complete.
|
141
|
+
"Write skipped for #{s.target.url}"
|
142
|
+
elsif tsv_cell_count > Gsheet.max_cells
|
143
|
+
raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
|
144
|
+
else
|
145
|
+
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
146
|
+
#update status
|
147
|
+
"Write successful for #{s.target.url}"
|
148
|
+
end
|
142
149
|
Gdrive.unslot_worker_by_path(stage_path)
|
143
|
-
#update status
|
144
|
-
stdout = "Write successful for #{s.target.url}"
|
145
150
|
stderr = nil
|
146
151
|
s.update_status(stdout)
|
147
152
|
signal = 0
|
@@ -25,7 +25,7 @@ module Mobilize
|
|
25
25
|
return idle_workers if state == 'idle'
|
26
26
|
stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
|
27
27
|
return stale_workers if state == 'stale'
|
28
|
-
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['
|
28
|
+
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
|
29
29
|
return timeout_workers if state == 'timeout'
|
30
30
|
raise "invalid state #{state}"
|
31
31
|
end
|
@@ -109,16 +109,28 @@ module Mobilize
|
|
109
109
|
Resque.failures.each_with_index do |f,f_i|
|
110
110
|
#skip if already notified
|
111
111
|
next if f['notified']
|
112
|
+
#try to send message to stage owner, where appropriate
|
112
113
|
stage_path = f['payload']['args'].first
|
113
|
-
|
114
|
-
|
114
|
+
email = begin
|
115
|
+
s = Stage.where(:path=>stage_path).first
|
116
|
+
if s.params['notify'].to_s=="false"
|
117
|
+
next
|
118
|
+
elsif s.params['notify'].index("@")
|
119
|
+
s.params['notify']
|
120
|
+
else
|
121
|
+
s.job.runner.user.email
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
#jobs without stages are sent to first admin
|
125
|
+
Jobtracker.admin_emails.first
|
126
|
+
end
|
115
127
|
exc_to_s = f['error']
|
116
128
|
if fjobs[email].nil?
|
117
129
|
fjobs[email] = {stage_path => {exc_to_s => 1}}
|
118
130
|
elsif fjobs[email][stage_path].nil?
|
119
131
|
fjobs[email][stage_path] = {exc_to_s => 1}
|
120
132
|
elsif fjobs[email][stage_path][exc_to_s].nil?
|
121
|
-
fjobs[email][stage_path][exc_to_s] = 1
|
133
|
+
fjobs[email][stage_path][exc_to_s] = 1
|
122
134
|
else
|
123
135
|
fjobs[email][stage_path][exc_to_s] += 1
|
124
136
|
end
|