mobilize-base 1.2 → 1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +10 -11
- data/lib/mobilize-base/extensions/google_drive/file.rb +7 -7
- data/lib/mobilize-base/extensions/google_drive/worksheet.rb +7 -2
- data/lib/mobilize-base/extensions/resque-server/views/queues.erb +59 -0
- data/lib/mobilize-base/extensions/resque-server/views/working.erb +85 -0
- data/lib/mobilize-base/extensions/string.rb +12 -4
- data/lib/mobilize-base/extensions/yaml.rb +11 -7
- data/lib/mobilize-base/handlers/gbook.rb +24 -31
- data/lib/mobilize-base/handlers/gfile.rb +5 -3
- data/lib/mobilize-base/handlers/gridfs.rb +19 -24
- data/lib/mobilize-base/handlers/gsheet.rb +25 -20
- data/lib/mobilize-base/handlers/resque.rb +16 -4
- data/lib/mobilize-base/helpers/job_helper.rb +54 -0
- data/lib/mobilize-base/helpers/runner_helper.rb +83 -0
- data/lib/mobilize-base/helpers/stage_helper.rb +38 -0
- data/lib/mobilize-base/jobtracker.rb +13 -5
- data/lib/mobilize-base/models/job.rb +36 -48
- data/lib/mobilize-base/models/runner.rb +24 -123
- data/lib/mobilize-base/models/stage.rb +14 -43
- data/lib/mobilize-base/tasks.rb +16 -3
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +5 -1
- data/lib/samples/gridfs.yml +0 -3
- data/lib/samples/gsheet.yml +4 -4
- data/mobilize-base.gemspec +4 -5
- data/test/mobilize-base_test.rb +1 -0
- metadata +21 -32
data/README.md
CHANGED
@@ -220,9 +220,8 @@ production:
|
|
220
220
|
|
221
221
|
gsheet.yml needs:
|
222
222
|
* max_cells, which is the number of cells a sheet is allowed to have
|
223
|
-
written to it at one time. Default is
|
224
|
-
|
225
|
-
you try to write more than that.
|
223
|
+
written to it at one time. Default is 50k cells, which is about how
|
224
|
+
much you can write before things start breaking.
|
226
225
|
* Because Google Docs ties date formatting to the Locale for the
|
227
226
|
spreadsheet, there are 2 date format parameters:
|
228
227
|
* read_date_format, which is the format that should be read FROM google
|
@@ -356,22 +355,16 @@ mobilize_base:resque_web task, as detailed in [Start Resque-Web](#section_Start_
|
|
356
355
|
Mobilize stores cached data in MongoDB Gridfs.
|
357
356
|
It needs the below parameters, which can be found in the [lib/samples][git_samples] folder.
|
358
357
|
|
359
|
-
* max_versions - the number of __different__ versions of data to keep
|
360
|
-
for a given cache. Default is 10. This is meant mostly to allow you to
|
361
|
-
restore Runners from cache if necessary.
|
362
358
|
* max_compressed_write_size - the amount of compressed data Gridfs will
|
363
359
|
allow. If you try to write more than this, an exception will be thrown.
|
364
360
|
|
365
361
|
``` yml
|
366
362
|
---
|
367
363
|
development:
|
368
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
369
364
|
max_compressed_write_size: 1000000000 #~1GB
|
370
365
|
test:
|
371
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
372
366
|
max_compressed_write_size: 1000000000 #~1GB
|
373
367
|
production:
|
374
|
-
max_versions: 10 #number of versions of cache to keep in gridfs
|
375
368
|
max_compressed_write_size: 1000000000 #~1GB
|
376
369
|
```
|
377
370
|
|
@@ -564,8 +557,14 @@ the Runner itself.
|
|
564
557
|
and "base1.out" for the second test. The first
|
565
558
|
takes the output from the first stage and the second reads it straight
|
566
559
|
from the referenced sheet.
|
567
|
-
* All stages accept
|
568
|
-
giving up.
|
560
|
+
* All stages accept retry parameters:
|
561
|
+
* retries: an integer specifying the number of times that the system will try it again before giving up.
|
562
|
+
* delay: an integer specifying the number of seconds between retries.
|
563
|
+
* always_on: if false, turns the job off on stage failures.
|
564
|
+
Otherwise the job will retry from the beginning with the same frequency as the Runner refresh rate.
|
565
|
+
* notify: by default, the stage owner will be notified on failure.
|
566
|
+
* if false, will not notify the stage owner in the event of a failure.
|
567
|
+
* If it's an email address, will email the specified person.
|
569
568
|
* If a stage fails after all retries, it will output its standard error to a tab in the Runner with the name of the job, the name of the stage, and a ".err" extension
|
570
569
|
* The tab will be headed "response" and will contain the exception and backtrace for the error.
|
571
570
|
* The test uses "Requestor_mobilize(test)/base1.out" and
|
@@ -13,7 +13,7 @@ module GoogleDrive
|
|
13
13
|
f = self
|
14
14
|
#admin includes workers
|
15
15
|
return true if f.has_admin_acl?
|
16
|
-
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
16
|
+
accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
17
17
|
accounts.each do |email|
|
18
18
|
f.update_acl(email)
|
19
19
|
end
|
@@ -21,9 +21,9 @@ module GoogleDrive
|
|
21
21
|
|
22
22
|
def has_admin_acl?
|
23
23
|
f = self
|
24
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
25
|
-
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
|
26
|
-
if (curr_emails & admin_emails) == admin_emails
|
24
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
25
|
+
admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).uniq
|
26
|
+
if curr_emails == admin_emails or (curr_emails & admin_emails) == admin_emails
|
27
27
|
return true
|
28
28
|
else
|
29
29
|
return false
|
@@ -32,9 +32,9 @@ module GoogleDrive
|
|
32
32
|
|
33
33
|
def has_worker_acl?
|
34
34
|
f = self
|
35
|
-
curr_emails = f.acls.map{|a| a.scope}.sort
|
35
|
+
curr_emails = f.acls.map{|a| a.scope}.compact.sort
|
36
36
|
worker_emails = Mobilize::Gdrive.worker_emails.sort
|
37
|
-
if (curr_emails & worker_emails) == worker_emails
|
37
|
+
if curr_emails == worker_emails or (curr_emails & worker_emails) == worker_emails
|
38
38
|
return true
|
39
39
|
else
|
40
40
|
return false
|
@@ -84,7 +84,7 @@ module GoogleDrive
|
|
84
84
|
end
|
85
85
|
def acl_entry(email)
|
86
86
|
f = self
|
87
|
-
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
|
87
|
+
f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
|
88
88
|
end
|
89
89
|
def entry_hash
|
90
90
|
f = self
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module GoogleDrive
|
2
2
|
class Worksheet
|
3
|
-
def to_tsv
|
3
|
+
def to_tsv(gsub_line_breaks="\n")
|
4
4
|
sheet = self
|
5
5
|
rows = sheet.rows
|
6
6
|
header = rows.first
|
@@ -8,7 +8,12 @@ module GoogleDrive
|
|
8
8
|
#look for blank cols to indicate end of row
|
9
9
|
col_last_i = (header.index("") || header.length)-1
|
10
10
|
#ignore user-entered line breaks for purposes of tsv reads
|
11
|
-
out_tsv = rows.map
|
11
|
+
out_tsv = rows.map do |r|
|
12
|
+
row = r[0..col_last_i].join("\t")
|
13
|
+
row.gsub!("\n",gsub_line_breaks)
|
14
|
+
row = row + "\n"
|
15
|
+
row
|
16
|
+
end.join + "\n"
|
12
17
|
out_tsv.tsv_convert_dates(Mobilize::Gsheet.config['sheet_date_format'],
|
13
18
|
Mobilize::Gsheet.config['read_date_format'])
|
14
19
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
<% @subtabs = resque.queues unless partial? || params[:id].nil? %>
|
2
|
+
|
3
|
+
<% if queue = params[:id] %>
|
4
|
+
|
5
|
+
<h1>Pending jobs on <span class='hl'><%= queue %></span></h1>
|
6
|
+
<form method="POST" action="<%=u "/queues/#{queue}/remove" %>" class='remove-queue'>
|
7
|
+
<input type='submit' name='' value='Remove Queue' onclick='return confirm("Are you absolutely sure? This cannot be undone.");' />
|
8
|
+
</form>
|
9
|
+
<p class='sub'>Showing <%= start = params[:start].to_i %> to <%= start + 20 %> of <b><%=size = resque.size(queue)%></b> jobs</p>
|
10
|
+
<table class='jobs'>
|
11
|
+
<tr>
|
12
|
+
<th>Class</th>
|
13
|
+
<th>Args</th>
|
14
|
+
</tr>
|
15
|
+
<% for job in (jobs = resque.peek(queue, start, 20)) %>
|
16
|
+
<tr>
|
17
|
+
<td class='class'><%= job['class'] %></td>
|
18
|
+
<td class='args'><%=h job['args'].inspect %></td>
|
19
|
+
</tr>
|
20
|
+
<% end %>
|
21
|
+
<% if jobs.empty? %>
|
22
|
+
<tr>
|
23
|
+
<td class='no-data' colspan='2'>There are no pending jobs in this queue</td>
|
24
|
+
</tr>
|
25
|
+
<% end %>
|
26
|
+
</table>
|
27
|
+
<%= partial :next_more, :start => start, :size => size, :per_page => 20 %>
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<h1 class='wi'>Queues</h1>
|
31
|
+
<p class='intro'>The list below contains all the registered queues with the number of jobs currently in the queue. Select a queue from above to view all jobs currently pending on the queue.</p>
|
32
|
+
<table class='queues'>
|
33
|
+
<tr>
|
34
|
+
<th>Name</th>
|
35
|
+
<th>Jobs</th>
|
36
|
+
</tr>
|
37
|
+
<!-- only show nonzero length queues-->
|
38
|
+
<% resque.queues.select{|q| resque.size(q)>0}.sort_by { |q| q.to_s }.each do |queue| %>
|
39
|
+
<tr>
|
40
|
+
<td class='queue'><a class="queue" href="<%= u "queues/#{queue}" %>"><%= queue %></a></td>
|
41
|
+
<td class='size'><%= resque.size queue %></td>
|
42
|
+
</tr>
|
43
|
+
<% end %>
|
44
|
+
<% if failed_multiple_queues? %>
|
45
|
+
<% Resque::Failure.queues.sort_by { |q| q.to_s }.each_with_index do |queue, i| %>
|
46
|
+
<tr class="<%= Resque::Failure.count(queue).zero? ? "failed" : "failure" %><%= " first_failure" if i.zero? %>">
|
47
|
+
<td class='queue failed'><a class="queue" href="<%= u "failed/#{queue}" %>"><%= queue %></a></td>
|
48
|
+
<td class='size'><%= Resque::Failure.count(queue) %></td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
<% else %>
|
52
|
+
<tr class="<%= Resque::Failure.count.zero? ? "failed" : "failure" %>">
|
53
|
+
<td class='queue failed'><a class="queue" href="<%= u :failed %>">failed</a></td>
|
54
|
+
<td class='size'><%= Resque::Failure.count %></td>
|
55
|
+
</tr>
|
56
|
+
<% end %>
|
57
|
+
</table>
|
58
|
+
|
59
|
+
<% end %>
|
@@ -0,0 +1,85 @@
|
|
1
|
+
<% if params[:id] && (worker = Resque::Worker.find(params[:id])) && worker.job %>
|
2
|
+
<h1><%= worker %>'s job</h1>
|
3
|
+
|
4
|
+
<table>
|
5
|
+
<tr>
|
6
|
+
<th> </th>
|
7
|
+
<th>Where</th>
|
8
|
+
<th>Queue</th>
|
9
|
+
<th>Started</th>
|
10
|
+
<th>Class</th>
|
11
|
+
<th>Args</th>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td><img src="<%=u 'working.png' %>" alt="working" title="working"></td>
|
15
|
+
<% host, pid, _ = worker.to_s.split(':') %>
|
16
|
+
<td><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
17
|
+
<% data = worker.job %>
|
18
|
+
<% queue = data['queue'] %>
|
19
|
+
<td><a class="queue" href="<%=u "/queues/#{queue}" %>"><%= queue %></a></td>
|
20
|
+
<td><span class="time"><%= data['run_at'] %></span></td>
|
21
|
+
<td>
|
22
|
+
<code><%= data['payload']['class'] %></code>
|
23
|
+
</td>
|
24
|
+
<td><%=h data['payload']['args'].inspect %></td>
|
25
|
+
</tr>
|
26
|
+
</table>
|
27
|
+
|
28
|
+
<% else %>
|
29
|
+
|
30
|
+
<%
|
31
|
+
workers = resque.working
|
32
|
+
jobs = workers.collect {|w| w.job }
|
33
|
+
worker_jobs = workers.zip(jobs)
|
34
|
+
worker_jobs = worker_jobs.reject { |w, j| w.idle? }
|
35
|
+
%>
|
36
|
+
|
37
|
+
<h1 class='wi'><%= worker_jobs.size %> of <%= resque.workers.size %> Workers Working</h1>
|
38
|
+
<p class='intro'>The list below contains all workers which are currently running a job.</p>
|
39
|
+
<table class='workers'>
|
40
|
+
<tr>
|
41
|
+
<th> </th>
|
42
|
+
<th>Where</th>
|
43
|
+
<th>Queue</th>
|
44
|
+
<th>Processing</th>
|
45
|
+
</tr>
|
46
|
+
<% if worker_jobs.empty? %>
|
47
|
+
<tr>
|
48
|
+
<td colspan="4" class='no-data'>Nothing is happening right now...</td>
|
49
|
+
</tr>
|
50
|
+
<% end %>
|
51
|
+
|
52
|
+
<% worker_jobs.sort_by {|w, j| j['run_at'] ? j['run_at'] : '' }.each do |worker, job| %>
|
53
|
+
<tr>
|
54
|
+
<td class='icon'><img src="<%=u state = worker.state %>.png" alt="<%= state %>" title="<%= state %>"></td>
|
55
|
+
<% host, pid, queues = worker.to_s.split(':') %>
|
56
|
+
<td class='where'><a href="<%=u "/workers/#{worker}" %>"><%= host %>:<%= pid %></a></td>
|
57
|
+
<td class='queues queue'>
|
58
|
+
<a class="queue-tag" href="<%=u "/queues/#{job['queue']}" %>"><%= job['queue'] %></a>
|
59
|
+
</td>
|
60
|
+
<td class='process'>
|
61
|
+
<% if job['queue']
|
62
|
+
job_stats = begin
|
63
|
+
j = job
|
64
|
+
args_hash = j['payload']['args'][1]
|
65
|
+
args_array = args_hash.map{|k,v| "#{k} : #{v}" }.join("</code><br><code>") if args_hash.class==Hash
|
66
|
+
args = [args_array].compact.join("")
|
67
|
+
path = j['payload']['args'].first
|
68
|
+
[path,args].join("</code><br><code>")
|
69
|
+
rescue => exc
|
70
|
+
[exc.to_s,exc.backtrace.join("<br>")].join("<br>")
|
71
|
+
end
|
72
|
+
%>
|
73
|
+
<%=job_stats%>
|
74
|
+
</code>
|
75
|
+
<br>
|
76
|
+
<small><a class="queue time" href="<%=u "/working/#{worker}" %>"><%= job['run_at'] %></a></small>
|
77
|
+
<% else %>
|
78
|
+
<span class='waiting'>Waiting for a job...</span>
|
79
|
+
<% end %>
|
80
|
+
</td>
|
81
|
+
</tr>
|
82
|
+
<% end %>
|
83
|
+
</table>
|
84
|
+
|
85
|
+
<% end %>
|
@@ -11,11 +11,19 @@ class String
|
|
11
11
|
def opp
|
12
12
|
pp self
|
13
13
|
end
|
14
|
+
def to_md5
|
15
|
+
Digest::MD5.hexdigest(self)
|
16
|
+
end
|
14
17
|
def bash(except=true)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
str = self
|
19
|
+
out_str,err_str = []
|
20
|
+
status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
|
21
|
+
out_str = stdout.read
|
22
|
+
err_str = stderr.read
|
23
|
+
end
|
24
|
+
exit_status = status.exitstatus
|
25
|
+
raise err_str if (exit_status !=0 and except==true)
|
26
|
+
return out_str
|
19
27
|
end
|
20
28
|
def escape_regex
|
21
29
|
str = self
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'yaml'
|
1
2
|
module YAML
|
2
3
|
def YAML.easy_load(string)
|
3
4
|
begin
|
@@ -9,13 +10,16 @@ module YAML
|
|
9
10
|
#make sure urls have their colon spaces fixed
|
10
11
|
result_hash={}
|
11
12
|
easy_hash.each do |k,v|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
#fucking yaml puts spaces in front of the key
|
14
|
+
#or something
|
15
|
+
strip_k = k.strip
|
16
|
+
result_hash[strip_k] = if v.class==String
|
17
|
+
v.gsub(": //","://")
|
18
|
+
elsif v.class==Array
|
19
|
+
v.map{|av| av.to_s.gsub(": //","://")}
|
20
|
+
else
|
21
|
+
v
|
22
|
+
end
|
19
23
|
end
|
20
24
|
return result_hash
|
21
25
|
end
|
@@ -14,51 +14,44 @@ module Mobilize
|
|
14
14
|
dst = Dataset.find_by_handler_and_path('gbook',path)
|
15
15
|
if dst and dst.http_url.to_s.length>0
|
16
16
|
book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
|
17
|
-
|
18
|
-
if book.entry_hash[:deleted]
|
19
|
-
book = nil
|
20
|
-
else
|
17
|
+
if book
|
21
18
|
return book
|
19
|
+
else
|
20
|
+
raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
|
22
21
|
end
|
23
22
|
end
|
23
|
+
#try to find books by title
|
24
24
|
books = Gbook.find_all_by_path(path,gdrive_slot)
|
25
|
-
|
26
|
-
book
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
if bkey == dkey
|
34
|
-
book = b
|
35
|
-
dst.update_attributes(:http_url=>book.human_url)
|
36
|
-
else
|
37
|
-
#delete the invalid book
|
38
|
-
b.delete
|
39
|
-
("Deleted duplicate book #{path}").oputs
|
40
|
-
end
|
41
|
-
end
|
42
|
-
else
|
43
|
-
#If it's a new dst or if there are multiple books
|
44
|
-
#take the first
|
45
|
-
book = books.first
|
46
|
-
dst.update_attributes(:http_url=>book.human_url) if book
|
25
|
+
#sort by publish date; if entry hash retrieval fails (as it does)
|
26
|
+
#assume the book was published now
|
27
|
+
book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
|
28
|
+
if book
|
29
|
+
#we know dataset will have blank url since it wasn't picked up above
|
30
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
31
|
+
api_url = book.human_url.split("&").first
|
32
|
+
dst.update_attributes(:http_url=>api_url)
|
47
33
|
end
|
48
34
|
return book
|
49
35
|
end
|
36
|
+
|
50
37
|
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
51
38
|
book = Gbook.find_by_path(path,gdrive_slot)
|
52
|
-
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
53
39
|
if book.nil?
|
54
40
|
#always use owner email to make sure all books are owned by owner account
|
55
41
|
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
56
42
|
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
43
|
+
#check to make sure the dataset has a blank url; if not, error out
|
44
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
45
|
+
if dst.http_url.to_s.length>0
|
46
|
+
#add acls to book regardless
|
47
|
+
book.add_admin_acl
|
48
|
+
raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
|
49
|
+
else
|
50
|
+
api_url = book.human_url.split("&").first
|
51
|
+
dst.update_attributes(:http_url=>api_url)
|
52
|
+
book.add_admin_acl
|
53
|
+
end
|
57
54
|
end
|
58
|
-
#always make sure book dataset http URL is up to date
|
59
|
-
#and that book has admin acl
|
60
|
-
dst.update_attributes(:http_url=>book.human_url)
|
61
|
-
book.add_admin_acl
|
62
55
|
return book
|
63
56
|
end
|
64
57
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gfile
|
3
|
-
def Gfile.path_to_dst(path,stage_path)
|
3
|
+
def Gfile.path_to_dst(path,stage_path,gdrive_slot)
|
4
4
|
#don't need the ://
|
5
5
|
path = path.split("://").last if path.index("://")
|
6
6
|
if Gfile.find_by_path(path)
|
@@ -38,7 +38,8 @@ module Mobilize
|
|
38
38
|
end
|
39
39
|
#update http url for file
|
40
40
|
dst = Dataset.find_by_handler_and_path("gfile",dst_path)
|
41
|
-
|
41
|
+
api_url = file.human_url.split("&").first
|
42
|
+
dst.update_attributes(:http_url=>api_url)
|
42
43
|
true
|
43
44
|
end
|
44
45
|
|
@@ -86,7 +87,8 @@ module Mobilize
|
|
86
87
|
#always make sure dataset http URL is up to date
|
87
88
|
#and that it has admin acl
|
88
89
|
if file
|
89
|
-
|
90
|
+
api_url = file.human_url.split("&").first
|
91
|
+
dst.update_attributes(:http_url=>api_url)
|
90
92
|
file.add_admin_acl
|
91
93
|
end
|
92
94
|
return file
|
@@ -1,43 +1,38 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
module Mobilize
|
2
3
|
module Gridfs
|
3
4
|
def Gridfs.config
|
4
5
|
Base.config('gridfs')
|
5
6
|
end
|
6
7
|
|
7
|
-
def Gridfs.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
8
|
+
def Gridfs.read_by_dataset_path(dst_path,*args)
|
9
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
10
|
+
zs = curr_file.data if curr_file
|
11
|
+
return ::Zlib::Inflate.inflate(zs) if zs.to_s.length>0
|
12
12
|
end
|
13
13
|
|
14
|
-
def Gridfs.
|
15
|
-
begin
|
16
|
-
zs=Gridfs.grid.open(dst_path,'r').read
|
17
|
-
return ::Zlib::Inflate.inflate(zs)
|
18
|
-
rescue
|
19
|
-
return nil
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def Gridfs.write_by_dataset_path(dst_path,string,user_name,*args)
|
14
|
+
def Gridfs.write_by_dataset_path(dst_path,string,*args)
|
24
15
|
zs = ::Zlib::Deflate.deflate(string)
|
25
16
|
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
-
|
27
|
-
|
17
|
+
#find and delete existing file
|
18
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
19
|
+
curr_zs = curr_file.data if curr_file
|
20
|
+
#overwrite when there is a change
|
28
21
|
if curr_zs != zs
|
29
|
-
|
22
|
+
Mongoid::GridFs.delete(curr_file.id) if curr_file
|
23
|
+
#create temp file w zstring
|
24
|
+
temp_file = ::Tempfile.new("#{string}#{Time.now.to_f}".to_md5)
|
25
|
+
temp_file.print(zs)
|
26
|
+
temp_file.close
|
27
|
+
#put data in file
|
28
|
+
Mongoid::GridFs.put(temp_file.path,:filename=>dst_path)
|
30
29
|
end
|
31
30
|
return true
|
32
31
|
end
|
33
32
|
|
34
33
|
def Gridfs.delete(dst_path)
|
35
|
-
|
36
|
-
|
37
|
-
return true
|
38
|
-
rescue
|
39
|
-
return nil
|
40
|
-
end
|
34
|
+
curr_file = Mongoid::GridFs::Fs::File.where(:filename=>dst_path).first
|
35
|
+
curr_file.delete
|
41
36
|
end
|
42
37
|
end
|
43
38
|
end
|
@@ -10,12 +10,10 @@ module Mobilize
|
|
10
10
|
end
|
11
11
|
|
12
12
|
# converts a source path or target path to a dst in the context of handler and stage
|
13
|
-
def Gsheet.path_to_dst(path,stage_path)
|
13
|
+
def Gsheet.path_to_dst(path,stage_path,gdrive_slot)
|
14
14
|
s = Stage.where(:path=>stage_path).first
|
15
15
|
params = s.params
|
16
16
|
target_path = params['target']
|
17
|
-
#take random slot if one is not available
|
18
|
-
gdrive_slot = Gdrive.slot_worker_by_path(stage_path) || Gdrive.worker_emails.sort_by{rand}.first
|
19
17
|
#if this is the target, it doesn't have to exist already
|
20
18
|
is_target = true if path == target_path
|
21
19
|
#don't need the ://
|
@@ -46,9 +44,7 @@ module Mobilize
|
|
46
44
|
|
47
45
|
def Gsheet.read_by_dataset_path(dst_path,user_name,*args)
|
48
46
|
#expects gdrive slot as first arg, otherwise chooses random
|
49
|
-
gdrive_slot = args
|
50
|
-
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
51
|
-
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
47
|
+
gdrive_slot = args.to_a.first
|
52
48
|
sheet = Gsheet.find_by_path(dst_path,gdrive_slot)
|
53
49
|
sheet.read(user_name) if sheet
|
54
50
|
end
|
@@ -56,8 +52,6 @@ module Mobilize
|
|
56
52
|
def Gsheet.write_by_dataset_path(dst_path,tsv,user_name,*args)
|
57
53
|
#expects gdrive slot as first arg, otherwise chooses random
|
58
54
|
gdrive_slot,crop = args
|
59
|
-
worker_emails = Gdrive.worker_emails.sort_by{rand}
|
60
|
-
gdrive_slot = worker_emails.first unless worker_emails.include?(gdrive_slot)
|
61
55
|
crop ||= true
|
62
56
|
Gsheet.write_target(dst_path,tsv,user_name,gdrive_slot,crop)
|
63
57
|
end
|
@@ -87,15 +81,16 @@ module Mobilize
|
|
87
81
|
|
88
82
|
def Gsheet.write_temp(target_path,gdrive_slot,tsv)
|
89
83
|
#find and delete temp sheet, if any
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
temp_sheet =
|
84
|
+
temp_book_title = target_path.gridsafe
|
85
|
+
#create book and sheet
|
86
|
+
temp_book = Gdrive.root(gdrive_slot).create_spreadsheet(temp_book_title)
|
87
|
+
rows, cols = tsv.split("\n").ie{|t| [t.length,t.first.split("\t").length]}
|
88
|
+
temp_sheet = temp_book.add_worksheet("temp",rows,cols)
|
95
89
|
#this step has a tendency to fail; if it does,
|
96
90
|
#don't fail the stage, mark it as false
|
97
91
|
begin
|
98
|
-
|
92
|
+
gdrive_user = gdrive_slot.split("@").first
|
93
|
+
temp_sheet.write(tsv,gdrive_user)
|
99
94
|
rescue
|
100
95
|
return nil
|
101
96
|
end
|
@@ -114,7 +109,7 @@ module Mobilize
|
|
114
109
|
#only give the user edit permissions if they're the ones
|
115
110
|
#creating it
|
116
111
|
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
117
|
-
target_sheet.spreadsheet.update_acl(
|
112
|
+
target_sheet.spreadsheet.update_acl(u.email,"writer") unless target_sheet.spreadsheet.acl_entry(u.email).ie{|e| e and e.role=="owner"}
|
118
113
|
target_sheet.delete_sheet1
|
119
114
|
end
|
120
115
|
#pass it crop param to determine whether to shrink target sheet to fit data
|
@@ -134,14 +129,24 @@ module Mobilize
|
|
134
129
|
crop = s.params['crop'] || true
|
135
130
|
begin
|
136
131
|
#get tsv to write from stage
|
137
|
-
source = s.sources.first
|
132
|
+
source = s.sources(gdrive_slot).first
|
138
133
|
raise "Need source for gsheet write" unless source
|
139
134
|
tsv = source.read(u.name,gdrive_slot)
|
140
|
-
raise "No data found
|
141
|
-
|
135
|
+
raise "No data source found for #{source.url}" unless tsv
|
136
|
+
tsv_row_count = tsv.to_s.split("\n").length
|
137
|
+
tsv_col_count = tsv.to_s.split("\n").first.to_s.split("\t").length
|
138
|
+
tsv_cell_count = tsv_row_count * tsv_col_count
|
139
|
+
stdout = if tsv_row_count == 0
|
140
|
+
#soft error; no data to write. Stage will complete.
|
141
|
+
"Write skipped for #{s.target.url}"
|
142
|
+
elsif tsv_cell_count > Gsheet.max_cells
|
143
|
+
raise "Too many datapoints; you have #{tsv_cell_count.to_s}, max is #{Gsheet.max_cells.to_s}"
|
144
|
+
else
|
145
|
+
Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot,crop)
|
146
|
+
#update status
|
147
|
+
"Write successful for #{s.target.url}"
|
148
|
+
end
|
142
149
|
Gdrive.unslot_worker_by_path(stage_path)
|
143
|
-
#update status
|
144
|
-
stdout = "Write successful for #{s.target.url}"
|
145
150
|
stderr = nil
|
146
151
|
s.update_status(stdout)
|
147
152
|
signal = 0
|
@@ -25,7 +25,7 @@ module Mobilize
|
|
25
25
|
return idle_workers if state == 'idle'
|
26
26
|
stale_workers = workers.select{|w| Time.parse(w.started) < Jobtracker.deployed_at}
|
27
27
|
return stale_workers if state == 'stale'
|
28
|
-
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['
|
28
|
+
timeout_workers = workers.select{|w| w.job['payload'] and w.job['payload']['class']!='Jobtracker' and w.job['run_at'] < (Time.now.utc - Jobtracker.max_run_time)}
|
29
29
|
return timeout_workers if state == 'timeout'
|
30
30
|
raise "invalid state #{state}"
|
31
31
|
end
|
@@ -109,16 +109,28 @@ module Mobilize
|
|
109
109
|
Resque.failures.each_with_index do |f,f_i|
|
110
110
|
#skip if already notified
|
111
111
|
next if f['notified']
|
112
|
+
#try to send message to stage owner, where appropriate
|
112
113
|
stage_path = f['payload']['args'].first
|
113
|
-
|
114
|
-
|
114
|
+
email = begin
|
115
|
+
s = Stage.where(:path=>stage_path).first
|
116
|
+
if s.params['notify'].to_s=="false"
|
117
|
+
next
|
118
|
+
elsif s.params['notify'].index("@")
|
119
|
+
s.params['notify']
|
120
|
+
else
|
121
|
+
s.job.runner.user.email
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
#jobs without stages are sent to first admin
|
125
|
+
Jobtracker.admin_emails.first
|
126
|
+
end
|
115
127
|
exc_to_s = f['error']
|
116
128
|
if fjobs[email].nil?
|
117
129
|
fjobs[email] = {stage_path => {exc_to_s => 1}}
|
118
130
|
elsif fjobs[email][stage_path].nil?
|
119
131
|
fjobs[email][stage_path] = {exc_to_s => 1}
|
120
132
|
elsif fjobs[email][stage_path][exc_to_s].nil?
|
121
|
-
fjobs[email][stage_path][exc_to_s] = 1
|
133
|
+
fjobs[email][stage_path][exc_to_s] = 1
|
122
134
|
else
|
123
135
|
fjobs[email][stage_path][exc_to_s] += 1
|
124
136
|
end
|