mobilize-base 1.0.2 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/LICENSE.txt +202 -20
- data/README.md +219 -138
- data/Rakefile +1 -2
- data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
- data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
- data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
- data/lib/mobilize-base/extensions/resque.rb +18 -24
- data/lib/mobilize-base/extensions/string.rb +12 -0
- data/lib/mobilize-base/handlers/gbook.rb +14 -47
- data/lib/mobilize-base/handlers/gdrive.rb +17 -18
- data/lib/mobilize-base/handlers/gfile.rb +18 -39
- data/lib/mobilize-base/handlers/gridfs.rb +43 -0
- data/lib/mobilize-base/handlers/gsheet.rb +48 -99
- data/lib/mobilize-base/jobtracker.rb +29 -15
- data/lib/mobilize-base/models/dataset.rb +33 -35
- data/lib/mobilize-base/models/job.rb +21 -168
- data/lib/mobilize-base/models/runner.rb +178 -0
- data/lib/mobilize-base/models/task.rb +137 -0
- data/lib/mobilize-base/models/user.rb +47 -0
- data/lib/mobilize-base/rakes.rb +59 -0
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +20 -9
- data/lib/samples/gdrive.yml +12 -12
- data/lib/samples/gridfs.yml +9 -0
- data/lib/samples/gsheet.yml +6 -0
- data/lib/samples/jobtracker.yml +9 -9
- data/lib/samples/mongoid.yml +3 -3
- data/mobilize-base.gemspec +1 -1
- data/test/base1_task1.yml +3 -0
- data/test/base_job_rows.yml +13 -0
- data/test/mobilize-base_test.rb +59 -0
- metadata +20 -9
- data/lib/mobilize-base/handlers/mongodb.rb +0 -32
- data/lib/mobilize-base/models/requestor.rb +0 -232
- data/lib/mobilize-base/tasks.rb +0 -43
- data/test/mobilize_test.rb +0 -108
@@ -17,6 +17,18 @@ class String
|
|
17
17
|
raise stderr.read if (stderr.read.length>0 and except==true)
|
18
18
|
return stdout.read
|
19
19
|
end
|
20
|
+
def escape_regex
|
21
|
+
str = self
|
22
|
+
new_str = str.clone
|
23
|
+
char_string = "[\/^$. |?*+()"
|
24
|
+
char_string.chars.to_a.each{|c|
|
25
|
+
new_str.gsub!(c,"\\#{c}")}
|
26
|
+
new_str
|
27
|
+
end
|
28
|
+
def gridsafe
|
29
|
+
str = self
|
30
|
+
str.downcase.gsub(/[^a-z0-9]/,"")
|
31
|
+
end
|
20
32
|
def googlesafe
|
21
33
|
v=self
|
22
34
|
return "" if v.to_s==""
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gbook
|
3
|
-
def Gbook.
|
4
|
-
Gdrive.books(
|
3
|
+
def Gbook.find_all_by_path(path,gdrive_slot)
|
4
|
+
Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
5
5
|
end
|
6
|
-
def Gbook.
|
7
|
-
books = Gdrive.books(
|
8
|
-
|
6
|
+
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
7
|
+
books = Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
8
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
|
+
#there should only be one book with each path, otherwise we have fail
|
9
10
|
book = nil
|
10
|
-
if books.length>1
|
11
|
+
if books.length>1 and dst.url.to_s.length>0
|
11
12
|
#some idiot process created a duplicate book.
|
12
13
|
#Fix by renaming all but one with dst entry's key
|
13
|
-
dst = Dataset.find_by_handler_and_name('gbook',title)
|
14
14
|
dkey = dst.url.split("key=").last
|
15
15
|
books.each do |b|
|
16
16
|
bkey = b.resource_id.split(":").last
|
@@ -19,55 +19,22 @@ module Mobilize
|
|
19
19
|
else
|
20
20
|
#delete the invalid book
|
21
21
|
b.delete
|
22
|
-
("Deleted duplicate book #{
|
22
|
+
("Deleted duplicate book #{path}").oputs
|
23
23
|
end
|
24
24
|
end
|
25
25
|
else
|
26
26
|
book = books.first
|
27
27
|
end
|
28
28
|
if book.nil?
|
29
|
-
#
|
30
|
-
|
31
|
-
book
|
32
|
-
("Created book #{title} at #{Time.now.utc.to_s}").oputs
|
29
|
+
#always use owner email to make sure all books are owned by owner account
|
30
|
+
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
31
|
+
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
33
32
|
end
|
34
|
-
#
|
35
|
-
#
|
36
|
-
|
37
|
-
sheet1 = sheets.select{|s| s.title == "Sheet1"}.first
|
38
|
-
#http
|
39
|
-
sheet1.delete if sheet1
|
40
|
-
end
|
41
|
-
#always make sure books have admin acl
|
33
|
+
#always make sure book dataset URL is up to date
|
34
|
+
#and that book has admin acl
|
35
|
+
dst.update_attributes(:url=>book.human_url)
|
42
36
|
book.add_admin_acl
|
43
37
|
return book
|
44
38
|
end
|
45
|
-
|
46
|
-
def Gbook.find_or_create_by_dst_id(dst_id,email=nil)
|
47
|
-
#creates by title, updates acl, updates dataset with url
|
48
|
-
dst = Dataset.find(dst_id)
|
49
|
-
r = Requestor.find(dst.requestor_id)
|
50
|
-
book = nil
|
51
|
-
#http
|
52
|
-
book = Gdrive.root.spreadsheet_by_url(dst.url) if dst.url
|
53
|
-
#manually try 5 times to validate sheet since we can't just try again and again
|
54
|
-
5.times.each do
|
55
|
-
begin
|
56
|
-
book.resource_id
|
57
|
-
#if no error then break loop
|
58
|
-
break
|
59
|
-
rescue=>exc
|
60
|
-
if book.nil? or exc.to_s.index('Invalid document id')
|
61
|
-
book = Gbook.find_or_create_by_title(dst.name,email)
|
62
|
-
#if invalid doc then update url w new book and break loop
|
63
|
-
dst.update_attributes(:url=>book.human_url)
|
64
|
-
break
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
#add requestor write access
|
69
|
-
book.update_acl(r.email)
|
70
|
-
return book
|
71
|
-
end
|
72
39
|
end
|
73
40
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gdrive
|
3
3
|
def Gdrive.config
|
4
|
-
Base.config('gdrive')
|
4
|
+
Base.config('gdrive')
|
5
5
|
end
|
6
6
|
|
7
7
|
def Gdrive.domain
|
@@ -9,7 +9,7 @@ module Mobilize
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def Gdrive.owner_email
|
12
|
-
Gdrive.config['owner']['
|
12
|
+
[Gdrive.config['owner']['name'],Gdrive.domain].join("@")
|
13
13
|
end
|
14
14
|
|
15
15
|
def Gdrive.password(email)
|
@@ -29,44 +29,43 @@ module Mobilize
|
|
29
29
|
if email.nil?
|
30
30
|
Gdrive.config['workers']
|
31
31
|
else
|
32
|
-
Gdrive.workers.select{|w| w['
|
32
|
+
Gdrive.workers.select{|w| [w['name'],Gdrive.domain].join("@") == email}.first
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def Gdrive.worker_emails
|
37
|
-
Gdrive.workers.map{|w| w['
|
37
|
+
Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
|
38
38
|
end
|
39
39
|
|
40
40
|
def Gdrive.admin_emails
|
41
|
-
Gdrive.admins.map{|w| w['
|
41
|
+
Gdrive.admins.map{|w| [w['name'],Gdrive.domain].join("@")}
|
42
42
|
end
|
43
43
|
|
44
44
|
#email management - used to make sure not too many emails get used at the same time
|
45
|
-
def Gdrive.
|
46
|
-
|
45
|
+
def Gdrive.slot_worker_by_path(path)
|
46
|
+
working_slots = Mobilize::Resque.jobs('working').map{|j| j['gdrive_slot'] if j['gdrive_slot']}.compact
|
47
47
|
Gdrive.workers.sort_by{rand}.each do |w|
|
48
|
-
|
49
|
-
Mobilize::Resque.
|
50
|
-
return w['
|
48
|
+
unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
|
49
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
|
50
|
+
return [w['name'],Gdrive.domain].join("@")
|
51
51
|
end
|
52
52
|
end
|
53
53
|
#return false if none are available
|
54
54
|
return false
|
55
55
|
end
|
56
56
|
|
57
|
-
def Gdrive.root(
|
58
|
-
|
59
|
-
|
60
|
-
GoogleDrive.login(email,pw)
|
57
|
+
def Gdrive.root(gdrive_slot=nil)
|
58
|
+
pw = Gdrive.password(gdrive_slot)
|
59
|
+
GoogleDrive.login(gdrive_slot,pw)
|
61
60
|
end
|
62
61
|
|
63
|
-
def Gdrive.files(
|
64
|
-
root = Gdrive.root(
|
62
|
+
def Gdrive.files(gdrive_slot=nil,params={})
|
63
|
+
root = Gdrive.root(gdrive_slot)
|
65
64
|
root.files(params)
|
66
65
|
end
|
67
66
|
|
68
|
-
def Gdrive.books(
|
69
|
-
Gdrive.files(
|
67
|
+
def Gdrive.books(gdrive_slot=nil,params={})
|
68
|
+
Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
|
70
69
|
end
|
71
70
|
end
|
72
71
|
end
|
@@ -1,55 +1,34 @@
|
|
1
1
|
module Mobilize
|
2
|
-
|
3
|
-
def Gfile.
|
4
|
-
|
5
|
-
end
|
6
|
-
|
7
|
-
def Gfile.find_by_dst_id(dst_id,email=nil)
|
8
|
-
dst = Dataset.find(dst_id)
|
9
|
-
Gfile.find_by_title(dst.path,email)
|
10
|
-
end
|
11
|
-
|
12
|
-
def Gfile.add_admin_acl_by_dst_id(dst_id)
|
13
|
-
#adds admins and workers as writers
|
14
|
-
file = Gfile.find_by_dst_id(dst_id)
|
2
|
+
module Gfile
|
3
|
+
def Gfile.add_admin_acl_by_path(path)
|
4
|
+
file = Gfile.find_by_path(path)
|
15
5
|
file.add_admin_acl
|
16
6
|
return true
|
17
7
|
end
|
18
8
|
|
19
|
-
def Gfile.
|
20
|
-
file = Gfile.
|
21
|
-
file.add_admin_acl
|
22
|
-
return true
|
23
|
-
end
|
24
|
-
|
25
|
-
def Gfile.add_worker_acl_by_title(title)
|
26
|
-
file = Gfile.find_by_title(title)
|
9
|
+
def Gfile.add_worker_acl_by_path(path)
|
10
|
+
file = Gfile.find_by_path(path)
|
27
11
|
file.add_worker_acl
|
28
12
|
return true
|
29
13
|
end
|
30
14
|
|
31
|
-
def Gfile.
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def Gfile.update_acl_by_title(title,email,role="writer",edit_email=nil)
|
37
|
-
file = Gfile.find_by_title(title,edit_email)
|
38
|
-
raise "File #{title} not found" unless file
|
39
|
-
file.update_acl(email,role)
|
15
|
+
def Gfile.update_acl_by_path(path,gdrive_slot,role="writer",target_email=nil)
|
16
|
+
file = Gfile.find_by_path(path,target_email)
|
17
|
+
raise "File #{path} not found" unless file
|
18
|
+
file.update_acl(gdrive_slot,role)
|
40
19
|
end
|
41
20
|
|
42
|
-
def Gfile.
|
43
|
-
|
21
|
+
def Gfile.find_by_path(path,gdrive_slot)
|
22
|
+
Gdrive.files(gdrive_slot,{"title"=>path,"title-exact"=>"true"}).first
|
44
23
|
end
|
45
24
|
|
46
|
-
def Gfile.
|
47
|
-
|
25
|
+
def Gfile.read_by_task_path(task_path)
|
26
|
+
#reserve gdrive_slot account for read
|
27
|
+
gdrive_slot = Gdrive.slot_worker_by_path(t.path)
|
28
|
+
return false unless gdrive_slot
|
29
|
+
t = Task.where(:path=>task_path)
|
30
|
+
gfile_path = t.params.first
|
31
|
+
Gfile.find_by_path(gfile_path,gdrive_slot).read
|
48
32
|
end
|
49
|
-
|
50
|
-
def Gfile.read_by_job_id(job_id)
|
51
|
-
j = Job.find(job_id)
|
52
|
-
end
|
53
|
-
|
54
33
|
end
|
55
34
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Mobilize
|
2
|
+
module Gridfs
|
3
|
+
def Gridfs.config
|
4
|
+
Base.config('gridfs')
|
5
|
+
end
|
6
|
+
|
7
|
+
def Gridfs.grid
|
8
|
+
session = ::Mongoid.configure.sessions['default']
|
9
|
+
database_name = session['database']
|
10
|
+
host,port = session['hosts'].first.split(":")
|
11
|
+
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
|
+
end
|
13
|
+
|
14
|
+
def Gridfs.read(path)
|
15
|
+
begin
|
16
|
+
zs=Gridfs.grid.open(path.gridsafe,'r').read
|
17
|
+
return ::Zlib::Inflate.inflate(zs)
|
18
|
+
rescue
|
19
|
+
return nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def Gridfs.write(path,string)
|
24
|
+
zs = ::Zlib::Deflate.deflate(string)
|
25
|
+
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
+
curr_zs = Gridfs.read(path.gridsafe).to_s
|
27
|
+
#write a new version when there is a change
|
28
|
+
if curr_zs != zs
|
29
|
+
Gridfs.grid.open(path.gridsafe,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
30
|
+
end
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
def Gridfs.delete(path)
|
35
|
+
begin
|
36
|
+
Gridfs.grid.delete(path.gridsafe)
|
37
|
+
return true
|
38
|
+
rescue
|
39
|
+
return nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -1,122 +1,71 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gsheet
|
3
3
|
|
4
|
-
def Gsheet.
|
5
|
-
|
4
|
+
def Gsheet.config
|
5
|
+
Base.config('gsheet')
|
6
6
|
end
|
7
7
|
|
8
|
-
def Gsheet.
|
9
|
-
|
10
|
-
sheet.to_tsv
|
8
|
+
def Gsheet.max_cells
|
9
|
+
Gsheet.config['max_cells']
|
11
10
|
end
|
12
11
|
|
13
|
-
def Gsheet.write(
|
14
|
-
sheet = Gsheet.
|
12
|
+
def Gsheet.write(path,tsv,gdrive_slot)
|
13
|
+
sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
|
15
14
|
sheet.write(tsv)
|
16
15
|
end
|
17
16
|
|
18
|
-
def Gsheet.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
sheets
|
17
|
+
def Gsheet.find_by_path(path,gdrive_slot)
|
18
|
+
book_path,sheet_name = path.split("/")
|
19
|
+
book = Gdrive.books(gdrive_slot,{"title"=>book_path,"title-exact"=>"true"}).first
|
20
|
+
return book.worksheet_by_title(sheet_name) if book
|
23
21
|
end
|
24
22
|
|
25
|
-
def Gsheet.
|
26
|
-
|
27
|
-
book = Gbook.
|
28
|
-
|
29
|
-
sheet = book.worksheets.select{|w| w.title==sheet_title}.first
|
23
|
+
def Gsheet.find_or_create_by_path(path,gdrive_slot,rows=100,cols=20)
|
24
|
+
book_path,sheet_name = path.split("/")
|
25
|
+
book = Gbook.find_or_create_by_path(book_path,gdrive_slot)
|
26
|
+
sheet = book.worksheet_by_title(sheet_name)
|
30
27
|
if sheet.nil?
|
31
|
-
|
32
|
-
|
33
|
-
("Created sheet #{name} at #{Time.now.utc.to_s}").oputs
|
28
|
+
sheet = book.add_worksheet(sheet_name,rows,cols)
|
29
|
+
("Created gsheet #{path} at #{Time.now.utc.to_s}").oputs
|
34
30
|
end
|
31
|
+
Dataset.find_or_create_by_handler_and_path("gsheet",path)
|
35
32
|
return sheet
|
36
33
|
end
|
37
34
|
|
38
|
-
def Gsheet.
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
r.find_or_create_gbook_by_title(book_title,email)
|
46
|
-
#add admin write access
|
47
|
-
sheet = Gsheet.find_or_create_by_name(name)
|
48
|
-
sheet_title = nil
|
49
|
-
return sheet
|
50
|
-
end
|
51
|
-
|
52
|
-
def Gsheet.read_by_dst_id(dst_id,email=nil)
|
53
|
-
dst = Dataset.find(dst_id)
|
54
|
-
name = dst.name
|
55
|
-
sheet = Gsheet.find_or_create_by_name(name,email)
|
56
|
-
output = sheet.to_tsv
|
57
|
-
return output
|
58
|
-
end
|
59
|
-
|
60
|
-
def Gsheet.read_by_job_id(job_id)
|
61
|
-
j = Job.find(job_id)
|
62
|
-
#reserve email account for read
|
63
|
-
email = Gdrive.get_worker_email_by_mongo_id(job_id)
|
64
|
-
return false unless email
|
65
|
-
#pull tsv from cache
|
66
|
-
j.dataset_array.first.read_cache
|
35
|
+
def Gsheet.read_by_task_path(task_path)
|
36
|
+
#reserve gdrive_slot account for read
|
37
|
+
gdrive_slot = Gdrive.slot_worker_by_path(task_path)
|
38
|
+
return false unless gdrive_slot
|
39
|
+
t = Task.where(:path=>task_path).first
|
40
|
+
gsheet_path = t.params.first
|
41
|
+
Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
67
42
|
end
|
68
43
|
|
69
|
-
def Gsheet.
|
70
|
-
|
71
|
-
#see if this is a specific cell
|
72
|
-
name = dst.name
|
73
|
-
return false unless email
|
74
|
-
#create temp tab, write data to it, checksum it against the source
|
75
|
-
temp_sheet = Gsheet.find_or_create_by_name("#{name}_temp")
|
76
|
-
temp_sheet.write(tsv)
|
77
|
-
#delete current sheet, replace it with temp one
|
78
|
-
sheet = Gsheet.find_or_create_by_name(dst.name)
|
79
|
-
title = sheet.title
|
80
|
-
#http
|
81
|
-
sheet.delete
|
82
|
-
begin
|
83
|
-
temp_sheet.rename(title)
|
84
|
-
rescue
|
85
|
-
#need this because sometimes it gets confused and tries to rename twice
|
86
|
-
end
|
87
|
-
"Write successful for #{write_name}".oputs
|
88
|
-
return true
|
89
|
-
end
|
90
|
-
|
91
|
-
def Gsheet.write_by_job_id(job_id)
|
92
|
-
j = Job.find(job_id)
|
93
|
-
r = j.requestor
|
94
|
-
dest_name = if j.destination.split("/").length==1
|
95
|
-
"#{r.jobspec_title}#{"/"}#{j.destination}"
|
96
|
-
else
|
97
|
-
j.destination
|
98
|
-
end
|
99
|
-
sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheet',dest_name)
|
100
|
-
sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
|
101
|
-
email = Gdrive.get_worker_email_by_mongo_id(job_id)
|
44
|
+
def Gsheet.write_by_task_path(task_path)
|
45
|
+
gdrive_slot = Gdrive.slot_worker_by_path(task_path)
|
102
46
|
#return false if there are no emails available
|
103
|
-
return false unless
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
temp_sheet.
|
118
|
-
|
119
|
-
|
47
|
+
return false unless gdrive_slot
|
48
|
+
t = Task.where(:path=>task_path).first
|
49
|
+
source = t.params.first
|
50
|
+
target_path = t.params.second
|
51
|
+
source_job_name, source_task_name = if source.index("/")
|
52
|
+
source.split("/")
|
53
|
+
else
|
54
|
+
[nil, source]
|
55
|
+
end
|
56
|
+
source_task_path = "#{t.job.runner.path}/#{source_job_name || t.job.name}/#{source_task_name}"
|
57
|
+
source_task = Task.where(:path=>source_task_path).first
|
58
|
+
tsv = source_task.stdout_dataset.read_cache
|
59
|
+
sheet_name = target_path.split("/").last
|
60
|
+
temp_path = [task_path.gridsafe,sheet_name].join("/")
|
61
|
+
temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
|
62
|
+
temp_sheet.write(tsv)
|
63
|
+
temp_sheet.check_and_fix(tsv)
|
64
|
+
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
65
|
+
target_sheet.merge(temp_sheet)
|
66
|
+
#delete the temp sheet's book
|
67
|
+
temp_sheet.spreadsheet.delete
|
68
|
+
"Write successful for #{target_path}".oputs
|
120
69
|
return true
|
121
70
|
end
|
122
71
|
end
|