mobilize-base 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/LICENSE.txt +202 -20
- data/README.md +219 -138
- data/Rakefile +1 -2
- data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
- data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
- data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
- data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
- data/lib/mobilize-base/extensions/resque.rb +18 -24
- data/lib/mobilize-base/extensions/string.rb +12 -0
- data/lib/mobilize-base/handlers/gbook.rb +14 -47
- data/lib/mobilize-base/handlers/gdrive.rb +17 -18
- data/lib/mobilize-base/handlers/gfile.rb +18 -39
- data/lib/mobilize-base/handlers/gridfs.rb +43 -0
- data/lib/mobilize-base/handlers/gsheet.rb +48 -99
- data/lib/mobilize-base/jobtracker.rb +29 -15
- data/lib/mobilize-base/models/dataset.rb +33 -35
- data/lib/mobilize-base/models/job.rb +21 -168
- data/lib/mobilize-base/models/runner.rb +178 -0
- data/lib/mobilize-base/models/task.rb +137 -0
- data/lib/mobilize-base/models/user.rb +47 -0
- data/lib/mobilize-base/rakes.rb +59 -0
- data/lib/mobilize-base/version.rb +1 -1
- data/lib/mobilize-base.rb +20 -9
- data/lib/samples/gdrive.yml +12 -12
- data/lib/samples/gridfs.yml +9 -0
- data/lib/samples/gsheet.yml +6 -0
- data/lib/samples/jobtracker.yml +9 -9
- data/lib/samples/mongoid.yml +3 -3
- data/mobilize-base.gemspec +1 -1
- data/test/base1_task1.yml +3 -0
- data/test/base_job_rows.yml +13 -0
- data/test/mobilize-base_test.rb +59 -0
- metadata +20 -9
- data/lib/mobilize-base/handlers/mongodb.rb +0 -32
- data/lib/mobilize-base/models/requestor.rb +0 -232
- data/lib/mobilize-base/tasks.rb +0 -43
- data/test/mobilize_test.rb +0 -108
@@ -17,6 +17,18 @@ class String
|
|
17
17
|
raise stderr.read if (stderr.read.length>0 and except==true)
|
18
18
|
return stdout.read
|
19
19
|
end
|
20
|
+
def escape_regex
|
21
|
+
str = self
|
22
|
+
new_str = str.clone
|
23
|
+
char_string = "[\/^$. |?*+()"
|
24
|
+
char_string.chars.to_a.each{|c|
|
25
|
+
new_str.gsub!(c,"\\#{c}")}
|
26
|
+
new_str
|
27
|
+
end
|
28
|
+
def gridsafe
|
29
|
+
str = self
|
30
|
+
str.downcase.gsub(/[^a-z0-9]/,"")
|
31
|
+
end
|
20
32
|
def googlesafe
|
21
33
|
v=self
|
22
34
|
return "" if v.to_s==""
|
@@ -1,16 +1,16 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gbook
|
3
|
-
def Gbook.
|
4
|
-
Gdrive.books(
|
3
|
+
def Gbook.find_all_by_path(path,gdrive_slot)
|
4
|
+
Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
5
5
|
end
|
6
|
-
def Gbook.
|
7
|
-
books = Gdrive.books(
|
8
|
-
|
6
|
+
def Gbook.find_or_create_by_path(path,gdrive_slot)
|
7
|
+
books = Gdrive.books(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
|
8
|
+
dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
|
9
|
+
#there should only be one book with each path, otherwise we have fail
|
9
10
|
book = nil
|
10
|
-
if books.length>1
|
11
|
+
if books.length>1 and dst.url.to_s.length>0
|
11
12
|
#some idiot process created a duplicate book.
|
12
13
|
#Fix by renaming all but one with dst entry's key
|
13
|
-
dst = Dataset.find_by_handler_and_name('gbook',title)
|
14
14
|
dkey = dst.url.split("key=").last
|
15
15
|
books.each do |b|
|
16
16
|
bkey = b.resource_id.split(":").last
|
@@ -19,55 +19,22 @@ module Mobilize
|
|
19
19
|
else
|
20
20
|
#delete the invalid book
|
21
21
|
b.delete
|
22
|
-
("Deleted duplicate book #{
|
22
|
+
("Deleted duplicate book #{path}").oputs
|
23
23
|
end
|
24
24
|
end
|
25
25
|
else
|
26
26
|
book = books.first
|
27
27
|
end
|
28
28
|
if book.nil?
|
29
|
-
#
|
30
|
-
|
31
|
-
book
|
32
|
-
("Created book #{title} at #{Time.now.utc.to_s}").oputs
|
29
|
+
#always use owner email to make sure all books are owned by owner account
|
30
|
+
book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
|
31
|
+
("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
|
33
32
|
end
|
34
|
-
#
|
35
|
-
#
|
36
|
-
|
37
|
-
sheet1 = sheets.select{|s| s.title == "Sheet1"}.first
|
38
|
-
#http
|
39
|
-
sheet1.delete if sheet1
|
40
|
-
end
|
41
|
-
#always make sure books have admin acl
|
33
|
+
#always make sure book dataset URL is up to date
|
34
|
+
#and that book has admin acl
|
35
|
+
dst.update_attributes(:url=>book.human_url)
|
42
36
|
book.add_admin_acl
|
43
37
|
return book
|
44
38
|
end
|
45
|
-
|
46
|
-
def Gbook.find_or_create_by_dst_id(dst_id,email=nil)
|
47
|
-
#creates by title, updates acl, updates dataset with url
|
48
|
-
dst = Dataset.find(dst_id)
|
49
|
-
r = Requestor.find(dst.requestor_id)
|
50
|
-
book = nil
|
51
|
-
#http
|
52
|
-
book = Gdrive.root.spreadsheet_by_url(dst.url) if dst.url
|
53
|
-
#manually try 5 times to validate sheet since we can't just try again and again
|
54
|
-
5.times.each do
|
55
|
-
begin
|
56
|
-
book.resource_id
|
57
|
-
#if no error then break loop
|
58
|
-
break
|
59
|
-
rescue=>exc
|
60
|
-
if book.nil? or exc.to_s.index('Invalid document id')
|
61
|
-
book = Gbook.find_or_create_by_title(dst.name,email)
|
62
|
-
#if invalid doc then update url w new book and break loop
|
63
|
-
dst.update_attributes(:url=>book.human_url)
|
64
|
-
break
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
#add requestor write access
|
69
|
-
book.update_acl(r.email)
|
70
|
-
return book
|
71
|
-
end
|
72
39
|
end
|
73
40
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gdrive
|
3
3
|
def Gdrive.config
|
4
|
-
Base.config('gdrive')
|
4
|
+
Base.config('gdrive')
|
5
5
|
end
|
6
6
|
|
7
7
|
def Gdrive.domain
|
@@ -9,7 +9,7 @@ module Mobilize
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def Gdrive.owner_email
|
12
|
-
Gdrive.config['owner']['
|
12
|
+
[Gdrive.config['owner']['name'],Gdrive.domain].join("@")
|
13
13
|
end
|
14
14
|
|
15
15
|
def Gdrive.password(email)
|
@@ -29,44 +29,43 @@ module Mobilize
|
|
29
29
|
if email.nil?
|
30
30
|
Gdrive.config['workers']
|
31
31
|
else
|
32
|
-
Gdrive.workers.select{|w| w['
|
32
|
+
Gdrive.workers.select{|w| [w['name'],Gdrive.domain].join("@") == email}.first
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def Gdrive.worker_emails
|
37
|
-
Gdrive.workers.map{|w| w['
|
37
|
+
Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
|
38
38
|
end
|
39
39
|
|
40
40
|
def Gdrive.admin_emails
|
41
|
-
Gdrive.admins.map{|w| w['
|
41
|
+
Gdrive.admins.map{|w| [w['name'],Gdrive.domain].join("@")}
|
42
42
|
end
|
43
43
|
|
44
44
|
#email management - used to make sure not too many emails get used at the same time
|
45
|
-
def Gdrive.
|
46
|
-
|
45
|
+
def Gdrive.slot_worker_by_path(path)
|
46
|
+
working_slots = Mobilize::Resque.jobs('working').map{|j| j['gdrive_slot'] if j['gdrive_slot']}.compact
|
47
47
|
Gdrive.workers.sort_by{rand}.each do |w|
|
48
|
-
|
49
|
-
Mobilize::Resque.
|
50
|
-
return w['
|
48
|
+
unless working_slots.include?([w['name'],Gdrive.domain].join("@"))
|
49
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'gdrive_slot'=>[w['name'],Gdrive.domain].join("@")})
|
50
|
+
return [w['name'],Gdrive.domain].join("@")
|
51
51
|
end
|
52
52
|
end
|
53
53
|
#return false if none are available
|
54
54
|
return false
|
55
55
|
end
|
56
56
|
|
57
|
-
def Gdrive.root(
|
58
|
-
|
59
|
-
|
60
|
-
GoogleDrive.login(email,pw)
|
57
|
+
def Gdrive.root(gdrive_slot=nil)
|
58
|
+
pw = Gdrive.password(gdrive_slot)
|
59
|
+
GoogleDrive.login(gdrive_slot,pw)
|
61
60
|
end
|
62
61
|
|
63
|
-
def Gdrive.files(
|
64
|
-
root = Gdrive.root(
|
62
|
+
def Gdrive.files(gdrive_slot=nil,params={})
|
63
|
+
root = Gdrive.root(gdrive_slot)
|
65
64
|
root.files(params)
|
66
65
|
end
|
67
66
|
|
68
|
-
def Gdrive.books(
|
69
|
-
Gdrive.files(
|
67
|
+
def Gdrive.books(gdrive_slot=nil,params={})
|
68
|
+
Gdrive.files(gdrive_slot,params).select{|f| f.class==GoogleDrive::Spreadsheet}
|
70
69
|
end
|
71
70
|
end
|
72
71
|
end
|
@@ -1,55 +1,34 @@
|
|
1
1
|
module Mobilize
|
2
|
-
|
3
|
-
def Gfile.
|
4
|
-
|
5
|
-
end
|
6
|
-
|
7
|
-
def Gfile.find_by_dst_id(dst_id,email=nil)
|
8
|
-
dst = Dataset.find(dst_id)
|
9
|
-
Gfile.find_by_title(dst.path,email)
|
10
|
-
end
|
11
|
-
|
12
|
-
def Gfile.add_admin_acl_by_dst_id(dst_id)
|
13
|
-
#adds admins and workers as writers
|
14
|
-
file = Gfile.find_by_dst_id(dst_id)
|
2
|
+
module Gfile
|
3
|
+
def Gfile.add_admin_acl_by_path(path)
|
4
|
+
file = Gfile.find_by_path(path)
|
15
5
|
file.add_admin_acl
|
16
6
|
return true
|
17
7
|
end
|
18
8
|
|
19
|
-
def Gfile.
|
20
|
-
file = Gfile.
|
21
|
-
file.add_admin_acl
|
22
|
-
return true
|
23
|
-
end
|
24
|
-
|
25
|
-
def Gfile.add_worker_acl_by_title(title)
|
26
|
-
file = Gfile.find_by_title(title)
|
9
|
+
def Gfile.add_worker_acl_by_path(path)
|
10
|
+
file = Gfile.find_by_path(path)
|
27
11
|
file.add_worker_acl
|
28
12
|
return true
|
29
13
|
end
|
30
14
|
|
31
|
-
def Gfile.
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def Gfile.update_acl_by_title(title,email,role="writer",edit_email=nil)
|
37
|
-
file = Gfile.find_by_title(title,edit_email)
|
38
|
-
raise "File #{title} not found" unless file
|
39
|
-
file.update_acl(email,role)
|
15
|
+
def Gfile.update_acl_by_path(path,gdrive_slot,role="writer",target_email=nil)
|
16
|
+
file = Gfile.find_by_path(path,target_email)
|
17
|
+
raise "File #{path} not found" unless file
|
18
|
+
file.update_acl(gdrive_slot,role)
|
40
19
|
end
|
41
20
|
|
42
|
-
def Gfile.
|
43
|
-
|
21
|
+
def Gfile.find_by_path(path,gdrive_slot)
|
22
|
+
Gdrive.files(gdrive_slot,{"title"=>path,"title-exact"=>"true"}).first
|
44
23
|
end
|
45
24
|
|
46
|
-
def Gfile.
|
47
|
-
|
25
|
+
def Gfile.read_by_task_path(task_path)
|
26
|
+
#reserve gdrive_slot account for read
|
27
|
+
gdrive_slot = Gdrive.slot_worker_by_path(t.path)
|
28
|
+
return false unless gdrive_slot
|
29
|
+
t = Task.where(:path=>task_path)
|
30
|
+
gfile_path = t.params.first
|
31
|
+
Gfile.find_by_path(gfile_path,gdrive_slot).read
|
48
32
|
end
|
49
|
-
|
50
|
-
def Gfile.read_by_job_id(job_id)
|
51
|
-
j = Job.find(job_id)
|
52
|
-
end
|
53
|
-
|
54
33
|
end
|
55
34
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Mobilize
|
2
|
+
module Gridfs
|
3
|
+
def Gridfs.config
|
4
|
+
Base.config('gridfs')
|
5
|
+
end
|
6
|
+
|
7
|
+
def Gridfs.grid
|
8
|
+
session = ::Mongoid.configure.sessions['default']
|
9
|
+
database_name = session['database']
|
10
|
+
host,port = session['hosts'].first.split(":")
|
11
|
+
return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
|
12
|
+
end
|
13
|
+
|
14
|
+
def Gridfs.read(path)
|
15
|
+
begin
|
16
|
+
zs=Gridfs.grid.open(path.gridsafe,'r').read
|
17
|
+
return ::Zlib::Inflate.inflate(zs)
|
18
|
+
rescue
|
19
|
+
return nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def Gridfs.write(path,string)
|
24
|
+
zs = ::Zlib::Deflate.deflate(string)
|
25
|
+
raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
|
26
|
+
curr_zs = Gridfs.read(path.gridsafe).to_s
|
27
|
+
#write a new version when there is a change
|
28
|
+
if curr_zs != zs
|
29
|
+
Gridfs.grid.open(path.gridsafe,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
|
30
|
+
end
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
def Gridfs.delete(path)
|
35
|
+
begin
|
36
|
+
Gridfs.grid.delete(path.gridsafe)
|
37
|
+
return true
|
38
|
+
rescue
|
39
|
+
return nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -1,122 +1,71 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Gsheet
|
3
3
|
|
4
|
-
def Gsheet.
|
5
|
-
|
4
|
+
def Gsheet.config
|
5
|
+
Base.config('gsheet')
|
6
6
|
end
|
7
7
|
|
8
|
-
def Gsheet.
|
9
|
-
|
10
|
-
sheet.to_tsv
|
8
|
+
def Gsheet.max_cells
|
9
|
+
Gsheet.config['max_cells']
|
11
10
|
end
|
12
11
|
|
13
|
-
def Gsheet.write(
|
14
|
-
sheet = Gsheet.
|
12
|
+
def Gsheet.write(path,tsv,gdrive_slot)
|
13
|
+
sheet = Gsheet.find_or_create_by_path(path,gdrive_slot)
|
15
14
|
sheet.write(tsv)
|
16
15
|
end
|
17
16
|
|
18
|
-
def Gsheet.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
sheets
|
17
|
+
def Gsheet.find_by_path(path,gdrive_slot)
|
18
|
+
book_path,sheet_name = path.split("/")
|
19
|
+
book = Gdrive.books(gdrive_slot,{"title"=>book_path,"title-exact"=>"true"}).first
|
20
|
+
return book.worksheet_by_title(sheet_name) if book
|
23
21
|
end
|
24
22
|
|
25
|
-
def Gsheet.
|
26
|
-
|
27
|
-
book = Gbook.
|
28
|
-
|
29
|
-
sheet = book.worksheets.select{|w| w.title==sheet_title}.first
|
23
|
+
def Gsheet.find_or_create_by_path(path,gdrive_slot,rows=100,cols=20)
|
24
|
+
book_path,sheet_name = path.split("/")
|
25
|
+
book = Gbook.find_or_create_by_path(book_path,gdrive_slot)
|
26
|
+
sheet = book.worksheet_by_title(sheet_name)
|
30
27
|
if sheet.nil?
|
31
|
-
|
32
|
-
|
33
|
-
("Created sheet #{name} at #{Time.now.utc.to_s}").oputs
|
28
|
+
sheet = book.add_worksheet(sheet_name,rows,cols)
|
29
|
+
("Created gsheet #{path} at #{Time.now.utc.to_s}").oputs
|
34
30
|
end
|
31
|
+
Dataset.find_or_create_by_handler_and_path("gsheet",path)
|
35
32
|
return sheet
|
36
33
|
end
|
37
34
|
|
38
|
-
def Gsheet.
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
r.find_or_create_gbook_by_title(book_title,email)
|
46
|
-
#add admin write access
|
47
|
-
sheet = Gsheet.find_or_create_by_name(name)
|
48
|
-
sheet_title = nil
|
49
|
-
return sheet
|
50
|
-
end
|
51
|
-
|
52
|
-
def Gsheet.read_by_dst_id(dst_id,email=nil)
|
53
|
-
dst = Dataset.find(dst_id)
|
54
|
-
name = dst.name
|
55
|
-
sheet = Gsheet.find_or_create_by_name(name,email)
|
56
|
-
output = sheet.to_tsv
|
57
|
-
return output
|
58
|
-
end
|
59
|
-
|
60
|
-
def Gsheet.read_by_job_id(job_id)
|
61
|
-
j = Job.find(job_id)
|
62
|
-
#reserve email account for read
|
63
|
-
email = Gdrive.get_worker_email_by_mongo_id(job_id)
|
64
|
-
return false unless email
|
65
|
-
#pull tsv from cache
|
66
|
-
j.dataset_array.first.read_cache
|
35
|
+
def Gsheet.read_by_task_path(task_path)
|
36
|
+
#reserve gdrive_slot account for read
|
37
|
+
gdrive_slot = Gdrive.slot_worker_by_path(task_path)
|
38
|
+
return false unless gdrive_slot
|
39
|
+
t = Task.where(:path=>task_path).first
|
40
|
+
gsheet_path = t.params.first
|
41
|
+
Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
|
67
42
|
end
|
68
43
|
|
69
|
-
def Gsheet.
|
70
|
-
|
71
|
-
#see if this is a specific cell
|
72
|
-
name = dst.name
|
73
|
-
return false unless email
|
74
|
-
#create temp tab, write data to it, checksum it against the source
|
75
|
-
temp_sheet = Gsheet.find_or_create_by_name("#{name}_temp")
|
76
|
-
temp_sheet.write(tsv)
|
77
|
-
#delete current sheet, replace it with temp one
|
78
|
-
sheet = Gsheet.find_or_create_by_name(dst.name)
|
79
|
-
title = sheet.title
|
80
|
-
#http
|
81
|
-
sheet.delete
|
82
|
-
begin
|
83
|
-
temp_sheet.rename(title)
|
84
|
-
rescue
|
85
|
-
#need this because sometimes it gets confused and tries to rename twice
|
86
|
-
end
|
87
|
-
"Write successful for #{write_name}".oputs
|
88
|
-
return true
|
89
|
-
end
|
90
|
-
|
91
|
-
def Gsheet.write_by_job_id(job_id)
|
92
|
-
j = Job.find(job_id)
|
93
|
-
r = j.requestor
|
94
|
-
dest_name = if j.destination.split("/").length==1
|
95
|
-
"#{r.jobspec_title}#{"/"}#{j.destination}"
|
96
|
-
else
|
97
|
-
j.destination
|
98
|
-
end
|
99
|
-
sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheet',dest_name)
|
100
|
-
sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
|
101
|
-
email = Gdrive.get_worker_email_by_mongo_id(job_id)
|
44
|
+
def Gsheet.write_by_task_path(task_path)
|
45
|
+
gdrive_slot = Gdrive.slot_worker_by_path(task_path)
|
102
46
|
#return false if there are no emails available
|
103
|
-
return false unless
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
temp_sheet.
|
118
|
-
|
119
|
-
|
47
|
+
return false unless gdrive_slot
|
48
|
+
t = Task.where(:path=>task_path).first
|
49
|
+
source = t.params.first
|
50
|
+
target_path = t.params.second
|
51
|
+
source_job_name, source_task_name = if source.index("/")
|
52
|
+
source.split("/")
|
53
|
+
else
|
54
|
+
[nil, source]
|
55
|
+
end
|
56
|
+
source_task_path = "#{t.job.runner.path}/#{source_job_name || t.job.name}/#{source_task_name}"
|
57
|
+
source_task = Task.where(:path=>source_task_path).first
|
58
|
+
tsv = source_task.stdout_dataset.read_cache
|
59
|
+
sheet_name = target_path.split("/").last
|
60
|
+
temp_path = [task_path.gridsafe,sheet_name].join("/")
|
61
|
+
temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
|
62
|
+
temp_sheet.write(tsv)
|
63
|
+
temp_sheet.check_and_fix(tsv)
|
64
|
+
target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
|
65
|
+
target_sheet.merge(temp_sheet)
|
66
|
+
#delete the temp sheet's book
|
67
|
+
temp_sheet.spreadsheet.delete
|
68
|
+
"Write successful for #{target_path}".oputs
|
120
69
|
return true
|
121
70
|
end
|
122
71
|
end
|