mobilize-base 1.36 → 1.293

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/README.md +666 -1
  2. data/lib/mobilize-base.rb +1 -12
  3. data/lib/mobilize-base/extensions/array.rb +3 -8
  4. data/lib/mobilize-base/extensions/google_drive/acl.rb +1 -1
  5. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +1 -2
  6. data/lib/mobilize-base/extensions/google_drive/file.rb +37 -11
  7. data/lib/mobilize-base/extensions/string.rb +6 -11
  8. data/lib/mobilize-base/extensions/yaml.rb +7 -10
  9. data/lib/mobilize-base/handlers/gbook.rb +38 -25
  10. data/lib/mobilize-base/handlers/gdrive.rb +4 -20
  11. data/lib/mobilize-base/handlers/gfile.rb +10 -64
  12. data/lib/mobilize-base/handlers/gridfs.rb +24 -19
  13. data/lib/mobilize-base/handlers/gsheet.rb +29 -45
  14. data/lib/mobilize-base/handlers/resque.rb +10 -17
  15. data/lib/mobilize-base/jobtracker.rb +196 -22
  16. data/lib/mobilize-base/models/job.rb +77 -107
  17. data/lib/mobilize-base/models/runner.rb +122 -36
  18. data/lib/mobilize-base/models/stage.rb +37 -18
  19. data/lib/mobilize-base/tasks.rb +13 -50
  20. data/lib/mobilize-base/version.rb +1 -1
  21. data/lib/samples/gdrive.yml +0 -15
  22. data/lib/samples/gridfs.yml +3 -0
  23. data/lib/samples/gsheet.yml +4 -4
  24. data/lib/samples/jobtracker.yml +6 -0
  25. data/mobilize-base.gemspec +3 -3
  26. data/test/base_job_rows.yml +11 -0
  27. data/test/mobilize-base_test.rb +106 -0
  28. data/test/test_base_1.yml +3 -0
  29. data/test/test_helper.rb +0 -155
  30. metadata +24 -36
  31. data/lib/mobilize-base/extensions/time.rb +0 -20
  32. data/lib/mobilize-base/helpers/job_helper.rb +0 -54
  33. data/lib/mobilize-base/helpers/jobtracker_helper.rb +0 -143
  34. data/lib/mobilize-base/helpers/runner_helper.rb +0 -83
  35. data/lib/mobilize-base/helpers/stage_helper.rb +0 -38
  36. data/lib/samples/gfile.yml +0 -9
  37. data/test/fixtures/base1_stage1.in.yml +0 -10
  38. data/test/fixtures/integration_expected.yml +0 -25
  39. data/test/fixtures/integration_jobs.yml +0 -12
  40. data/test/fixtures/is_due.yml +0 -97
  41. data/test/integration/mobilize-base_test.rb +0 -57
  42. data/test/unit/mobilize-base_test.rb +0 -33
data/lib/mobilize-base.rb CHANGED
@@ -3,7 +3,6 @@ require "mobilize-base/extensions/array"
3
3
  require "mobilize-base/extensions/hash"
4
4
  require "mobilize-base/extensions/object"
5
5
  require "mobilize-base/extensions/string"
6
- require "mobilize-base/extensions/time"
7
6
  require "mobilize-base/extensions/yaml"
8
7
  #this is the base of the mobilize object, any methods that should be
9
8
  #made available application-wide go over here
@@ -17,9 +16,6 @@ module Mobilize
17
16
  ENV['PWD']
18
17
  end
19
18
  end
20
- def Base.home_dir
21
- File.expand_path('..',File.dirname(__FILE__))
22
- end
23
19
  def Base.config_dir
24
20
  ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/"
25
21
  end
@@ -60,25 +56,18 @@ module Mobilize
60
56
  raise "Could not find #{log_dir} folder for logs"
61
57
  end
62
58
  end
63
- def Base.handlers
64
- Dir.entries(File.dirname(__FILE__) + "/mobilize-base/handlers").select{|e| e.ends_with?(".rb")}.map{|e| e.split(".").first}
65
- end
66
59
  end
67
60
  end
68
61
  mongoid_config_path = "#{Mobilize::Base.root}/#{Mobilize::Base.config_dir}mongoid.yml"
69
62
  if File.exists?(mongoid_config_path)
63
+ require 'mongo'
70
64
  require 'mongoid'
71
- require 'mongoid-grid_fs'
72
65
  Mongoid.load!(mongoid_config_path, Mobilize::Base.env)
73
66
  require "mobilize-base/models/dataset"
74
67
  require "mobilize-base/models/user"
75
- require "mobilize-base/helpers/runner_helper"
76
68
  require "mobilize-base/models/runner"
77
- require "mobilize-base/helpers/job_helper"
78
69
  require "mobilize-base/models/job"
79
- require "mobilize-base/helpers/stage_helper"
80
70
  require "mobilize-base/models/stage"
81
-
82
71
  end
83
72
  require 'google_drive'
84
73
  require 'resque'
@@ -12,16 +12,11 @@ class Array
12
12
  return self.inject{|sum,x| sum + x }
13
13
  end
14
14
  def hash_array_to_tsv
15
- ha = self
16
- if ha.first.nil? or ha.first.class!=Hash
15
+ if self.first.nil? or self.first.class!=Hash
17
16
  return ""
18
17
  end
19
- max_row_length = ha.map{|h| h.keys.length}.max
20
- header_keys = ha.select{|h| h.keys.length==max_row_length}.first.keys
21
- header = header_keys.join("\t")
22
- rows = ha.map do |r|
23
- header_keys.map{|k| r[k]}.join("\t")
24
- end
18
+ header = self.first.keys.join("\t")
19
+ rows = self.map{|r| r.values.join("\t")}
25
20
  ([header] + rows).join("\n")
26
21
  end
27
22
  end
@@ -14,7 +14,7 @@ module GoogleDrive
14
14
  def push(entry)
15
15
  #do not send email notifications
16
16
  entry = AclEntry.new(entry) if entry.is_a?(Hash)
17
- url_suffix = ((@acls_feed_url.index("?") ? "&" : "?") + "send-notification-emails=false")
17
+ url_suffix = "?send-notification-emails=false"
18
18
  header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
19
19
  doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
20
20
  entry.params = entry_to_params(doc.root)
@@ -8,7 +8,7 @@ module GoogleDrive
8
8
  attempts = 0
9
9
  sleep_time = nil
10
10
  #try 5 times to make the call
11
- while (response.nil? or response.code.starts_with?("5")) and attempts < Mobilize::Gdrive.max_api_retries
11
+ while (response.nil? or response.code.starts_with?("5")) and attempts < 20
12
12
  #instantiate http object, set params
13
13
  http = @proxy.new(uri.host, uri.port)
14
14
  http.use_ssl = true
@@ -16,7 +16,6 @@ module GoogleDrive
16
16
  #set 600 to allow for large downloads
17
17
  http.read_timeout = 600
18
18
  response = begin
19
- puts "#{Time.now.utc} Gdrive API #{method.to_s}: #{uri.to_s} #{extra_header.to_s}"
20
19
  clf.http_call(http, method, uri, data, extra_header, auth)
21
20
  rescue
22
21
  #timeouts etc.
@@ -3,16 +3,42 @@ module GoogleDrive
3
3
 
4
4
  def add_worker_acl
5
5
  f = self
6
- email = "#{Mobilize::Gdrive.worker_group_name}@#{Mobilize::Gdrive.domain}"
7
- f.update_acl(email,"group")
6
+ return true if f.has_worker_acl?
7
+ Mobilize::Gdrive.worker_emails.each do |a|
8
+ f.update_acl(a)
9
+ end
8
10
  end
9
11
 
10
12
  def add_admin_acl
11
13
  f = self
12
- email = "#{Mobilize::Gdrive.admin_group_name}@#{Mobilize::Gdrive.domain}"
13
- f.update_acl(email,"group")
14
- #if adding acl ,must currently add workers as well
15
- f.add_worker_acl
14
+ #admin includes workers
15
+ return true if f.has_admin_acl?
16
+ accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
17
+ accounts.each do |email|
18
+ f.update_acl(email)
19
+ end
20
+ end
21
+
22
+ def has_admin_acl?
23
+ f = self
24
+ curr_emails = f.acls.map{|a| a.scope}.sort
25
+ admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
26
+ if (curr_emails & admin_emails) == admin_emails
27
+ return true
28
+ else
29
+ return false
30
+ end
31
+ end
32
+
33
+ def has_worker_acl?
34
+ f = self
35
+ curr_emails = f.acls.map{|a| a.scope}.sort
36
+ worker_emails = Mobilize::Gdrive.worker_emails.sort
37
+ if (curr_emails & worker_emails) == worker_emails
38
+ return true
39
+ else
40
+ return false
41
+ end
16
42
  end
17
43
 
18
44
  def read(user_name)
@@ -25,7 +51,7 @@ module GoogleDrive
25
51
  end
26
52
  end
27
53
 
28
- def update_acl(email,scope_type="user",role="writer")
54
+ def update_acl(email,role="writer")
29
55
  f = self
30
56
  #need these flags for HTTP retries
31
57
  #create req_acl hash to add to current acl
@@ -38,16 +64,16 @@ module GoogleDrive
38
64
  if entry.role != role
39
65
  #for whatever reason
40
66
  f.acl.delete(entry)
41
- f.acl.push({:scope_type=>scope_type,:scope=>email,:role=>role})
67
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
42
68
  end
43
69
  elsif !['reader','writer','owner'].include?(role)
44
70
  raise "Invalid role #{role}"
45
71
  end
46
72
  else
47
73
  begin
48
- f.acl.push({:scope_type=>scope_type,:scope=>email,:role=>role})
74
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
49
75
  rescue => exc
50
- raise exc unless exc.to_s.index("already has access")
76
+ raise exc unless exc.to_s.index("user already has access")
51
77
  end
52
78
  end
53
79
  return true
@@ -58,7 +84,7 @@ module GoogleDrive
58
84
  end
59
85
  def acl_entry(email)
60
86
  f = self
61
- f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
87
+ f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
62
88
  end
63
89
  def entry_hash
64
90
  f = self
@@ -11,18 +11,13 @@ class String
11
11
  def opp
12
12
  pp self
13
13
  end
14
- def to_md5
15
- Digest::MD5.hexdigest(self)
16
- end
17
14
  def bash(except=true)
18
15
  str = self
19
- out_str,err_str = []
20
- status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
21
- out_str = stdout.read
22
- err_str = stderr.read
23
- end
24
- exit_status = status.exitstatus
25
- raise err_str if (exit_status !=0 and except==true)
16
+ pid,stdin,stdout,stderr = Open4.popen4(str)
17
+ pid,stdin = [nil,nil]
18
+ err_str = stderr.read if stderr
19
+ out_str = stdout.read if stdout
20
+ raise err_str if (err_str.length>0 and except==true)
26
21
  return out_str
27
22
  end
28
23
  def escape_regex
@@ -43,7 +38,7 @@ class String
43
38
  return v if v.to_s.strip==""
44
39
  #normalize numbers by removing '$', '%', ',', ' '
45
40
  vnorm = v.to_s.norm_num
46
- vdigits = vnorm.split(".").last.to_s.length
41
+ vdigits = vnorm.split(".").last.length
47
42
  if vnorm.to_f.to_s=="Infinity"
48
43
  #do nothing
49
44
  elsif ("%.#{vdigits}f" % vnorm.to_f.to_s)==vnorm
@@ -10,16 +10,13 @@ module YAML
10
10
  #make sure urls have their colon spaces fixed
11
11
  result_hash={}
12
12
  easy_hash.each do |k,v|
13
- #fucking yaml puts spaces in front of the key
14
- #or something
15
- strip_k = k.strip
16
- result_hash[strip_k] = if v.class==String
17
- v.gsub(": //","://")
18
- elsif v.class==Array
19
- v.map{|av| av.to_s.gsub(": //","://")}
20
- else
21
- v
22
- end
13
+ result_hash[k] = if v.class==String
14
+ v.gsub(": //","://")
15
+ elsif v.class==Array
16
+ v.map{|av| av.to_s.gsub(": //","://")}
17
+ else
18
+ v
19
+ end
23
20
  end
24
21
  return result_hash
25
22
  end
@@ -14,44 +14,57 @@ module Mobilize
14
14
  dst = Dataset.find_by_handler_and_path('gbook',path)
15
15
  if dst and dst.http_url.to_s.length>0
16
16
  book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
17
- if book
18
- return book
19
- else
20
- raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
17
+ begin
18
+ #doesn't count if it's deleted
19
+ #or if its name can't be accessed
20
+ if book.entry_hash[:deleted]
21
+ book = nil
22
+ else
23
+ return book
24
+ end
25
+ rescue
26
+ #use regular process if book entry hash fails
27
+ book = nil
21
28
  end
22
29
  end
23
- #try to find books by title
24
30
  books = Gbook.find_all_by_path(path,gdrive_slot)
25
- #sort by publish date; if entry hash retrieval fails (as it does)
26
- #assume the book was published now
27
- book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
28
- if book
29
- #we know dataset will have blank url since it wasn't picked up above
30
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
31
- api_url = book.human_url.split("&").first
32
- dst.update_attributes(:http_url=>api_url)
31
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
32
+ book = nil
33
+ if books.length>1 and dst.http_url.to_s.length>0
34
+ #some idiot process or malicious user created a duplicate book.
35
+ #Fix by deleting all but the one with dst entry's key
36
+ dkey = dst.http_url.split("key=").last
37
+ books.each do |b|
38
+ bkey = b.resource_id.split(":").last
39
+ if bkey == dkey
40
+ book = b
41
+ dst.update_attributes(:http_url=>book.human_url)
42
+ else
43
+ #delete the invalid book
44
+ b.delete
45
+ ("Deleted duplicate book #{path}").oputs
46
+ end
47
+ end
48
+ else
49
+ #If it's a new dst or if there are multiple books
50
+ #take the first
51
+ book = books.first
52
+ dst.update_attributes(:http_url=>book.human_url) if book
33
53
  end
34
54
  return book
35
55
  end
36
-
37
56
  def Gbook.find_or_create_by_path(path,gdrive_slot)
38
57
  book = Gbook.find_by_path(path,gdrive_slot)
58
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
39
59
  if book.nil?
40
60
  #always use owner email to make sure all books are owned by owner account
41
61
  book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
42
62
  ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
43
- #check to make sure the dataset has a blank url; if not, error out
44
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
45
- if dst.http_url.to_s.length>0
46
- #add acls to book regardless
47
- book.add_admin_acl
48
- raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
49
- else
50
- api_url = book.human_url.split("&").first
51
- dst.update_attributes(:http_url=>api_url)
52
- book.add_admin_acl
53
- end
54
63
  end
64
+ #always make sure book dataset http URL is up to date
65
+ #and that book has admin acl
66
+ dst.update_attributes(:http_url=>book.human_url)
67
+ book.add_admin_acl
55
68
  return book
56
69
  end
57
70
  end
@@ -25,18 +25,6 @@ module Mobilize
25
25
  end
26
26
  end
27
27
 
28
- def Gdrive.max_api_retries
29
- Gdrive.config['max_api_retries']
30
- end
31
-
32
- def Gdrive.max_file_write_retries
33
- Gdrive.config['max_file_write_retries']
34
- end
35
-
36
- def Gdrive.file_write_retry_delay
37
- Gdrive.config['file_write_retry_delay']
38
- end
39
-
40
28
  def Gdrive.admins
41
29
  Gdrive.config['admins']
42
30
  end
@@ -49,18 +37,14 @@ module Mobilize
49
37
  end
50
38
  end
51
39
 
52
- def Gdrive.worker_group_name
53
- Gdrive.config['worker_group_name']
54
- end
55
-
56
- def Gdrive.admin_group_name
57
- Gdrive.config['admin_group_name']
58
- end
59
-
60
40
  def Gdrive.worker_emails
61
41
  Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
62
42
  end
63
43
 
44
+ def Gdrive.admin_emails
45
+ Gdrive.admins.map{|w| [w['name'],Gdrive.domain].join("@")}
46
+ end
47
+
64
48
  #email management - used to make sure not too many emails get used at the same time
65
49
  def Gdrive.slot_worker_by_path(path)
66
50
  working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
@@ -1,23 +1,9 @@
1
1
  module Mobilize
2
2
  module Gfile
3
-
4
- def Gfile.config
5
- Base.config('gfile')
6
- end
7
-
8
- def Gfile.max_length
9
- Gfile.config['max_length']
10
- end
11
-
12
3
  def Gfile.path_to_dst(path,stage_path,gdrive_slot)
13
- s = Stage.where(:path=>stage_path).first
14
- params = s.params
15
- target_path = params['target']
16
- #if this is the target, it doesn't have to exist already
17
- is_target = true if path == target_path
18
4
  #don't need the ://
19
5
  path = path.split("://").last if path.index("://")
20
- if is_target or Gfile.find_by_path(path)
6
+ if Gfile.find_by_path(path)
21
7
  handler = "gfile"
22
8
  Dataset.find_or_create_by_url("#{handler}://#{path}")
23
9
  else
@@ -44,10 +30,15 @@ module Mobilize
44
30
  :content_type=>"test/plain",
45
31
  :convert=>false)
46
32
  file.add_admin_acl
33
+ #make sure user is owner or can edit
34
+ u = User.where(:name=>user_name).first
35
+ entry = file.acl_entry(u.email)
36
+ unless entry and ['writer','owner'].include?(entry.role)
37
+ file.update_acl(u.email)
38
+ end
47
39
  #update http url for file
48
40
  dst = Dataset.find_by_handler_and_path("gfile",dst_path)
49
- api_url = file.human_url.split("&").first
50
- dst.update_attributes(:http_url=>api_url)
41
+ dst.update_attributes(:http_url=>file.human_url)
51
42
  true
52
43
  end
53
44
 
@@ -66,7 +57,7 @@ module Mobilize
66
57
  def Gfile.update_acl_by_path(path,gdrive_slot,role="writer",target_email=nil)
67
58
  file = Gfile.find_by_path(path,target_email)
68
59
  raise "File #{path} not found" unless file
69
- file.update_acl(gdrive_slot,"user",role)
60
+ file.update_acl(gdrive_slot,role)
70
61
  end
71
62
 
72
63
  def Gfile.find_by_path(path)
@@ -95,55 +86,10 @@ module Mobilize
95
86
  #always make sure dataset http URL is up to date
96
87
  #and that it has admin acl
97
88
  if file
98
- api_url = file.human_url.split("&").first
99
- dst.update_attributes(:http_url=>api_url)
89
+ dst.update_attributes(:http_url=>file.human_url)
100
90
  file.add_admin_acl
101
91
  end
102
92
  return file
103
93
  end
104
-
105
- def Gfile.write_by_stage_path(stage_path)
106
- gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
107
- #return blank response if there are no slots available
108
- return nil unless gdrive_slot
109
- s = Stage.where(:path=>stage_path).first
110
- u = s.job.runner.user
111
- retries = 0
112
- stdout,stderr = []
113
- while stdout.nil? and stderr.nil? and retries < Gdrive.max_file_write_retries
114
- begin
115
- #get tsv to write from stage
116
- source = s.sources(gdrive_slot).first
117
- raise "Need source for gfile write" unless source
118
- tsv = source.read(u.name,gdrive_slot)
119
- raise "No data source found for #{source.url}" unless tsv.to_s.length>0
120
- if tsv.length > Gfile.max_length
121
- raise "Too much data; you have #{tsv.length.to_s}, max is #{Gfile.max_length.to_s}"
122
- end
123
- stdout = if tsv.length == 0
124
- #soft error; no data to write. Stage will complete.
125
- "Write skipped for #{s.target.url}"
126
- else
127
- Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot)
128
- #update status
129
- "Write successful for #{s.target.url}"
130
- end
131
- Gdrive.unslot_worker_by_path(stage_path)
132
- stderr = nil
133
- s.update_status(stdout)
134
- signal = 0
135
- rescue => exc
136
- if retries < Gdrive.max_file_write_retries
137
- retries +=1
138
- sleep Gdrive.file_write_retry_delay
139
- else
140
- stdout = nil
141
- stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
142
- signal = 500
143
- end
144
- end
145
- end
146
- return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
147
- end
148
94
  end
149
95
  end