mobilize-base 1.36 → 1.293

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/README.md +666 -1
  2. data/lib/mobilize-base.rb +1 -12
  3. data/lib/mobilize-base/extensions/array.rb +3 -8
  4. data/lib/mobilize-base/extensions/google_drive/acl.rb +1 -1
  5. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +1 -2
  6. data/lib/mobilize-base/extensions/google_drive/file.rb +37 -11
  7. data/lib/mobilize-base/extensions/string.rb +6 -11
  8. data/lib/mobilize-base/extensions/yaml.rb +7 -10
  9. data/lib/mobilize-base/handlers/gbook.rb +38 -25
  10. data/lib/mobilize-base/handlers/gdrive.rb +4 -20
  11. data/lib/mobilize-base/handlers/gfile.rb +10 -64
  12. data/lib/mobilize-base/handlers/gridfs.rb +24 -19
  13. data/lib/mobilize-base/handlers/gsheet.rb +29 -45
  14. data/lib/mobilize-base/handlers/resque.rb +10 -17
  15. data/lib/mobilize-base/jobtracker.rb +196 -22
  16. data/lib/mobilize-base/models/job.rb +77 -107
  17. data/lib/mobilize-base/models/runner.rb +122 -36
  18. data/lib/mobilize-base/models/stage.rb +37 -18
  19. data/lib/mobilize-base/tasks.rb +13 -50
  20. data/lib/mobilize-base/version.rb +1 -1
  21. data/lib/samples/gdrive.yml +0 -15
  22. data/lib/samples/gridfs.yml +3 -0
  23. data/lib/samples/gsheet.yml +4 -4
  24. data/lib/samples/jobtracker.yml +6 -0
  25. data/mobilize-base.gemspec +3 -3
  26. data/test/base_job_rows.yml +11 -0
  27. data/test/mobilize-base_test.rb +106 -0
  28. data/test/test_base_1.yml +3 -0
  29. data/test/test_helper.rb +0 -155
  30. metadata +24 -36
  31. data/lib/mobilize-base/extensions/time.rb +0 -20
  32. data/lib/mobilize-base/helpers/job_helper.rb +0 -54
  33. data/lib/mobilize-base/helpers/jobtracker_helper.rb +0 -143
  34. data/lib/mobilize-base/helpers/runner_helper.rb +0 -83
  35. data/lib/mobilize-base/helpers/stage_helper.rb +0 -38
  36. data/lib/samples/gfile.yml +0 -9
  37. data/test/fixtures/base1_stage1.in.yml +0 -10
  38. data/test/fixtures/integration_expected.yml +0 -25
  39. data/test/fixtures/integration_jobs.yml +0 -12
  40. data/test/fixtures/is_due.yml +0 -97
  41. data/test/integration/mobilize-base_test.rb +0 -57
  42. data/test/unit/mobilize-base_test.rb +0 -33
data/lib/mobilize-base.rb CHANGED
@@ -3,7 +3,6 @@ require "mobilize-base/extensions/array"
3
3
  require "mobilize-base/extensions/hash"
4
4
  require "mobilize-base/extensions/object"
5
5
  require "mobilize-base/extensions/string"
6
- require "mobilize-base/extensions/time"
7
6
  require "mobilize-base/extensions/yaml"
8
7
  #this is the base of the mobilize object, any methods that should be
9
8
  #made available application-wide go over here
@@ -17,9 +16,6 @@ module Mobilize
17
16
  ENV['PWD']
18
17
  end
19
18
  end
20
- def Base.home_dir
21
- File.expand_path('..',File.dirname(__FILE__))
22
- end
23
19
  def Base.config_dir
24
20
  ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/"
25
21
  end
@@ -60,25 +56,18 @@ module Mobilize
60
56
  raise "Could not find #{log_dir} folder for logs"
61
57
  end
62
58
  end
63
- def Base.handlers
64
- Dir.entries(File.dirname(__FILE__) + "/mobilize-base/handlers").select{|e| e.ends_with?(".rb")}.map{|e| e.split(".").first}
65
- end
66
59
  end
67
60
  end
68
61
  mongoid_config_path = "#{Mobilize::Base.root}/#{Mobilize::Base.config_dir}mongoid.yml"
69
62
  if File.exists?(mongoid_config_path)
63
+ require 'mongo'
70
64
  require 'mongoid'
71
- require 'mongoid-grid_fs'
72
65
  Mongoid.load!(mongoid_config_path, Mobilize::Base.env)
73
66
  require "mobilize-base/models/dataset"
74
67
  require "mobilize-base/models/user"
75
- require "mobilize-base/helpers/runner_helper"
76
68
  require "mobilize-base/models/runner"
77
- require "mobilize-base/helpers/job_helper"
78
69
  require "mobilize-base/models/job"
79
- require "mobilize-base/helpers/stage_helper"
80
70
  require "mobilize-base/models/stage"
81
-
82
71
  end
83
72
  require 'google_drive'
84
73
  require 'resque'
@@ -12,16 +12,11 @@ class Array
12
12
  return self.inject{|sum,x| sum + x }
13
13
  end
14
14
  def hash_array_to_tsv
15
- ha = self
16
- if ha.first.nil? or ha.first.class!=Hash
15
+ if self.first.nil? or self.first.class!=Hash
17
16
  return ""
18
17
  end
19
- max_row_length = ha.map{|h| h.keys.length}.max
20
- header_keys = ha.select{|h| h.keys.length==max_row_length}.first.keys
21
- header = header_keys.join("\t")
22
- rows = ha.map do |r|
23
- header_keys.map{|k| r[k]}.join("\t")
24
- end
18
+ header = self.first.keys.join("\t")
19
+ rows = self.map{|r| r.values.join("\t")}
25
20
  ([header] + rows).join("\n")
26
21
  end
27
22
  end
@@ -14,7 +14,7 @@ module GoogleDrive
14
14
  def push(entry)
15
15
  #do not send email notifications
16
16
  entry = AclEntry.new(entry) if entry.is_a?(Hash)
17
- url_suffix = ((@acls_feed_url.index("?") ? "&" : "?") + "send-notification-emails=false")
17
+ url_suffix = "?send-notification-emails=false"
18
18
  header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
19
19
  doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
20
20
  entry.params = entry_to_params(doc.root)
@@ -8,7 +8,7 @@ module GoogleDrive
8
8
  attempts = 0
9
9
  sleep_time = nil
10
10
  #try 5 times to make the call
11
- while (response.nil? or response.code.starts_with?("5")) and attempts < Mobilize::Gdrive.max_api_retries
11
+ while (response.nil? or response.code.starts_with?("5")) and attempts < 20
12
12
  #instantiate http object, set params
13
13
  http = @proxy.new(uri.host, uri.port)
14
14
  http.use_ssl = true
@@ -16,7 +16,6 @@ module GoogleDrive
16
16
  #set 600 to allow for large downloads
17
17
  http.read_timeout = 600
18
18
  response = begin
19
- puts "#{Time.now.utc} Gdrive API #{method.to_s}: #{uri.to_s} #{extra_header.to_s}"
20
19
  clf.http_call(http, method, uri, data, extra_header, auth)
21
20
  rescue
22
21
  #timeouts etc.
@@ -3,16 +3,42 @@ module GoogleDrive
3
3
 
4
4
  def add_worker_acl
5
5
  f = self
6
- email = "#{Mobilize::Gdrive.worker_group_name}@#{Mobilize::Gdrive.domain}"
7
- f.update_acl(email,"group")
6
+ return true if f.has_worker_acl?
7
+ Mobilize::Gdrive.worker_emails.each do |a|
8
+ f.update_acl(a)
9
+ end
8
10
  end
9
11
 
10
12
  def add_admin_acl
11
13
  f = self
12
- email = "#{Mobilize::Gdrive.admin_group_name}@#{Mobilize::Gdrive.domain}"
13
- f.update_acl(email,"group")
14
- #if adding acl ,must currently add workers as well
15
- f.add_worker_acl
14
+ #admin includes workers
15
+ return true if f.has_admin_acl?
16
+ accounts = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
17
+ accounts.each do |email|
18
+ f.update_acl(email)
19
+ end
20
+ end
21
+
22
+ def has_admin_acl?
23
+ f = self
24
+ curr_emails = f.acls.map{|a| a.scope}.sort
25
+ admin_emails = (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails)
26
+ if (curr_emails & admin_emails) == admin_emails
27
+ return true
28
+ else
29
+ return false
30
+ end
31
+ end
32
+
33
+ def has_worker_acl?
34
+ f = self
35
+ curr_emails = f.acls.map{|a| a.scope}.sort
36
+ worker_emails = Mobilize::Gdrive.worker_emails.sort
37
+ if (curr_emails & worker_emails) == worker_emails
38
+ return true
39
+ else
40
+ return false
41
+ end
16
42
  end
17
43
 
18
44
  def read(user_name)
@@ -25,7 +51,7 @@ module GoogleDrive
25
51
  end
26
52
  end
27
53
 
28
- def update_acl(email,scope_type="user",role="writer")
54
+ def update_acl(email,role="writer")
29
55
  f = self
30
56
  #need these flags for HTTP retries
31
57
  #create req_acl hash to add to current acl
@@ -38,16 +64,16 @@ module GoogleDrive
38
64
  if entry.role != role
39
65
  #for whatever reason
40
66
  f.acl.delete(entry)
41
- f.acl.push({:scope_type=>scope_type,:scope=>email,:role=>role})
67
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
42
68
  end
43
69
  elsif !['reader','writer','owner'].include?(role)
44
70
  raise "Invalid role #{role}"
45
71
  end
46
72
  else
47
73
  begin
48
- f.acl.push({:scope_type=>scope_type,:scope=>email,:role=>role})
74
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
49
75
  rescue => exc
50
- raise exc unless exc.to_s.index("already has access")
76
+ raise exc unless exc.to_s.index("user already has access")
51
77
  end
52
78
  end
53
79
  return true
@@ -58,7 +84,7 @@ module GoogleDrive
58
84
  end
59
85
  def acl_entry(email)
60
86
  f = self
61
- f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope and a.scope == email}.first
87
+ f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
62
88
  end
63
89
  def entry_hash
64
90
  f = self
@@ -11,18 +11,13 @@ class String
11
11
  def opp
12
12
  pp self
13
13
  end
14
- def to_md5
15
- Digest::MD5.hexdigest(self)
16
- end
17
14
  def bash(except=true)
18
15
  str = self
19
- out_str,err_str = []
20
- status = Open4.popen4(str) do |pid,stdin,stdout,stderr|
21
- out_str = stdout.read
22
- err_str = stderr.read
23
- end
24
- exit_status = status.exitstatus
25
- raise err_str if (exit_status !=0 and except==true)
16
+ pid,stdin,stdout,stderr = Open4.popen4(str)
17
+ pid,stdin = [nil,nil]
18
+ err_str = stderr.read if stderr
19
+ out_str = stdout.read if stdout
20
+ raise err_str if (err_str.length>0 and except==true)
26
21
  return out_str
27
22
  end
28
23
  def escape_regex
@@ -43,7 +38,7 @@ class String
43
38
  return v if v.to_s.strip==""
44
39
  #normalize numbers by removing '$', '%', ',', ' '
45
40
  vnorm = v.to_s.norm_num
46
- vdigits = vnorm.split(".").last.to_s.length
41
+ vdigits = vnorm.split(".").last.length
47
42
  if vnorm.to_f.to_s=="Infinity"
48
43
  #do nothing
49
44
  elsif ("%.#{vdigits}f" % vnorm.to_f.to_s)==vnorm
@@ -10,16 +10,13 @@ module YAML
10
10
  #make sure urls have their colon spaces fixed
11
11
  result_hash={}
12
12
  easy_hash.each do |k,v|
13
- #fucking yaml puts spaces in front of the key
14
- #or something
15
- strip_k = k.strip
16
- result_hash[strip_k] = if v.class==String
17
- v.gsub(": //","://")
18
- elsif v.class==Array
19
- v.map{|av| av.to_s.gsub(": //","://")}
20
- else
21
- v
22
- end
13
+ result_hash[k] = if v.class==String
14
+ v.gsub(": //","://")
15
+ elsif v.class==Array
16
+ v.map{|av| av.to_s.gsub(": //","://")}
17
+ else
18
+ v
19
+ end
23
20
  end
24
21
  return result_hash
25
22
  end
@@ -14,44 +14,57 @@ module Mobilize
14
14
  dst = Dataset.find_by_handler_and_path('gbook',path)
15
15
  if dst and dst.http_url.to_s.length>0
16
16
  book = Gbook.find_by_http_url(dst.http_url,gdrive_slot)
17
- if book
18
- return book
19
- else
20
- raise "Could not find book #{path} with url #{dst.http_url}, please check dataset"
17
+ begin
18
+ #doesn't count if it's deleted
19
+ #or if its name can't be accessed
20
+ if book.entry_hash[:deleted]
21
+ book = nil
22
+ else
23
+ return book
24
+ end
25
+ rescue
26
+ #use regular process if book entry hash fails
27
+ book = nil
21
28
  end
22
29
  end
23
- #try to find books by title
24
30
  books = Gbook.find_all_by_path(path,gdrive_slot)
25
- #sort by publish date; if entry hash retrieval fails (as it does)
26
- #assume the book was published now
27
- book = books.sort_by{|b| begin b.entry_hash[:published];rescue;Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z");end;}.first
28
- if book
29
- #we know dataset will have blank url since it wasn't picked up above
30
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
31
- api_url = book.human_url.split("&").first
32
- dst.update_attributes(:http_url=>api_url)
31
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
32
+ book = nil
33
+ if books.length>1 and dst.http_url.to_s.length>0
34
+ #some idiot process or malicious user created a duplicate book.
35
+ #Fix by deleting all but the one with dst entry's key
36
+ dkey = dst.http_url.split("key=").last
37
+ books.each do |b|
38
+ bkey = b.resource_id.split(":").last
39
+ if bkey == dkey
40
+ book = b
41
+ dst.update_attributes(:http_url=>book.human_url)
42
+ else
43
+ #delete the invalid book
44
+ b.delete
45
+ ("Deleted duplicate book #{path}").oputs
46
+ end
47
+ end
48
+ else
49
+ #If it's a new dst or if there are multiple books
50
+ #take the first
51
+ book = books.first
52
+ dst.update_attributes(:http_url=>book.human_url) if book
33
53
  end
34
54
  return book
35
55
  end
36
-
37
56
  def Gbook.find_or_create_by_path(path,gdrive_slot)
38
57
  book = Gbook.find_by_path(path,gdrive_slot)
58
+ dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
39
59
  if book.nil?
40
60
  #always use owner email to make sure all books are owned by owner account
41
61
  book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
42
62
  ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
43
- #check to make sure the dataset has a blank url; if not, error out
44
- dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
45
- if dst.http_url.to_s.length>0
46
- #add acls to book regardless
47
- book.add_admin_acl
48
- raise "Book #{path} is already assigned to #{dst.http_url}; please update the record with #{book.human_url}"
49
- else
50
- api_url = book.human_url.split("&").first
51
- dst.update_attributes(:http_url=>api_url)
52
- book.add_admin_acl
53
- end
54
63
  end
64
+ #always make sure book dataset http URL is up to date
65
+ #and that book has admin acl
66
+ dst.update_attributes(:http_url=>book.human_url)
67
+ book.add_admin_acl
55
68
  return book
56
69
  end
57
70
  end
@@ -25,18 +25,6 @@ module Mobilize
25
25
  end
26
26
  end
27
27
 
28
- def Gdrive.max_api_retries
29
- Gdrive.config['max_api_retries']
30
- end
31
-
32
- def Gdrive.max_file_write_retries
33
- Gdrive.config['max_file_write_retries']
34
- end
35
-
36
- def Gdrive.file_write_retry_delay
37
- Gdrive.config['file_write_retry_delay']
38
- end
39
-
40
28
  def Gdrive.admins
41
29
  Gdrive.config['admins']
42
30
  end
@@ -49,18 +37,14 @@ module Mobilize
49
37
  end
50
38
  end
51
39
 
52
- def Gdrive.worker_group_name
53
- Gdrive.config['worker_group_name']
54
- end
55
-
56
- def Gdrive.admin_group_name
57
- Gdrive.config['admin_group_name']
58
- end
59
-
60
40
  def Gdrive.worker_emails
61
41
  Gdrive.workers.map{|w| [w['name'],Gdrive.domain].join("@")}
62
42
  end
63
43
 
44
+ def Gdrive.admin_emails
45
+ Gdrive.admins.map{|w| [w['name'],Gdrive.domain].join("@")}
46
+ end
47
+
64
48
  #email management - used to make sure not too many emails get used at the same time
65
49
  def Gdrive.slot_worker_by_path(path)
66
50
  working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['gdrive_slot'];rescue;nil;end}.compact.uniq
@@ -1,23 +1,9 @@
1
1
  module Mobilize
2
2
  module Gfile
3
-
4
- def Gfile.config
5
- Base.config('gfile')
6
- end
7
-
8
- def Gfile.max_length
9
- Gfile.config['max_length']
10
- end
11
-
12
3
  def Gfile.path_to_dst(path,stage_path,gdrive_slot)
13
- s = Stage.where(:path=>stage_path).first
14
- params = s.params
15
- target_path = params['target']
16
- #if this is the target, it doesn't have to exist already
17
- is_target = true if path == target_path
18
4
  #don't need the ://
19
5
  path = path.split("://").last if path.index("://")
20
- if is_target or Gfile.find_by_path(path)
6
+ if Gfile.find_by_path(path)
21
7
  handler = "gfile"
22
8
  Dataset.find_or_create_by_url("#{handler}://#{path}")
23
9
  else
@@ -44,10 +30,15 @@ module Mobilize
44
30
  :content_type=>"test/plain",
45
31
  :convert=>false)
46
32
  file.add_admin_acl
33
+ #make sure user is owner or can edit
34
+ u = User.where(:name=>user_name).first
35
+ entry = file.acl_entry(u.email)
36
+ unless entry and ['writer','owner'].include?(entry.role)
37
+ file.update_acl(u.email)
38
+ end
47
39
  #update http url for file
48
40
  dst = Dataset.find_by_handler_and_path("gfile",dst_path)
49
- api_url = file.human_url.split("&").first
50
- dst.update_attributes(:http_url=>api_url)
41
+ dst.update_attributes(:http_url=>file.human_url)
51
42
  true
52
43
  end
53
44
 
@@ -66,7 +57,7 @@ module Mobilize
66
57
  def Gfile.update_acl_by_path(path,gdrive_slot,role="writer",target_email=nil)
67
58
  file = Gfile.find_by_path(path,target_email)
68
59
  raise "File #{path} not found" unless file
69
- file.update_acl(gdrive_slot,"user",role)
60
+ file.update_acl(gdrive_slot,role)
70
61
  end
71
62
 
72
63
  def Gfile.find_by_path(path)
@@ -95,55 +86,10 @@ module Mobilize
95
86
  #always make sure dataset http URL is up to date
96
87
  #and that it has admin acl
97
88
  if file
98
- api_url = file.human_url.split("&").first
99
- dst.update_attributes(:http_url=>api_url)
89
+ dst.update_attributes(:http_url=>file.human_url)
100
90
  file.add_admin_acl
101
91
  end
102
92
  return file
103
93
  end
104
-
105
- def Gfile.write_by_stage_path(stage_path)
106
- gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
107
- #return blank response if there are no slots available
108
- return nil unless gdrive_slot
109
- s = Stage.where(:path=>stage_path).first
110
- u = s.job.runner.user
111
- retries = 0
112
- stdout,stderr = []
113
- while stdout.nil? and stderr.nil? and retries < Gdrive.max_file_write_retries
114
- begin
115
- #get tsv to write from stage
116
- source = s.sources(gdrive_slot).first
117
- raise "Need source for gfile write" unless source
118
- tsv = source.read(u.name,gdrive_slot)
119
- raise "No data source found for #{source.url}" unless tsv.to_s.length>0
120
- if tsv.length > Gfile.max_length
121
- raise "Too much data; you have #{tsv.length.to_s}, max is #{Gfile.max_length.to_s}"
122
- end
123
- stdout = if tsv.length == 0
124
- #soft error; no data to write. Stage will complete.
125
- "Write skipped for #{s.target.url}"
126
- else
127
- Dataset.write_by_url(s.target.url,tsv,u.name,gdrive_slot)
128
- #update status
129
- "Write successful for #{s.target.url}"
130
- end
131
- Gdrive.unslot_worker_by_path(stage_path)
132
- stderr = nil
133
- s.update_status(stdout)
134
- signal = 0
135
- rescue => exc
136
- if retries < Gdrive.max_file_write_retries
137
- retries +=1
138
- sleep Gdrive.file_write_retry_delay
139
- else
140
- stdout = nil
141
- stderr = [exc.to_s,"\n",exc.backtrace.join("\n")].join
142
- signal = 500
143
- end
144
- end
145
- end
146
- return {'out_str'=>stdout, 'err_str'=>stderr, 'signal' => signal}
147
- end
148
94
  end
149
95
  end