mobilize-base 1.0.2 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE.txt +202 -20
  3. data/README.md +219 -138
  4. data/Rakefile +1 -2
  5. data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
  6. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
  7. data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
  8. data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
  9. data/lib/mobilize-base/extensions/resque.rb +18 -24
  10. data/lib/mobilize-base/extensions/string.rb +12 -0
  11. data/lib/mobilize-base/handlers/gbook.rb +14 -47
  12. data/lib/mobilize-base/handlers/gdrive.rb +17 -18
  13. data/lib/mobilize-base/handlers/gfile.rb +18 -39
  14. data/lib/mobilize-base/handlers/gridfs.rb +43 -0
  15. data/lib/mobilize-base/handlers/gsheet.rb +48 -99
  16. data/lib/mobilize-base/jobtracker.rb +29 -15
  17. data/lib/mobilize-base/models/dataset.rb +33 -35
  18. data/lib/mobilize-base/models/job.rb +21 -168
  19. data/lib/mobilize-base/models/runner.rb +178 -0
  20. data/lib/mobilize-base/models/task.rb +137 -0
  21. data/lib/mobilize-base/models/user.rb +47 -0
  22. data/lib/mobilize-base/rakes.rb +59 -0
  23. data/lib/mobilize-base/version.rb +1 -1
  24. data/lib/mobilize-base.rb +20 -9
  25. data/lib/samples/gdrive.yml +12 -12
  26. data/lib/samples/gridfs.yml +9 -0
  27. data/lib/samples/gsheet.yml +6 -0
  28. data/lib/samples/jobtracker.yml +9 -9
  29. data/lib/samples/mongoid.yml +3 -3
  30. data/mobilize-base.gemspec +1 -1
  31. data/test/base1_task1.yml +3 -0
  32. data/test/base_job_rows.yml +13 -0
  33. data/test/mobilize-base_test.rb +59 -0
  34. metadata +20 -9
  35. data/lib/mobilize-base/handlers/mongodb.rb +0 -32
  36. data/lib/mobilize-base/models/requestor.rb +0 -232
  37. data/lib/mobilize-base/tasks.rb +0 -43
  38. data/test/mobilize_test.rb +0 -108
data/Rakefile CHANGED
@@ -17,8 +17,7 @@ require "bundler/gem_tasks"
17
17
  # Setup
18
18
  #
19
19
  $LOAD_PATH.unshift 'lib'
20
- require 'mobilize-base/tasks'
21
-
20
+ require 'mobilize-base/rakes'
22
21
 
23
22
  #
24
23
  # Tests
@@ -0,0 +1,25 @@
1
+ module GoogleDrive
2
+ class Acl
3
+ def update_role(entry, role) #:nodoc:
4
+ #do not send email notifications
5
+ url_suffix = "?send-notification-emails=false"
6
+ header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
7
+ doc = @session.request(
8
+ :put, %{#{entry.edit_url}#{url_suffix}}, :data => entry.to_xml(), :header => header, :auth => :writely)
9
+
10
+ entry.params = entry_to_params(doc.root)
11
+ return entry
12
+ end
13
+
14
+ def push(entry)
15
+ #do not send email notifications
16
+ entry = AclEntry.new(entry) if entry.is_a?(Hash)
17
+ url_suffix = "?send-notification-emails=false"
18
+ header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
19
+ doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
20
+ entry.params = entry_to_params(doc.root)
21
+ @acls.push(entry)
22
+ return entry
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,49 @@
1
+ module GoogleDrive
2
+ class ClientLoginFetcher
3
+ def request_raw(method, url, data, extra_header, auth)
4
+ #this is patched to handle server errors due to http chaos
5
+ uri = URI.parse(url)
6
+ response = nil
7
+ attempts = 0
8
+ sleep_time = nil
9
+ #try 5 times to make the call
10
+ while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
+ #instantiate http object, set params
12
+ http = @proxy.new(uri.host, uri.port)
13
+ http.use_ssl = true
14
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
15
+ #set 600 to allow for large downloads
16
+ http.read_timeout = 600
17
+ response = self.http_call(http, method, uri, data, extra_header, auth)
18
+ if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
19
+ if response.body.downcase.index("rate limit") or response.body.downcase.index("captcha")
20
+ if sleep_time
21
+ sleep_time = sleep_time * attempts
22
+ else
23
+ sleep_time = (rand*100).to_i
24
+ end
25
+ else
26
+ sleep_time = 10
27
+ end
28
+ attempts += 1
29
+ puts "Sleeping for #{sleep_time.to_s} due to #{response.body}"
30
+ sleep sleep_time
31
+ end
32
+ end
33
+ raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
34
+ return response
35
+ end
36
+ def http_call(http, method, uri, data, extra_header, auth)
37
+ http.read_timeout = 600
38
+ http.start() do
39
+ path = uri.path + (uri.query ? "?#{uri.query}" : "")
40
+ header = auth_header(auth).merge(extra_header)
41
+ if method == :delete || method == :get
42
+ http.__send__(method, path, header)
43
+ else
44
+ http.__send__(method, path, data, header)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,80 @@
1
+ module GoogleDrive
2
+ class File
3
+
4
+ def add_worker_acl
5
+ f = self
6
+ return true if f.has_worker_acl?
7
+ Mobilize::Gdrive.worker_emails.each do |a|
8
+ f.update_acl(a)
9
+ end
10
+ end
11
+
12
+ def add_admin_acl
13
+ f = self
14
+ #admin includes workers
15
+ return true if f.has_admin_acl?
16
+ (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
17
+ f.update_acl(a)
18
+ end
19
+ end
20
+
21
+ def has_admin_acl?
22
+ f = self
23
+ curr_emails = f.acls.map{|a| a.scope}.sort
24
+ admin_emails = Mobilize::Gdrive.admin_emails.sort
25
+ if (curr_emails & admin_emails) == admin_emails
26
+ return true
27
+ else
28
+ return false
29
+ end
30
+ end
31
+
32
+ def has_worker_acl?
33
+ f = self
34
+ curr_emails = f.acls.map{|a| a.scope}.sort
35
+ worker_emails = Mobilize::Gdrive.worker_emails.sort
36
+ if (curr_emails & worker_emails) == worker_emails
37
+ return true
38
+ else
39
+ return false
40
+ end
41
+ end
42
+
43
+ def update_acl(email,role="writer")
44
+ f = self
45
+ #need these flags for HTTP retries
46
+ #create req_acl hash to add to current acl
47
+ if entry = f.acl_entry(email)
48
+ if [nil,"none","delete"].include?(role)
49
+ f.acl.delete(entry)
50
+ elsif entry.role != role and ['reader','writer','owner'].include?(role)
51
+ entry.role=role
52
+ f.acl.update_role(entry,entry.role)
53
+ elsif !['reader','writer','owner'].include?(role)
54
+ raise "Invalid role #{role}"
55
+ end
56
+ else
57
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
58
+ end
59
+ return true
60
+ end
61
+ def acls
62
+ f = self
63
+ f.acl.to_enum.to_a
64
+ end
65
+ def acl_entry(email)
66
+ f = self
67
+ f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
68
+ end
69
+
70
+ def entry_hash
71
+ f = self
72
+ dfe_xml = f.document_feed_entry.to_xml
73
+ begin
74
+ Hash.from_xml(dfe_xml)[:entry]
75
+ rescue
76
+ {}
77
+ end
78
+ end
79
+ end
80
+ end
@@ -1,154 +1,4 @@
1
1
  module GoogleDrive
2
- class ClientLoginFetcher
3
- def request_raw(method, url, data, extra_header, auth)
4
- #this is patched to handle server errors due to http chaos
5
- uri = URI.parse(url)
6
- response = nil
7
- attempts = 0
8
- sleep_time = nil
9
- #try 5 times to make the call
10
- while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
- #instantiate http object, set params
12
- http = @proxy.new(uri.host, uri.port)
13
- http.use_ssl = true
14
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
15
- #set 600 to allow for large downloads
16
- http.read_timeout = 600
17
- response = self.http_call(http, method, uri, data, extra_header, auth)
18
- if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
19
- if response.body.downcase.index("rate limit") or response.body.downcase.index("captcha")
20
- if sleep_time
21
- sleep_time = sleep_time * attempts
22
- else
23
- sleep_time = (rand*100).to_i
24
- end
25
- else
26
- sleep_time = 10
27
- end
28
- attempts += 1
29
- puts "Sleeping for #{sleep_time.to_s} due to #{response.body}"
30
- sleep sleep_time
31
- end
32
- end
33
- raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
34
- return response
35
- end
36
- def http_call(http, method, uri, data, extra_header, auth)
37
- http.read_timeout = 600
38
- http.start() do
39
- path = uri.path + (uri.query ? "?#{uri.query}" : "")
40
- header = auth_header(auth).merge(extra_header)
41
- if method == :delete || method == :get
42
- http.__send__(method, path, header)
43
- else
44
- http.__send__(method, path, data, header)
45
- end
46
- end
47
- end
48
- end
49
- class Acl
50
- def update_role(entry, role) #:nodoc:
51
- #do not send email notifications
52
- url_suffix = "?send-notification-emails=false"
53
- header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
54
- doc = @session.request(
55
- :put, %{#{entry.edit_url}#{url_suffix}}, :data => entry.to_xml(), :header => header, :auth => :writely)
56
-
57
- entry.params = entry_to_params(doc.root)
58
- return entry
59
- end
60
-
61
- def push(entry)
62
- #do not send email notifications
63
- entry = AclEntry.new(entry) if entry.is_a?(Hash)
64
- url_suffix = "?send-notification-emails=false"
65
- header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
66
- doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
67
- entry.params = entry_to_params(doc.root)
68
- @acls.push(entry)
69
- return entry
70
- end
71
- end
72
-
73
- class File
74
-
75
- def add_worker_acl
76
- f = self
77
- return true if f.has_worker_acl?
78
- Mobilize::Gdrive.worker_emails.each do |a|
79
- f.update_acl(a)
80
- end
81
- end
82
-
83
- def add_admin_acl
84
- f = self
85
- #admin includes workers
86
- return true if f.has_admin_acl?
87
- (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
88
- f.update_acl(a)
89
- end
90
- end
91
-
92
- def has_admin_acl?
93
- f = self
94
- curr_emails = f.acls.map{|a| a.scope}.sort
95
- admin_emails = Mobilize::Gdrive.admin_emails.sort
96
- if (curr_emails & admin_emails) == admin_emails
97
- return true
98
- else
99
- return false
100
- end
101
- end
102
-
103
- def has_worker_acl?
104
- f = self
105
- curr_emails = f.acls.map{|a| a.scope}.sort
106
- worker_emails = Mobilize::Gdrive.worker_emails.sort
107
- if (curr_emails & worker_emails) == worker_emails
108
- return true
109
- else
110
- return false
111
- end
112
- end
113
-
114
- def update_acl(email,role="writer")
115
- f = self
116
- #need these flags for HTTP retries
117
- #create req_acl hash to add to current acl
118
- if entry = f.acl_entry(email)
119
- if [nil,"none","delete"].include?(role)
120
- f.acl.delete(entry)
121
- elsif entry.role != role and ['reader','writer','owner'].include?(role)
122
- entry.role=role
123
- f.acl.update_role(entry,entry.role)
124
- elsif !['reader','writer','owner'].include?(role)
125
- raise "Invalid role #{role}"
126
- end
127
- else
128
- f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
129
- end
130
- return true
131
- end
132
- def acls
133
- f = self
134
- f.acl.to_enum.to_a
135
- end
136
- def acl_entry(email)
137
- f = self
138
- f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
139
- end
140
-
141
- def entry_hash
142
- f = self
143
- dfe_xml = f.document_feed_entry.to_xml
144
- begin
145
- Hash.from_xml(dfe_xml)[:entry]
146
- rescue
147
- {}
148
- end
149
- end
150
- end
151
-
152
2
  class Worksheet
153
3
  def to_tsv
154
4
  sheet = self
@@ -163,8 +13,17 @@ module GoogleDrive
163
13
  headers.each_with_index do |h,h_i|
164
14
  self[1,h_i+1] = h
165
15
  end
16
+ self.save
17
+ end
18
+ def delete_sheet1
19
+ sheet = self
20
+ #delete sheet1
21
+ sheet1 = sheet.spreadsheet.worksheet_by_title("Sheet1") || sheet.spreadsheet.worksheet_by_title("Sheet 1")
22
+ if sheet1
23
+ sheet1.delete
24
+ return true
25
+ end
166
26
  end
167
-
168
27
  def add_or_update_rows(upd_rows)
169
28
  sheet = self
170
29
  curr_rows = sheet.to_tsv.tsv_to_hash_array
@@ -185,7 +44,40 @@ module GoogleDrive
185
44
  sheet.save
186
45
  end
187
46
 
188
- def write(tsv,check=true,job_id=nil)
47
+ def merge(merge_sheet)
48
+ #write the top left of sheet
49
+ #with the contents of merge_sheet
50
+ sheet = self
51
+ sheet.reload
52
+ merge_sheet.reload
53
+ curr_rows = sheet.num_rows
54
+ curr_cols = sheet.num_cols
55
+ merge_rows = merge_sheet.num_rows
56
+ merge_cols = merge_sheet.num_cols
57
+ #make sure sheet is at least as big as necessary
58
+ if merge_rows > curr_rows
59
+ sheet.max_rows = merge_rows
60
+ sheet.save
61
+ end
62
+ if merge_cols > curr_cols
63
+ sheet.max_cols = merge_cols
64
+ sheet.save
65
+ end
66
+ batch_start = 0
67
+ batch_length = 80
68
+ merge_sheet.rows.each_with_index do |row,row_i|
69
+ row.each_with_index do |val,col_i|
70
+ sheet[row_i+1,col_i+1] = val
71
+ end
72
+ if row_i > batch_start + batch_length
73
+ sheet.save
74
+ batch_start += (batch_length+1)
75
+ end
76
+ end
77
+ sheet.save
78
+ end
79
+
80
+ def write(tsv)
189
81
  sheet = self
190
82
  tsvrows = tsv.split("\n")
191
83
  #no rows, no write
@@ -196,8 +88,6 @@ module GoogleDrive
196
88
  rows_written = 0
197
89
  curr_rows = sheet.num_rows
198
90
  curr_cols = sheet.num_cols
199
- pct_tens_complete =["0"]
200
- curr_pct_complete = "00"
201
91
  #make sure sheet is at least as big as necessary
202
92
  if tsvrows.length != curr_rows
203
93
  sheet.max_rows = tsvrows.length
@@ -220,29 +110,12 @@ module GoogleDrive
220
110
  batch_start += (batch_length + 1)
221
111
  rows_written+=batch_length
222
112
  if batch_start>tsvrows.length+1
223
- if job_id
224
- newstatus = "100 pct written at #{Time.now.utc}"
225
- Mobilize::Job.find(job_id).update_status(newstatus)
226
- end
227
- break
228
- else
229
- #pad digit
230
- curr_pct_complete = "%02d" % ((rows_written+1).to_f*100/tsvrows.length.to_f).round(0)
231
- if !pct_tens_complete.include?(curr_pct_complete.first)
232
- if job_id
233
- newstatus = "#{curr_pct_complete} pct written at #{Time.now.utc}"
234
- Mobilize::Job.find(job_id).update_status(newstatus)
235
- newstatus.oputs
236
- pct_tens_complete << curr_pct_complete.first
237
- end
238
- end
113
+ break
239
114
  end
240
115
  end
241
- #checksum it against the source
242
- sheet.checksum(tsv) if check
243
116
  true
244
117
  end
245
- def checksum(tsv)
118
+ def check_and_fix(tsv)
246
119
  sheet = self
247
120
  sheet.reload
248
121
  #loading remote data for checksum
@@ -1,7 +1,7 @@
1
1
  module Mobilize
2
2
  module Resque
3
3
  def Resque.config
4
- Base.config('resque')[Base.env]
4
+ Base.config('resque')
5
5
  end
6
6
 
7
7
  def Resque.queue_name
@@ -47,36 +47,27 @@ module Mobilize
47
47
  return working_jobs + queued_jobs + failed_jobs if state == 'all'
48
48
  end
49
49
 
50
- def Resque.active_mongo_ids
51
- #first argument of the payload is the mongo id in Mongo unless the worker is Jobtracker
50
+ def Resque.active_paths
51
+ #first argument of the payload is the runner / task path unless the worker is Jobtracker
52
52
  Resque.jobs('active').map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
53
53
  end
54
54
 
55
55
  #Resque workers and methods to find
56
- def Resque.find_worker_by_mongo_id(mongo_id)
57
- Resque.workers('working').select{|w| w.job['payload']['args'][0] == mongo_id}.first
56
+ def Resque.find_worker_by_path(path)
57
+ Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
58
58
  end
59
59
 
60
- def Resque.update_job_status(mongo_id,msg)
60
+ def Resque.set_worker_args_by_path(path,args)
61
61
  #this only works on working workers
62
- worker = Resque.find_worker_by_mongo_id(mongo_id)
62
+ worker = Resque.find_worker_by_path(path)
63
+ args_string = args.map{|k,v| "#{k}: #{v}"}.join(";")
63
64
  #also fire a log, cap logfiles at 10 MB
64
- if !worker
65
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[no worker for #{mongo_id}: #{Time.now.utc}] status: #{msg}")
66
- return false
67
- end
68
- Resque.set_worker_args(worker,{"status"=>msg})
69
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[#{worker} #{Time.now.utc}] status: #{msg}")
70
- return true
71
- end
72
-
73
- def Resque.update_job_email(mongo_id,email)
74
- #this only works on working workers
75
- worker = Resque.find_worker_by_mongo_id(mongo_id)
65
+ worker_string = worker ? worker.to_s : "no worker"
66
+ info_msg = "[#{worker_string} for #{path}: #{Time.now.utc}] #{args_string}"
67
+ Logger.new(Resque.log_path, 10, 1024*1000*10).info(info_msg)
76
68
  return false unless worker
77
- Resque.set_worker_args(worker,{"email"=>email})
78
- #also fire a log, cap logfiles at 10 MB
79
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[#{worker} #{Time.now.utc}] email: #{email}")
69
+ Resque.set_worker_args(worker,args)
70
+ return true
80
71
  end
81
72
 
82
73
  def Resque.get_worker_args(worker)
@@ -114,7 +105,7 @@ module Mobilize
114
105
  fjobs = {}
115
106
  excs = Hash.new(0)
116
107
  Resque.failures.each do |f|
117
- sname = f['payload']['class'] + ("=>" + f['payload']['args'].second['name'].to_s if f['payload']['args'].second).to_s
108
+ sname = f['payload']['args'].first
118
109
  excs = f['error']
119
110
  if fjobs[sname].nil?
120
111
  fjobs[sname] = {excs => 1}
@@ -129,7 +120,10 @@ module Mobilize
129
120
 
130
121
  def Resque.start_workers(count=1)
131
122
  count.times do
132
- "(cd #{Base.root};rake MOBILIZE_ENV=#{Base.env} mobilize:work) >> #{Resque.log_path} 2>&1 &".bash
123
+ dir_envs = "MOBILIZE_ENV=#{Base.env} " +
124
+ "MOBILIZE_CONFIG_DIR=#{Base.config_dir} " +
125
+ "MOBILIZE_LOG_DIR=#{Base.log_dir}"
126
+ "(cd #{Base.root};rake #{dir_envs} mobilize:work) >> #{Resque.log_path} 2>&1 &".bash
133
127
  end
134
128
  end
135
129