mobilize-base 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE.txt +202 -20
  3. data/README.md +219 -138
  4. data/Rakefile +1 -2
  5. data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
  6. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
  7. data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
  8. data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
  9. data/lib/mobilize-base/extensions/resque.rb +18 -24
  10. data/lib/mobilize-base/extensions/string.rb +12 -0
  11. data/lib/mobilize-base/handlers/gbook.rb +14 -47
  12. data/lib/mobilize-base/handlers/gdrive.rb +17 -18
  13. data/lib/mobilize-base/handlers/gfile.rb +18 -39
  14. data/lib/mobilize-base/handlers/gridfs.rb +43 -0
  15. data/lib/mobilize-base/handlers/gsheet.rb +48 -99
  16. data/lib/mobilize-base/jobtracker.rb +29 -15
  17. data/lib/mobilize-base/models/dataset.rb +33 -35
  18. data/lib/mobilize-base/models/job.rb +21 -168
  19. data/lib/mobilize-base/models/runner.rb +178 -0
  20. data/lib/mobilize-base/models/task.rb +137 -0
  21. data/lib/mobilize-base/models/user.rb +47 -0
  22. data/lib/mobilize-base/rakes.rb +59 -0
  23. data/lib/mobilize-base/version.rb +1 -1
  24. data/lib/mobilize-base.rb +20 -9
  25. data/lib/samples/gdrive.yml +12 -12
  26. data/lib/samples/gridfs.yml +9 -0
  27. data/lib/samples/gsheet.yml +6 -0
  28. data/lib/samples/jobtracker.yml +9 -9
  29. data/lib/samples/mongoid.yml +3 -3
  30. data/mobilize-base.gemspec +1 -1
  31. data/test/base1_task1.yml +3 -0
  32. data/test/base_job_rows.yml +13 -0
  33. data/test/mobilize-base_test.rb +59 -0
  34. metadata +20 -9
  35. data/lib/mobilize-base/handlers/mongodb.rb +0 -32
  36. data/lib/mobilize-base/models/requestor.rb +0 -232
  37. data/lib/mobilize-base/tasks.rb +0 -43
  38. data/test/mobilize_test.rb +0 -108
data/Rakefile CHANGED
@@ -17,8 +17,7 @@ require "bundler/gem_tasks"
17
17
  # Setup
18
18
  #
19
19
  $LOAD_PATH.unshift 'lib'
20
- require 'mobilize-base/tasks'
21
-
20
+ require 'mobilize-base/rakes'
22
21
 
23
22
  #
24
23
  # Tests
@@ -0,0 +1,25 @@
1
+ module GoogleDrive
2
+ class Acl
3
+ def update_role(entry, role) #:nodoc:
4
+ #do not send email notifications
5
+ url_suffix = "?send-notification-emails=false"
6
+ header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
7
+ doc = @session.request(
8
+ :put, %{#{entry.edit_url}#{url_suffix}}, :data => entry.to_xml(), :header => header, :auth => :writely)
9
+
10
+ entry.params = entry_to_params(doc.root)
11
+ return entry
12
+ end
13
+
14
+ def push(entry)
15
+ #do not send email notifications
16
+ entry = AclEntry.new(entry) if entry.is_a?(Hash)
17
+ url_suffix = "?send-notification-emails=false"
18
+ header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
19
+ doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
20
+ entry.params = entry_to_params(doc.root)
21
+ @acls.push(entry)
22
+ return entry
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,49 @@
1
+ module GoogleDrive
2
+ class ClientLoginFetcher
3
+ def request_raw(method, url, data, extra_header, auth)
4
+ #this is patched to handle server errors due to http chaos
5
+ uri = URI.parse(url)
6
+ response = nil
7
+ attempts = 0
8
+ sleep_time = nil
9
+ #try 5 times to make the call
10
+ while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
+ #instantiate http object, set params
12
+ http = @proxy.new(uri.host, uri.port)
13
+ http.use_ssl = true
14
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
15
+ #set 600 to allow for large downloads
16
+ http.read_timeout = 600
17
+ response = self.http_call(http, method, uri, data, extra_header, auth)
18
+ if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
19
+ if response.body.downcase.index("rate limit") or response.body.downcase.index("captcha")
20
+ if sleep_time
21
+ sleep_time = sleep_time * attempts
22
+ else
23
+ sleep_time = (rand*100).to_i
24
+ end
25
+ else
26
+ sleep_time = 10
27
+ end
28
+ attempts += 1
29
+ puts "Sleeping for #{sleep_time.to_s} due to #{response.body}"
30
+ sleep sleep_time
31
+ end
32
+ end
33
+ raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
34
+ return response
35
+ end
36
+ def http_call(http, method, uri, data, extra_header, auth)
37
+ http.read_timeout = 600
38
+ http.start() do
39
+ path = uri.path + (uri.query ? "?#{uri.query}" : "")
40
+ header = auth_header(auth).merge(extra_header)
41
+ if method == :delete || method == :get
42
+ http.__send__(method, path, header)
43
+ else
44
+ http.__send__(method, path, data, header)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,80 @@
1
+ module GoogleDrive
2
+ class File
3
+
4
+ def add_worker_acl
5
+ f = self
6
+ return true if f.has_worker_acl?
7
+ Mobilize::Gdrive.worker_emails.each do |a|
8
+ f.update_acl(a)
9
+ end
10
+ end
11
+
12
+ def add_admin_acl
13
+ f = self
14
+ #admin includes workers
15
+ return true if f.has_admin_acl?
16
+ (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
17
+ f.update_acl(a)
18
+ end
19
+ end
20
+
21
+ def has_admin_acl?
22
+ f = self
23
+ curr_emails = f.acls.map{|a| a.scope}.sort
24
+ admin_emails = Mobilize::Gdrive.admin_emails.sort
25
+ if (curr_emails & admin_emails) == admin_emails
26
+ return true
27
+ else
28
+ return false
29
+ end
30
+ end
31
+
32
+ def has_worker_acl?
33
+ f = self
34
+ curr_emails = f.acls.map{|a| a.scope}.sort
35
+ worker_emails = Mobilize::Gdrive.worker_emails.sort
36
+ if (curr_emails & worker_emails) == worker_emails
37
+ return true
38
+ else
39
+ return false
40
+ end
41
+ end
42
+
43
+ def update_acl(email,role="writer")
44
+ f = self
45
+ #need these flags for HTTP retries
46
+ #create req_acl hash to add to current acl
47
+ if entry = f.acl_entry(email)
48
+ if [nil,"none","delete"].include?(role)
49
+ f.acl.delete(entry)
50
+ elsif entry.role != role and ['reader','writer','owner'].include?(role)
51
+ entry.role=role
52
+ f.acl.update_role(entry,entry.role)
53
+ elsif !['reader','writer','owner'].include?(role)
54
+ raise "Invalid role #{role}"
55
+ end
56
+ else
57
+ f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
58
+ end
59
+ return true
60
+ end
61
+ def acls
62
+ f = self
63
+ f.acl.to_enum.to_a
64
+ end
65
+ def acl_entry(email)
66
+ f = self
67
+ f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
68
+ end
69
+
70
+ def entry_hash
71
+ f = self
72
+ dfe_xml = f.document_feed_entry.to_xml
73
+ begin
74
+ Hash.from_xml(dfe_xml)[:entry]
75
+ rescue
76
+ {}
77
+ end
78
+ end
79
+ end
80
+ end
@@ -1,154 +1,4 @@
1
1
  module GoogleDrive
2
- class ClientLoginFetcher
3
- def request_raw(method, url, data, extra_header, auth)
4
- #this is patched to handle server errors due to http chaos
5
- uri = URI.parse(url)
6
- response = nil
7
- attempts = 0
8
- sleep_time = nil
9
- #try 5 times to make the call
10
- while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
- #instantiate http object, set params
12
- http = @proxy.new(uri.host, uri.port)
13
- http.use_ssl = true
14
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
15
- #set 600 to allow for large downloads
16
- http.read_timeout = 600
17
- response = self.http_call(http, method, uri, data, extra_header, auth)
18
- if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
19
- if response.body.downcase.index("rate limit") or response.body.downcase.index("captcha")
20
- if sleep_time
21
- sleep_time = sleep_time * attempts
22
- else
23
- sleep_time = (rand*100).to_i
24
- end
25
- else
26
- sleep_time = 10
27
- end
28
- attempts += 1
29
- puts "Sleeping for #{sleep_time.to_s} due to #{response.body}"
30
- sleep sleep_time
31
- end
32
- end
33
- raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
34
- return response
35
- end
36
- def http_call(http, method, uri, data, extra_header, auth)
37
- http.read_timeout = 600
38
- http.start() do
39
- path = uri.path + (uri.query ? "?#{uri.query}" : "")
40
- header = auth_header(auth).merge(extra_header)
41
- if method == :delete || method == :get
42
- http.__send__(method, path, header)
43
- else
44
- http.__send__(method, path, data, header)
45
- end
46
- end
47
- end
48
- end
49
- class Acl
50
- def update_role(entry, role) #:nodoc:
51
- #do not send email notifications
52
- url_suffix = "?send-notification-emails=false"
53
- header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
54
- doc = @session.request(
55
- :put, %{#{entry.edit_url}#{url_suffix}}, :data => entry.to_xml(), :header => header, :auth => :writely)
56
-
57
- entry.params = entry_to_params(doc.root)
58
- return entry
59
- end
60
-
61
- def push(entry)
62
- #do not send email notifications
63
- entry = AclEntry.new(entry) if entry.is_a?(Hash)
64
- url_suffix = "?send-notification-emails=false"
65
- header = {"GData-Version" => "3.0", "Content-Type" => "application/atom+xml"}
66
- doc = @session.request(:post, "#{@acls_feed_url}#{url_suffix}", :data => entry.to_xml(), :header => header, :auth => :writely)
67
- entry.params = entry_to_params(doc.root)
68
- @acls.push(entry)
69
- return entry
70
- end
71
- end
72
-
73
- class File
74
-
75
- def add_worker_acl
76
- f = self
77
- return true if f.has_worker_acl?
78
- Mobilize::Gdrive.worker_emails.each do |a|
79
- f.update_acl(a)
80
- end
81
- end
82
-
83
- def add_admin_acl
84
- f = self
85
- #admin includes workers
86
- return true if f.has_admin_acl?
87
- (Mobilize::Gdrive.admin_emails + Mobilize::Gdrive.worker_emails).each do |a|
88
- f.update_acl(a)
89
- end
90
- end
91
-
92
- def has_admin_acl?
93
- f = self
94
- curr_emails = f.acls.map{|a| a.scope}.sort
95
- admin_emails = Mobilize::Gdrive.admin_emails.sort
96
- if (curr_emails & admin_emails) == admin_emails
97
- return true
98
- else
99
- return false
100
- end
101
- end
102
-
103
- def has_worker_acl?
104
- f = self
105
- curr_emails = f.acls.map{|a| a.scope}.sort
106
- worker_emails = Mobilize::Gdrive.worker_emails.sort
107
- if (curr_emails & worker_emails) == worker_emails
108
- return true
109
- else
110
- return false
111
- end
112
- end
113
-
114
- def update_acl(email,role="writer")
115
- f = self
116
- #need these flags for HTTP retries
117
- #create req_acl hash to add to current acl
118
- if entry = f.acl_entry(email)
119
- if [nil,"none","delete"].include?(role)
120
- f.acl.delete(entry)
121
- elsif entry.role != role and ['reader','writer','owner'].include?(role)
122
- entry.role=role
123
- f.acl.update_role(entry,entry.role)
124
- elsif !['reader','writer','owner'].include?(role)
125
- raise "Invalid role #{role}"
126
- end
127
- else
128
- f.acl.push({:scope_type=>"user",:scope=>email,:role=>role})
129
- end
130
- return true
131
- end
132
- def acls
133
- f = self
134
- f.acl.to_enum.to_a
135
- end
136
- def acl_entry(email)
137
- f = self
138
- f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
139
- end
140
-
141
- def entry_hash
142
- f = self
143
- dfe_xml = f.document_feed_entry.to_xml
144
- begin
145
- Hash.from_xml(dfe_xml)[:entry]
146
- rescue
147
- {}
148
- end
149
- end
150
- end
151
-
152
2
  class Worksheet
153
3
  def to_tsv
154
4
  sheet = self
@@ -163,8 +13,17 @@ module GoogleDrive
163
13
  headers.each_with_index do |h,h_i|
164
14
  self[1,h_i+1] = h
165
15
  end
16
+ self.save
17
+ end
18
+ def delete_sheet1
19
+ sheet = self
20
+ #delete sheet1
21
+ sheet1 = sheet.spreadsheet.worksheet_by_title("Sheet1") || sheet.spreadsheet.worksheet_by_title("Sheet 1")
22
+ if sheet1
23
+ sheet1.delete
24
+ return true
25
+ end
166
26
  end
167
-
168
27
  def add_or_update_rows(upd_rows)
169
28
  sheet = self
170
29
  curr_rows = sheet.to_tsv.tsv_to_hash_array
@@ -185,7 +44,40 @@ module GoogleDrive
185
44
  sheet.save
186
45
  end
187
46
 
188
- def write(tsv,check=true,job_id=nil)
47
+ def merge(merge_sheet)
48
+ #write the top left of sheet
49
+ #with the contents of merge_sheet
50
+ sheet = self
51
+ sheet.reload
52
+ merge_sheet.reload
53
+ curr_rows = sheet.num_rows
54
+ curr_cols = sheet.num_cols
55
+ merge_rows = merge_sheet.num_rows
56
+ merge_cols = merge_sheet.num_cols
57
+ #make sure sheet is at least as big as necessary
58
+ if merge_rows > curr_rows
59
+ sheet.max_rows = merge_rows
60
+ sheet.save
61
+ end
62
+ if merge_cols > curr_cols
63
+ sheet.max_cols = merge_cols
64
+ sheet.save
65
+ end
66
+ batch_start = 0
67
+ batch_length = 80
68
+ merge_sheet.rows.each_with_index do |row,row_i|
69
+ row.each_with_index do |val,col_i|
70
+ sheet[row_i+1,col_i+1] = val
71
+ end
72
+ if row_i > batch_start + batch_length
73
+ sheet.save
74
+ batch_start += (batch_length+1)
75
+ end
76
+ end
77
+ sheet.save
78
+ end
79
+
80
+ def write(tsv)
189
81
  sheet = self
190
82
  tsvrows = tsv.split("\n")
191
83
  #no rows, no write
@@ -196,8 +88,6 @@ module GoogleDrive
196
88
  rows_written = 0
197
89
  curr_rows = sheet.num_rows
198
90
  curr_cols = sheet.num_cols
199
- pct_tens_complete =["0"]
200
- curr_pct_complete = "00"
201
91
  #make sure sheet is at least as big as necessary
202
92
  if tsvrows.length != curr_rows
203
93
  sheet.max_rows = tsvrows.length
@@ -220,29 +110,12 @@ module GoogleDrive
220
110
  batch_start += (batch_length + 1)
221
111
  rows_written+=batch_length
222
112
  if batch_start>tsvrows.length+1
223
- if job_id
224
- newstatus = "100 pct written at #{Time.now.utc}"
225
- Mobilize::Job.find(job_id).update_status(newstatus)
226
- end
227
- break
228
- else
229
- #pad digit
230
- curr_pct_complete = "%02d" % ((rows_written+1).to_f*100/tsvrows.length.to_f).round(0)
231
- if !pct_tens_complete.include?(curr_pct_complete.first)
232
- if job_id
233
- newstatus = "#{curr_pct_complete} pct written at #{Time.now.utc}"
234
- Mobilize::Job.find(job_id).update_status(newstatus)
235
- newstatus.oputs
236
- pct_tens_complete << curr_pct_complete.first
237
- end
238
- end
113
+ break
239
114
  end
240
115
  end
241
- #checksum it against the source
242
- sheet.checksum(tsv) if check
243
116
  true
244
117
  end
245
- def checksum(tsv)
118
+ def check_and_fix(tsv)
246
119
  sheet = self
247
120
  sheet.reload
248
121
  #loading remote data for checksum
@@ -1,7 +1,7 @@
1
1
  module Mobilize
2
2
  module Resque
3
3
  def Resque.config
4
- Base.config('resque')[Base.env]
4
+ Base.config('resque')
5
5
  end
6
6
 
7
7
  def Resque.queue_name
@@ -47,36 +47,27 @@ module Mobilize
47
47
  return working_jobs + queued_jobs + failed_jobs if state == 'all'
48
48
  end
49
49
 
50
- def Resque.active_mongo_ids
51
- #first argument of the payload is the mongo id in Mongo unless the worker is Jobtracker
50
+ def Resque.active_paths
51
+ #first argument of the payload is the runner / task path unless the worker is Jobtracker
52
52
  Resque.jobs('active').map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
53
53
  end
54
54
 
55
55
  #Resque workers and methods to find
56
- def Resque.find_worker_by_mongo_id(mongo_id)
57
- Resque.workers('working').select{|w| w.job['payload']['args'][0] == mongo_id}.first
56
+ def Resque.find_worker_by_path(path)
57
+ Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
58
58
  end
59
59
 
60
- def Resque.update_job_status(mongo_id,msg)
60
+ def Resque.set_worker_args_by_path(path,args)
61
61
  #this only works on working workers
62
- worker = Resque.find_worker_by_mongo_id(mongo_id)
62
+ worker = Resque.find_worker_by_path(path)
63
+ args_string = args.map{|k,v| "#{k}: #{v}"}.join(";")
63
64
  #also fire a log, cap logfiles at 10 MB
64
- if !worker
65
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[no worker for #{mongo_id}: #{Time.now.utc}] status: #{msg}")
66
- return false
67
- end
68
- Resque.set_worker_args(worker,{"status"=>msg})
69
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[#{worker} #{Time.now.utc}] status: #{msg}")
70
- return true
71
- end
72
-
73
- def Resque.update_job_email(mongo_id,email)
74
- #this only works on working workers
75
- worker = Resque.find_worker_by_mongo_id(mongo_id)
65
+ worker_string = worker ? worker.to_s : "no worker"
66
+ info_msg = "[#{worker_string} for #{path}: #{Time.now.utc}] #{args_string}"
67
+ Logger.new(Resque.log_path, 10, 1024*1000*10).info(info_msg)
76
68
  return false unless worker
77
- Resque.set_worker_args(worker,{"email"=>email})
78
- #also fire a log, cap logfiles at 10 MB
79
- Logger.new(Resque.log_path, 10, 1024*1000*10).info("[#{worker} #{Time.now.utc}] email: #{email}")
69
+ Resque.set_worker_args(worker,args)
70
+ return true
80
71
  end
81
72
 
82
73
  def Resque.get_worker_args(worker)
@@ -114,7 +105,7 @@ module Mobilize
114
105
  fjobs = {}
115
106
  excs = Hash.new(0)
116
107
  Resque.failures.each do |f|
117
- sname = f['payload']['class'] + ("=>" + f['payload']['args'].second['name'].to_s if f['payload']['args'].second).to_s
108
+ sname = f['payload']['args'].first
118
109
  excs = f['error']
119
110
  if fjobs[sname].nil?
120
111
  fjobs[sname] = {excs => 1}
@@ -129,7 +120,10 @@ module Mobilize
129
120
 
130
121
  def Resque.start_workers(count=1)
131
122
  count.times do
132
- "(cd #{Base.root};rake MOBILIZE_ENV=#{Base.env} mobilize:work) >> #{Resque.log_path} 2>&1 &".bash
123
+ dir_envs = "MOBILIZE_ENV=#{Base.env} " +
124
+ "MOBILIZE_CONFIG_DIR=#{Base.config_dir} " +
125
+ "MOBILIZE_LOG_DIR=#{Base.log_dir}"
126
+ "(cd #{Base.root};rake #{dir_envs} mobilize:work) >> #{Resque.log_path} 2>&1 &".bash
133
127
  end
134
128
  end
135
129