mobilize-base 1.0.2 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE.txt +202 -20
  3. data/README.md +219 -138
  4. data/Rakefile +1 -2
  5. data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
  6. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
  7. data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
  8. data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
  9. data/lib/mobilize-base/extensions/resque.rb +18 -24
  10. data/lib/mobilize-base/extensions/string.rb +12 -0
  11. data/lib/mobilize-base/handlers/gbook.rb +14 -47
  12. data/lib/mobilize-base/handlers/gdrive.rb +17 -18
  13. data/lib/mobilize-base/handlers/gfile.rb +18 -39
  14. data/lib/mobilize-base/handlers/gridfs.rb +43 -0
  15. data/lib/mobilize-base/handlers/gsheet.rb +48 -99
  16. data/lib/mobilize-base/jobtracker.rb +29 -15
  17. data/lib/mobilize-base/models/dataset.rb +33 -35
  18. data/lib/mobilize-base/models/job.rb +21 -168
  19. data/lib/mobilize-base/models/runner.rb +178 -0
  20. data/lib/mobilize-base/models/task.rb +137 -0
  21. data/lib/mobilize-base/models/user.rb +47 -0
  22. data/lib/mobilize-base/rakes.rb +59 -0
  23. data/lib/mobilize-base/version.rb +1 -1
  24. data/lib/mobilize-base.rb +20 -9
  25. data/lib/samples/gdrive.yml +12 -12
  26. data/lib/samples/gridfs.yml +9 -0
  27. data/lib/samples/gsheet.yml +6 -0
  28. data/lib/samples/jobtracker.yml +9 -9
  29. data/lib/samples/mongoid.yml +3 -3
  30. data/mobilize-base.gemspec +1 -1
  31. data/test/base1_task1.yml +3 -0
  32. data/test/base_job_rows.yml +13 -0
  33. data/test/mobilize-base_test.rb +59 -0
  34. metadata +20 -9
  35. data/lib/mobilize-base/handlers/mongodb.rb +0 -32
  36. data/lib/mobilize-base/models/requestor.rb +0 -232
  37. data/lib/mobilize-base/tasks.rb +0 -43
  38. data/test/mobilize_test.rb +0 -108
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.homepage = ""
11
11
  s.summary = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
12
12
  s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
13
- Mobilize schedules jobs, queues workers, sends failure notifications, and
13
+ Mobilize schedules jobs, queues workers, sends failure notifications, and
14
14
  integrates mobilize-hadoop, -http, -mysql, and -mongodb packages
15
15
  to allow seamless transport of TSV and JSON data between any two endpoints. }
16
16
 
@@ -0,0 +1,3 @@
1
+ - {test_header: t1, test_header2: t1, test_header3: t1}
2
+ - {test_header: t2, test_header2: t2, test_header3: t2}
3
+ - {test_header: t3, test_header2: t3, test_header3: t3}
@@ -0,0 +1,13 @@
1
+ - name: "base1"
2
+ active: true
3
+ trigger: once
4
+ status: ""
5
+ task1: 'gsheet.read "Runner_mobilize(test)/base1_task1.in"'
6
+ task2: 'gsheet.write "base1/task1", "Runner_mobilize(test)/base1.out"'
7
+
8
+ - name: "base2"
9
+ active: true
10
+ trigger: "after base1"
11
+ status: ""
12
+ task1: 'gsheet.read "Runner_mobilize(test)/base1.out"'
13
+ task2: 'gsheet.write "task1", "Runner_mobilize(test)/base2.out"'
@@ -0,0 +1,59 @@
1
+ require 'test_helper'
2
+
3
+ describe "Mobilize" do
4
+
5
+ def before
6
+ puts 'nothing before'
7
+ end
8
+
9
+ # enqueues 4 workers on Resque
10
+ it "runs integration test" do
11
+
12
+ puts "restart test redis"
13
+ Mobilize::Jobtracker.restart_test_redis
14
+
15
+ puts "clear out test db"
16
+ Mobilize::Jobtracker.drop_test_db
17
+
18
+ puts "restart workers"
19
+ Mobilize::Jobtracker.restart_workers!
20
+
21
+ puts "build test runner"
22
+ gdrive_slot = Mobilize::Gdrive.owner_email
23
+ puts "create user 'mobilize'"
24
+ user_name = gdrive_slot.split("@").first
25
+ u = Mobilize::User.find_or_create_by_name(user_name)
26
+ assert u.email == gdrive_slot
27
+
28
+ Mobilize::Jobtracker.build_test_runner(user_name)
29
+ assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
30
+
31
+ puts "Jobtracker created runner with 'jobs' sheet?"
32
+ r = u.runner
33
+ jobs_sheet = r.gsheet(gdrive_slot)
34
+ tsv = jobs_sheet.to_tsv
35
+ assert tsv.length == 56 #headers only
36
+
37
+ puts "add base1_task1 input sheet"
38
+ test_source_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_task1.in",gdrive_slot)
39
+
40
+ test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/base1_task1.yml")*40
41
+ test_source_tsv = test_source_ha.hash_array_to_tsv
42
+ test_source_sheet.write(test_source_tsv)
43
+
44
+ puts "add row to jobs sheet, wait 120s"
45
+ test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
46
+ jobs_sheet.add_or_update_rows(test_job_rows)
47
+
48
+ puts "job row added, force enqueued runner"
49
+ r.enqueue!
50
+ sleep 120
51
+
52
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
53
+ test_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
54
+
55
+ assert test_target_sheet.to_tsv == test_source_sheet.to_tsv
56
+
57
+ end
58
+
59
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-28 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -189,7 +189,7 @@ dependencies:
189
189
  version: 3.1.1
190
190
  description: ! "Manage your organization's workflows entirely through Google Docs
191
191
  and irb.\n Mobilize schedules jobs, queues workers, sends failure
192
- notifications, and \n integrates mobilize-hadoop, -http, -mysql,
192
+ notifications, and\n integrates mobilize-hadoop, -http, -mysql,
193
193
  and -mongodb packages\n to allow seamless transport of TSV and
194
194
  JSON data between any two endpoints. "
195
195
  email:
@@ -205,7 +205,10 @@ files:
205
205
  - Rakefile
206
206
  - lib/mobilize-base.rb
207
207
  - lib/mobilize-base/extensions/array.rb
208
- - lib/mobilize-base/extensions/google_drive.rb
208
+ - lib/mobilize-base/extensions/google_drive/acl.rb
209
+ - lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb
210
+ - lib/mobilize-base/extensions/google_drive/file.rb
211
+ - lib/mobilize-base/extensions/google_drive/worksheet.rb
209
212
  - lib/mobilize-base/extensions/hash.rb
210
213
  - lib/mobilize-base/extensions/object.rb
211
214
  - lib/mobilize-base/extensions/resque.rb
@@ -214,21 +217,27 @@ files:
214
217
  - lib/mobilize-base/handlers/gbook.rb
215
218
  - lib/mobilize-base/handlers/gdrive.rb
216
219
  - lib/mobilize-base/handlers/gfile.rb
220
+ - lib/mobilize-base/handlers/gridfs.rb
217
221
  - lib/mobilize-base/handlers/gsheet.rb
218
- - lib/mobilize-base/handlers/mongodb.rb
219
222
  - lib/mobilize-base/jobtracker.rb
220
223
  - lib/mobilize-base/models/dataset.rb
221
224
  - lib/mobilize-base/models/job.rb
222
- - lib/mobilize-base/models/requestor.rb
223
- - lib/mobilize-base/tasks.rb
225
+ - lib/mobilize-base/models/runner.rb
226
+ - lib/mobilize-base/models/task.rb
227
+ - lib/mobilize-base/models/user.rb
228
+ - lib/mobilize-base/rakes.rb
224
229
  - lib/mobilize-base/tasks/mobilize-base.rake
225
230
  - lib/mobilize-base/version.rb
226
231
  - lib/samples/gdrive.yml
232
+ - lib/samples/gridfs.yml
233
+ - lib/samples/gsheet.yml
227
234
  - lib/samples/jobtracker.yml
228
235
  - lib/samples/mongoid.yml
229
236
  - lib/samples/resque.yml
230
237
  - mobilize-base.gemspec
231
- - test/mobilize_test.rb
238
+ - test/base1_task1.yml
239
+ - test/base_job_rows.yml
240
+ - test/mobilize-base_test.rb
232
241
  - test/redis-test.conf
233
242
  - test/test_helper.rb
234
243
  homepage: ''
@@ -257,7 +266,9 @@ specification_version: 3
257
266
  summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
258
267
  Docs
259
268
  test_files:
260
- - test/mobilize_test.rb
269
+ - test/base1_task1.yml
270
+ - test/base_job_rows.yml
271
+ - test/mobilize-base_test.rb
261
272
  - test/redis-test.conf
262
273
  - test/test_helper.rb
263
274
  has_rdoc:
@@ -1,32 +0,0 @@
1
- module Mobilize
2
- class Mongodb
3
-
4
- def Mongodb.grid
5
- session = ::Mongoid.configure.sessions['default']
6
- database_name = session['database']
7
- host,port = session['hosts'].first.split(":")
8
- return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
9
- end
10
-
11
- def Mongodb.read_by_filename(filename)
12
- begin
13
- zs=Mongodb.grid.open(filename,'r').read
14
- return ::Zlib::Inflate.inflate(zs)
15
- rescue
16
- "failed Mongo read for filename #{filename}".oputs
17
- return nil
18
- end
19
- end
20
-
21
- def Mongodb.write_by_filename(filename,string)
22
- zs = ::Zlib::Deflate.deflate(string)
23
- Mongodb.grid.open(filename,'w',:delete_old => true){|f| f.write(zs)}
24
- return true
25
- end
26
-
27
- def Mongodb.delete_by_filename(filename)
28
- Mongodb.grid.delete(filename)
29
- return true
30
- end
31
- end
32
- end
@@ -1,232 +0,0 @@
1
- module Mobilize
2
- class Requestor
3
- include Mongoid::Document
4
- include Mongoid::Timestamps
5
- field :email, type: String
6
- field :oauth, type: String
7
- field :name, type: String
8
- field :first_name, type: String
9
- field :last_name, type: String
10
- field :admin_role, type: String
11
- field :last_run, type: Time
12
- field :status, type: String
13
-
14
- validates_presence_of :name, :message => ' cannot be blank.'
15
- validates_uniqueness_of :name, :message => ' has already been used.'
16
-
17
- before_destroy :destroy_jobs
18
-
19
- def Requestor.find_or_create_by_name(name)
20
- r = Requestor.where(:name => name).first
21
- r = Requestor.create(:name => name) unless r
22
- return r
23
- end
24
-
25
- def Requestor.find_or_create_by_email(email)
26
- r = Requestor.where(:email => email).first
27
- r = Requestor.create(:email => email) unless r
28
- user_name = email.split("@").first
29
- r.update_attributes(:name => user_name) unless r.name.to_s.length>0
30
- return r
31
- end
32
-
33
- def Requestor.jobs_sheet_headers
34
- %w{name active schedule status last_error destination_url tasks datasets params destination}
35
- end
36
-
37
- def Requestor.perform(id,*args)
38
- r = Requestor.find(id.to_s)
39
- #reserve email account for read
40
- gdrive_email = Gdrive.get_worker_email_by_mongo_id(id)
41
- unless gdrive_email
42
- "no gdrive_email available for #{r.name}".oputs
43
- return false
44
- end
45
- jobs_sheet = r.jobs_sheet(gdrive_email)
46
- #write headers to sheet
47
- Requestor.jobs_sheet_headers.each_with_index do |h,h_i|
48
- jobs_sheet[1,h_i+1] = h
49
- end
50
- jobs_sheet.save
51
- #read the jobs sheet
52
- #record jobs in DB
53
- #deactivate jobs not in sheet
54
- r.read_jobs(gdrive_email)
55
- #queue up the jobs that are due and active
56
- r.jobs.each do |j|
57
- begin
58
- if j.active and j.is_due?
59
- #cache all datasets
60
- j.dataset_array.each do |dst|
61
- #read tsv, write to cache for job to use
62
- tsv = Gsheet.find_or_create_by_name(dst.name,gdrive_email).to_tsv
63
- r.update_status("caching #{dst.name}")
64
- dst.write_cache(tsv)
65
- end
66
- j.enqueue!
67
- end
68
- rescue ScriptError,StandardError => exc
69
- #update errors
70
- j.update_attributes(:last_error=>exc.to_s,:last_trace=>exc.backtrace.to_s)
71
- end
72
- end
73
- #write any updates to status, error, datasource_url etc.
74
- r.write_jobs(gdrive_email)
75
- r.update_attributes(:last_run=>Time.now.utc)
76
- end
77
-
78
- def jobs_sheet(gdrive_email)#gdrive_email to read with
79
- r = self
80
- r.find_or_create_gbook_by_title(r.jobspec_title,gdrive_email)
81
- jobs_name = [r.jobspec_title,"Jobs"].join("/")
82
- r.find_or_create_gsheet_by_name(jobs_name,gdrive_email)
83
- end
84
-
85
- def read_jobs(gdrive_email)
86
- r = self
87
- jobs_sheet = r.jobs_sheet(gdrive_email)
88
- rem_jobs = jobs_sheet.to_tsv.tsv_to_hash_array
89
- #go through each job, update relevant job with its params
90
- loc_jobs = []
91
- rem_jobs.each_with_index do |rj,rj_i|
92
- #skip bad rows
93
- next if (rj['name'].to_s.first == "#" or ['name','schedule','tasks','active'].select{|c| rj[c].to_s.strip==""}.length>0)
94
- j = Job.find_or_create_by_requestor_id_and_name(r.id.to_s,rj['name'])
95
- #update top line params
96
- j.update_attributes(:active => rj['active'],
97
- :schedule => rj['schedule'],
98
- :tasks => rj['tasks'],
99
- :datasets => rj['datasets'],
100
- :params => rj['params'],
101
- :destination => rj['destination'])
102
- #update laststatus with "Created job for" if job is due
103
- j.update_status("Due and active at #{Time.now.utc}") if j.is_due? and j.active
104
- #add this job to list of local ones
105
- loc_jobs << j
106
- end
107
- #deactivate requestor jobs that are not included in sheet;
108
- #this makes sure we don't run obsolete jobs
109
- (r.jobs.map{|j| j.id.to_s} - loc_jobs.map{|j| j.id.to_s}).each do |rjid|
110
- j = Job.find(rjid)
111
- if j.active
112
- j.update_attributes(:active=>false)
113
- r.update_status("Deactivated job:#{r.name}=>#{j.name}")
114
- end
115
- end
116
- r.update_status(r.name + " jobs read at #{Time.now.utc}")
117
- return true
118
- end
119
-
120
- def write_jobs(gdrive_email) #gdrive_email to update with
121
- r = self
122
- jobs_sheet = r.jobs_sheet(gdrive_email)
123
- rem_jobs = jobs_sheet.to_tsv.tsv_to_hash_array
124
- #go through each job, update relevant job with its params
125
- headers = Requestor.jobs_sheet_headers
126
- #write headers
127
- jobs_sheet.add_headers(headers)
128
- #write rows
129
- rem_jobs.each_with_index do |rj,rj_i|
130
- #skip bad rows
131
- next if (rj['name'].to_s.first == "#" or ['name','schedule','tasks','active'].select{|c| rj[c].to_s.strip==""}.length>0)
132
- if j = r.jobs(rj['name'])
133
- #update active to false if this was a run once
134
- j.update_attributes(:active=>false) if j.schedule.to_s == 'once'
135
- jobs_sheet[rj_i+2,headers.index('active')+1] = j.active.to_s
136
- jobs_sheet[rj_i+2,headers.index('status')+1] = j.status.to_s.gsub("\n",";").gsub("\t"," ")
137
- jobs_sheet[rj_i+2,headers.index('last_error')+1] = j.last_error.to_s.gsub("\n",";").gsub("\t"," ")
138
- jobs_sheet[rj_i+2,headers.index('destination_url')+1] = j.destination_url.to_s
139
- end
140
- end
141
- jobs_sheet.save
142
- r.update_status(r.name + " jobs written")
143
- return true
144
- end
145
-
146
- def jobspec_title
147
- r = self
148
- prefix = "Jobspec_"
149
- suffix = ""
150
- if Mobilize::Base.env == 'development'
151
- suffix = "_dev"
152
- elsif Mobilize::Base.env == 'test' or Mobilize::Base.env == 'pry_dev'
153
- suffix = "_test"
154
- elsif Mobilize::Base.env == 'production' or Mobilize::Base.env == 'integration'
155
- suffix = ""
156
- else
157
- raise "Invalid environment"
158
- end
159
- title = prefix + r.name + suffix
160
- return title
161
- end
162
-
163
- #Google doc helper methods
164
-
165
- def find_or_create_gbook_by_title(title,gdrive_email)
166
- r = self
167
- book_dst = Dataset.find_or_create_by_handler_and_name('gbook',title)
168
- #give dst this requestor if none
169
- book_dst.update_attributes(:requestor_id=>r.id.to_s) if book_dst.requestor_id.nil?
170
- book = Gbook.find_or_create_by_dst_id(book_dst.id.to_s,gdrive_email)
171
- return book
172
- end
173
-
174
- def find_or_create_gsheet_by_name(name,gdrive_email)
175
- r = self
176
- sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheet',name)
177
- sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
178
- sheet = Gsheet.find_or_create_by_dst_id(sheet_dst.id.to_s,gdrive_email)
179
- return sheet
180
- end
181
-
182
- def jobs(jname=nil)
183
- r = self
184
- js = Job.find_all_by_requestor_id(r.id.to_s)
185
- if jname
186
- return js.sel{|j| j.name == jname}.first
187
- else
188
- return js
189
- end
190
- end
191
-
192
- def destroy_jobs
193
- r = self
194
- r.jobs.each{|s| s.delete}
195
- end
196
-
197
- def gsheets
198
- r = self
199
- Dataset.find_all_by_handler_and_requestor_id('gsheet',r.id.to_s)
200
- end
201
-
202
- def worker
203
- r = self
204
- Mobilize::Resque.find_worker_by_mongo_id(r.id.to_s)
205
- end
206
-
207
- def update_status(msg)
208
- r = self
209
- r.update_attributes(:status=>msg)
210
- Mobilize::Resque.update_job_status(r.id.to_s,msg)
211
- return true
212
- end
213
-
214
- def is_working?
215
- r = self
216
- Mobilize::Resque.active_mongo_ids.include?(r.id.to_s)
217
- end
218
-
219
- def is_due?
220
- r = self.reload
221
- return false if r.is_working?
222
- last_due_time = Time.now.utc - Jobtracker.requestor_refresh_freq
223
- return true if r.last_run.nil? or r.last_run < last_due_time
224
- end
225
-
226
- def enqueue!
227
- r = self
228
- ::Resque::Job.create("mobilize",Requestor,r.id.to_s,{"name"=>r.name})
229
- return true
230
- end
231
- end
232
- end
@@ -1,43 +0,0 @@
1
- # require 'resque/tasks'
2
- # will give you the resque tasks
3
-
4
- namespace :mobilize do
5
-
6
- desc "Start a Resque worker"
7
- task :work do
8
- require 'resque'
9
- require 'mobilize-base'
10
-
11
- begin
12
- worker = Resque::Worker.new(Mobilize::Resque.config['queue_name'])
13
- rescue Resque::NoQueueError
14
- abort "set QUEUE env var, e.g. $ QUEUE=critical,high rake resque:work"
15
- end
16
-
17
- puts "Starting worker #{worker}"
18
-
19
- worker.work(ENV['INTERVAL'] || 5) # interval, will block
20
- end
21
-
22
- desc "Set up config and log folders and files"
23
- task :setup do
24
- sample_dir = File.dirname(__FILE__) + '/../samples/'
25
- sample_files = Dir.entries(sample_dir)
26
- config_dir = "#{ENV['PWD']}/config/"
27
- log_dir = "#{ENV['PWD']}/log/"
28
- unless File.exists?(config_dir)
29
- puts "creating config dir"
30
- `mkdir #{config_dir}`
31
- end
32
- unless File.exists?(log_dir)
33
- puts "creating log dir"
34
- `mkdir #{log_dir}`
35
- end
36
- sample_files.each do |fname|
37
- unless File.exists?("#{config_dir}#{fname}")
38
- puts "creating config/#{fname}"
39
- `cp #{sample_dir}#{fname} #{config_dir}#{fname}`
40
- end
41
- end
42
- end
43
- end