mobilize-base 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/.gitignore +5 -0
  2. data/LICENSE.txt +202 -20
  3. data/README.md +219 -138
  4. data/Rakefile +1 -2
  5. data/lib/mobilize-base/extensions/google_drive/acl.rb +25 -0
  6. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +49 -0
  7. data/lib/mobilize-base/extensions/google_drive/file.rb +80 -0
  8. data/lib/mobilize-base/extensions/{google_drive.rb → google_drive/worksheet.rb} +46 -173
  9. data/lib/mobilize-base/extensions/resque.rb +18 -24
  10. data/lib/mobilize-base/extensions/string.rb +12 -0
  11. data/lib/mobilize-base/handlers/gbook.rb +14 -47
  12. data/lib/mobilize-base/handlers/gdrive.rb +17 -18
  13. data/lib/mobilize-base/handlers/gfile.rb +18 -39
  14. data/lib/mobilize-base/handlers/gridfs.rb +43 -0
  15. data/lib/mobilize-base/handlers/gsheet.rb +48 -99
  16. data/lib/mobilize-base/jobtracker.rb +29 -15
  17. data/lib/mobilize-base/models/dataset.rb +33 -35
  18. data/lib/mobilize-base/models/job.rb +21 -168
  19. data/lib/mobilize-base/models/runner.rb +178 -0
  20. data/lib/mobilize-base/models/task.rb +137 -0
  21. data/lib/mobilize-base/models/user.rb +47 -0
  22. data/lib/mobilize-base/rakes.rb +59 -0
  23. data/lib/mobilize-base/version.rb +1 -1
  24. data/lib/mobilize-base.rb +20 -9
  25. data/lib/samples/gdrive.yml +12 -12
  26. data/lib/samples/gridfs.yml +9 -0
  27. data/lib/samples/gsheet.yml +6 -0
  28. data/lib/samples/jobtracker.yml +9 -9
  29. data/lib/samples/mongoid.yml +3 -3
  30. data/mobilize-base.gemspec +1 -1
  31. data/test/base1_task1.yml +3 -0
  32. data/test/base_job_rows.yml +13 -0
  33. data/test/mobilize-base_test.rb +59 -0
  34. metadata +20 -9
  35. data/lib/mobilize-base/handlers/mongodb.rb +0 -32
  36. data/lib/mobilize-base/models/requestor.rb +0 -232
  37. data/lib/mobilize-base/tasks.rb +0 -43
  38. data/test/mobilize_test.rb +0 -108
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.homepage = ""
11
11
  s.summary = %q{Moves datasets and schedules data transfers using MongoDB, Resque and Google Docs}
12
12
  s.description = %q{Manage your organization's workflows entirely through Google Docs and irb.
13
- Mobilize schedules jobs, queues workers, sends failure notifications, and
13
+ Mobilize schedules jobs, queues workers, sends failure notifications, and
14
14
  integrates mobilize-hadoop, -http, -mysql, and -mongodb packages
15
15
  to allow seamless transport of TSV and JSON data between any two endpoints. }
16
16
 
@@ -0,0 +1,3 @@
1
+ - {test_header: t1, test_header2: t1, test_header3: t1}
2
+ - {test_header: t2, test_header2: t2, test_header3: t2}
3
+ - {test_header: t3, test_header2: t3, test_header3: t3}
@@ -0,0 +1,13 @@
1
+ - name: "base1"
2
+ active: true
3
+ trigger: once
4
+ status: ""
5
+ task1: 'gsheet.read "Runner_mobilize(test)/base1_task1.in"'
6
+ task2: 'gsheet.write "base1/task1", "Runner_mobilize(test)/base1.out"'
7
+
8
+ - name: "base2"
9
+ active: true
10
+ trigger: "after base1"
11
+ status: ""
12
+ task1: 'gsheet.read "Runner_mobilize(test)/base1.out"'
13
+ task2: 'gsheet.write "task1", "Runner_mobilize(test)/base2.out"'
@@ -0,0 +1,59 @@
1
+ require 'test_helper'
2
+
3
+ describe "Mobilize" do
4
+
5
+ def before
6
+ puts 'nothing before'
7
+ end
8
+
9
+ # enqueues 4 workers on Resque
10
+ it "runs integration test" do
11
+
12
+ puts "restart test redis"
13
+ Mobilize::Jobtracker.restart_test_redis
14
+
15
+ puts "clear out test db"
16
+ Mobilize::Jobtracker.drop_test_db
17
+
18
+ puts "restart workers"
19
+ Mobilize::Jobtracker.restart_workers!
20
+
21
+ puts "build test runner"
22
+ gdrive_slot = Mobilize::Gdrive.owner_email
23
+ puts "create user 'mobilize'"
24
+ user_name = gdrive_slot.split("@").first
25
+ u = Mobilize::User.find_or_create_by_name(user_name)
26
+ assert u.email == gdrive_slot
27
+
28
+ Mobilize::Jobtracker.build_test_runner(user_name)
29
+ assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
30
+
31
+ puts "Jobtracker created runner with 'jobs' sheet?"
32
+ r = u.runner
33
+ jobs_sheet = r.gsheet(gdrive_slot)
34
+ tsv = jobs_sheet.to_tsv
35
+ assert tsv.length == 56 #headers only
36
+
37
+ puts "add base1_task1 input sheet"
38
+ test_source_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_task1.in",gdrive_slot)
39
+
40
+ test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/base1_task1.yml")*40
41
+ test_source_tsv = test_source_ha.hash_array_to_tsv
42
+ test_source_sheet.write(test_source_tsv)
43
+
44
+ puts "add row to jobs sheet, wait 120s"
45
+ test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
46
+ jobs_sheet.add_or_update_rows(test_job_rows)
47
+
48
+ puts "job row added, force enqueued runner"
49
+ r.enqueue!
50
+ sleep 120
51
+
52
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
53
+ test_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
54
+
55
+ assert test_target_sheet.to_tsv == test_source_sheet.to_tsv
56
+
57
+ end
58
+
59
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-28 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -189,7 +189,7 @@ dependencies:
189
189
  version: 3.1.1
190
190
  description: ! "Manage your organization's workflows entirely through Google Docs
191
191
  and irb.\n Mobilize schedules jobs, queues workers, sends failure
192
- notifications, and \n integrates mobilize-hadoop, -http, -mysql,
192
+ notifications, and\n integrates mobilize-hadoop, -http, -mysql,
193
193
  and -mongodb packages\n to allow seamless transport of TSV and
194
194
  JSON data between any two endpoints. "
195
195
  email:
@@ -205,7 +205,10 @@ files:
205
205
  - Rakefile
206
206
  - lib/mobilize-base.rb
207
207
  - lib/mobilize-base/extensions/array.rb
208
- - lib/mobilize-base/extensions/google_drive.rb
208
+ - lib/mobilize-base/extensions/google_drive/acl.rb
209
+ - lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb
210
+ - lib/mobilize-base/extensions/google_drive/file.rb
211
+ - lib/mobilize-base/extensions/google_drive/worksheet.rb
209
212
  - lib/mobilize-base/extensions/hash.rb
210
213
  - lib/mobilize-base/extensions/object.rb
211
214
  - lib/mobilize-base/extensions/resque.rb
@@ -214,21 +217,27 @@ files:
214
217
  - lib/mobilize-base/handlers/gbook.rb
215
218
  - lib/mobilize-base/handlers/gdrive.rb
216
219
  - lib/mobilize-base/handlers/gfile.rb
220
+ - lib/mobilize-base/handlers/gridfs.rb
217
221
  - lib/mobilize-base/handlers/gsheet.rb
218
- - lib/mobilize-base/handlers/mongodb.rb
219
222
  - lib/mobilize-base/jobtracker.rb
220
223
  - lib/mobilize-base/models/dataset.rb
221
224
  - lib/mobilize-base/models/job.rb
222
- - lib/mobilize-base/models/requestor.rb
223
- - lib/mobilize-base/tasks.rb
225
+ - lib/mobilize-base/models/runner.rb
226
+ - lib/mobilize-base/models/task.rb
227
+ - lib/mobilize-base/models/user.rb
228
+ - lib/mobilize-base/rakes.rb
224
229
  - lib/mobilize-base/tasks/mobilize-base.rake
225
230
  - lib/mobilize-base/version.rb
226
231
  - lib/samples/gdrive.yml
232
+ - lib/samples/gridfs.yml
233
+ - lib/samples/gsheet.yml
227
234
  - lib/samples/jobtracker.yml
228
235
  - lib/samples/mongoid.yml
229
236
  - lib/samples/resque.yml
230
237
  - mobilize-base.gemspec
231
- - test/mobilize_test.rb
238
+ - test/base1_task1.yml
239
+ - test/base_job_rows.yml
240
+ - test/mobilize-base_test.rb
232
241
  - test/redis-test.conf
233
242
  - test/test_helper.rb
234
243
  homepage: ''
@@ -257,7 +266,9 @@ specification_version: 3
257
266
  summary: Moves datasets and schedules data transfers using MongoDB, Resque and Google
258
267
  Docs
259
268
  test_files:
260
- - test/mobilize_test.rb
269
+ - test/base1_task1.yml
270
+ - test/base_job_rows.yml
271
+ - test/mobilize-base_test.rb
261
272
  - test/redis-test.conf
262
273
  - test/test_helper.rb
263
274
  has_rdoc:
@@ -1,32 +0,0 @@
1
- module Mobilize
2
- class Mongodb
3
-
4
- def Mongodb.grid
5
- session = ::Mongoid.configure.sessions['default']
6
- database_name = session['database']
7
- host,port = session['hosts'].first.split(":")
8
- return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
9
- end
10
-
11
- def Mongodb.read_by_filename(filename)
12
- begin
13
- zs=Mongodb.grid.open(filename,'r').read
14
- return ::Zlib::Inflate.inflate(zs)
15
- rescue
16
- "failed Mongo read for filename #{filename}".oputs
17
- return nil
18
- end
19
- end
20
-
21
- def Mongodb.write_by_filename(filename,string)
22
- zs = ::Zlib::Deflate.deflate(string)
23
- Mongodb.grid.open(filename,'w',:delete_old => true){|f| f.write(zs)}
24
- return true
25
- end
26
-
27
- def Mongodb.delete_by_filename(filename)
28
- Mongodb.grid.delete(filename)
29
- return true
30
- end
31
- end
32
- end
@@ -1,232 +0,0 @@
1
- module Mobilize
2
- class Requestor
3
- include Mongoid::Document
4
- include Mongoid::Timestamps
5
- field :email, type: String
6
- field :oauth, type: String
7
- field :name, type: String
8
- field :first_name, type: String
9
- field :last_name, type: String
10
- field :admin_role, type: String
11
- field :last_run, type: Time
12
- field :status, type: String
13
-
14
- validates_presence_of :name, :message => ' cannot be blank.'
15
- validates_uniqueness_of :name, :message => ' has already been used.'
16
-
17
- before_destroy :destroy_jobs
18
-
19
- def Requestor.find_or_create_by_name(name)
20
- r = Requestor.where(:name => name).first
21
- r = Requestor.create(:name => name) unless r
22
- return r
23
- end
24
-
25
- def Requestor.find_or_create_by_email(email)
26
- r = Requestor.where(:email => email).first
27
- r = Requestor.create(:email => email) unless r
28
- user_name = email.split("@").first
29
- r.update_attributes(:name => user_name) unless r.name.to_s.length>0
30
- return r
31
- end
32
-
33
- def Requestor.jobs_sheet_headers
34
- %w{name active schedule status last_error destination_url tasks datasets params destination}
35
- end
36
-
37
- def Requestor.perform(id,*args)
38
- r = Requestor.find(id.to_s)
39
- #reserve email account for read
40
- gdrive_email = Gdrive.get_worker_email_by_mongo_id(id)
41
- unless gdrive_email
42
- "no gdrive_email available for #{r.name}".oputs
43
- return false
44
- end
45
- jobs_sheet = r.jobs_sheet(gdrive_email)
46
- #write headers to sheet
47
- Requestor.jobs_sheet_headers.each_with_index do |h,h_i|
48
- jobs_sheet[1,h_i+1] = h
49
- end
50
- jobs_sheet.save
51
- #read the jobs sheet
52
- #record jobs in DB
53
- #deactivate jobs not in sheet
54
- r.read_jobs(gdrive_email)
55
- #queue up the jobs that are due and active
56
- r.jobs.each do |j|
57
- begin
58
- if j.active and j.is_due?
59
- #cache all datasets
60
- j.dataset_array.each do |dst|
61
- #read tsv, write to cache for job to use
62
- tsv = Gsheet.find_or_create_by_name(dst.name,gdrive_email).to_tsv
63
- r.update_status("caching #{dst.name}")
64
- dst.write_cache(tsv)
65
- end
66
- j.enqueue!
67
- end
68
- rescue ScriptError,StandardError => exc
69
- #update errors
70
- j.update_attributes(:last_error=>exc.to_s,:last_trace=>exc.backtrace.to_s)
71
- end
72
- end
73
- #write any updates to status, error, datasource_url etc.
74
- r.write_jobs(gdrive_email)
75
- r.update_attributes(:last_run=>Time.now.utc)
76
- end
77
-
78
- def jobs_sheet(gdrive_email)#gdrive_email to read with
79
- r = self
80
- r.find_or_create_gbook_by_title(r.jobspec_title,gdrive_email)
81
- jobs_name = [r.jobspec_title,"Jobs"].join("/")
82
- r.find_or_create_gsheet_by_name(jobs_name,gdrive_email)
83
- end
84
-
85
- def read_jobs(gdrive_email)
86
- r = self
87
- jobs_sheet = r.jobs_sheet(gdrive_email)
88
- rem_jobs = jobs_sheet.to_tsv.tsv_to_hash_array
89
- #go through each job, update relevant job with its params
90
- loc_jobs = []
91
- rem_jobs.each_with_index do |rj,rj_i|
92
- #skip bad rows
93
- next if (rj['name'].to_s.first == "#" or ['name','schedule','tasks','active'].select{|c| rj[c].to_s.strip==""}.length>0)
94
- j = Job.find_or_create_by_requestor_id_and_name(r.id.to_s,rj['name'])
95
- #update top line params
96
- j.update_attributes(:active => rj['active'],
97
- :schedule => rj['schedule'],
98
- :tasks => rj['tasks'],
99
- :datasets => rj['datasets'],
100
- :params => rj['params'],
101
- :destination => rj['destination'])
102
- #update laststatus with "Created job for" if job is due
103
- j.update_status("Due and active at #{Time.now.utc}") if j.is_due? and j.active
104
- #add this job to list of local ones
105
- loc_jobs << j
106
- end
107
- #deactivate requestor jobs that are not included in sheet;
108
- #this makes sure we don't run obsolete jobs
109
- (r.jobs.map{|j| j.id.to_s} - loc_jobs.map{|j| j.id.to_s}).each do |rjid|
110
- j = Job.find(rjid)
111
- if j.active
112
- j.update_attributes(:active=>false)
113
- r.update_status("Deactivated job:#{r.name}=>#{j.name}")
114
- end
115
- end
116
- r.update_status(r.name + " jobs read at #{Time.now.utc}")
117
- return true
118
- end
119
-
120
- def write_jobs(gdrive_email) #gdrive_email to update with
121
- r = self
122
- jobs_sheet = r.jobs_sheet(gdrive_email)
123
- rem_jobs = jobs_sheet.to_tsv.tsv_to_hash_array
124
- #go through each job, update relevant job with its params
125
- headers = Requestor.jobs_sheet_headers
126
- #write headers
127
- jobs_sheet.add_headers(headers)
128
- #write rows
129
- rem_jobs.each_with_index do |rj,rj_i|
130
- #skip bad rows
131
- next if (rj['name'].to_s.first == "#" or ['name','schedule','tasks','active'].select{|c| rj[c].to_s.strip==""}.length>0)
132
- if j = r.jobs(rj['name'])
133
- #update active to false if this was a run once
134
- j.update_attributes(:active=>false) if j.schedule.to_s == 'once'
135
- jobs_sheet[rj_i+2,headers.index('active')+1] = j.active.to_s
136
- jobs_sheet[rj_i+2,headers.index('status')+1] = j.status.to_s.gsub("\n",";").gsub("\t"," ")
137
- jobs_sheet[rj_i+2,headers.index('last_error')+1] = j.last_error.to_s.gsub("\n",";").gsub("\t"," ")
138
- jobs_sheet[rj_i+2,headers.index('destination_url')+1] = j.destination_url.to_s
139
- end
140
- end
141
- jobs_sheet.save
142
- r.update_status(r.name + " jobs written")
143
- return true
144
- end
145
-
146
- def jobspec_title
147
- r = self
148
- prefix = "Jobspec_"
149
- suffix = ""
150
- if Mobilize::Base.env == 'development'
151
- suffix = "_dev"
152
- elsif Mobilize::Base.env == 'test' or Mobilize::Base.env == 'pry_dev'
153
- suffix = "_test"
154
- elsif Mobilize::Base.env == 'production' or Mobilize::Base.env == 'integration'
155
- suffix = ""
156
- else
157
- raise "Invalid environment"
158
- end
159
- title = prefix + r.name + suffix
160
- return title
161
- end
162
-
163
- #Google doc helper methods
164
-
165
- def find_or_create_gbook_by_title(title,gdrive_email)
166
- r = self
167
- book_dst = Dataset.find_or_create_by_handler_and_name('gbook',title)
168
- #give dst this requestor if none
169
- book_dst.update_attributes(:requestor_id=>r.id.to_s) if book_dst.requestor_id.nil?
170
- book = Gbook.find_or_create_by_dst_id(book_dst.id.to_s,gdrive_email)
171
- return book
172
- end
173
-
174
- def find_or_create_gsheet_by_name(name,gdrive_email)
175
- r = self
176
- sheet_dst = Dataset.find_or_create_by_handler_and_name('gsheet',name)
177
- sheet_dst.update_attributes(:requestor_id=>r.id.to_s) if sheet_dst.requestor_id.nil?
178
- sheet = Gsheet.find_or_create_by_dst_id(sheet_dst.id.to_s,gdrive_email)
179
- return sheet
180
- end
181
-
182
- def jobs(jname=nil)
183
- r = self
184
- js = Job.find_all_by_requestor_id(r.id.to_s)
185
- if jname
186
- return js.sel{|j| j.name == jname}.first
187
- else
188
- return js
189
- end
190
- end
191
-
192
- def destroy_jobs
193
- r = self
194
- r.jobs.each{|s| s.delete}
195
- end
196
-
197
- def gsheets
198
- r = self
199
- Dataset.find_all_by_handler_and_requestor_id('gsheet',r.id.to_s)
200
- end
201
-
202
- def worker
203
- r = self
204
- Mobilize::Resque.find_worker_by_mongo_id(r.id.to_s)
205
- end
206
-
207
- def update_status(msg)
208
- r = self
209
- r.update_attributes(:status=>msg)
210
- Mobilize::Resque.update_job_status(r.id.to_s,msg)
211
- return true
212
- end
213
-
214
- def is_working?
215
- r = self
216
- Mobilize::Resque.active_mongo_ids.include?(r.id.to_s)
217
- end
218
-
219
- def is_due?
220
- r = self.reload
221
- return false if r.is_working?
222
- last_due_time = Time.now.utc - Jobtracker.requestor_refresh_freq
223
- return true if r.last_run.nil? or r.last_run < last_due_time
224
- end
225
-
226
- def enqueue!
227
- r = self
228
- ::Resque::Job.create("mobilize",Requestor,r.id.to_s,{"name"=>r.name})
229
- return true
230
- end
231
- end
232
- end
@@ -1,43 +0,0 @@
1
- # require 'resque/tasks'
2
- # will give you the resque tasks
3
-
4
- namespace :mobilize do
5
-
6
- desc "Start a Resque worker"
7
- task :work do
8
- require 'resque'
9
- require 'mobilize-base'
10
-
11
- begin
12
- worker = Resque::Worker.new(Mobilize::Resque.config['queue_name'])
13
- rescue Resque::NoQueueError
14
- abort "set QUEUE env var, e.g. $ QUEUE=critical,high rake resque:work"
15
- end
16
-
17
- puts "Starting worker #{worker}"
18
-
19
- worker.work(ENV['INTERVAL'] || 5) # interval, will block
20
- end
21
-
22
- desc "Set up config and log folders and files"
23
- task :setup do
24
- sample_dir = File.dirname(__FILE__) + '/../samples/'
25
- sample_files = Dir.entries(sample_dir)
26
- config_dir = "#{ENV['PWD']}/config/"
27
- log_dir = "#{ENV['PWD']}/log/"
28
- unless File.exists?(config_dir)
29
- puts "creating config dir"
30
- `mkdir #{config_dir}`
31
- end
32
- unless File.exists?(log_dir)
33
- puts "creating log dir"
34
- `mkdir #{log_dir}`
35
- end
36
- sample_files.each do |fname|
37
- unless File.exists?("#{config_dir}#{fname}")
38
- puts "creating config/#{fname}"
39
- `cp #{sample_dir}#{fname} #{config_dir}#{fname}`
40
- end
41
- end
42
- end
43
- end