mobilize-base 1.36 → 1.293

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/README.md +666 -1
  2. data/lib/mobilize-base.rb +1 -12
  3. data/lib/mobilize-base/extensions/array.rb +3 -8
  4. data/lib/mobilize-base/extensions/google_drive/acl.rb +1 -1
  5. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +1 -2
  6. data/lib/mobilize-base/extensions/google_drive/file.rb +37 -11
  7. data/lib/mobilize-base/extensions/string.rb +6 -11
  8. data/lib/mobilize-base/extensions/yaml.rb +7 -10
  9. data/lib/mobilize-base/handlers/gbook.rb +38 -25
  10. data/lib/mobilize-base/handlers/gdrive.rb +4 -20
  11. data/lib/mobilize-base/handlers/gfile.rb +10 -64
  12. data/lib/mobilize-base/handlers/gridfs.rb +24 -19
  13. data/lib/mobilize-base/handlers/gsheet.rb +29 -45
  14. data/lib/mobilize-base/handlers/resque.rb +10 -17
  15. data/lib/mobilize-base/jobtracker.rb +196 -22
  16. data/lib/mobilize-base/models/job.rb +77 -107
  17. data/lib/mobilize-base/models/runner.rb +122 -36
  18. data/lib/mobilize-base/models/stage.rb +37 -18
  19. data/lib/mobilize-base/tasks.rb +13 -50
  20. data/lib/mobilize-base/version.rb +1 -1
  21. data/lib/samples/gdrive.yml +0 -15
  22. data/lib/samples/gridfs.yml +3 -0
  23. data/lib/samples/gsheet.yml +4 -4
  24. data/lib/samples/jobtracker.yml +6 -0
  25. data/mobilize-base.gemspec +3 -3
  26. data/test/base_job_rows.yml +11 -0
  27. data/test/mobilize-base_test.rb +106 -0
  28. data/test/test_base_1.yml +3 -0
  29. data/test/test_helper.rb +0 -155
  30. metadata +24 -36
  31. data/lib/mobilize-base/extensions/time.rb +0 -20
  32. data/lib/mobilize-base/helpers/job_helper.rb +0 -54
  33. data/lib/mobilize-base/helpers/jobtracker_helper.rb +0 -143
  34. data/lib/mobilize-base/helpers/runner_helper.rb +0 -83
  35. data/lib/mobilize-base/helpers/stage_helper.rb +0 -38
  36. data/lib/samples/gfile.yml +0 -9
  37. data/test/fixtures/base1_stage1.in.yml +0 -10
  38. data/test/fixtures/integration_expected.yml +0 -25
  39. data/test/fixtures/integration_jobs.yml +0 -12
  40. data/test/fixtures/is_due.yml +0 -97
  41. data/test/integration/mobilize-base_test.rb +0 -57
  42. data/test/unit/mobilize-base_test.rb +0 -33
@@ -2,7 +2,6 @@ module Mobilize
2
2
  class Stage
3
3
  include Mongoid::Document
4
4
  include Mongoid::Timestamps
5
- include Mobilize::StageHelper
6
5
  field :path, type: String
7
6
  field :handler, type: String
8
7
  field :call, type: String
@@ -17,6 +16,40 @@ module Mobilize
17
16
 
18
17
  index({ path: 1})
19
18
 
19
+ def idx
20
+ s = self
21
+ s.path.split("/").last.gsub("stage","").to_i
22
+ end
23
+
24
+ def out_dst
25
+ #this gives a dataset that points to the output
26
+ #allowing you to determine its size
27
+ #before committing to a read or write
28
+ s = self
29
+ Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
30
+ end
31
+
32
+ def err_dst
33
+ #this gives a dataset that points to the output
34
+ #allowing you to determine its size
35
+ #before committing to a read or write
36
+ s = self
37
+ Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
38
+ end
39
+
40
+ def params
41
+ s = self
42
+ p = YAML.easy_load(s.param_string)
43
+ raise "Must resolve to Hash" unless p.class==Hash
44
+ return p
45
+ end
46
+
47
+ def job
48
+ s = self
49
+ job_path = s.path.split("/")[0..-2].join("/")
50
+ Job.where(:path=>job_path).first
51
+ end
52
+
20
53
  def Stage.find_or_create_by_path(path)
21
54
  s = Stage.where(:path=>path).first
22
55
  s = Stage.create(:path=>path) unless s
@@ -44,14 +77,6 @@ module Mobilize
44
77
 
45
78
  def Stage.perform(id,*args)
46
79
  s = Stage.where(:path=>id).first
47
- #check to make sure params are parsable
48
- begin
49
- param_hash = s.params
50
- raise ScriptError if param_hash.class!=Hash
51
- rescue StandardError, ScriptError
52
- s.fail({'signal'=>500,
53
- 'err_str'=>"Unable to parse stage params, make sure you don't have issues with your quotes, commas, or colons."})
54
- end
55
80
  s.update_attributes(:started_at=>Time.now.utc)
56
81
  s.update_status(%{Starting at #{Time.now.utc}})
57
82
  #get response by running method
@@ -82,6 +107,8 @@ module Mobilize
82
107
 
83
108
  def complete(response)
84
109
  s = self
110
+ s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
111
+ s.update_status("Completed at #{Time.now.utc.to_s}")
85
112
  j = s.job
86
113
  if s.idx == j.stages.length
87
114
  #check for any dependent jobs, if there are, enqueue them
@@ -108,10 +135,6 @@ module Mobilize
108
135
  s.next.update_attributes(:retries_done=>0)
109
136
  s.next.enqueue!
110
137
  end
111
- #complete after dependencies are processed
112
- #to make sure it doesn't enqueue due to runner check
113
- s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
114
- s.update_status("Completed at #{Time.now.utc.to_s}")
115
138
  true
116
139
  end
117
140
 
@@ -122,11 +145,7 @@ module Mobilize
122
145
  j = s.job
123
146
  r = j.runner
124
147
  u = r.user
125
- begin
126
- j.update_attributes(:active=>false) if s.params['always_on'].to_s=="false"
127
- rescue StandardError, ScriptError
128
- #skip due to parse error on params
129
- end
148
+ j.update_attributes(:active=>false) unless s.params['always_on']
130
149
  s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
131
150
  stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
132
151
  target_path = (r.path.split("/")[0..-2] + [stage_name]).join("/")
@@ -1,7 +1,6 @@
1
- namespace :mobilize do
1
+ namespace :mobilize_base do
2
2
  desc "Start a Resque worker"
3
- task :work, :env do |t,args|
4
- ENV['MOBILIZE_ENV']=args.env
3
+ task :work do
5
4
  require 'mobilize-base'
6
5
  Mobilize::Base.config('jobtracker')['extensions'].each do |e|
7
6
  begin
@@ -21,77 +20,42 @@ namespace :mobilize do
21
20
  worker.work(ENV['INTERVAL'] || 5) # interval, will block
22
21
  end
23
22
  desc "Kill all Resque workers"
24
- task :kill_workers, :env do |t,args|
25
- ENV['MOBILIZE_ENV']=args.env
23
+ task :kill_workers do
26
24
  require 'mobilize-base'
27
25
  Mobilize::Jobtracker.kill_workers
28
26
  end
29
27
  desc "Kill idle workers not in sync with repo"
30
- task :kill_idle_and_stale_workers, :env do |t,args|
31
- ENV['MOBILIZE_ENV']=args.env
28
+ task :kill_idle_and_stale_workers do
32
29
  require 'mobilize-base'
33
30
  Mobilize::Jobtracker.kill_idle_and_stale_workers
34
31
  end
35
- desc "Kill idle workers"
36
- task :kill_idle_workers, :env do |t,args|
37
- ENV['MOBILIZE_ENV']=args.env
32
+ desc "Kill idle workers"
33
+ task :kill_idle_workers do
38
34
  require 'mobilize-base'
39
35
  Mobilize::Jobtracker.kill_idle_workers
40
36
  end
41
37
  desc "Make sure there are the correct # of workers, kill if too many"
42
- task :prep_workers, :env do |t,args|
43
- ENV['MOBILIZE_ENV']=args.env
38
+ task :prep_workers do
44
39
  require 'mobilize-base'
45
40
  Mobilize::Jobtracker.prep_workers
46
41
  end
47
- desc "Restart Resque workers"
48
- task :restart_workers, :env do |t,args|
49
- ENV['MOBILIZE_ENV']=args.env
50
- require 'mobilize-base'
51
- Mobilize::Jobtracker.kill_workers
52
- sleep 5
53
- Mobilize::Jobtracker.prep_workers
54
- end
55
42
  desc "Stop Jobtracker"
56
- task :stop_jobtracker, :env do |t,args|
57
- ENV['MOBILIZE_ENV']=args.env
43
+ task :stop_jobtracker do
58
44
  require 'mobilize-base'
59
45
  Mobilize::Jobtracker.stop!
60
46
  end
61
47
  desc "Start Jobtracker"
62
- task :start_jobtracker, :env do |t,args|
63
- ENV['MOBILIZE_ENV']=args.env
48
+ task :start_jobtracker do
64
49
  require 'mobilize-base'
65
50
  Mobilize::Jobtracker.start
66
51
  end
67
52
  desc "Restart Jobtracker"
68
- task :restart_jobtracker, :env do |t,args|
69
- ENV['MOBILIZE_ENV']=args.env
53
+ task :restart_jobtracker do
70
54
  require 'mobilize-base'
71
55
  Mobilize::Jobtracker.restart!
72
56
  end
73
- desc "Add a user"
74
- task :add_user, :name, :env do |t,args|
75
- ENV['MOBILIZE_ENV']=args.env
76
- require 'mobilize-base'
77
- Mobilize::User.find_or_create_by_name(args.name)
78
- end
79
- desc "Enqueue a user's runner"
80
- task :enqueue_user, :name, :env do |t,args|
81
- ENV['MOBILIZE_ENV']=args.env
82
- require 'mobilize-base'
83
- Mobilize::User.where(name: args.name).first.runner.enqueue!
84
- end
85
- desc "Enqueue a stage"
86
- task :enqueue_stage, :path, :env do |t,args|
87
- ENV['MOBILIZE_ENV']=args.env
88
- require 'mobilize-base'
89
- user,job,stage = args.path.split("/")
90
- Mobilize::Stage.where(path: "Runner_#{user}/jobs/#{job}/#{stage}").first.en
91
- end
92
57
  desc "kill all old resque web processes, start new one with resque_web.rb extension file"
93
- task :resque_web, :env do |t,args|
94
- ENV['MOBILIZE_ENV']=args.env
58
+ task :resque_web do
95
59
  require 'mobilize-base'
96
60
  port = Mobilize::Base.config('resque')['web_port']
97
61
  config_dir = (ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/")
@@ -119,15 +83,14 @@ namespace :mobilize do
119
83
  `#{command}`
120
84
  end
121
85
  desc "create indexes for all base models in mongodb"
122
- task :create_indexes, :env do |t,args|
123
- ENV['MOBILIZE_ENV']=args.env
86
+ task :create_indexes do
124
87
  require 'mobilize-base'
125
88
  ["Dataset","Job","Runner","Task","User"].each do |m|
126
89
  "Mobilize::#{m}".constantize.create_indexes
127
90
  end
128
91
  end
129
92
  desc "Set up config and log folders and files, populate from samples"
130
- task :setup_base do
93
+ task :setup do
131
94
  config_dir = (ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/")
132
95
  log_dir = (ENV['MOBILIZE_LOG_DIR'] ||= "log/")
133
96
  sample_dir = File.dirname(__FILE__) + '/../samples/'
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.36"
3
+ VERSION = "1.293"
4
4
  end
5
5
  end
@@ -1,16 +1,11 @@
1
1
  ---
2
2
  development:
3
3
  domain: host.com
4
- max_api_retries: 10
5
- file_write_retries: 5
6
- file_write_retry_delay: 30
7
4
  owner:
8
5
  name: owner_development
9
6
  pw: google_drive_password
10
- admin_group_name: admins_development
11
7
  admins:
12
8
  - name: admin
13
- worker_group_name: workers_development
14
9
  workers:
15
10
  - name: worker_development001
16
11
  pw: worker001_google_drive_password
@@ -18,16 +13,11 @@ development:
18
13
  pw: worker002_google_drive_password
19
14
  test:
20
15
  domain: host.com
21
- max_api_retries: 10
22
- file_write_retries: 5
23
- file_write_retry_delay: 30
24
16
  owner:
25
17
  name: owner_test
26
18
  pw: google_drive_password
27
- admin_group_name: admins_test
28
19
  admins:
29
20
  - name: admin
30
- worker_group_name: workers_test
31
21
  workers:
32
22
  - name: worker_test001
33
23
  pw: worker001_google_drive_password
@@ -35,16 +25,11 @@ test:
35
25
  pw: worker002_google_drive_password
36
26
  production:
37
27
  domain: host.com
38
- max_api_retries: 10
39
- file_write_retries: 5
40
- file_write_retry_delay: 30
41
28
  owner:
42
29
  name: owner_production
43
30
  pw: google_drive_password
44
- admin_group_name: admins_production
45
31
  admins:
46
32
  - name: admin
47
- worker_group_name: workers_production
48
33
  workers:
49
34
  - name: worker_production001
50
35
  pw: worker001_google_drive_password
@@ -1,7 +1,10 @@
1
1
  ---
2
2
  development:
3
+ max_versions: 10 #number of versions of cache to keep in gridfs
3
4
  max_compressed_write_size: 1000000000 #~1GB
4
5
  test:
6
+ max_versions: 10 #number of versions of cache to keep in gridfs
5
7
  max_compressed_write_size: 1000000000 #~1GB
6
8
  production:
9
+ max_versions: 10 #number of versions of cache to keep in gridfs
7
10
  max_compressed_write_size: 1000000000 #~1GB
@@ -1,17 +1,17 @@
1
1
  ---
2
2
  development:
3
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
3
+ max_cells: 400000 #current google limit is 400k cells per book
4
4
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
5
5
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
6
6
  test:
7
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
7
+ max_cells: 400000 #current google limit is 400k cells per book
8
8
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
9
9
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
10
10
  staging:
11
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
11
+ max_cells: 400000 #current google limit is 400k cells per book
12
12
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
13
13
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
14
14
  production:
15
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
15
+ max_cells: 400000 #current google limit is 400k cells per book
16
16
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
17
17
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
@@ -5,15 +5,21 @@ development:
5
5
  runner_read_freq: 300 #5 min between runner reads
6
6
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
7
7
  extensions: [] #additional Mobilize modules to load workers with
8
+ admins: #emails to send notifications to
9
+ - email: admin@host.com
8
10
  test:
9
11
  cycle_freq: 10 #time between Jobtracker sweeps
10
12
  notification_freq: 3600 #1 hour between failure/timeout notifications
11
13
  runner_read_freq: 300 #5 min between runner reads
12
14
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
13
15
  extensions: [] #additional Mobilize modules to load workers with
16
+ admins: #emails to send notifications to
17
+ - email: admin@host.com
14
18
  production:
15
19
  cycle_freq: 10 #time between Jobtracker sweeps
16
20
  notification_freq: 3600 #1 hour between failure/timeout notifications
17
21
  runner_read_freq: 300 #5 min between runner reads
18
22
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
19
23
  extensions: [] #additional Mobilize modules to load workers with
24
+ admins: #emails to send notifications to
25
+ - email: admin@host.com
@@ -22,10 +22,10 @@ Gem::Specification.new do |s|
22
22
  s.require_paths = ["lib"]
23
23
 
24
24
  s.add_runtime_dependency 'rake'
25
- s.add_runtime_dependency 'bson','1.8.4'
26
- s.add_runtime_dependency 'bson_ext','1.8.4'
25
+ s.add_runtime_dependency 'bson','1.6.1'
26
+ s.add_runtime_dependency 'bson_ext','1.6.1'
27
+ s.add_runtime_dependency 'mongo', '1.6.1'
27
28
  s.add_runtime_dependency "mongoid", "~>3.0.0"
28
- s.add_runtime_dependency 'mongoid-grid_fs'
29
29
  s.add_runtime_dependency 'resque', '1.24.0'
30
30
  s.add_runtime_dependency 'google_drive','0.3.2'
31
31
  s.add_runtime_dependency 'popen4','0.1.2'
@@ -0,0 +1,11 @@
1
+ - name: base1
2
+ active: true
3
+ trigger: once
4
+ status: ""
5
+ stage1: gsheet.write source:"gfile://test_base_1.tsv", target:base1.out
6
+
7
+ - name: base2
8
+ active: true
9
+ trigger: after base1
10
+ status: ""
11
+ stage1: gsheet.write source:base1.out, target:base2.out
@@ -0,0 +1,106 @@
1
+ require 'test_helper'
2
+
3
+ describe "Mobilize" do
4
+
5
+ def before
6
+ puts 'nothing before'
7
+ end
8
+
9
+ # enqueues 4 workers on Resque
10
+ it "runs integration test" do
11
+
12
+ puts "restart test redis"
13
+ Mobilize::Jobtracker.restart_test_redis
14
+
15
+ puts "clear out test db"
16
+ Mobilize::Jobtracker.drop_test_db
17
+
18
+ puts "restart workers"
19
+ Mobilize::Jobtracker.restart_workers!
20
+
21
+ puts "build test runner"
22
+ gdrive_slot = Mobilize::Gdrive.owner_email
23
+ puts "create user 'mobilize'"
24
+ user_name = gdrive_slot.split("@").first
25
+ u = Mobilize::User.find_or_create_by_name(user_name)
26
+ assert u.email == gdrive_slot
27
+
28
+ Mobilize::Jobtracker.build_test_runner(user_name)
29
+ assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
30
+
31
+ puts "Jobtracker created runner with 'jobs' sheet?"
32
+ r = u.runner
33
+ jobs_sheet_url = "gsheet://#{r.path}"
34
+ jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
35
+ jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
36
+ jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
37
+ assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
38
+
39
+ #stop Jobtracker, if you're doing this by queueing runners
40
+ #Mobilize::Jobtracker.stop!
41
+
42
+ puts "add base1 input file"
43
+ test_filename = "test_base_1"
44
+ file_url = "gfile://#{test_filename}.tsv"
45
+ test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
46
+ test_source_tsv = test_source_ha.hash_array_to_tsv
47
+ Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
48
+ rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
49
+ assert rem_tsv == test_source_tsv
50
+
51
+ puts "add row to jobs sheet, wait for stages"
52
+ test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
53
+ jobs_sheet.reload
54
+ jobs_sheet.add_or_update_rows(test_job_rows)
55
+ #wait for stages to complete
56
+ #r.enqueue!
57
+ wait_for_stages
58
+
59
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
60
+ test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
61
+ test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
62
+ test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
63
+
64
+ test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
65
+ test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
66
+
67
+ assert test_1_tsv == test_2_tsv
68
+
69
+ puts "change first job to fail, wait for stages"
70
+ test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
71
+ Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
72
+ jobs_sheet.add_or_update_rows(test_job_rows)
73
+
74
+ #wait for stages to complete
75
+ wait_for_stages
76
+
77
+ test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
78
+ puts "jobtracker posted failing test error to sheet "
79
+ error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
80
+ assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
81
+ Mobilize::Jobtracker.stop!
82
+ end
83
+
84
+ def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
85
+ time = 0
86
+ time_since_stage = 0
87
+ #check for 10 min
88
+ while time < time_limit and time_since_stage < stage_limit
89
+ sleep wait_length
90
+ job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
91
+ if job_classes.include?("Mobilize::Stage")
92
+ time_since_stage = 0
93
+ puts "saw stage at #{time.to_s} seconds"
94
+ else
95
+ time_since_stage += wait_length
96
+ puts "#{time_since_stage.to_s} seconds since stage seen"
97
+ end
98
+ time += wait_length
99
+ puts "total wait time #{time.to_s} seconds"
100
+ end
101
+
102
+ if time >= time_limit
103
+ raise "Timed out before stage completion"
104
+ end
105
+ end
106
+ end