mobilize-base 1.36 → 1.293

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/README.md +666 -1
  2. data/lib/mobilize-base.rb +1 -12
  3. data/lib/mobilize-base/extensions/array.rb +3 -8
  4. data/lib/mobilize-base/extensions/google_drive/acl.rb +1 -1
  5. data/lib/mobilize-base/extensions/google_drive/client_login_fetcher.rb +1 -2
  6. data/lib/mobilize-base/extensions/google_drive/file.rb +37 -11
  7. data/lib/mobilize-base/extensions/string.rb +6 -11
  8. data/lib/mobilize-base/extensions/yaml.rb +7 -10
  9. data/lib/mobilize-base/handlers/gbook.rb +38 -25
  10. data/lib/mobilize-base/handlers/gdrive.rb +4 -20
  11. data/lib/mobilize-base/handlers/gfile.rb +10 -64
  12. data/lib/mobilize-base/handlers/gridfs.rb +24 -19
  13. data/lib/mobilize-base/handlers/gsheet.rb +29 -45
  14. data/lib/mobilize-base/handlers/resque.rb +10 -17
  15. data/lib/mobilize-base/jobtracker.rb +196 -22
  16. data/lib/mobilize-base/models/job.rb +77 -107
  17. data/lib/mobilize-base/models/runner.rb +122 -36
  18. data/lib/mobilize-base/models/stage.rb +37 -18
  19. data/lib/mobilize-base/tasks.rb +13 -50
  20. data/lib/mobilize-base/version.rb +1 -1
  21. data/lib/samples/gdrive.yml +0 -15
  22. data/lib/samples/gridfs.yml +3 -0
  23. data/lib/samples/gsheet.yml +4 -4
  24. data/lib/samples/jobtracker.yml +6 -0
  25. data/mobilize-base.gemspec +3 -3
  26. data/test/base_job_rows.yml +11 -0
  27. data/test/mobilize-base_test.rb +106 -0
  28. data/test/test_base_1.yml +3 -0
  29. data/test/test_helper.rb +0 -155
  30. metadata +24 -36
  31. data/lib/mobilize-base/extensions/time.rb +0 -20
  32. data/lib/mobilize-base/helpers/job_helper.rb +0 -54
  33. data/lib/mobilize-base/helpers/jobtracker_helper.rb +0 -143
  34. data/lib/mobilize-base/helpers/runner_helper.rb +0 -83
  35. data/lib/mobilize-base/helpers/stage_helper.rb +0 -38
  36. data/lib/samples/gfile.yml +0 -9
  37. data/test/fixtures/base1_stage1.in.yml +0 -10
  38. data/test/fixtures/integration_expected.yml +0 -25
  39. data/test/fixtures/integration_jobs.yml +0 -12
  40. data/test/fixtures/is_due.yml +0 -97
  41. data/test/integration/mobilize-base_test.rb +0 -57
  42. data/test/unit/mobilize-base_test.rb +0 -33
@@ -2,7 +2,6 @@ module Mobilize
2
2
  class Stage
3
3
  include Mongoid::Document
4
4
  include Mongoid::Timestamps
5
- include Mobilize::StageHelper
6
5
  field :path, type: String
7
6
  field :handler, type: String
8
7
  field :call, type: String
@@ -17,6 +16,40 @@ module Mobilize
17
16
 
18
17
  index({ path: 1})
19
18
 
19
+ def idx
20
+ s = self
21
+ s.path.split("/").last.gsub("stage","").to_i
22
+ end
23
+
24
+ def out_dst
25
+ #this gives a dataset that points to the output
26
+ #allowing you to determine its size
27
+ #before committing to a read or write
28
+ s = self
29
+ Dataset.find_by_url(s.response['out_url']) if s.response and s.response['out_url']
30
+ end
31
+
32
+ def err_dst
33
+ #this gives a dataset that points to the output
34
+ #allowing you to determine its size
35
+ #before committing to a read or write
36
+ s = self
37
+ Dataset.find_by_url(s.response['err_url']) if s.response and s.response['err_url']
38
+ end
39
+
40
+ def params
41
+ s = self
42
+ p = YAML.easy_load(s.param_string)
43
+ raise "Must resolve to Hash" unless p.class==Hash
44
+ return p
45
+ end
46
+
47
+ def job
48
+ s = self
49
+ job_path = s.path.split("/")[0..-2].join("/")
50
+ Job.where(:path=>job_path).first
51
+ end
52
+
20
53
  def Stage.find_or_create_by_path(path)
21
54
  s = Stage.where(:path=>path).first
22
55
  s = Stage.create(:path=>path) unless s
@@ -44,14 +77,6 @@ module Mobilize
44
77
 
45
78
  def Stage.perform(id,*args)
46
79
  s = Stage.where(:path=>id).first
47
- #check to make sure params are parsable
48
- begin
49
- param_hash = s.params
50
- raise ScriptError if param_hash.class!=Hash
51
- rescue StandardError, ScriptError
52
- s.fail({'signal'=>500,
53
- 'err_str'=>"Unable to parse stage params, make sure you don't have issues with your quotes, commas, or colons."})
54
- end
55
80
  s.update_attributes(:started_at=>Time.now.utc)
56
81
  s.update_status(%{Starting at #{Time.now.utc}})
57
82
  #get response by running method
@@ -82,6 +107,8 @@ module Mobilize
82
107
 
83
108
  def complete(response)
84
109
  s = self
110
+ s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
111
+ s.update_status("Completed at #{Time.now.utc.to_s}")
85
112
  j = s.job
86
113
  if s.idx == j.stages.length
87
114
  #check for any dependent jobs, if there are, enqueue them
@@ -108,10 +135,6 @@ module Mobilize
108
135
  s.next.update_attributes(:retries_done=>0)
109
136
  s.next.enqueue!
110
137
  end
111
- #complete after dependencies are processed
112
- #to make sure it doesn't enqueue due to runner check
113
- s.update_attributes(:completed_at=>Time.now.utc,:response=>response)
114
- s.update_status("Completed at #{Time.now.utc.to_s}")
115
138
  true
116
139
  end
117
140
 
@@ -122,11 +145,7 @@ module Mobilize
122
145
  j = s.job
123
146
  r = j.runner
124
147
  u = r.user
125
- begin
126
- j.update_attributes(:active=>false) if s.params['always_on'].to_s=="false"
127
- rescue StandardError, ScriptError
128
- #skip due to parse error on params
129
- end
148
+ j.update_attributes(:active=>false) unless s.params['always_on']
130
149
  s.update_attributes(:failed_at=>Time.now.utc,:response=>response)
131
150
  stage_name = "#{j.name}_stage#{s.idx.to_s}.err"
132
151
  target_path = (r.path.split("/")[0..-2] + [stage_name]).join("/")
@@ -1,7 +1,6 @@
1
- namespace :mobilize do
1
+ namespace :mobilize_base do
2
2
  desc "Start a Resque worker"
3
- task :work, :env do |t,args|
4
- ENV['MOBILIZE_ENV']=args.env
3
+ task :work do
5
4
  require 'mobilize-base'
6
5
  Mobilize::Base.config('jobtracker')['extensions'].each do |e|
7
6
  begin
@@ -21,77 +20,42 @@ namespace :mobilize do
21
20
  worker.work(ENV['INTERVAL'] || 5) # interval, will block
22
21
  end
23
22
  desc "Kill all Resque workers"
24
- task :kill_workers, :env do |t,args|
25
- ENV['MOBILIZE_ENV']=args.env
23
+ task :kill_workers do
26
24
  require 'mobilize-base'
27
25
  Mobilize::Jobtracker.kill_workers
28
26
  end
29
27
  desc "Kill idle workers not in sync with repo"
30
- task :kill_idle_and_stale_workers, :env do |t,args|
31
- ENV['MOBILIZE_ENV']=args.env
28
+ task :kill_idle_and_stale_workers do
32
29
  require 'mobilize-base'
33
30
  Mobilize::Jobtracker.kill_idle_and_stale_workers
34
31
  end
35
- desc "Kill idle workers"
36
- task :kill_idle_workers, :env do |t,args|
37
- ENV['MOBILIZE_ENV']=args.env
32
+ desc "Kill idle workers"
33
+ task :kill_idle_workers do
38
34
  require 'mobilize-base'
39
35
  Mobilize::Jobtracker.kill_idle_workers
40
36
  end
41
37
  desc "Make sure there are the correct # of workers, kill if too many"
42
- task :prep_workers, :env do |t,args|
43
- ENV['MOBILIZE_ENV']=args.env
38
+ task :prep_workers do
44
39
  require 'mobilize-base'
45
40
  Mobilize::Jobtracker.prep_workers
46
41
  end
47
- desc "Restart Resque workers"
48
- task :restart_workers, :env do |t,args|
49
- ENV['MOBILIZE_ENV']=args.env
50
- require 'mobilize-base'
51
- Mobilize::Jobtracker.kill_workers
52
- sleep 5
53
- Mobilize::Jobtracker.prep_workers
54
- end
55
42
  desc "Stop Jobtracker"
56
- task :stop_jobtracker, :env do |t,args|
57
- ENV['MOBILIZE_ENV']=args.env
43
+ task :stop_jobtracker do
58
44
  require 'mobilize-base'
59
45
  Mobilize::Jobtracker.stop!
60
46
  end
61
47
  desc "Start Jobtracker"
62
- task :start_jobtracker, :env do |t,args|
63
- ENV['MOBILIZE_ENV']=args.env
48
+ task :start_jobtracker do
64
49
  require 'mobilize-base'
65
50
  Mobilize::Jobtracker.start
66
51
  end
67
52
  desc "Restart Jobtracker"
68
- task :restart_jobtracker, :env do |t,args|
69
- ENV['MOBILIZE_ENV']=args.env
53
+ task :restart_jobtracker do
70
54
  require 'mobilize-base'
71
55
  Mobilize::Jobtracker.restart!
72
56
  end
73
- desc "Add a user"
74
- task :add_user, :name, :env do |t,args|
75
- ENV['MOBILIZE_ENV']=args.env
76
- require 'mobilize-base'
77
- Mobilize::User.find_or_create_by_name(args.name)
78
- end
79
- desc "Enqueue a user's runner"
80
- task :enqueue_user, :name, :env do |t,args|
81
- ENV['MOBILIZE_ENV']=args.env
82
- require 'mobilize-base'
83
- Mobilize::User.where(name: args.name).first.runner.enqueue!
84
- end
85
- desc "Enqueue a stage"
86
- task :enqueue_stage, :path, :env do |t,args|
87
- ENV['MOBILIZE_ENV']=args.env
88
- require 'mobilize-base'
89
- user,job,stage = args.path.split("/")
90
- Mobilize::Stage.where(path: "Runner_#{user}/jobs/#{job}/#{stage}").first.en
91
- end
92
57
  desc "kill all old resque web processes, start new one with resque_web.rb extension file"
93
- task :resque_web, :env do |t,args|
94
- ENV['MOBILIZE_ENV']=args.env
58
+ task :resque_web do
95
59
  require 'mobilize-base'
96
60
  port = Mobilize::Base.config('resque')['web_port']
97
61
  config_dir = (ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/")
@@ -119,15 +83,14 @@ namespace :mobilize do
119
83
  `#{command}`
120
84
  end
121
85
  desc "create indexes for all base models in mongodb"
122
- task :create_indexes, :env do |t,args|
123
- ENV['MOBILIZE_ENV']=args.env
86
+ task :create_indexes do
124
87
  require 'mobilize-base'
125
88
  ["Dataset","Job","Runner","Task","User"].each do |m|
126
89
  "Mobilize::#{m}".constantize.create_indexes
127
90
  end
128
91
  end
129
92
  desc "Set up config and log folders and files, populate from samples"
130
- task :setup_base do
93
+ task :setup do
131
94
  config_dir = (ENV['MOBILIZE_CONFIG_DIR'] ||= "config/mobilize/")
132
95
  log_dir = (ENV['MOBILIZE_LOG_DIR'] ||= "log/")
133
96
  sample_dir = File.dirname(__FILE__) + '/../samples/'
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.36"
3
+ VERSION = "1.293"
4
4
  end
5
5
  end
@@ -1,16 +1,11 @@
1
1
  ---
2
2
  development:
3
3
  domain: host.com
4
- max_api_retries: 10
5
- file_write_retries: 5
6
- file_write_retry_delay: 30
7
4
  owner:
8
5
  name: owner_development
9
6
  pw: google_drive_password
10
- admin_group_name: admins_development
11
7
  admins:
12
8
  - name: admin
13
- worker_group_name: workers_development
14
9
  workers:
15
10
  - name: worker_development001
16
11
  pw: worker001_google_drive_password
@@ -18,16 +13,11 @@ development:
18
13
  pw: worker002_google_drive_password
19
14
  test:
20
15
  domain: host.com
21
- max_api_retries: 10
22
- file_write_retries: 5
23
- file_write_retry_delay: 30
24
16
  owner:
25
17
  name: owner_test
26
18
  pw: google_drive_password
27
- admin_group_name: admins_test
28
19
  admins:
29
20
  - name: admin
30
- worker_group_name: workers_test
31
21
  workers:
32
22
  - name: worker_test001
33
23
  pw: worker001_google_drive_password
@@ -35,16 +25,11 @@ test:
35
25
  pw: worker002_google_drive_password
36
26
  production:
37
27
  domain: host.com
38
- max_api_retries: 10
39
- file_write_retries: 5
40
- file_write_retry_delay: 30
41
28
  owner:
42
29
  name: owner_production
43
30
  pw: google_drive_password
44
- admin_group_name: admins_production
45
31
  admins:
46
32
  - name: admin
47
- worker_group_name: workers_production
48
33
  workers:
49
34
  - name: worker_production001
50
35
  pw: worker001_google_drive_password
@@ -1,7 +1,10 @@
1
1
  ---
2
2
  development:
3
+ max_versions: 10 #number of versions of cache to keep in gridfs
3
4
  max_compressed_write_size: 1000000000 #~1GB
4
5
  test:
6
+ max_versions: 10 #number of versions of cache to keep in gridfs
5
7
  max_compressed_write_size: 1000000000 #~1GB
6
8
  production:
9
+ max_versions: 10 #number of versions of cache to keep in gridfs
7
10
  max_compressed_write_size: 1000000000 #~1GB
@@ -1,17 +1,17 @@
1
1
  ---
2
2
  development:
3
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
3
+ max_cells: 400000 #current google limit is 400k cells per book
4
4
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
5
5
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
6
6
  test:
7
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
7
+ max_cells: 400000 #current google limit is 400k cells per book
8
8
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
9
9
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
10
10
  staging:
11
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
11
+ max_cells: 400000 #current google limit is 400k cells per book
12
12
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
13
13
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
14
14
  production:
15
- max_cells: 50000 #current google limit is 400k cells per book but fails earlier
15
+ max_cells: 400000 #current google limit is 400k cells per book
16
16
  read_date_format: "%Y-%m-%d" #format to record when reading sheets
17
17
  sheet_date_format: "%m/%d/%Y" #format to use to parse sheets
@@ -5,15 +5,21 @@ development:
5
5
  runner_read_freq: 300 #5 min between runner reads
6
6
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
7
7
  extensions: [] #additional Mobilize modules to load workers with
8
+ admins: #emails to send notifications to
9
+ - email: admin@host.com
8
10
  test:
9
11
  cycle_freq: 10 #time between Jobtracker sweeps
10
12
  notification_freq: 3600 #1 hour between failure/timeout notifications
11
13
  runner_read_freq: 300 #5 min between runner reads
12
14
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
13
15
  extensions: [] #additional Mobilize modules to load workers with
16
+ admins: #emails to send notifications to
17
+ - email: admin@host.com
14
18
  production:
15
19
  cycle_freq: 10 #time between Jobtracker sweeps
16
20
  notification_freq: 3600 #1 hour between failure/timeout notifications
17
21
  runner_read_freq: 300 #5 min between runner reads
18
22
  max_run_time: 14400 # if a job runs for 4h+, notification will be sent
19
23
  extensions: [] #additional Mobilize modules to load workers with
24
+ admins: #emails to send notifications to
25
+ - email: admin@host.com
@@ -22,10 +22,10 @@ Gem::Specification.new do |s|
22
22
  s.require_paths = ["lib"]
23
23
 
24
24
  s.add_runtime_dependency 'rake'
25
- s.add_runtime_dependency 'bson','1.8.4'
26
- s.add_runtime_dependency 'bson_ext','1.8.4'
25
+ s.add_runtime_dependency 'bson','1.6.1'
26
+ s.add_runtime_dependency 'bson_ext','1.6.1'
27
+ s.add_runtime_dependency 'mongo', '1.6.1'
27
28
  s.add_runtime_dependency "mongoid", "~>3.0.0"
28
- s.add_runtime_dependency 'mongoid-grid_fs'
29
29
  s.add_runtime_dependency 'resque', '1.24.0'
30
30
  s.add_runtime_dependency 'google_drive','0.3.2'
31
31
  s.add_runtime_dependency 'popen4','0.1.2'
@@ -0,0 +1,11 @@
1
+ - name: base1
2
+ active: true
3
+ trigger: once
4
+ status: ""
5
+ stage1: gsheet.write source:"gfile://test_base_1.tsv", target:base1.out
6
+
7
+ - name: base2
8
+ active: true
9
+ trigger: after base1
10
+ status: ""
11
+ stage1: gsheet.write source:base1.out, target:base2.out
@@ -0,0 +1,106 @@
1
+ require 'test_helper'
2
+
3
+ describe "Mobilize" do
4
+
5
+ def before
6
+ puts 'nothing before'
7
+ end
8
+
9
+ # enqueues 4 workers on Resque
10
+ it "runs integration test" do
11
+
12
+ puts "restart test redis"
13
+ Mobilize::Jobtracker.restart_test_redis
14
+
15
+ puts "clear out test db"
16
+ Mobilize::Jobtracker.drop_test_db
17
+
18
+ puts "restart workers"
19
+ Mobilize::Jobtracker.restart_workers!
20
+
21
+ puts "build test runner"
22
+ gdrive_slot = Mobilize::Gdrive.owner_email
23
+ puts "create user 'mobilize'"
24
+ user_name = gdrive_slot.split("@").first
25
+ u = Mobilize::User.find_or_create_by_name(user_name)
26
+ assert u.email == gdrive_slot
27
+
28
+ Mobilize::Jobtracker.build_test_runner(user_name)
29
+ assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
30
+
31
+ puts "Jobtracker created runner with 'jobs' sheet?"
32
+ r = u.runner
33
+ jobs_sheet_url = "gsheet://#{r.path}"
34
+ jobs_sheet = Mobilize::Gsheet.find_by_path(r.path,gdrive_slot)
35
+ jobs_sheet_dst = Mobilize::Dataset.find_or_create_by_url(jobs_sheet_url)
36
+ jobs_sheet_tsv = jobs_sheet_dst.read(user_name,gdrive_slot)
37
+ assert jobs_sheet_tsv.tsv_header_array.join.length == 53 #total header length
38
+
39
+ #stop Jobtracker, if you're doing this by queueing runners
40
+ #Mobilize::Jobtracker.stop!
41
+
42
+ puts "add base1 input file"
43
+ test_filename = "test_base_1"
44
+ file_url = "gfile://#{test_filename}.tsv"
45
+ test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/#{test_filename}.yml")*40
46
+ test_source_tsv = test_source_ha.hash_array_to_tsv
47
+ Mobilize::Dataset.write_by_url(file_url,test_source_tsv,user_name)
48
+ rem_tsv = Mobilize::Dataset.read_by_url(file_url,user_name)
49
+ assert rem_tsv == test_source_tsv
50
+
51
+ puts "add row to jobs sheet, wait for stages"
52
+ test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
53
+ jobs_sheet.reload
54
+ jobs_sheet.add_or_update_rows(test_job_rows)
55
+ #wait for stages to complete
56
+ #r.enqueue!
57
+ wait_for_stages
58
+
59
+ puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
60
+ test_target_sheet_1_url = "gsheet://#{r.title}/base1.out"
61
+ test_target_sheet_2_url = "gsheet://#{r.title}/base2.out"
62
+ test_error_sheet_url = "gsheet://#{r.title}/base1_stage1.err"
63
+
64
+ test_1_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
65
+ test_2_tsv = Mobilize::Dataset.read_by_url(test_target_sheet_1_url,user_name,gdrive_slot)
66
+
67
+ assert test_1_tsv == test_2_tsv
68
+
69
+ puts "change first job to fail, wait for stages"
70
+ test_job_rows.first['stage1'] = %{gsheet.write source:"gfile://test_base_1.fail", target:base1.out, retries:3}
71
+ Mobilize::Dataset.write_by_url(test_error_sheet_url," ",user_name,gdrive_slot)
72
+ jobs_sheet.add_or_update_rows(test_job_rows)
73
+
74
+ #wait for stages to complete
75
+ wait_for_stages
76
+
77
+ test_error_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1_stage1.err",gdrive_slot)
78
+ puts "jobtracker posted failing test error to sheet "
79
+ error_rows = test_error_sheet.read(user_name).tsv_to_hash_array
80
+ assert error_rows.first['response'] == "Could not get gfile://test_base_1.fail with error: unable to find test_base_1.fail"
81
+ Mobilize::Jobtracker.stop!
82
+ end
83
+
84
+ def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
85
+ time = 0
86
+ time_since_stage = 0
87
+ #check for 10 min
88
+ while time < time_limit and time_since_stage < stage_limit
89
+ sleep wait_length
90
+ job_classes = Mobilize::Resque.jobs.map{|j| j['class']}
91
+ if job_classes.include?("Mobilize::Stage")
92
+ time_since_stage = 0
93
+ puts "saw stage at #{time.to_s} seconds"
94
+ else
95
+ time_since_stage += wait_length
96
+ puts "#{time_since_stage.to_s} seconds since stage seen"
97
+ end
98
+ time += wait_length
99
+ puts "total wait time #{time.to_s} seconds"
100
+ end
101
+
102
+ if time >= time_limit
103
+ raise "Timed out before stage completion"
104
+ end
105
+ end
106
+ end