mobilize-base 1.0.84 → 1.0.91

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,7 +8,7 @@ module GoogleDrive
8
8
  attempts = 0
9
9
  sleep_time = nil
10
10
  #try 5 times to make the call
11
- while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
+ while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
12
12
  #instantiate http object, set params
13
13
  http = @proxy.new(uri.host, uri.port)
14
14
  http.use_ssl = true
@@ -40,7 +40,7 @@ module GoogleDrive
40
40
  end
41
41
  end
42
42
  end
43
- raise "No response after 5 attempts" if response.nil?
43
+ raise "No response after 20 attempts" if response.nil?
44
44
  raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
45
45
  return response
46
46
  end
@@ -8,10 +8,10 @@ module Mobilize
8
8
  dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
9
9
  #there should only be one book with each path, otherwise we have fail
10
10
  book = nil
11
- if books.length>1 and dst.url.to_s.length>0
11
+ if books.length>1 and dst.http_url.to_s.length>0
12
12
  #some idiot process created a duplicate book.
13
13
  #Fix by renaming all but one with dst entry's key
14
- dkey = dst.url.split("key=").last
14
+ dkey = dst.http_url.split("key=").last
15
15
  books.each do |b|
16
16
  bkey = b.resource_id.split(":").last
17
17
  if bkey == dkey
@@ -30,9 +30,9 @@ module Mobilize
30
30
  book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
31
31
  ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
32
32
  end
33
- #always make sure book dataset URL is up to date
33
+ #always make sure book dataset http URL is up to date
34
34
  #and that book has admin acl
35
- dst.update_attributes(:url=>book.human_url)
35
+ dst.update_attributes(:http_url=>book.human_url)
36
36
  book.add_admin_acl
37
37
  return book
38
38
  end
@@ -28,7 +28,10 @@ module Mobilize
28
28
  return false unless gdrive_slot
29
29
  s = Stage.where(:path=>stage_path)
30
30
  gfile_path = s.params['file']
31
- Gfile.find_by_path(gfile_path,gdrive_slot).read
31
+ out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
32
+ #use Gridfs to cache result
33
+ out_url = "gridfs://#{s.path}/out"
34
+ Dataset.write_to_url(out_url,out_tsv)
32
35
  end
33
36
  end
34
37
  end
@@ -11,29 +11,29 @@ module Mobilize
11
11
  return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
12
12
  end
13
13
 
14
- def Gridfs.read(path)
14
+ def Gridfs.read_by_dataset_path(dst_path)
15
15
  begin
16
- zs=Gridfs.grid.open(path.gridsafe,'r').read
16
+ zs=Gridfs.grid.open(dst_path,'r').read
17
17
  return ::Zlib::Inflate.inflate(zs)
18
18
  rescue
19
19
  return nil
20
20
  end
21
21
  end
22
22
 
23
- def Gridfs.write(path,string)
23
+ def Gridfs.write_by_dataset_path(dst_path,string)
24
24
  zs = ::Zlib::Deflate.deflate(string)
25
25
  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
26
- curr_zs = Gridfs.read(path.gridsafe).to_s
26
+ curr_zs = Gridfs.read_by_dataset_path(dst_path).to_s
27
27
  #write a new version when there is a change
28
28
  if curr_zs != zs
29
- Gridfs.grid.open(path.gridsafe,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
29
+ Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
30
30
  end
31
31
  return true
32
32
  end
33
33
 
34
- def Gridfs.delete(path)
34
+ def Gridfs.delete(dst_path)
35
35
  begin
36
- Gridfs.grid.delete(path.gridsafe)
36
+ Gridfs.grid.delete(dst_path)
37
37
  return true
38
38
  rescue
39
39
  return nil
@@ -38,13 +38,16 @@ module Mobilize
38
38
  return false unless gdrive_slot
39
39
  s = Stage.where(:path=>stage_path).first
40
40
  gsheet_path = s.params['source']
41
- Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
41
+ out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
42
+ #use Gridfs to cache result
43
+ out_url = "gridfs://#{s.path}/out"
44
+ Dataset.write_to_url(out_url,out_tsv)
42
45
  end
43
46
 
44
47
  def Gsheet.write_by_stage_path(stage_path)
45
48
  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
46
- #return false if there are no emails available
47
- return false unless gdrive_slot
49
+ #return blank response if there are no slots available
50
+ return nil unless gdrive_slot
48
51
  s = Stage.where(:path=>stage_path).first
49
52
  source = s.params['source']
50
53
  target_path = s.params['target']
@@ -55,7 +58,7 @@ module Mobilize
55
58
  end
56
59
  source_stage_path = "#{s.job.runner.path}/#{source_job_name || s.job.name}/#{source_stage_name}"
57
60
  source_stage = Stage.where(:path=>source_stage_path).first
58
- tsv = source_stage.stdout_dataset.read_cache
61
+ tsv = source_stage.out_dst.read
59
62
  sheet_name = target_path.split("/").last
60
63
  temp_path = [stage_path.gridsafe,sheet_name].join("/")
61
64
  temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
@@ -65,8 +68,11 @@ module Mobilize
65
68
  target_sheet.merge(temp_sheet)
66
69
  #delete the temp sheet's book
67
70
  temp_sheet.spreadsheet.delete
68
- "Write successful for #{target_path}".oputs
69
- return true
71
+ status = "Write successful for #{target_path}"
72
+ s.update_status(status)
73
+ #use Gridfs to cache result
74
+ out_url = "gridfs://#{s.path}/out"
75
+ Dataset.write_to_url(out_url,status)
70
76
  end
71
77
  end
72
78
  end
@@ -51,12 +51,12 @@ module Mobilize
51
51
 
52
52
  def Resque.active_paths
53
53
  #first argument of the payload is the runner / stage path unless the worker is Jobtracker
54
- Resque.jobs('active').map{|j| j['args'].first unless (j.nil? or j['class']=='Jobtracker')}.compact
54
+ Resque.jobs('active').compact.map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
55
55
  end
56
56
 
57
57
  #Resque workers and methods to find
58
58
  def Resque.find_worker_by_path(path)
59
- Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
59
+ Resque.workers('working').select{|w| w.job and w.job['payload'] and w.job['payload']['args'].first == path}.first
60
60
  end
61
61
 
62
62
  def Resque.set_worker_args_by_path(path,args)
@@ -106,7 +106,9 @@ module Mobilize
106
106
  def Resque.failure_report
107
107
  fjobs = {}
108
108
  excs = Hash.new(0)
109
- Resque.failures.each do |f|
109
+ Resque.failures.each_with_index do |f,f_i|
110
+ #skip if already notified
111
+ next if f['notified']
110
112
  sname = f['payload']['args'].first
111
113
  excs = f['error']
112
114
  if fjobs[sname].nil?
@@ -116,6 +118,9 @@ module Mobilize
116
118
  else
117
119
  fjobs[sname][excs] += 1
118
120
  end
121
+ #add notified flag to redis
122
+ f['notified'] = true
123
+ ::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
119
124
  end
120
125
  return fjobs
121
126
  end
@@ -134,9 +139,9 @@ module Mobilize
134
139
  if count.to_i > idle_pids.length or count == 0
135
140
  return false
136
141
  elsif count
137
- "kill #{idle_pids[0..count-1].join(" ")}".bash
142
+ "kill #{idle_pids[0..count-1].join(" ")}".bash(false)
138
143
  else
139
- "kill #{idle_pids.join(" ")}".bash
144
+ "kill #{idle_pids.join(" ")}".bash(false)
140
145
  end
141
146
  return true
142
147
  end
@@ -148,7 +153,7 @@ module Mobilize
148
153
  if idle_stale_pids.length == 0
149
154
  return false
150
155
  else
151
- "kill #{idle_stale_pids.join(" ")}".bash
156
+ "kill #{idle_stale_pids.join(" ")}".bash(false)
152
157
  end
153
158
  return true
154
159
  end
@@ -163,12 +163,14 @@ module Mobilize
163
163
  if Jobtracker.notif_due?
164
164
  notifs = []
165
165
  if Jobtracker.failures.length>0
166
- n = {}
167
166
  jfcs = Resque.failure_report
168
- n['subj'] = "#{jfcs.keys.length.to_s} failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
169
- #one row per exception type, with the job name
170
- n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
171
- notifs << n
167
+ unless jfcs=={} #no new failures
168
+ n = {}
169
+ n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
170
+ #one row per exception type, with the job name
171
+ n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
172
+ notifs << n
173
+ end
172
174
  end
173
175
  lws = Jobtracker.max_run_time_workers
174
176
  if lws.length>0
@@ -4,7 +4,7 @@ module Mobilize
4
4
  include Mongoid::Timestamps
5
5
  field :handler, type: String
6
6
  field :path, type: String
7
- field :url, type: String
7
+ field :http_url, type: String
8
8
  field :raw_size, type: Fixnum
9
9
  field :last_cached_at, type: Time
10
10
  field :last_cache_handler, type: String
@@ -18,6 +18,16 @@ module Mobilize
18
18
  return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
19
19
  end
20
20
 
21
+ def Dataset.find_by_url(url)
22
+ handler,path = url.split("://")
23
+ Dataset.find_by_handler_and_path(handler,path)
24
+ end
25
+
26
+ def Dataset.find_or_create_by_url(url)
27
+ handler,path = url.split("://")
28
+ Dataset.find_or_create_by_handler_and_path(handler,path)
29
+ end
30
+
21
31
  def Dataset.find_by_handler_and_path(handler,path)
22
32
  Dataset.where(handler: handler, path: path).first
23
33
  end
@@ -28,41 +38,30 @@ module Mobilize
28
38
  return dst
29
39
  end
30
40
 
31
- def write(string)
32
- dst = self
33
- "Mobilize::#{dst.handler.humanize}".constantize.write_by_path(dst.path,string)
34
- dst.raw_size = string.length
35
- dst.save!
36
- return true
41
+ def Dataset.write_to_url(url,string)
42
+ dst = Dataset.find_or_create_by_url(url)
43
+ dst.write(string)
44
+ url
37
45
  end
38
46
 
39
- def cache_valid?
47
+ def read
40
48
  dst = self
41
- return true if dst.last_cached_at and (dst.cache_expire_at.nil? or dst.cache_expire_at > Time.now.utc)
49
+ dst.update_attributes(:last_read_at=>Time.now.utc)
50
+ "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path)
42
51
  end
43
52
 
44
- def read_cache(cache_handler="gridfs")
53
+ def write(string)
45
54
  dst = self
46
- if cache_valid?
47
- dst.update_attributes(:last_read_at=>Time.now.utc)
48
- return "Mobilize::#{cache_handler.humanize}".constantize.read([dst.handler,dst.path].join("://"))
49
- else
50
- raise "Cache invalid or not found for #{cache_handler}://#{dst.path}"
51
- end
55
+ "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string)
56
+ dst.raw_size = string.length
57
+ dst.save!
58
+ return true
52
59
  end
53
60
 
54
- def write_cache(string,expire_at=nil,cache_handler="gridfs")
61
+ def delete
55
62
  dst = self
56
- "Mobilize::#{cache_handler.humanize}".constantize.write([dst.handler,dst.path].join("://"),string)
57
- dst.update_attributes(:last_cached_at=>Time.now.utc,
58
- :last_cache_handler=>cache_handler.to_s.downcase,
59
- :cache_expire_at=>expire_at,
60
- :size=>string.length)
63
+ "Mobilize::#{dst.handler.humanize}".constantize.delete_by_dataset_path(dst.path)
61
64
  return true
62
65
  end
63
-
64
- def delete_cache(cache_handler="gridfs")
65
- return "Mobilize::#{cache_handler.humanize}".constantize.delete(dst.handler, dst.path)
66
- end
67
66
  end
68
67
  end
@@ -71,9 +71,9 @@ module Mobilize
71
71
  Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
72
72
  end
73
73
 
74
- def read_cache
74
+ def cache
75
75
  r = self
76
- r.dataset.read_cache
76
+ Dataset.find_or_create_by_url("gridfs://#{r.path}")
77
77
  end
78
78
 
79
79
  def gsheet(gdrive_slot)
@@ -88,7 +88,7 @@ module Mobilize
88
88
  r = self
89
89
  gsheet_tsv = r.gsheet(gdrive_slot).to_tsv
90
90
  #cache in DB
91
- r.dataset.write_cache(gsheet_tsv)
91
+ r.cache.write(gsheet_tsv)
92
92
  #turn it into a hash array
93
93
  gsheet_jobs = gsheet_tsv.tsv_to_hash_array
94
94
  #go through each job, update relevant job with its params
@@ -7,6 +7,7 @@ module Mobilize
7
7
  field :call, type: String
8
8
  field :param_string, type: Array
9
9
  field :status, type: String
10
+ field :out_url, type: String
10
11
  field :completed_at, type: Time
11
12
  field :started_at, type: Time
12
13
  field :failed_at, type: Time
@@ -19,19 +20,12 @@ module Mobilize
19
20
  s.path.split("/").last.gsub("stage","").to_i
20
21
  end
21
22
 
22
- def stdout_dataset
23
+ def out_dst
24
+ #this gives a dataset that points to the output
25
+ #allowing you to determine its size
26
+ #before committing to a read or write
23
27
  s = self
24
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stdout")
25
- end
26
-
27
- def stderr_dataset
28
- s = self
29
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stderr")
30
- end
31
-
32
- def log_dataset
33
- s = self
34
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/log")
28
+ Dataset.find_by_url(s.out_url) if s.out_url
35
29
  end
36
30
 
37
31
  def params
@@ -79,27 +73,21 @@ module Mobilize
79
73
  j = s.job
80
74
  s.update_attributes(:started_at=>Time.now.utc)
81
75
  s.update_status(%{Starting at #{Time.now.utc}})
82
- stdout, stderr = [nil,nil]
83
76
  begin
84
- stdout,log = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path).to_s
85
- #write to log if method returns an array w 2 members
86
- s.log_dataset.write_cache(log) if log
77
+ #get response by running method
78
+ s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
79
+ s.save!
80
+ unless s.out_url
81
+ #re-queue self if no response
82
+ s.enqueue!
83
+ return false
84
+ end
87
85
  rescue ScriptError, StandardError => exc
88
- stderr = [exc.to_s,exc.backtrace.to_s].join("\n")
89
- #record the failure in Job so it appears on Runner, turn it off
90
- #so it doesn't run again
91
86
  j.update_attributes(:active=>false)
92
87
  s.update_attributes(:failed_at=>Time.now.utc)
93
88
  s.update_status("Failed at #{Time.now.utc.to_s}")
94
89
  raise exc
95
90
  end
96
- if stdout == false
97
- #re-queue self if output is false
98
- s.enqueue!
99
- return false
100
- end
101
- #write output to cache
102
- s.stdout_dataset.write_cache(stdout)
103
91
  s.update_attributes(:completed_at=>Time.now.utc)
104
92
  s.update_status("Completed at #{Time.now.utc.to_s}")
105
93
  if s.idx == j.stages.length
@@ -24,7 +24,7 @@ module Mobilize
24
24
 
25
25
  def jobs(jname=nil)
26
26
  u = self
27
- return u.runners.map{|r| r.jobs(jname)}.flatten
27
+ return u.runner.jobs
28
28
  end
29
29
 
30
30
  def runner_path
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.0.84"
3
+ VERSION = "1.0.91"
4
4
  end
5
5
  end
@@ -41,10 +41,10 @@ describe "Mobilize" do
41
41
  test_source_tsv = test_source_ha.hash_array_to_tsv
42
42
  test_source_sheet.write(test_source_tsv)
43
43
 
44
- puts "add row to jobs sheet, wait 120s"
44
+ puts "add row to jobs sheet, wait 150s"
45
45
  test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
46
46
  jobs_sheet.add_or_update_rows(test_job_rows)
47
- sleep 120
47
+ sleep 150
48
48
 
49
49
  puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
50
50
  test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
@@ -52,11 +52,11 @@ describe "Mobilize" do
52
52
 
53
53
  assert test_target_sheet_1.to_tsv == test_source_sheet.to_tsv
54
54
 
55
- puts "delete both output sheets, set first job to active=true"
55
+ puts "delete both output sheets, set first job to active=true, wait 120s"
56
56
  [test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
57
57
 
58
58
  jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
59
- sleep 90
59
+ sleep 120
60
60
 
61
61
  test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
62
62
  puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.84
4
+ version: 1.0.91
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-19 00:00:00.000000000 Z
12
+ date: 2013-01-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -238,7 +238,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
238
238
  version: '0'
239
239
  segments:
240
240
  - 0
241
- hash: 1770893729203810846
241
+ hash: -136156851409089699
242
242
  required_rubygems_version: !ruby/object:Gem::Requirement
243
243
  none: false
244
244
  requirements:
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
247
247
  version: '0'
248
248
  segments:
249
249
  - 0
250
- hash: 1770893729203810846
250
+ hash: -136156851409089699
251
251
  requirements: []
252
252
  rubyforge_project: mobilize-base
253
253
  rubygems_version: 1.8.24