mobilize-base 1.0.84 → 1.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ module GoogleDrive
8
8
  attempts = 0
9
9
  sleep_time = nil
10
10
  #try 5 times to make the call
11
- while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 5
11
+ while (response.nil? or response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}) and attempts < 20
12
12
  #instantiate http object, set params
13
13
  http = @proxy.new(uri.host, uri.port)
14
14
  http.use_ssl = true
@@ -40,7 +40,7 @@ module GoogleDrive
40
40
  end
41
41
  end
42
42
  end
43
- raise "No response after 5 attempts" if response.nil?
43
+ raise "No response after 20 attempts" if response.nil?
44
44
  raise response.body if response.code.ie{|rcode| rcode.starts_with?("4") or rcode.starts_with?("5")}
45
45
  return response
46
46
  end
@@ -8,10 +8,10 @@ module Mobilize
8
8
  dst = Dataset.find_or_create_by_handler_and_path('gbook',path)
9
9
  #there should only be one book with each path, otherwise we have fail
10
10
  book = nil
11
- if books.length>1 and dst.url.to_s.length>0
11
+ if books.length>1 and dst.http_url.to_s.length>0
12
12
  #some idiot process created a duplicate book.
13
13
  #Fix by renaming all but one with dst entry's key
14
- dkey = dst.url.split("key=").last
14
+ dkey = dst.http_url.split("key=").last
15
15
  books.each do |b|
16
16
  bkey = b.resource_id.split(":").last
17
17
  if bkey == dkey
@@ -30,9 +30,9 @@ module Mobilize
30
30
  book = Gdrive.root(Gdrive.owner_email).create_spreadsheet(path)
31
31
  ("Created book #{path} at #{Time.now.utc.to_s}; Access at #{book.human_url}").oputs
32
32
  end
33
- #always make sure book dataset URL is up to date
33
+ #always make sure book dataset http URL is up to date
34
34
  #and that book has admin acl
35
- dst.update_attributes(:url=>book.human_url)
35
+ dst.update_attributes(:http_url=>book.human_url)
36
36
  book.add_admin_acl
37
37
  return book
38
38
  end
@@ -28,7 +28,10 @@ module Mobilize
28
28
  return false unless gdrive_slot
29
29
  s = Stage.where(:path=>stage_path)
30
30
  gfile_path = s.params['file']
31
- Gfile.find_by_path(gfile_path,gdrive_slot).read
31
+ out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
32
+ #use Gridfs to cache result
33
+ out_url = "gridfs://#{s.path}/out"
34
+ Dataset.write_to_url(out_url,out_tsv)
32
35
  end
33
36
  end
34
37
  end
@@ -11,29 +11,29 @@ module Mobilize
11
11
  return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
12
12
  end
13
13
 
14
- def Gridfs.read(path)
14
+ def Gridfs.read_by_dataset_path(dst_path)
15
15
  begin
16
- zs=Gridfs.grid.open(path.gridsafe,'r').read
16
+ zs=Gridfs.grid.open(dst_path,'r').read
17
17
  return ::Zlib::Inflate.inflate(zs)
18
18
  rescue
19
19
  return nil
20
20
  end
21
21
  end
22
22
 
23
- def Gridfs.write(path,string)
23
+ def Gridfs.write_by_dataset_path(dst_path,string)
24
24
  zs = ::Zlib::Deflate.deflate(string)
25
25
  raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
26
- curr_zs = Gridfs.read(path.gridsafe).to_s
26
+ curr_zs = Gridfs.read_by_dataset_path(dst_path).to_s
27
27
  #write a new version when there is a change
28
28
  if curr_zs != zs
29
- Gridfs.grid.open(path.gridsafe,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
29
+ Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}
30
30
  end
31
31
  return true
32
32
  end
33
33
 
34
- def Gridfs.delete(path)
34
+ def Gridfs.delete(dst_path)
35
35
  begin
36
- Gridfs.grid.delete(path.gridsafe)
36
+ Gridfs.grid.delete(dst_path)
37
37
  return true
38
38
  rescue
39
39
  return nil
@@ -38,13 +38,16 @@ module Mobilize
38
38
  return false unless gdrive_slot
39
39
  s = Stage.where(:path=>stage_path).first
40
40
  gsheet_path = s.params['source']
41
- Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
41
+ out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
42
+ #use Gridfs to cache result
43
+ out_url = "gridfs://#{s.path}/out"
44
+ Dataset.write_to_url(out_url,out_tsv)
42
45
  end
43
46
 
44
47
  def Gsheet.write_by_stage_path(stage_path)
45
48
  gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
46
- #return false if there are no emails available
47
- return false unless gdrive_slot
49
+ #return blank response if there are no slots available
50
+ return nil unless gdrive_slot
48
51
  s = Stage.where(:path=>stage_path).first
49
52
  source = s.params['source']
50
53
  target_path = s.params['target']
@@ -55,7 +58,7 @@ module Mobilize
55
58
  end
56
59
  source_stage_path = "#{s.job.runner.path}/#{source_job_name || s.job.name}/#{source_stage_name}"
57
60
  source_stage = Stage.where(:path=>source_stage_path).first
58
- tsv = source_stage.stdout_dataset.read_cache
61
+ tsv = source_stage.out_dst.read
59
62
  sheet_name = target_path.split("/").last
60
63
  temp_path = [stage_path.gridsafe,sheet_name].join("/")
61
64
  temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
@@ -65,8 +68,11 @@ module Mobilize
65
68
  target_sheet.merge(temp_sheet)
66
69
  #delete the temp sheet's book
67
70
  temp_sheet.spreadsheet.delete
68
- "Write successful for #{target_path}".oputs
69
- return true
71
+ status = "Write successful for #{target_path}"
72
+ s.update_status(status)
73
+ #use Gridfs to cache result
74
+ out_url = "gridfs://#{s.path}/out"
75
+ Dataset.write_to_url(out_url,status)
70
76
  end
71
77
  end
72
78
  end
@@ -51,12 +51,12 @@ module Mobilize
51
51
 
52
52
  def Resque.active_paths
53
53
  #first argument of the payload is the runner / stage path unless the worker is Jobtracker
54
- Resque.jobs('active').map{|j| j['args'].first unless (j.nil? or j['class']=='Jobtracker')}.compact
54
+ Resque.jobs('active').compact.map{|j| j['args'].first unless j['class']=='Jobtracker'}.compact
55
55
  end
56
56
 
57
57
  #Resque workers and methods to find
58
58
  def Resque.find_worker_by_path(path)
59
- Resque.workers('working').select{|w| w.job['payload'] and w.job['payload']['args'].first == path}.first
59
+ Resque.workers('working').select{|w| w.job and w.job['payload'] and w.job['payload']['args'].first == path}.first
60
60
  end
61
61
 
62
62
  def Resque.set_worker_args_by_path(path,args)
@@ -106,7 +106,9 @@ module Mobilize
106
106
  def Resque.failure_report
107
107
  fjobs = {}
108
108
  excs = Hash.new(0)
109
- Resque.failures.each do |f|
109
+ Resque.failures.each_with_index do |f,f_i|
110
+ #skip if already notified
111
+ next if f['notified']
110
112
  sname = f['payload']['args'].first
111
113
  excs = f['error']
112
114
  if fjobs[sname].nil?
@@ -116,6 +118,9 @@ module Mobilize
116
118
  else
117
119
  fjobs[sname][excs] += 1
118
120
  end
121
+ #add notified flag to redis
122
+ f['notified'] = true
123
+ ::Resque.redis.lset(:failed, f_i, ::Resque.encode(f))
119
124
  end
120
125
  return fjobs
121
126
  end
@@ -134,9 +139,9 @@ module Mobilize
134
139
  if count.to_i > idle_pids.length or count == 0
135
140
  return false
136
141
  elsif count
137
- "kill #{idle_pids[0..count-1].join(" ")}".bash
142
+ "kill #{idle_pids[0..count-1].join(" ")}".bash(false)
138
143
  else
139
- "kill #{idle_pids.join(" ")}".bash
144
+ "kill #{idle_pids.join(" ")}".bash(false)
140
145
  end
141
146
  return true
142
147
  end
@@ -148,7 +153,7 @@ module Mobilize
148
153
  if idle_stale_pids.length == 0
149
154
  return false
150
155
  else
151
- "kill #{idle_stale_pids.join(" ")}".bash
156
+ "kill #{idle_stale_pids.join(" ")}".bash(false)
152
157
  end
153
158
  return true
154
159
  end
@@ -163,12 +163,14 @@ module Mobilize
163
163
  if Jobtracker.notif_due?
164
164
  notifs = []
165
165
  if Jobtracker.failures.length>0
166
- n = {}
167
166
  jfcs = Resque.failure_report
168
- n['subj'] = "#{jfcs.keys.length.to_s} failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
169
- #one row per exception type, with the job name
170
- n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
171
- notifs << n
167
+ unless jfcs=={} #no new failures
168
+ n = {}
169
+ n['subj'] = "#{jfcs.keys.length.to_s} new failed jobs, #{jfcs.values.map{|v| v.values}.flatten.sum.to_s} failures"
170
+ #one row per exception type, with the job name
171
+ n['body'] = jfcs.map{|key,val| val.map{|b,name| [key," : ",b,", ",name," times"].join}}.flatten.join("\n\n")
172
+ notifs << n
173
+ end
172
174
  end
173
175
  lws = Jobtracker.max_run_time_workers
174
176
  if lws.length>0
@@ -4,7 +4,7 @@ module Mobilize
4
4
  include Mongoid::Timestamps
5
5
  field :handler, type: String
6
6
  field :path, type: String
7
- field :url, type: String
7
+ field :http_url, type: String
8
8
  field :raw_size, type: Fixnum
9
9
  field :last_cached_at, type: Time
10
10
  field :last_cache_handler, type: String
@@ -18,6 +18,16 @@ module Mobilize
18
18
  return "Mobilize::#{dst.handler.humanize}".constantize.read_by_path(dst.path)
19
19
  end
20
20
 
21
+ def Dataset.find_by_url(url)
22
+ handler,path = url.split("://")
23
+ Dataset.find_by_handler_and_path(handler,path)
24
+ end
25
+
26
+ def Dataset.find_or_create_by_url(url)
27
+ handler,path = url.split("://")
28
+ Dataset.find_or_create_by_handler_and_path(handler,path)
29
+ end
30
+
21
31
  def Dataset.find_by_handler_and_path(handler,path)
22
32
  Dataset.where(handler: handler, path: path).first
23
33
  end
@@ -28,41 +38,30 @@ module Mobilize
28
38
  return dst
29
39
  end
30
40
 
31
- def write(string)
32
- dst = self
33
- "Mobilize::#{dst.handler.humanize}".constantize.write_by_path(dst.path,string)
34
- dst.raw_size = string.length
35
- dst.save!
36
- return true
41
+ def Dataset.write_to_url(url,string)
42
+ dst = Dataset.find_or_create_by_url(url)
43
+ dst.write(string)
44
+ url
37
45
  end
38
46
 
39
- def cache_valid?
47
+ def read
40
48
  dst = self
41
- return true if dst.last_cached_at and (dst.cache_expire_at.nil? or dst.cache_expire_at > Time.now.utc)
49
+ dst.update_attributes(:last_read_at=>Time.now.utc)
50
+ "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path)
42
51
  end
43
52
 
44
- def read_cache(cache_handler="gridfs")
53
+ def write(string)
45
54
  dst = self
46
- if cache_valid?
47
- dst.update_attributes(:last_read_at=>Time.now.utc)
48
- return "Mobilize::#{cache_handler.humanize}".constantize.read([dst.handler,dst.path].join("://"))
49
- else
50
- raise "Cache invalid or not found for #{cache_handler}://#{dst.path}"
51
- end
55
+ "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string)
56
+ dst.raw_size = string.length
57
+ dst.save!
58
+ return true
52
59
  end
53
60
 
54
- def write_cache(string,expire_at=nil,cache_handler="gridfs")
61
+ def delete
55
62
  dst = self
56
- "Mobilize::#{cache_handler.humanize}".constantize.write([dst.handler,dst.path].join("://"),string)
57
- dst.update_attributes(:last_cached_at=>Time.now.utc,
58
- :last_cache_handler=>cache_handler.to_s.downcase,
59
- :cache_expire_at=>expire_at,
60
- :size=>string.length)
63
+ "Mobilize::#{dst.handler.humanize}".constantize.delete_by_dataset_path(dst.path)
61
64
  return true
62
65
  end
63
-
64
- def delete_cache(cache_handler="gridfs")
65
- return "Mobilize::#{cache_handler.humanize}".constantize.delete(dst.handler, dst.path)
66
- end
67
66
  end
68
67
  end
@@ -71,9 +71,9 @@ module Mobilize
71
71
  Runner.where(:path=>path).first || Runner.create(:path=>path,:active=>true)
72
72
  end
73
73
 
74
- def read_cache
74
+ def cache
75
75
  r = self
76
- r.dataset.read_cache
76
+ Dataset.find_or_create_by_url("gridfs://#{r.path}")
77
77
  end
78
78
 
79
79
  def gsheet(gdrive_slot)
@@ -88,7 +88,7 @@ module Mobilize
88
88
  r = self
89
89
  gsheet_tsv = r.gsheet(gdrive_slot).to_tsv
90
90
  #cache in DB
91
- r.dataset.write_cache(gsheet_tsv)
91
+ r.cache.write(gsheet_tsv)
92
92
  #turn it into a hash array
93
93
  gsheet_jobs = gsheet_tsv.tsv_to_hash_array
94
94
  #go through each job, update relevant job with its params
@@ -7,6 +7,7 @@ module Mobilize
7
7
  field :call, type: String
8
8
  field :param_string, type: Array
9
9
  field :status, type: String
10
+ field :out_url, type: String
10
11
  field :completed_at, type: Time
11
12
  field :started_at, type: Time
12
13
  field :failed_at, type: Time
@@ -19,19 +20,12 @@ module Mobilize
19
20
  s.path.split("/").last.gsub("stage","").to_i
20
21
  end
21
22
 
22
- def stdout_dataset
23
+ def out_dst
24
+ #this gives a dataset that points to the output
25
+ #allowing you to determine its size
26
+ #before committing to a read or write
23
27
  s = self
24
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stdout")
25
- end
26
-
27
- def stderr_dataset
28
- s = self
29
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/stderr")
30
- end
31
-
32
- def log_dataset
33
- s = self
34
- Dataset.find_or_create_by_handler_and_path("gridfs","#{s.path}/log")
28
+ Dataset.find_by_url(s.out_url) if s.out_url
35
29
  end
36
30
 
37
31
  def params
@@ -79,27 +73,21 @@ module Mobilize
79
73
  j = s.job
80
74
  s.update_attributes(:started_at=>Time.now.utc)
81
75
  s.update_status(%{Starting at #{Time.now.utc}})
82
- stdout, stderr = [nil,nil]
83
76
  begin
84
- stdout,log = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path).to_s
85
- #write to log if method returns an array w 2 members
86
- s.log_dataset.write_cache(log) if log
77
+ #get response by running method
78
+ s.out_url = "Mobilize::#{s.handler.humanize}".constantize.send("#{s.call}_by_stage_path",s.path)
79
+ s.save!
80
+ unless s.out_url
81
+ #re-queue self if no response
82
+ s.enqueue!
83
+ return false
84
+ end
87
85
  rescue ScriptError, StandardError => exc
88
- stderr = [exc.to_s,exc.backtrace.to_s].join("\n")
89
- #record the failure in Job so it appears on Runner, turn it off
90
- #so it doesn't run again
91
86
  j.update_attributes(:active=>false)
92
87
  s.update_attributes(:failed_at=>Time.now.utc)
93
88
  s.update_status("Failed at #{Time.now.utc.to_s}")
94
89
  raise exc
95
90
  end
96
- if stdout == false
97
- #re-queue self if output is false
98
- s.enqueue!
99
- return false
100
- end
101
- #write output to cache
102
- s.stdout_dataset.write_cache(stdout)
103
91
  s.update_attributes(:completed_at=>Time.now.utc)
104
92
  s.update_status("Completed at #{Time.now.utc.to_s}")
105
93
  if s.idx == j.stages.length
@@ -24,7 +24,7 @@ module Mobilize
24
24
 
25
25
  def jobs(jname=nil)
26
26
  u = self
27
- return u.runners.map{|r| r.jobs(jname)}.flatten
27
+ return u.runner.jobs
28
28
  end
29
29
 
30
30
  def runner_path
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Base
3
- VERSION = "1.0.84"
3
+ VERSION = "1.0.91"
4
4
  end
5
5
  end
@@ -41,10 +41,10 @@ describe "Mobilize" do
41
41
  test_source_tsv = test_source_ha.hash_array_to_tsv
42
42
  test_source_sheet.write(test_source_tsv)
43
43
 
44
- puts "add row to jobs sheet, wait 120s"
44
+ puts "add row to jobs sheet, wait 150s"
45
45
  test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
46
46
  jobs_sheet.add_or_update_rows(test_job_rows)
47
- sleep 120
47
+ sleep 150
48
48
 
49
49
  puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
50
50
  test_target_sheet_1 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
@@ -52,11 +52,11 @@ describe "Mobilize" do
52
52
 
53
53
  assert test_target_sheet_1.to_tsv == test_source_sheet.to_tsv
54
54
 
55
- puts "delete both output sheets, set first job to active=true"
55
+ puts "delete both output sheets, set first job to active=true, wait 120s"
56
56
  [test_target_sheet_1,test_target_sheet_2].each{|s| s.delete}
57
57
 
58
58
  jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
59
- sleep 90
59
+ sleep 120
60
60
 
61
61
  test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
62
62
  puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.84
4
+ version: 1.0.91
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-19 00:00:00.000000000 Z
12
+ date: 2013-01-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -238,7 +238,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
238
238
  version: '0'
239
239
  segments:
240
240
  - 0
241
- hash: 1770893729203810846
241
+ hash: -136156851409089699
242
242
  required_rubygems_version: !ruby/object:Gem::Requirement
243
243
  none: false
244
244
  requirements:
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
247
247
  version: '0'
248
248
  segments:
249
249
  - 0
250
- hash: 1770893729203810846
250
+ hash: -136156851409089699
251
251
  requirements: []
252
252
  rubyforge_project: mobilize-base
253
253
  rubygems_version: 1.8.24