scout-gear 7.2.0 → 7.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +37 -3
  3. data/VERSION +1 -1
  4. data/lib/scout/concurrent_stream.rb +9 -8
  5. data/lib/scout/exceptions.rb +1 -0
  6. data/lib/scout/log/color.rb +0 -1
  7. data/lib/scout/log/progress/util.rb +65 -0
  8. data/lib/scout/misc/helper.rb +31 -0
  9. data/lib/scout/misc/monitor.rb +1 -1
  10. data/lib/scout/misc.rb +1 -0
  11. data/lib/scout/open/stream.rb +21 -27
  12. data/lib/scout/persist.rb +42 -28
  13. data/lib/scout/semaphore.rb +8 -1
  14. data/lib/scout/tsv/dumper.rb +13 -8
  15. data/lib/scout/tsv/index.rb +127 -15
  16. data/lib/scout/tsv/open.rb +128 -0
  17. data/lib/scout/tsv/parser.rb +70 -43
  18. data/lib/scout/tsv/path.rb +4 -4
  19. data/lib/scout/tsv/persist/adapter.rb +52 -33
  20. data/lib/scout/tsv/persist/fix_width_table.rb +324 -0
  21. data/lib/scout/tsv/persist/serialize.rb +117 -0
  22. data/lib/scout/tsv/persist/tokyocabinet.rb +3 -3
  23. data/lib/scout/tsv/persist.rb +0 -2
  24. data/lib/scout/tsv/traverse.rb +130 -35
  25. data/lib/scout/tsv/util/filter.rb +303 -0
  26. data/lib/scout/tsv/util/process.rb +73 -0
  27. data/lib/scout/tsv/util/select.rb +220 -0
  28. data/lib/scout/tsv/util.rb +77 -19
  29. data/lib/scout/tsv.rb +2 -2
  30. data/lib/scout/work_queue/worker.rb +1 -1
  31. data/lib/scout/workflow/definition.rb +8 -0
  32. data/lib/scout/workflow/step/info.rb +4 -0
  33. data/lib/scout/workflow/step/progress.rb +14 -0
  34. data/lib/scout/workflow/step.rb +10 -5
  35. data/lib/scout/workflow/task.rb +8 -4
  36. data/lib/scout/workflow/usage.rb +2 -0
  37. data/scout-gear.gemspec +33 -10
  38. data/scout_commands/workflow/task +3 -2
  39. data/scout_commands/workflow/task_old +2 -2
  40. data/test/scout/open/test_stream.rb +1 -1
  41. data/test/scout/test_persist.rb +61 -0
  42. data/test/scout/test_tmpfile.rb +1 -1
  43. data/test/scout/test_tsv.rb +10 -1
  44. data/test/scout/test_work_queue.rb +1 -0
  45. data/test/scout/tsv/persist/test_adapter.rb +10 -0
  46. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  47. data/test/scout/tsv/test_index.rb +94 -2
  48. data/test/scout/tsv/test_open.rb +9 -0
  49. data/test/scout/tsv/test_parser.rb +28 -3
  50. data/test/scout/tsv/test_persist.rb +7 -0
  51. data/test/scout/tsv/test_traverse.rb +110 -3
  52. data/test/scout/tsv/test_util.rb +23 -0
  53. data/test/scout/tsv/util/test_filter.rb +188 -0
  54. data/test/scout/tsv/util/test_process.rb +47 -0
  55. data/test/scout/tsv/util/test_select.rb +44 -0
  56. data/test/scout/work_queue/test_worker.rb +63 -6
  57. data/test/scout/workflow/step/test_load.rb +3 -3
  58. data/test/scout/workflow/test_step.rb +10 -10
  59. data/test/test_helper.rb +3 -1
  60. metadata +19 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de499ccbdffd888298324a025b253f0dd0e359d279d6abe8331acd91fd00cc57
4
- data.tar.gz: 9643053080da23a98209ebefbcd3b3043b7031374a405ffed112460f1ddcf411
3
+ metadata.gz: 7273030ac34bd180620d9075ee85b26d8a9883831a5925fc911915f1edeeecf3
4
+ data.tar.gz: 4e445acb896844240c0024b7754cffee93af71feb62f9c4251fda70aee79121a
5
5
  SHA512:
6
- metadata.gz: 73afbf6aeb4032380fc45e2f60e2e9335d763c6837ef3bb8247c83bfaf2cd962a6fdfba767788b988e00f685d0e9bc19927a4e794fc092296cc088f23d4565ee
7
- data.tar.gz: 309e86afe466c649800fd212fd22e9e183a5e92ba558d88ec0258690a15437952ead3e968c3693b89c036dc2fa9e27f4d129f6be6c001031dc38ab071361ba99
6
+ metadata.gz: '029a8eb23eda77c7e0c11ac2966e1e8ebf3eae479bbee0c76e113f5b839c655e97e0fec5025cc3c251795afcfc92ac3395a79b68c8eb99158d1cee8e2310f002'
7
+ data.tar.gz: 04ed4bcabfa7c41b80ba520074404bfb09c723ecdb8d02a2052f73fe86759bab7fdadff31f0f66284ed4938ebe5a1cad56fb94182b3edcc0de6325decdcb84f6
data/.vimproject CHANGED
@@ -32,6 +32,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
32
32
  filesystem.rb
33
33
  monitor.rb
34
34
  system.rb
35
+ helper.rb
35
36
  }
36
37
  named_array.rb
37
38
  indiferent_hash.rb
@@ -103,6 +104,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
103
104
  config.rb
104
105
  dependencies.rb
105
106
  provenance.rb
107
+ progress.rb
106
108
  }
107
109
  task.rb
108
110
  task=task{
@@ -123,26 +125,58 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
123
125
  parser.rb
124
126
  dumper.rb
125
127
  persist.rb
126
- index.rb
127
128
  persist=persist{
128
129
  adapter.rb
130
+ serialize.rb
129
131
  tokyocabinet.rb
132
+ fix_width_table.rb
130
133
  }
134
+ index.rb
131
135
  traverse.rb
132
136
  util.rb
137
+ util=util{
138
+ process.rb
139
+ select.rb
140
+ filter.rb
141
+ }
133
142
  path.rb
143
+ open.rb
134
144
  }
135
145
  }
136
146
  }
137
147
  test=test {
138
148
  test_helper.rb
149
+ test_scout-gear.rb
139
150
  scout=scout{
151
+ test_cmd.rb
152
+ test_concurrent_stream.rb
153
+ test_config.rb
140
154
  test_indiferent_hash.rb
155
+ test_log.rb
156
+ test_meta_extension.rb
157
+ test_misc.rb
158
+ test_named_array.rb
159
+ test_open.rb
160
+ test_path.rb
161
+ test_persist.rb
162
+ test_resource.rb
163
+ test_semaphore.rb
164
+ test_tmpfile.rb
165
+ test_tsv.rb
166
+ test_work_queue.rb
167
+ test_workflow.rb
168
+ workflow=workflow{
169
+ test_definition.rb
170
+ test_documentation.rb
171
+ test_step.rb
172
+ test_task.rb
173
+ test_usage.rb
174
+ test_util.rb
175
+ }
141
176
  indiferent_hash=indiferent_hash{
177
+ test_case_insensitive.rb
142
178
  test_options.rb
143
179
  }
144
- test_log.rb
145
- test_tmpfile.rb
146
180
  }
147
181
  }
148
182
  share=share{
data/VERSION CHANGED
@@ -1 +1 @@
1
- 7.2.0
1
+ 7.3.0
@@ -9,11 +9,11 @@ module AbortedStream
9
9
  end
10
10
 
11
11
  module ConcurrentStream
12
- attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lockfile, :no_fail, :pair, :thread, :stream_exception, :log, :std_err, :next
12
+ attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lock, :no_fail, :pair, :thread, :stream_exception, :log, :std_err, :next
13
13
 
14
14
  def self.setup(stream, options = {}, &block)
15
15
 
16
- threads, pids, callback, abort_callback, filename, autojoin, lockfile, no_fail, pair, next_stream = IndiferentHash.process_options options, :threads, :pids, :callback, :abort_callback, :filename, :autojoin, :lockfile, :no_fail, :pair, :next
16
+ threads, pids, callback, abort_callback, filename, autojoin, lock, no_fail, pair, next_stream = IndiferentHash.process_options options, :threads, :pids, :callback, :abort_callback, :filename, :autojoin, :lock, :no_fail, :pair, :next
17
17
  stream.extend ConcurrentStream unless ConcurrentStream === stream
18
18
 
19
19
  stream.threads ||= []
@@ -54,7 +54,7 @@ module ConcurrentStream
54
54
 
55
55
  stream.filename = filename.nil? ? stream.inspect.split(":").last[0..-2] : filename
56
56
 
57
- stream.lockfile = lockfile unless lockfile.nil?
57
+ stream.lock = lock unless lock.nil?
58
58
 
59
59
  stream.aborted = false
60
60
 
@@ -62,7 +62,7 @@ module ConcurrentStream
62
62
  end
63
63
 
64
64
  def annotate(stream)
65
- ConcurrentStream.setup(stream, :threads => threads, :pids => pids, :callback => callback, :abort_callback => abort_callback, :filename => filename, :autojoin => autojoin, :lockfile => lockfile)
65
+ ConcurrentStream.setup(stream, :threads => threads, :pids => pids, :callback => callback, :abort_callback => abort_callback, :filename => filename, :autojoin => autojoin, :lock => lock)
66
66
  stream
67
67
  end
68
68
 
@@ -136,11 +136,12 @@ module ConcurrentStream
136
136
  begin
137
137
  join_threads
138
138
  join_pids
139
+ raise stream_exception if stream_exception
139
140
  join_callback
140
141
  close unless closed?
141
142
  ensure
142
143
  @joined = true
143
- lockfile.unlock if lockfile and lockfile.locked?
144
+ lock.unlock if lock and lock.locked?
144
145
  raise stream_exception if stream_exception
145
146
  end
146
147
  end
@@ -160,7 +161,7 @@ module ConcurrentStream
160
161
  @threads.each do |t|
161
162
  next if t == Thread.current
162
163
  begin
163
- t.join unless t == Thread.current
164
+ t.join
164
165
  rescue Aborted
165
166
  rescue Exception
166
167
  Log.debug "Thread (#{name}) exception: #{$!.message}"
@@ -205,8 +206,8 @@ module ConcurrentStream
205
206
  ensure
206
207
  close unless closed?
207
208
 
208
- if lockfile and lockfile.locked?
209
- lockfile.unlock
209
+ if lock and lock.locked?
210
+ lock.unlock
210
211
  end
211
212
  end
212
213
  end
@@ -92,6 +92,7 @@ class WorkerException < ScoutException
92
92
  end
93
93
  end
94
94
 
95
+ class SemaphoreInterrupted < TryAgain; end
95
96
 
96
97
  #class OpenGzipError < StandardError; end
97
98
  #
@@ -2,7 +2,6 @@ require_relative 'color_class'
2
2
  require_relative '../indiferent_hash'
3
3
 
4
4
  require 'term/ansicolor'
5
- require 'colorist'
6
5
 
7
6
  module Colorize
8
7
  def self.colors=(colors)
@@ -96,6 +96,71 @@ module Log
96
96
  remove_bar(bar, error) if bar && ! keep
97
97
  end
98
98
  end
99
+
100
+ def self.guess_obj_max(obj)
101
+ begin
102
+ case obj
103
+ when (defined? Step and Step)
104
+ if obj.done?
105
+ path = obj.path
106
+ path = path.find if path.respond_to? :find
107
+ if File.exist? path
108
+ CMD.cmd("wc -l '#{path}'").read.to_i
109
+ else
110
+ nil
111
+ end
112
+ else
113
+ nil
114
+ end
115
+ when TSV
116
+ obj.length
117
+ when Array, Hash
118
+ obj.size
119
+ when File
120
+ return nil if Open.gzip?(obj) or Open.bgzip?(obj)
121
+ CMD.cmd("wc -l '#{obj.path}'").read.to_i
122
+ when Path, String
123
+ obj = obj.find if Path === obj
124
+ if File.exist? obj
125
+ return nil if Open.gzip?(obj) or Open.bgzip?(obj)
126
+ CMD.cmd("wc -l '#{obj}'").read.to_i
127
+ else
128
+ nil
129
+ end
130
+ end
131
+ rescue Exception
132
+ Log.exception $!
133
+ nil
134
+ end
135
+ end
136
+
137
+ def self.get_obj_bar(bar, obj)
138
+ case bar
139
+ when String
140
+ max = guess_obj_max(obj)
141
+ Log::ProgressBar.new_bar(max, {:desc => bar})
142
+ when TrueClass
143
+ max = guess_obj_max(obj)
144
+ Log::ProgressBar.new_bar(max, nil)
145
+ when Numeric
146
+ max = guess_obj_max(obj)
147
+ Log::ProgressBar.new_bar(bar)
148
+ when Hash
149
+ max = Misc.process_options(bar, :max) || max
150
+ Log::ProgressBar.new_bar(max, bar)
151
+ when Log::ProgressBar
152
+ bar.max ||= guess_obj_max(obj)
153
+ bar
154
+ else
155
+ if (defined? Step and Step === bar)
156
+ max = guess_obj_max(obj)
157
+ Log::ProgressBar.new_bar(max, {:desc => bar.status, :file => bar.file(:progress)})
158
+ else
159
+ bar
160
+ end
161
+ end
162
+ end
99
163
  end
164
+
100
165
  end
101
166
 
@@ -0,0 +1,31 @@
1
+ module Misc
2
+ def self.intersect_sorted_arrays(a1, a2)
3
+ e1, e2 = a1.shift, a2.shift
4
+ intersect = []
5
+ while true
6
+ break if e1.nil? or e2.nil?
7
+ case e1 <=> e2
8
+ when 0
9
+ intersect << e1
10
+ e1, e2 = a1.shift, a2.shift
11
+ when -1
12
+ e1 = a1.shift while not e1.nil? and e1 < e2
13
+ when 1
14
+ e2 = a2.shift
15
+ e2 = a2.shift while not e2.nil? and e2 < e1
16
+ end
17
+ end
18
+ intersect
19
+ end
20
+
21
+ def self.counts(array)
22
+ counts = {}
23
+ array.each do |e|
24
+ counts[e] ||= 0
25
+ counts[e] += 1
26
+ end
27
+
28
+ counts
29
+ end
30
+
31
+ end
@@ -14,7 +14,7 @@ module Misc
14
14
  if message
15
15
  puts "#{message }: #{ repeats } repeats"
16
16
  else
17
- puts "Benchmark for #{ repeats } repeats"
17
+ puts "Benchmark for #{ repeats } repeats (#{caller.first})"
18
18
  end
19
19
  puts measure
20
20
  rescue Exception
data/lib/scout/misc.rb CHANGED
@@ -4,6 +4,7 @@ require_relative 'misc/digest'
4
4
  require_relative 'misc/filesystem'
5
5
  require_relative 'misc/monitor'
6
6
  require_relative 'misc/system'
7
+ require_relative 'misc/helper'
7
8
 
8
9
  module Misc
9
10
  end
@@ -54,11 +54,9 @@ module Open
54
54
 
55
55
  into_close = false unless into.respond_to? :close
56
56
 
57
- begin
58
- while c = io.readpartial(BLOCK_SIZE)
59
- into << c if into
60
- end
61
- rescue EOFError
57
+ while c = io.read(BLOCK_SIZE)
58
+ into << c if into
59
+ break if io.closed?
62
60
  end
63
61
 
64
62
  io.join if io.respond_to? :join
@@ -120,12 +118,10 @@ module Open
120
118
  when (IO === content or StringIO === content or File === content)
121
119
  Open.write(tmp_path) do |f|
122
120
  #f.sync = true
123
- begin
124
- while block = content.readpartial(BLOCK_SIZE)
125
- f.write block
126
- end
127
- rescue EOFError
128
- end
121
+ while block = content.read(BLOCK_SIZE)
122
+ f.write block
123
+ break if content.closed?
124
+ end
129
125
  end
130
126
  else
131
127
  File.open(tmp_path, 'wb') do |f| end
@@ -290,22 +286,20 @@ module Open
290
286
  Thread.current["name"] = "Splitter #{Log.fingerprint stream}"
291
287
 
292
288
  skip = [false] * num
293
- begin
294
- while block = stream.readpartial(BLOCK_SIZE)
295
-
296
- in_pipes.each_with_index do |sin,i|
297
- begin
298
- sin.write block
299
- rescue IOError
300
- Log.warn("Tee stream #{i} #{Log.fingerprint stream} IOError: #{$!.message} (#{Log.fingerprint sin})");
301
- skip[i] = true
302
- rescue
303
- Log.warn("Tee stream #{i} #{Log.fingerprint stream} Exception: #{$!.message} (#{Log.fingerprint sin})");
304
- raise $!
305
- end unless skip[i]
306
- end
289
+ while block = stream.read(BLOCK_SIZE)
290
+
291
+ in_pipes.each_with_index do |sin,i|
292
+ begin
293
+ sin.write block
294
+ rescue IOError
295
+ Log.warn("Tee stream #{i} #{Log.fingerprint stream} IOError: #{$!.message} (#{Log.fingerprint sin})");
296
+ skip[i] = true
297
+ rescue
298
+ Log.warn("Tee stream #{i} #{Log.fingerprint stream} Exception: #{$!.message} (#{Log.fingerprint sin})");
299
+ raise $!
300
+ end unless skip[i]
307
301
  end
308
- rescue IOError
302
+ break if stream.closed?
309
303
  end
310
304
 
311
305
  stream.join if stream.respond_to? :join
@@ -397,7 +391,7 @@ module Open
397
391
  end
398
392
  str
399
393
  end
400
-
394
+
401
395
  def self.sort_stream(stream, header_hash = "#", cmd_args = "-u")
402
396
  Open.open_pipe do |sin|
403
397
  line = stream.gets
data/lib/scout/persist.rb CHANGED
@@ -14,7 +14,7 @@ module Persist
14
14
 
15
15
  attr_writer :lock_dir
16
16
  def lock_dir
17
- @lock_dir ||= Path.setup("var/cache/persist_locks")
17
+ @lock_dir ||= Path.setup("tmp/persist_locks").find
18
18
  end
19
19
  end
20
20
 
@@ -24,46 +24,60 @@ module Persist
24
24
  TmpFile.tmp_for_file(name, options, other_options)
25
25
  end
26
26
 
27
+ MEMORY_CACHE = {}
28
+ CONNECTIONS = {}
27
29
  def self.persist(name, type = :serializer, options = {}, &block)
28
30
  persist_options = IndiferentHash.pull_keys options, :persist
29
31
  return yield if FalseClass === persist_options[:persist]
30
32
  file = persist_options[:path] || options[:path] || persistence_path(name, options)
31
33
 
34
+ lockfile = persist_options[:lockfile] || options[:lockfile] || Persist.persistence_path(file + '.persist', {:dir => Persist.lock_dir})
35
+
32
36
  update = options[:update] || persist_options[:update]
33
37
  update = Open.mtime(update) if Path === update
34
38
  update = Open.mtime(file) >= update ? false : true if Time === update
35
39
 
36
- if Open.exist?(file) && ! update
37
- Persist.load(file, type)
38
- else
39
- return yield(file) if block.arity == 1
40
- res = yield
41
- begin
42
- Open.rm(file)
40
+ if type == :memory
41
+ repo = options[:memory] || options[:repo] || MEMORY_CACHE
42
+ repo[file] ||= yield
43
+ return repo[file]
44
+ end
45
+
46
+ Open.lock lockfile do |lock|
47
+ if Open.exist?(file) && ! update
48
+ Persist.load(file, type)
49
+ else
50
+ return yield(file) if block.arity == 1
51
+ res = yield
52
+ begin
53
+ Open.rm(file)
43
54
 
44
- if IO === res || StringIO === res
45
- tee_copies = options[:tee_copies] || 1
46
- main, *copies = Open.tee_stream_thread_multiple res, tee_copies + 1
47
- t = Thread.new do
48
- Thread.current.report_on_exception = false
49
- Thread.current["name"] = "file saver: " + file
50
- Open.sensible_write(file, main)
51
- end
52
- Thread.pass until t["name"]
53
- copies.each_with_index do |copy,i|
54
- next_stream = copies[i+1] if copies.length > i
55
- ConcurrentStream.setup copy, :threads => t, :filename => file, :autojoin => true, :next => next_stream
55
+ if IO === res || StringIO === res
56
+ tee_copies = options[:tee_copies] || 1
57
+ main, *copies = Open.tee_stream_thread_multiple res, tee_copies + 1
58
+ main.lock = lock
59
+ t = Thread.new do
60
+ Thread.current.report_on_exception = false
61
+ Thread.current["name"] = "file saver: " + file
62
+ Open.sensible_write(file, main)
63
+ end
64
+ Thread.pass until t["name"]
65
+ copies.each_with_index do |copy,i|
66
+ next_stream = copies[i+1] if copies.length > i
67
+ ConcurrentStream.setup copy, :threads => t, :filename => file, :autojoin => true, :next => next_stream
68
+ end
69
+ res = copies.first
70
+ raise KeepLocked.new(res)
71
+ else
72
+ pres = Persist.save(res, file, type)
73
+ res = pres unless pres.nil?
56
74
  end
57
- res = copies.first
58
- else
59
- pres = Persist.save(res, file, type)
60
- res = pres unless pres.nil?
75
+ rescue
76
+ raise $! unless options[:canfail]
77
+ Log.debug "Could not persist #{type} on #{file}"
61
78
  end
62
- rescue
63
- raise $! unless options[:canfail]
64
- Log.debug "Could not persist #{type} on #{file}"
79
+ res
65
80
  end
66
- res
67
81
  end
68
82
  end
69
83
 
@@ -37,7 +37,13 @@ if continue
37
37
  int ret;
38
38
  sem_t* sem;
39
39
  sem = sem_open(name, 0);
40
+ if (sem == SEM_FAILED){
41
+ return(errno);
42
+ }
40
43
  ret = sem_wait(sem);
44
+ if (ret == -1){
45
+ return(errno);
46
+ }
41
47
  sem_close(sem);
42
48
  return(ret);
43
49
  }
@@ -51,6 +57,7 @@ if continue
51
57
  sem_close(sem);
52
58
  }
53
59
  EOF
60
+
54
61
  end
55
62
 
56
63
  SEM_MUTEX = Mutex.new
@@ -66,7 +73,7 @@ if continue
66
73
 
67
74
  def self.with_semaphore(size, file = nil)
68
75
  if file.nil?
69
- file = "/" << Misc.digest(rand(1000000000000).to_s) if file.nil?
76
+ file = "/scout-" << Misc.digest(rand(100000000000).to_s)[0..10] if file.nil?
70
77
  else
71
78
  file = file.gsub('/', '_') if file
72
79
  end
@@ -52,8 +52,8 @@ module TSV
52
52
  :sep => "\t", :type => :double
53
53
  @options = options
54
54
  @sout, @sin = Open.pipe
55
- ConcurrentStream.setup(@sin, :pair => @sout)
56
- ConcurrentStream.setup(@sout, :pair => @sin)
55
+ ConcurrentStream.setup(@sin, pair: @sout)
56
+ ConcurrentStream.setup(@sout, pair: @sin)
57
57
  end
58
58
 
59
59
  def init
@@ -88,16 +88,21 @@ module TSV
88
88
  end
89
89
 
90
90
  def stream
91
- iii self.extension_attr_hash
92
91
  dumper = TSV::Dumper.new self.extension_attr_hash
93
92
  dumper.init
94
- Thread.new do
95
- Thread.current["name"] = "Dumper thread"
96
- self.each do |k,v|
97
- dumper.add k, v
93
+ t = Thread.new do
94
+ begin
95
+ Thread.current.report_on_exception = true
96
+ Thread.current["name"] = "Dumper thread"
97
+ self.each do |k,v|
98
+ dumper.add k, v
99
+ end
100
+ dumper.close
101
+ rescue
102
+ dumper.abort($!)
98
103
  end
99
- dumper.close
100
104
  end
105
+ Thread.pass until t["name"]
101
106
  dumper.stream
102
107
  end
103
108