rbbt-util 5.7.0 → 5.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
@@ -50,6 +50,15 @@ void post_semaphore(char* name){
50
50
  EOF
51
51
  end
52
52
 
53
+ def self.synchronize(sem)
54
+ RbbtSemaphore.wait_semaphore(sem)
55
+ begin
56
+ yield
57
+ ensure
58
+ RbbtSemaphore.post_semaphore(sem)
59
+ end
60
+ end
61
+
53
62
  def self.with_semaphore(size, file = nil)
54
63
  file = Misc.digest(rand.to_s) if file.nil?
55
64
  file.gsub!('/', '_')
@@ -68,11 +77,8 @@ void post_semaphore(char* name){
68
77
  pids = elems.collect do |elem|
69
78
  Process.fork do
70
79
  begin
71
- RbbtSemaphore.wait_semaphore(file)
72
- begin
80
+ RbbtSemaphore.synchronize(file) do
73
81
  yield elem
74
- ensure
75
- RbbtSemaphore.post_semaphore(file)
76
82
  end
77
83
  rescue Interrupt
78
84
  Log.error "Process #{Process.pid} was aborted"
@@ -47,4 +47,9 @@ module SOPT
47
47
  self.input_descriptions.delete input
48
48
  end
49
49
  end
50
+
51
+ def self.usage
52
+ puts SOPT.doc
53
+ exit 0
54
+ end
50
55
  end
@@ -73,10 +73,8 @@ module SOPT
73
73
 
74
74
  def self.doc
75
75
  doc = <<-EOF
76
- #{Log.color :magenta}
77
- #{command}(1) -- #{summary}
78
- #{"=" * (command.length + summary.length + 7)}
79
- #{Log.color :reset}
76
+ #{Log.color :magenta}#{command}(1) -- #{summary}
77
+ #{"=" * (command.length + summary.length + 7)}#{Log.color :reset}
80
78
 
81
79
  #{ Log.color :magenta, "## SYNOPSYS"}
82
80
 
@@ -94,16 +94,27 @@ class Step
94
94
  set_info(:messages, (messages || []) << message)
95
95
  end
96
96
 
97
- def log(status, message = nil, do_log = true)
97
+ attr_accessor :last_log
98
+ def last_log
99
+ @last_log ||= Time.now
100
+ end
98
101
 
102
+ def log(status, message = nil)
99
103
  if message
100
- Log.medium "[#{ status }] #{ message }: #{path}"
101
- else
102
- Log.medium "[#{ status }]: #{path}"
103
- end if do_log
104
-
105
- self.status = status
106
- message(message) unless message.nil?
104
+ Log.medium do
105
+ now = Time.now
106
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }: #{ message } -- #{path}"
107
+ @last_log = now
108
+ str
109
+ end
110
+ else
111
+ Log.medium do
112
+ now = Time.now
113
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s } -- #{path}"
114
+ @last_log = now
115
+ str
116
+ end
117
+ end
107
118
  end
108
119
 
109
120
  def started?
@@ -213,11 +224,27 @@ class Step
213
224
  end
214
225
 
215
226
  module Workflow
227
+
228
+ attr_accessor :last_log
229
+ def last_log
230
+ @@last_log ||= Time.now
231
+ end
232
+
216
233
  def log(status, message = nil)
217
234
  if message
218
- Log.low "#{ status }: #{ message }"
219
- else
220
- Log.low "#{ status }"
235
+ Log.medium do
236
+ now = Time.now
237
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }: #{ message }"
238
+ @@last_log = now
239
+ str
240
+ end
241
+ else
242
+ Log.medium do
243
+ now = Time.now
244
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }"
245
+ @@last_log = now
246
+ str
247
+ end
221
248
  end
222
249
  end
223
250
 
@@ -315,7 +342,7 @@ module Workflow
315
342
  if inputs.any? or dependencies.any?
316
343
  tagged_jobname = case TAG
317
344
  when :hash
318
- jobname + '_' + Misc.digest((inputs * "\n" + ";" + dependencies.collect{|dep| dep.name} * "\n"))
345
+ jobname + '_' + Misc.digest((inputs.collect{|i| Misc.fingerprint(i)} * "," + ";" + dependencies.collect{|dep| dep.name } * "\n"))
319
346
  else
320
347
  jobname
321
348
  end
@@ -10,8 +10,6 @@ class Step
10
10
  attr_accessor :pid
11
11
  attr_accessor :exec
12
12
 
13
- class Aborted < Exception; end
14
-
15
13
  def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
16
14
  path = Path.setup(Misc.sanitize_filename(path)) if String === path
17
15
  @path = path
@@ -123,7 +121,7 @@ class Step
123
121
  end
124
122
  }
125
123
 
126
- Log.medium("Starting task #{task.name || ""} [#{Process.pid}]: #{ path }")
124
+ Log.medium{"#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ path }"}
127
125
  set_info :status, :started
128
126
 
129
127
  set_info :started, (start_time = Time.now)
@@ -132,7 +130,7 @@ class Step
132
130
 
133
131
  res = begin
134
132
  exec
135
- rescue Step::Aborted
133
+ rescue Aborted
136
134
  log(:error, "Aborted")
137
135
 
138
136
  children_pids = info[:children_pids]
@@ -165,7 +163,7 @@ class Step
165
163
  set_info :status, :done
166
164
  set_info :done, (done_time = Time.now)
167
165
  set_info :time_elapsed, done_time - start_time
168
- Log.medium("Completed task #{task.name || ""} [#{Process.pid}]: #{ path }")
166
+ Log.medium{"#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ path }"}
169
167
 
170
168
  res
171
169
  end
@@ -180,13 +178,13 @@ class Step
180
178
  def fork(semaphore = nil)
181
179
  raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil?
182
180
  @pid = Process.fork do
183
- trap(:INT) { raise Step::Aborted.new "INT signal recieved" }
181
+ trap(:INT) { raise Aborted.new "INT signal recieved" }
184
182
  begin
185
183
  RbbtSemaphore.wait_semaphore(semaphore) if semaphore
186
184
  FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
187
185
  begin
188
186
  run(true)
189
- rescue Step::Aborted
187
+ rescue Aborted
190
188
  Log.debug{"Forked process aborted: #{path}"}
191
189
  log :aborted, "Aborted"
192
190
  raise $!
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+
5
+ file, *sizes = ARGV
6
+
7
+ sizes.each do |size|
8
+ size = size.to_i
9
+
10
+ %w(--parallel=cpu --parallel=thr --noparallel).each do |parallel|
11
+ Log.info Log.color(:red, parallel) + " " + Log.color(:blue, size.to_s)
12
+
13
+ start = Time.now
14
+ `head -n #{ size } "#{file}" | drbbt task Structure mutated_isoform_neighbour_annotation -g - -cl --log 10 -pn #{parallel}`
15
+ Log.info Log.color(:red, parallel) + " " + Log.color(:blue, size.to_s) + " " + Log.color(:yellow, (Time.now - start).to_s)
16
+ end
17
+ end
18
+
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Matches text and colors it.
10
+
11
+ $ rbbt color <file|-> (<color> <pattern>)+
12
+
13
+ Use - to read from STDIN. Example: `rbbt color - red "/.*rbbt.*/"`
14
+
15
+ -h--help Print help
16
+
17
+ EOF
18
+ rbbt_usage and exit 0 if options[:help]
19
+
20
+ file, *rest = ARGV
21
+
22
+ if file == '-'
23
+ text = STDIN.read
24
+ else
25
+ text = Open.read(file)
26
+ end
27
+
28
+ while rest.any?
29
+ color = rest.shift
30
+ pattern = rest.shift
31
+ color = color.to_sym
32
+ if pattern =~ /^\/(.*)\/$/
33
+ text.gsub!(Regexp.compile($1)){|match| Log.color color, match }
34
+ else
35
+ text.gsub!(pattern){|match| Log.color color, match }
36
+ end
37
+ end
38
+
39
+ puts text
40
+
41
+
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ require 'rbbt/util/R'
7
+
8
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
9
+
10
+ options = SOPT.setup <<EOF
11
+ Calculate density
12
+
13
+ $ rbbt stat density <file>
14
+
15
+ Display summary information. Works with Tokyocabinet HDB and DBD as well.
16
+
17
+ -tch--tokyocabinet File is a TC HDB
18
+ -tcb--tokyocabinet_bd File is a TC BDB
19
+ -t--type* Type of tsv (single, list, double, flat)
20
+ -h--header_hash* Change the character used to mark the header line (defaults to #)
21
+ -k--key_field* Change the key field
22
+ -f--fields* Change the fields to load
23
+ -h--help Help
24
+ EOF
25
+
26
+ SOPT.usage if options[:help]
27
+
28
+ file = ARGV.shift
29
+
30
+ if file == '-' or file.nil?
31
+ file = STDIN
32
+ else
33
+ file = Open.open file
34
+ end
35
+
36
+
37
+ values = file.read.split("\n").collect{|v| v.to_f}
38
+
39
+ res = TmpFile.with_file do |tmp|
40
+ R.run <<-EOF
41
+ values = #{R.ruby2R values}
42
+ d = density(values)
43
+ df = data.frame(x=d$x, y=d$y)
44
+ rbbt.tsv.write(file='#{ tmp }', df)
45
+ EOF
46
+
47
+ Open.read(tmp)
48
+ end
49
+
50
+ puts res.split("\n")[1..-1].collect{|l| l.split("\t")[1,2] * "\t"} * "\n"
@@ -3,10 +3,25 @@
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
5
 
6
- options = SOPT.get("-tch--tokyocabinet:-tcb--tokyocabinet_bd:-t--type*:-h--header_hash*:-k--key_field*:-f--fields*")
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
7
 
8
- options[:fields] = options[:fields].split(/,\|/) if options[:fields]
9
- options[:header_hash] = options["header_hash"]
8
+ options = SOPT.setup <<EOF
9
+ Inspect a TSV file
10
+
11
+ $ rbbt tsv info [options] file.tsv
12
+
13
+ Display summary information. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -t--type* Type of tsv (single, list, double, flat)
18
+ -h--header_hash* Change the character used to mark the header line (defaults to #)
19
+ -k--key_field* Change the key field
20
+ -f--fields* Change the fields to load
21
+ -h--help Help
22
+ EOF
23
+
24
+ SOPT.usage if options[:help]
10
25
 
11
26
  file = ARGV.shift
12
27
 
@@ -14,6 +29,9 @@ file = STDIN if file == '-'
14
29
 
15
30
  raise ParameterException, "Please specify the tsv file as argument" if file.nil?
16
31
 
32
+ options[:fields] = options[:fields].split(/,\|/) if options[:fields]
33
+ options[:header_hash] = options["header_hash"]
34
+
17
35
  case
18
36
  when options[:tokyocabinet]
19
37
  tsv = Persist.open_tokyocabinet(file, false)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Slice column from tsv
10
+
11
+ $ rbbt tsv slice [options] file.tsv -f "Field 1"
12
+
13
+ Display summary informations. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -hh--header_hash* Change the character used to mark the header line (defaults to #)
18
+ -f--fields* Field to slice (comma-separated)
19
+ -h--help Help
20
+ EOF
21
+
22
+ SOPT.usage if options[:help]
23
+
24
+ file = ARGV.shift
25
+
26
+ file = STDIN if file == '-'
27
+
28
+ raise ParameterException, "Please specify the tsv file as argument" if file.nil?
29
+
30
+ fields = options[:fields]
31
+ raise ParameterException, "Please specify the fields to slice" if fields.nil?
32
+
33
+ options[:header_hash] = options["header_hash"]
34
+
35
+ case
36
+ when options[:tokyocabinet]
37
+ tsv = Persist.open_tokyocabinet(file, false)
38
+ puts tsv.summary
39
+ when options[:tokyocabinet_bd]
40
+ tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
41
+ puts tsv.summary
42
+ else
43
+ tsv = TSV.open(file, options)
44
+ end
45
+
46
+ puts tsv.slice(fields.split(","))
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Subset entries in a tsv
10
+
11
+ $ rbbt tsv subset [options] file.tsv --subset key1,key2,key3
12
+
13
+ Subsets entries from a TSV file from a given list. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -hh--header_hash* Change the character used to mark the header line (defaults to #)
18
+ -s--subset* Subset of samples (Comma-separated of file)
19
+ -h--help Help
20
+ EOF
21
+
22
+ SOPT.usage if options[:help]
23
+
24
+ file = ARGV.shift
25
+
26
+ file = STDIN if file == '-'
27
+
28
+ raise ParameterException, "Please specify the tsv file as argument" if file.nil?
29
+
30
+ subset = options[:subset]
31
+ raise ParameterException, "Please specify a subset of keys" if subset.nil?
32
+
33
+ if File.exists?(subset)
34
+ subset = Open.read(subset).split("\n")
35
+ else
36
+ subset = subset.split(',')
37
+ end
38
+
39
+ options[:fields] = options[:fields].split(/,\|/) if options[:fields]
40
+ options[:header_hash] = options["header_hash"]
41
+
42
+ case
43
+ when options[:tokyocabinet]
44
+ tsv = Persist.open_tokyocabinet(file, false)
45
+ puts tsv.summary
46
+ when options[:tokyocabinet_bd]
47
+ tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
48
+ puts tsv.summary
49
+ else
50
+ tsv = TSV.open(file, options)
51
+ end
52
+
53
+ puts tsv.select(subset)
@@ -15,6 +15,7 @@ Use - to read from STDIN
15
15
  -tcb--tokyocabinet_bd File is a tokyocabinet B database
16
16
  -f--field* Limit to a particular field
17
17
  -h--help Print this help
18
+ -l--lines Separate in lines
18
19
 
19
20
  EOF
20
21
  rbbt_usage and exit 0 if options[:help]
@@ -37,6 +38,11 @@ else
37
38
  tsv = TSV.open(file)
38
39
  end
39
40
 
41
+ field = options[:field]
40
42
  tsv = tsv.slice(field) if field
41
43
 
42
- tsv.each{|k,v| puts (Array === v ? v.flatten*"\t" : v.to_s ) }
44
+ if options[:lines]
45
+ tsv.each{|k,v| puts (Array === v ? v.flatten*"\n" : v.to_s ) }
46
+ else
47
+ tsv.each{|k,v| puts (Array === v ? v.flatten*"\t" : v.to_s ) }
48
+ end