rbbt-util 5.7.0 → 5.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
@@ -50,6 +50,15 @@ void post_semaphore(char* name){
50
50
  EOF
51
51
  end
52
52
 
53
+ def self.synchronize(sem)
54
+ RbbtSemaphore.wait_semaphore(sem)
55
+ begin
56
+ yield
57
+ ensure
58
+ RbbtSemaphore.post_semaphore(sem)
59
+ end
60
+ end
61
+
53
62
  def self.with_semaphore(size, file = nil)
54
63
  file = Misc.digest(rand.to_s) if file.nil?
55
64
  file.gsub!('/', '_')
@@ -68,11 +77,8 @@ void post_semaphore(char* name){
68
77
  pids = elems.collect do |elem|
69
78
  Process.fork do
70
79
  begin
71
- RbbtSemaphore.wait_semaphore(file)
72
- begin
80
+ RbbtSemaphore.synchronize(file) do
73
81
  yield elem
74
- ensure
75
- RbbtSemaphore.post_semaphore(file)
76
82
  end
77
83
  rescue Interrupt
78
84
  Log.error "Process #{Process.pid} was aborted"
@@ -47,4 +47,9 @@ module SOPT
47
47
  self.input_descriptions.delete input
48
48
  end
49
49
  end
50
+
51
+ def self.usage
52
+ puts SOPT.doc
53
+ exit 0
54
+ end
50
55
  end
@@ -73,10 +73,8 @@ module SOPT
73
73
 
74
74
  def self.doc
75
75
  doc = <<-EOF
76
- #{Log.color :magenta}
77
- #{command}(1) -- #{summary}
78
- #{"=" * (command.length + summary.length + 7)}
79
- #{Log.color :reset}
76
+ #{Log.color :magenta}#{command}(1) -- #{summary}
77
+ #{"=" * (command.length + summary.length + 7)}#{Log.color :reset}
80
78
 
81
79
  #{ Log.color :magenta, "## SYNOPSYS"}
82
80
 
@@ -94,16 +94,27 @@ class Step
94
94
  set_info(:messages, (messages || []) << message)
95
95
  end
96
96
 
97
- def log(status, message = nil, do_log = true)
97
+ attr_accessor :last_log
98
+ def last_log
99
+ @last_log ||= Time.now
100
+ end
98
101
 
102
+ def log(status, message = nil)
99
103
  if message
100
- Log.medium "[#{ status }] #{ message }: #{path}"
101
- else
102
- Log.medium "[#{ status }]: #{path}"
103
- end if do_log
104
-
105
- self.status = status
106
- message(message) unless message.nil?
104
+ Log.medium do
105
+ now = Time.now
106
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }: #{ message } -- #{path}"
107
+ @last_log = now
108
+ str
109
+ end
110
+ else
111
+ Log.medium do
112
+ now = Time.now
113
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s } -- #{path}"
114
+ @last_log = now
115
+ str
116
+ end
117
+ end
107
118
  end
108
119
 
109
120
  def started?
@@ -213,11 +224,27 @@ class Step
213
224
  end
214
225
 
215
226
  module Workflow
227
+
228
+ attr_accessor :last_log
229
+ def last_log
230
+ @@last_log ||= Time.now
231
+ end
232
+
216
233
  def log(status, message = nil)
217
234
  if message
218
- Log.low "#{ status }: #{ message }"
219
- else
220
- Log.low "#{ status }"
235
+ Log.medium do
236
+ now = Time.now
237
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }: #{ message }"
238
+ @@last_log = now
239
+ str
240
+ end
241
+ else
242
+ Log.medium do
243
+ now = Time.now
244
+ str = "+#{(now - last_log).to_i} #{ Log.color :cyan, status.to_s }"
245
+ @@last_log = now
246
+ str
247
+ end
221
248
  end
222
249
  end
223
250
 
@@ -315,7 +342,7 @@ module Workflow
315
342
  if inputs.any? or dependencies.any?
316
343
  tagged_jobname = case TAG
317
344
  when :hash
318
- jobname + '_' + Misc.digest((inputs * "\n" + ";" + dependencies.collect{|dep| dep.name} * "\n"))
345
+ jobname + '_' + Misc.digest((inputs.collect{|i| Misc.fingerprint(i)} * "," + ";" + dependencies.collect{|dep| dep.name } * "\n"))
319
346
  else
320
347
  jobname
321
348
  end
@@ -10,8 +10,6 @@ class Step
10
10
  attr_accessor :pid
11
11
  attr_accessor :exec
12
12
 
13
- class Aborted < Exception; end
14
-
15
13
  def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
16
14
  path = Path.setup(Misc.sanitize_filename(path)) if String === path
17
15
  @path = path
@@ -123,7 +121,7 @@ class Step
123
121
  end
124
122
  }
125
123
 
126
- Log.medium("Starting task #{task.name || ""} [#{Process.pid}]: #{ path }")
124
+ Log.medium{"#{Log.color :magenta, "Starting task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ path }"}
127
125
  set_info :status, :started
128
126
 
129
127
  set_info :started, (start_time = Time.now)
@@ -132,7 +130,7 @@ class Step
132
130
 
133
131
  res = begin
134
132
  exec
135
- rescue Step::Aborted
133
+ rescue Aborted
136
134
  log(:error, "Aborted")
137
135
 
138
136
  children_pids = info[:children_pids]
@@ -165,7 +163,7 @@ class Step
165
163
  set_info :status, :done
166
164
  set_info :done, (done_time = Time.now)
167
165
  set_info :time_elapsed, done_time - start_time
168
- Log.medium("Completed task #{task.name || ""} [#{Process.pid}]: #{ path }")
166
+ Log.medium{"#{Log.color :magenta, "Completed task"} #{Log.color :yellow, task.name.to_s || ""} [#{Process.pid}]: #{ path }"}
169
167
 
170
168
  res
171
169
  end
@@ -180,13 +178,13 @@ class Step
180
178
  def fork(semaphore = nil)
181
179
  raise "Can not fork: Step is waiting for proces #{@pid} to finish" if not @pid.nil?
182
180
  @pid = Process.fork do
183
- trap(:INT) { raise Step::Aborted.new "INT signal recieved" }
181
+ trap(:INT) { raise Aborted.new "INT signal recieved" }
184
182
  begin
185
183
  RbbtSemaphore.wait_semaphore(semaphore) if semaphore
186
184
  FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
187
185
  begin
188
186
  run(true)
189
- rescue Step::Aborted
187
+ rescue Aborted
190
188
  Log.debug{"Forked process aborted: #{path}"}
191
189
  log :aborted, "Aborted"
192
190
  raise $!
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+
5
+ file, *sizes = ARGV
6
+
7
+ sizes.each do |size|
8
+ size = size.to_i
9
+
10
+ %w(--parallel=cpu --parallel=thr --noparallel).each do |parallel|
11
+ Log.info Log.color(:red, parallel) + " " + Log.color(:blue, size.to_s)
12
+
13
+ start = Time.now
14
+ `head -n #{ size } "#{file}" | drbbt task Structure mutated_isoform_neighbour_annotation -g - -cl --log 10 -pn #{parallel}`
15
+ Log.info Log.color(:red, parallel) + " " + Log.color(:blue, size.to_s) + " " + Log.color(:yellow, (Time.now - start).to_s)
16
+ end
17
+ end
18
+
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Matches text and colors it.
10
+
11
+ $ rbbt color <file|-> (<color> <pattern>)+
12
+
13
+ Use - to read from STDIN. Example: `rbbt color - red "/.*rbbt.*/"`
14
+
15
+ -h--help Print help
16
+
17
+ EOF
18
+ rbbt_usage and exit 0 if options[:help]
19
+
20
+ file, *rest = ARGV
21
+
22
+ if file == '-'
23
+ text = STDIN.read
24
+ else
25
+ text = Open.read(file)
26
+ end
27
+
28
+ while rest.any?
29
+ color = rest.shift
30
+ pattern = rest.shift
31
+ color = color.to_sym
32
+ if pattern =~ /^\/(.*)\/$/
33
+ text.gsub!(Regexp.compile($1)){|match| Log.color color, match }
34
+ else
35
+ text.gsub!(pattern){|match| Log.color color, match }
36
+ end
37
+ end
38
+
39
+ puts text
40
+
41
+
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ require 'rbbt/util/R'
7
+
8
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
9
+
10
+ options = SOPT.setup <<EOF
11
+ Calculate density
12
+
13
+ $ rbbt stat density <file>
14
+
15
+ Display summary information. Works with Tokyocabinet HDB and DBD as well.
16
+
17
+ -tch--tokyocabinet File is a TC HDB
18
+ -tcb--tokyocabinet_bd File is a TC BDB
19
+ -t--type* Type of tsv (single, list, double, flat)
20
+ -h--header_hash* Change the character used to mark the header line (defaults to #)
21
+ -k--key_field* Change the key field
22
+ -f--fields* Change the fields to load
23
+ -h--help Help
24
+ EOF
25
+
26
+ SOPT.usage if options[:help]
27
+
28
+ file = ARGV.shift
29
+
30
+ if file == '-' or file.nil?
31
+ file = STDIN
32
+ else
33
+ file = Open.open file
34
+ end
35
+
36
+
37
+ values = file.read.split("\n").collect{|v| v.to_f}
38
+
39
+ res = TmpFile.with_file do |tmp|
40
+ R.run <<-EOF
41
+ values = #{R.ruby2R values}
42
+ d = density(values)
43
+ df = data.frame(x=d$x, y=d$y)
44
+ rbbt.tsv.write(file='#{ tmp }', df)
45
+ EOF
46
+
47
+ Open.read(tmp)
48
+ end
49
+
50
+ puts res.split("\n")[1..-1].collect{|l| l.split("\t")[1,2] * "\t"} * "\n"
@@ -3,10 +3,25 @@
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
5
 
6
- options = SOPT.get("-tch--tokyocabinet:-tcb--tokyocabinet_bd:-t--type*:-h--header_hash*:-k--key_field*:-f--fields*")
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
7
 
8
- options[:fields] = options[:fields].split(/,\|/) if options[:fields]
9
- options[:header_hash] = options["header_hash"]
8
+ options = SOPT.setup <<EOF
9
+ Inspect a TSV file
10
+
11
+ $ rbbt tsv info [options] file.tsv
12
+
13
+ Display summary information. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -t--type* Type of tsv (single, list, double, flat)
18
+ -h--header_hash* Change the character used to mark the header line (defaults to #)
19
+ -k--key_field* Change the key field
20
+ -f--fields* Change the fields to load
21
+ -h--help Help
22
+ EOF
23
+
24
+ SOPT.usage if options[:help]
10
25
 
11
26
  file = ARGV.shift
12
27
 
@@ -14,6 +29,9 @@ file = STDIN if file == '-'
14
29
 
15
30
  raise ParameterException, "Please specify the tsv file as argument" if file.nil?
16
31
 
32
+ options[:fields] = options[:fields].split(/,\|/) if options[:fields]
33
+ options[:header_hash] = options["header_hash"]
34
+
17
35
  case
18
36
  when options[:tokyocabinet]
19
37
  tsv = Persist.open_tokyocabinet(file, false)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Slice column from tsv
10
+
11
+ $ rbbt tsv slice [options] file.tsv -f "Field 1"
12
+
13
+ Display summary informations. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -hh--header_hash* Change the character used to mark the header line (defaults to #)
18
+ -f--fields* Field to slice (comma-separated)
19
+ -h--help Help
20
+ EOF
21
+
22
+ SOPT.usage if options[:help]
23
+
24
+ file = ARGV.shift
25
+
26
+ file = STDIN if file == '-'
27
+
28
+ raise ParameterException, "Please specify the tsv file as argument" if file.nil?
29
+
30
+ fields = options[:fields]
31
+ raise ParameterException, "Please specify the fields to slice" if fields.nil?
32
+
33
+ options[:header_hash] = options["header_hash"]
34
+
35
+ case
36
+ when options[:tokyocabinet]
37
+ tsv = Persist.open_tokyocabinet(file, false)
38
+ puts tsv.summary
39
+ when options[:tokyocabinet_bd]
40
+ tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
41
+ puts tsv.summary
42
+ else
43
+ tsv = TSV.open(file, options)
44
+ end
45
+
46
+ puts tsv.slice(fields.split(","))
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+ Subset entries in a tsv
10
+
11
+ $ rbbt tsv subset [options] file.tsv --subset key1,key2,key3
12
+
13
+ Subsets entries from a TSV file from a given list. Works with Tokyocabinet HDB and DBD as well.
14
+
15
+ -tch--tokyocabinet File is a TC HDB
16
+ -tcb--tokyocabinet_bd File is a TC BDB
17
+ -hh--header_hash* Change the character used to mark the header line (defaults to #)
18
+ -s--subset* Subset of samples (Comma-separated of file)
19
+ -h--help Help
20
+ EOF
21
+
22
+ SOPT.usage if options[:help]
23
+
24
+ file = ARGV.shift
25
+
26
+ file = STDIN if file == '-'
27
+
28
+ raise ParameterException, "Please specify the tsv file as argument" if file.nil?
29
+
30
+ subset = options[:subset]
31
+ raise ParameterException, "Please specify a subset of keys" if subset.nil?
32
+
33
+ if File.exists?(subset)
34
+ subset = Open.read(subset).split("\n")
35
+ else
36
+ subset = subset.split(',')
37
+ end
38
+
39
+ options[:fields] = options[:fields].split(/,\|/) if options[:fields]
40
+ options[:header_hash] = options["header_hash"]
41
+
42
+ case
43
+ when options[:tokyocabinet]
44
+ tsv = Persist.open_tokyocabinet(file, false)
45
+ puts tsv.summary
46
+ when options[:tokyocabinet_bd]
47
+ tsv = Persist.open_tokyocabinet(file, false, nil, TokyoCabinet::BDB)
48
+ puts tsv.summary
49
+ else
50
+ tsv = TSV.open(file, options)
51
+ end
52
+
53
+ puts tsv.select(subset)
@@ -15,6 +15,7 @@ Use - to read from STDIN
15
15
  -tcb--tokyocabinet_bd File is a tokyocabinet B database
16
16
  -f--field* Limit to a particular field
17
17
  -h--help Print this help
18
+ -l--lines Separate in lines
18
19
 
19
20
  EOF
20
21
  rbbt_usage and exit 0 if options[:help]
@@ -37,6 +38,11 @@ else
37
38
  tsv = TSV.open(file)
38
39
  end
39
40
 
41
+ field = options[:field]
40
42
  tsv = tsv.slice(field) if field
41
43
 
42
- tsv.each{|k,v| puts (Array === v ? v.flatten*"\t" : v.to_s ) }
44
+ if options[:lines]
45
+ tsv.each{|k,v| puts (Array === v ? v.flatten*"\n" : v.to_s ) }
46
+ else
47
+ tsv.each{|k,v| puts (Array === v ? v.flatten*"\t" : v.to_s ) }
48
+ end