rbbt-util 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -12,7 +12,7 @@ class TSV
12
12
  when Integer === new_fields
13
13
  [new_fields]
14
14
  when String === new_fields
15
- [identify_field new_fields]
15
+ [identify_field(new_fields)]
16
16
  when Array === new_fields
17
17
  new_fields.collect{|new_field| identify_field new_field}
18
18
  when new_fields == :key
@@ -46,7 +46,7 @@ class TSV
46
46
 
47
47
  # Cycle through
48
48
  if monitor
49
- desc = "Parsing Stream"
49
+ desc = "Iterating TSV"
50
50
  step = 100
51
51
  if Hash === monitor
52
52
  desc = monitor[:desc] if monitor.include? :desc
@@ -84,14 +84,22 @@ class TSV
84
84
  f
85
85
  else
86
86
  f = fields.dup
87
- if type == :double
87
+ case
88
+ when type == :single
89
+ f = [f,key]
90
+ when type == :double
88
91
  f.push [key]
89
92
  else
90
93
  f.push key
91
94
  end
92
95
  f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
93
96
  end
94
- new_field_values = NamedArray.name new_field_values, new_field_names
97
+
98
+ if type == :single
99
+ new_field_values = new_field_values.first
100
+ else
101
+ new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
102
+ end
95
103
 
96
104
  next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
97
105
  yield new_key_value, new_field_values
@@ -221,14 +229,17 @@ class TSV
221
229
  method.each{|item| new[item] = self[item] if self.include? item}
222
230
  when Array === method
223
231
  through :key, key do |key, values|
232
+ values = [values] if type == :single
224
233
  new[key] = self[key] if (values.flatten & method).any?
225
234
  end
226
235
  when Regexp === method
227
236
  through :key, key do |key, values|
237
+ values = [values] if type == :single
228
238
  new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
229
239
  end
230
240
  when String === method
231
241
  through :key, key do |key, values|
242
+ values = [values] if type == :single
232
243
  new[key] = self[key] if values.flatten.select{|v| v == method}.any?
233
244
  end
234
245
  end
@@ -267,7 +278,7 @@ class TSV
267
278
  end
268
279
 
269
280
  def add_field(name = nil)
270
- each do |key, values|
281
+ through do |key, values|
271
282
  new_values = yield(key, values)
272
283
  new_values = [new_values] if type == :double and not Array === new_values
273
284
 
@@ -280,7 +291,7 @@ class TSV
280
291
  end
281
292
 
282
293
  def add_fields(names = nil)
283
- each do |key, values|
294
+ through do |key, values|
284
295
  new_values = yield(key, values)
285
296
  new_values = [new_values] if type == :double and not Array == new_values
286
297
 
@@ -29,3 +29,13 @@ class TSV
29
29
  counts
30
30
  end
31
31
  end
32
+
33
+ module Open
34
+ def self.tsv(file, *args)
35
+ TSV.new file, *args
36
+ end
37
+
38
+ def self.index(file, *args)
39
+ TSV.index file, *args
40
+ end
41
+ end
@@ -142,12 +142,29 @@ class TSV
142
142
 
143
143
  #{{{ Process rest
144
144
  data = options[:persistence_data] || {}
145
+ if Persistence::TSV === data
146
+ serializer = case
147
+ when ((cast == "to_i" or cast == :to_i) and type == :single)
148
+ :integer
149
+ when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
150
+ :integer_array
151
+ when (type == :list or type == :flat)
152
+ :list
153
+ when type == :single
154
+ :single
155
+ else
156
+ :double
157
+ end
158
+ data.serializer = serializer
159
+ end
160
+
161
+
145
162
  single = type.to_sym != :double
146
163
  max_cols = 0
147
164
  while line do
148
165
  line.chomp!
149
166
 
150
- progress_monitor.tick(stream.pos) if progress_monitor
167
+ progress_monitor.tick(stream.pos) if progress_monitor
151
168
 
152
169
  if line.empty? or
153
170
  (exclude and exclude.call(line)) or
@@ -4,6 +4,15 @@ require 'rbbt/util/persistence'
4
4
  require 'rbbt/util/misc'
5
5
 
6
6
  module WorkFlow
7
+
8
+ def self.require_workflow(path)
9
+ if Rbbt.etc.workflow_dir.exists?
10
+ require Dir.glob(File.join(Rbbt.etc.workflow_dir.read.strip, '*', path + '.rb')).first
11
+ else
12
+ require Dir.glob(File.join(Rbbt.share.workflows.find, '*', path + '.rb')).first
13
+ end
14
+ end
15
+
7
16
  def self.extended(base)
8
17
  class << base
9
18
  attr_accessor :tasks, :jobdir, :dangling_options, :dangling_option_descriptions,
@@ -53,8 +62,8 @@ module WorkFlow
53
62
  @dangling_option_defaults[name] = default if default
54
63
  end
55
64
 
56
- def task_dependencies(dependencies)
57
- dependencies = [dependencies] unless Array === dependencies
65
+ def task_dependencies(*dependencies)
66
+ dependencies = dependencies.flatten
58
67
  @dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
59
68
  end
60
69
 
@@ -93,7 +102,7 @@ module WorkFlow
93
102
  @last_task = task
94
103
  end
95
104
 
96
- def job(task, jobname, *args)
105
+ def job(task, jobname = "Default", *args)
97
106
  tasks[task].job(jobname, *args)
98
107
  end
99
108
 
@@ -105,7 +105,6 @@ if __FILE__ == $0
105
105
  class SequenceWF < WorkFlowWS
106
106
  self.klass = Organism
107
107
  end
108
-
109
108
 
110
109
  wf = SequenceWF.new
111
110
  wf.export :genomic_mutations_to_genes
@@ -8,8 +8,6 @@ OPT_SCM_DIR="$SOFTWARE_DIR/scm"; [ -d $OPT_SCM_DIR ] || mkdir -p $OPT_SCM_DIR
8
8
  OPT_JAR_DIR="$OPT_DIR/jars"; [ -d $OPT_JAR_DIR ] || mkdir -p $OPT_JAR_DIR
9
9
  OPT_BUILD_DIR="$SOFTWARE_DIR/.build"; [ -d $OPT_BUILD_DIR ] || mkdir -p $OPT_BUILD_DIR
10
10
 
11
- #source "$HOME/config/bash/_utility_functions"
12
-
13
11
  function expand_path(){
14
12
  name=$(basename $1)
15
13
  dir=$(dirname $1)
data/share/lib/R/util.R CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
26
26
  }
27
27
 
28
28
  rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
29
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
30
30
  f = file(filename, 'r');
31
31
  headers = readLines(f, 1);
32
32
  if (length(grep("^#: ", headers)) > 0){
@@ -93,7 +93,7 @@ rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
93
93
 
94
94
  rbbt.add <- function(data, new){
95
95
  if (is.null(data)){
96
- return(new);
96
+ return(c(new));
97
97
  }else{
98
98
  return(c(data, new));
99
99
  }
@@ -101,7 +101,7 @@ rbbt.add <- function(data, new){
101
101
 
102
102
  rbbt.acc <- function(data, new){
103
103
  if (is.null(data)){
104
- return(new);
104
+ return(c(new));
105
105
  }else{
106
106
  return(unique(c(data, new)));
107
107
  }
@@ -22,6 +22,7 @@ class TestCmd < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  def test_pipe
25
+ assert_equal("test\n", CMD.cmd("echo test", :pipe => true).read)
25
26
  assert_equal("test\n", CMD.cmd("echo '{opt}' test", :pipe => true).read)
26
27
  assert_equal("test", CMD.cmd("echo '{opt}' test", "-n" => true, :pipe => true).read)
27
28
  assert_equal("test2\n", CMD.cmd("cut", "-f" => 2, "-d" => '" "', :in => "test1 test2", :pipe => true).read)
@@ -41,4 +42,26 @@ class TestCmd < Test::Unit::TestCase
41
42
  assert_raise CMD::CMDError do CMD.cmd('ls -fake_option', :stderr => true, :pipe => true).read end
42
43
  end
43
44
 
45
+ def test_pipes
46
+ text = <<-EOF
47
+ line1
48
+ line2
49
+ line3
50
+ line11
51
+ line22
52
+ line33
53
+ EOF
54
+
55
+ TmpFile.with_file(text * 100) do |file|
56
+ CMD.cmd("gzip #{ file }")
57
+
58
+ gz = CMD.cmd("gunzip", :in => File.open(file + '.gz'), :pipe => true)
59
+ io = CMD.cmd('tail -n 10', :in => gz, :pipe => true)
60
+ assert_equal 10, io.read.split(/\n/).length
61
+
62
+
63
+ end
64
+
65
+ end
66
+
44
67
  end
@@ -3,7 +3,7 @@ require 'rbbt/util/excel2tsv'
3
3
 
4
4
  class TestTSV < Test::Unit::TestCase
5
5
  def test_tsv2excel
6
- tsv = TSV.excel2tsv(test_datafile('Test.xls'), :header => true)
6
+ tsv = TSV.excel2tsv(datafile_test('Test.xls'), :header => true)
7
7
  assert_equal 'Id', tsv.key_field
8
8
  end
9
9
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
  class TestMisc < Test::Unit::TestCase
6
6
 
7
7
  def test_pdf2text_example
8
- assert PDF2Text.pdf2text(test_datafile('example.pdf')).read =~ /An Example Paper/i
8
+ assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
9
9
  end
10
10
 
11
11
  def test_pdf2text_EPAR
@@ -13,7 +13,7 @@ class TestMisc < Test::Unit::TestCase
13
13
  end
14
14
 
15
15
  def test_pdf2text_wrong
16
- assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
16
+ assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
17
17
  end
18
18
 
19
19
  def test_string2hash
@@ -84,32 +84,62 @@ This is an example file. Entries are separated by Entry
84
84
  a = {:a => 1, "b" => 2}
85
85
  a.extend IndiferentHash
86
86
 
87
- assert 1, a["a"]
88
- assert 1, a[:a]
89
- assert 2, a["b"]
90
- assert 2, a[:b]
87
+ assert_equal 1, a["a"]
88
+ assert_equal 1, a[:a]
89
+ assert_equal 2, a["b"]
90
+ assert_equal 2, a[:b]
91
91
  end
92
92
 
93
93
  def test_lockfile
94
+
94
95
  TmpFile.with_file do |tmpfile|
95
96
  pids = []
96
- 3.times do |i|
97
+ 4.times do |i|
97
98
  pids << Process.fork do
98
- pid = pid.to_s
99
- Misc.lock(tmpfile, pid) do |f, val|
99
+ pid = Process.pid().to_s
100
+ status = Misc.lock(tmpfile, pid) do |f, val|
100
101
  Open.write(f, val)
101
102
  sleep rand * 2
102
103
  if pid == Open.read(tmpfile)
103
- exit(0)
104
+ 0
104
105
  else
105
- exit(1)
106
+ 1
106
107
  end
107
108
  end
109
+ exit(status)
108
110
  end
111
+
109
112
  end
110
113
  pids.each do |pid| Process.waitpid pid; assert $?.success? end
111
114
  end
112
115
 
113
116
  end
114
117
 
118
+ def test_divide
119
+ assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
120
+ end
121
+
122
+ def test_process_to_hash
123
+ list = [1,2,3,4]
124
+ assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
125
+ end
126
+
127
+ def test_add_method
128
+ a = "Test"
129
+ Misc.add_method a, :invert do self.reverse end
130
+ assert_equal "Test".reverse, a.invert
131
+ end
132
+
133
+ def test_redefine_method
134
+ a = "Test"
135
+ worked = false
136
+ Misc.redefine_method a, :reverse, :old_reverse do worked = true; self.old_reverse end
137
+ assert_equal "Test".reverse, a.reverse
138
+ assert worked
139
+ end
140
+
141
+ def test_merge_sorted_arrays
142
+ assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
143
+ end
144
+
115
145
  end
@@ -49,7 +49,7 @@ class TestOpen < Test::Unit::TestCase
49
49
  end
50
50
  end
51
51
 
52
- def test_read_grep
52
+ def test_read_grep
53
53
  content =<<-EOF
54
54
  1
55
55
  2
@@ -67,7 +67,7 @@ class TestOpen < Test::Unit::TestCase
67
67
  Open.read(file, :grep => ["1","3"]) do |line| sum += line.to_i end
68
68
  assert_equal(1 + 3, sum)
69
69
  end
70
-
70
+
71
71
  end
72
72
 
73
73
  def test_gzip
@@ -153,11 +153,11 @@ row2 2 4 6 8
153
153
  end
154
154
 
155
155
  def test_non_blocking
156
- $a = TSV.new test_datafile('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
156
+ $a = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
157
157
  $a.data.read
158
158
 
159
159
  pid = Process.fork do
160
- $b = TSV.new test_datafile('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
160
+ $b = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
161
161
  $b.data.close
162
162
  end
163
163
 
@@ -13,18 +13,6 @@ file 'foo' do |t|
13
13
  end
14
14
  EOF
15
15
 
16
- tmp.work.define_as_rake tmp.Rakefile.find.produce
17
-
18
- tmp.test.install.xclip.define_as_string <<-EOF
19
- name="xclip:0.12"
20
- url="http://downloads.sourceforge.net/project/xclip/xclip/0.12/xclip-0.12.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fxclip%2F&ts=1286472387&use_mirror=sunet"
21
-
22
- install_src "$name" "$url"
23
- EOF
24
-
25
- FileUtils.chmod 0770, tmp.test.install.xclip.produce
26
-
27
- software.opt.xclip.define_as_install tmp.test.install.xclip.find
28
16
  end
29
17
 
30
18
  Open.cachedir = Rbbt.tmp.cache.find :user
@@ -36,10 +24,10 @@ end
36
24
 
37
25
  class TestResource < Test::Unit::TestCase
38
26
  def test_methods
39
- assert Resource.methods.include?("resources")
40
- assert ! Resource.methods.include?("pkgdir")
41
- assert ! Phgx.methods.include?("resources")
42
- assert Phgx.methods.include?("pkgdir")
27
+ assert Resource.methods.collect{|m| m.to_s}.include?("resources")
28
+ assert ! Resource.methods.collect{|m| m.to_s}.include?("pkgdir")
29
+ assert ! Phgx.methods.collect{|m| m.to_s}.include?("resources")
30
+ assert Phgx.methods.collect{|m| m.to_s}.include?("pkgdir")
43
31
 
44
32
  end
45
33
  def test_resolve
@@ -84,9 +72,5 @@ class TestResource < Test::Unit::TestCase
84
72
  FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
85
73
  end
86
74
  end
87
-
88
- def test_install
89
- assert File.exists?(Rbbt.software.opt.xclip.produce)
90
- end
91
75
  end
92
76
 
@@ -86,5 +86,43 @@ class TestTCHash < Test::Unit::TestCase
86
86
  Process.wait pid
87
87
  end
88
88
  end
89
+
90
+ def test_serializer_alias
91
+ TmpFile.with_file do |f|
92
+ t = TCHash.get f, true, :double
93
+ t["1"] = [[1],[2]]
94
+ t["2"] = [[3],[4,5]]
95
+
96
+ t = TCHash.get f
97
+ assert_equal [["3"],["4","5"]], t["2"]
98
+
99
+ t.close
100
+ TCHash::CONNECTIONS.clear
101
+
102
+ t = TCHash.get f
103
+ assert_equal [["3"],["4","5"]], t["2"]
104
+ end
105
+ end
106
+
107
+ def test_serializer_reload
108
+ TmpFile.with_file do |f|
109
+ t = TCHash.get f, true, :double
110
+ t["1"] = [[1],[2]]
111
+ t["2"] = [[3],[4,5]]
112
+
113
+ t = TCHash.get f
114
+ assert_equal TCHash::StringDoubleArraySerializer, t.serializer
115
+ assert_equal [["3"],["4","5"]], t["2"]
116
+
117
+ t.close
118
+ TCHash::CONNECTIONS.clear
119
+
120
+ t = TCHash.get f
121
+ assert_equal [["3"],["4","5"]], t["2"]
122
+ end
123
+ end
124
+
125
+
126
+
89
127
  end
90
128