rbbt-util 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -12,7 +12,7 @@ class TSV
12
12
  when Integer === new_fields
13
13
  [new_fields]
14
14
  when String === new_fields
15
- [identify_field new_fields]
15
+ [identify_field(new_fields)]
16
16
  when Array === new_fields
17
17
  new_fields.collect{|new_field| identify_field new_field}
18
18
  when new_fields == :key
@@ -46,7 +46,7 @@ class TSV
46
46
 
47
47
  # Cycle through
48
48
  if monitor
49
- desc = "Parsing Stream"
49
+ desc = "Iterating TSV"
50
50
  step = 100
51
51
  if Hash === monitor
52
52
  desc = monitor[:desc] if monitor.include? :desc
@@ -84,14 +84,22 @@ class TSV
84
84
  f
85
85
  else
86
86
  f = fields.dup
87
- if type == :double
87
+ case
88
+ when type == :single
89
+ f = [f,key]
90
+ when type == :double
88
91
  f.push [key]
89
92
  else
90
93
  f.push key
91
94
  end
92
95
  f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
93
96
  end
94
- new_field_values = NamedArray.name new_field_values, new_field_names
97
+
98
+ if type == :single
99
+ new_field_values = new_field_values.first
100
+ else
101
+ new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
102
+ end
95
103
 
96
104
  next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
97
105
  yield new_key_value, new_field_values
@@ -221,14 +229,17 @@ class TSV
221
229
  method.each{|item| new[item] = self[item] if self.include? item}
222
230
  when Array === method
223
231
  through :key, key do |key, values|
232
+ values = [values] if type == :single
224
233
  new[key] = self[key] if (values.flatten & method).any?
225
234
  end
226
235
  when Regexp === method
227
236
  through :key, key do |key, values|
237
+ values = [values] if type == :single
228
238
  new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
229
239
  end
230
240
  when String === method
231
241
  through :key, key do |key, values|
242
+ values = [values] if type == :single
232
243
  new[key] = self[key] if values.flatten.select{|v| v == method}.any?
233
244
  end
234
245
  end
@@ -267,7 +278,7 @@ class TSV
267
278
  end
268
279
 
269
280
  def add_field(name = nil)
270
- each do |key, values|
281
+ through do |key, values|
271
282
  new_values = yield(key, values)
272
283
  new_values = [new_values] if type == :double and not Array === new_values
273
284
 
@@ -280,7 +291,7 @@ class TSV
280
291
  end
281
292
 
282
293
  def add_fields(names = nil)
283
- each do |key, values|
294
+ through do |key, values|
284
295
  new_values = yield(key, values)
285
296
  new_values = [new_values] if type == :double and not Array == new_values
286
297
 
@@ -29,3 +29,13 @@ class TSV
29
29
  counts
30
30
  end
31
31
  end
32
+
33
+ module Open
34
+ def self.tsv(file, *args)
35
+ TSV.new file, *args
36
+ end
37
+
38
+ def self.index(file, *args)
39
+ TSV.index file, *args
40
+ end
41
+ end
@@ -142,12 +142,29 @@ class TSV
142
142
 
143
143
  #{{{ Process rest
144
144
  data = options[:persistence_data] || {}
145
+ if Persistence::TSV === data
146
+ serializer = case
147
+ when ((cast == "to_i" or cast == :to_i) and type == :single)
148
+ :integer
149
+ when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
150
+ :integer_array
151
+ when (type == :list or type == :flat)
152
+ :list
153
+ when type == :single
154
+ :single
155
+ else
156
+ :double
157
+ end
158
+ data.serializer = serializer
159
+ end
160
+
161
+
145
162
  single = type.to_sym != :double
146
163
  max_cols = 0
147
164
  while line do
148
165
  line.chomp!
149
166
 
150
- progress_monitor.tick(stream.pos) if progress_monitor
167
+ progress_monitor.tick(stream.pos) if progress_monitor
151
168
 
152
169
  if line.empty? or
153
170
  (exclude and exclude.call(line)) or
@@ -4,6 +4,15 @@ require 'rbbt/util/persistence'
4
4
  require 'rbbt/util/misc'
5
5
 
6
6
  module WorkFlow
7
+
8
+ def self.require_workflow(path)
9
+ if Rbbt.etc.workflow_dir.exists?
10
+ require Dir.glob(File.join(Rbbt.etc.workflow_dir.read.strip, '*', path + '.rb')).first
11
+ else
12
+ require Dir.glob(File.join(Rbbt.share.workflows.find, '*', path + '.rb')).first
13
+ end
14
+ end
15
+
7
16
  def self.extended(base)
8
17
  class << base
9
18
  attr_accessor :tasks, :jobdir, :dangling_options, :dangling_option_descriptions,
@@ -53,8 +62,8 @@ module WorkFlow
53
62
  @dangling_option_defaults[name] = default if default
54
63
  end
55
64
 
56
- def task_dependencies(dependencies)
57
- dependencies = [dependencies] unless Array === dependencies
65
+ def task_dependencies(*dependencies)
66
+ dependencies = dependencies.flatten
58
67
  @dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
59
68
  end
60
69
 
@@ -93,7 +102,7 @@ module WorkFlow
93
102
  @last_task = task
94
103
  end
95
104
 
96
- def job(task, jobname, *args)
105
+ def job(task, jobname = "Default", *args)
97
106
  tasks[task].job(jobname, *args)
98
107
  end
99
108
 
@@ -105,7 +105,6 @@ if __FILE__ == $0
105
105
  class SequenceWF < WorkFlowWS
106
106
  self.klass = Organism
107
107
  end
108
-
109
108
 
110
109
  wf = SequenceWF.new
111
110
  wf.export :genomic_mutations_to_genes
@@ -8,8 +8,6 @@ OPT_SCM_DIR="$SOFTWARE_DIR/scm"; [ -d $OPT_SCM_DIR ] || mkdir -p $OPT_SCM_DIR
8
8
  OPT_JAR_DIR="$OPT_DIR/jars"; [ -d $OPT_JAR_DIR ] || mkdir -p $OPT_JAR_DIR
9
9
  OPT_BUILD_DIR="$SOFTWARE_DIR/.build"; [ -d $OPT_BUILD_DIR ] || mkdir -p $OPT_BUILD_DIR
10
10
 
11
- #source "$HOME/config/bash/_utility_functions"
12
-
13
11
  function expand_path(){
14
12
  name=$(basename $1)
15
13
  dir=$(dirname $1)
data/share/lib/R/util.R CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
26
26
  }
27
27
 
28
28
  rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
29
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
30
30
  f = file(filename, 'r');
31
31
  headers = readLines(f, 1);
32
32
  if (length(grep("^#: ", headers)) > 0){
@@ -93,7 +93,7 @@ rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
93
93
 
94
94
  rbbt.add <- function(data, new){
95
95
  if (is.null(data)){
96
- return(new);
96
+ return(c(new));
97
97
  }else{
98
98
  return(c(data, new));
99
99
  }
@@ -101,7 +101,7 @@ rbbt.add <- function(data, new){
101
101
 
102
102
  rbbt.acc <- function(data, new){
103
103
  if (is.null(data)){
104
- return(new);
104
+ return(c(new));
105
105
  }else{
106
106
  return(unique(c(data, new)));
107
107
  }
@@ -22,6 +22,7 @@ class TestCmd < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  def test_pipe
25
+ assert_equal("test\n", CMD.cmd("echo test", :pipe => true).read)
25
26
  assert_equal("test\n", CMD.cmd("echo '{opt}' test", :pipe => true).read)
26
27
  assert_equal("test", CMD.cmd("echo '{opt}' test", "-n" => true, :pipe => true).read)
27
28
  assert_equal("test2\n", CMD.cmd("cut", "-f" => 2, "-d" => '" "', :in => "test1 test2", :pipe => true).read)
@@ -41,4 +42,26 @@ class TestCmd < Test::Unit::TestCase
41
42
  assert_raise CMD::CMDError do CMD.cmd('ls -fake_option', :stderr => true, :pipe => true).read end
42
43
  end
43
44
 
45
+ def test_pipes
46
+ text = <<-EOF
47
+ line1
48
+ line2
49
+ line3
50
+ line11
51
+ line22
52
+ line33
53
+ EOF
54
+
55
+ TmpFile.with_file(text * 100) do |file|
56
+ CMD.cmd("gzip #{ file }")
57
+
58
+ gz = CMD.cmd("gunzip", :in => File.open(file + '.gz'), :pipe => true)
59
+ io = CMD.cmd('tail -n 10', :in => gz, :pipe => true)
60
+ assert_equal 10, io.read.split(/\n/).length
61
+
62
+
63
+ end
64
+
65
+ end
66
+
44
67
  end
@@ -3,7 +3,7 @@ require 'rbbt/util/excel2tsv'
3
3
 
4
4
  class TestTSV < Test::Unit::TestCase
5
5
  def test_tsv2excel
6
- tsv = TSV.excel2tsv(test_datafile('Test.xls'), :header => true)
6
+ tsv = TSV.excel2tsv(datafile_test('Test.xls'), :header => true)
7
7
  assert_equal 'Id', tsv.key_field
8
8
  end
9
9
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
  class TestMisc < Test::Unit::TestCase
6
6
 
7
7
  def test_pdf2text_example
8
- assert PDF2Text.pdf2text(test_datafile('example.pdf')).read =~ /An Example Paper/i
8
+ assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
9
9
  end
10
10
 
11
11
  def test_pdf2text_EPAR
@@ -13,7 +13,7 @@ class TestMisc < Test::Unit::TestCase
13
13
  end
14
14
 
15
15
  def test_pdf2text_wrong
16
- assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
16
+ assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
17
17
  end
18
18
 
19
19
  def test_string2hash
@@ -84,32 +84,62 @@ This is an example file. Entries are separated by Entry
84
84
  a = {:a => 1, "b" => 2}
85
85
  a.extend IndiferentHash
86
86
 
87
- assert 1, a["a"]
88
- assert 1, a[:a]
89
- assert 2, a["b"]
90
- assert 2, a[:b]
87
+ assert_equal 1, a["a"]
88
+ assert_equal 1, a[:a]
89
+ assert_equal 2, a["b"]
90
+ assert_equal 2, a[:b]
91
91
  end
92
92
 
93
93
  def test_lockfile
94
+
94
95
  TmpFile.with_file do |tmpfile|
95
96
  pids = []
96
- 3.times do |i|
97
+ 4.times do |i|
97
98
  pids << Process.fork do
98
- pid = pid.to_s
99
- Misc.lock(tmpfile, pid) do |f, val|
99
+ pid = Process.pid().to_s
100
+ status = Misc.lock(tmpfile, pid) do |f, val|
100
101
  Open.write(f, val)
101
102
  sleep rand * 2
102
103
  if pid == Open.read(tmpfile)
103
- exit(0)
104
+ 0
104
105
  else
105
- exit(1)
106
+ 1
106
107
  end
107
108
  end
109
+ exit(status)
108
110
  end
111
+
109
112
  end
110
113
  pids.each do |pid| Process.waitpid pid; assert $?.success? end
111
114
  end
112
115
 
113
116
  end
114
117
 
118
+ def test_divide
119
+ assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
120
+ end
121
+
122
+ def test_process_to_hash
123
+ list = [1,2,3,4]
124
+ assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
125
+ end
126
+
127
+ def test_add_method
128
+ a = "Test"
129
+ Misc.add_method a, :invert do self.reverse end
130
+ assert_equal "Test".reverse, a.invert
131
+ end
132
+
133
+ def test_redefine_method
134
+ a = "Test"
135
+ worked = false
136
+ Misc.redefine_method a, :reverse, :old_reverse do worked = true; self.old_reverse end
137
+ assert_equal "Test".reverse, a.reverse
138
+ assert worked
139
+ end
140
+
141
+ def test_merge_sorted_arrays
142
+ assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
143
+ end
144
+
115
145
  end
@@ -49,7 +49,7 @@ class TestOpen < Test::Unit::TestCase
49
49
  end
50
50
  end
51
51
 
52
- def test_read_grep
52
+ def test_read_grep
53
53
  content =<<-EOF
54
54
  1
55
55
  2
@@ -67,7 +67,7 @@ class TestOpen < Test::Unit::TestCase
67
67
  Open.read(file, :grep => ["1","3"]) do |line| sum += line.to_i end
68
68
  assert_equal(1 + 3, sum)
69
69
  end
70
-
70
+
71
71
  end
72
72
 
73
73
  def test_gzip
@@ -153,11 +153,11 @@ row2 2 4 6 8
153
153
  end
154
154
 
155
155
  def test_non_blocking
156
- $a = TSV.new test_datafile('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
156
+ $a = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
157
157
  $a.data.read
158
158
 
159
159
  pid = Process.fork do
160
- $b = TSV.new test_datafile('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
160
+ $b = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
161
161
  $b.data.close
162
162
  end
163
163
 
@@ -13,18 +13,6 @@ file 'foo' do |t|
13
13
  end
14
14
  EOF
15
15
 
16
- tmp.work.define_as_rake tmp.Rakefile.find.produce
17
-
18
- tmp.test.install.xclip.define_as_string <<-EOF
19
- name="xclip:0.12"
20
- url="http://downloads.sourceforge.net/project/xclip/xclip/0.12/xclip-0.12.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fxclip%2F&ts=1286472387&use_mirror=sunet"
21
-
22
- install_src "$name" "$url"
23
- EOF
24
-
25
- FileUtils.chmod 0770, tmp.test.install.xclip.produce
26
-
27
- software.opt.xclip.define_as_install tmp.test.install.xclip.find
28
16
  end
29
17
 
30
18
  Open.cachedir = Rbbt.tmp.cache.find :user
@@ -36,10 +24,10 @@ end
36
24
 
37
25
  class TestResource < Test::Unit::TestCase
38
26
  def test_methods
39
- assert Resource.methods.include?("resources")
40
- assert ! Resource.methods.include?("pkgdir")
41
- assert ! Phgx.methods.include?("resources")
42
- assert Phgx.methods.include?("pkgdir")
27
+ assert Resource.methods.collect{|m| m.to_s}.include?("resources")
28
+ assert ! Resource.methods.collect{|m| m.to_s}.include?("pkgdir")
29
+ assert ! Phgx.methods.collect{|m| m.to_s}.include?("resources")
30
+ assert Phgx.methods.collect{|m| m.to_s}.include?("pkgdir")
43
31
 
44
32
  end
45
33
  def test_resolve
@@ -84,9 +72,5 @@ class TestResource < Test::Unit::TestCase
84
72
  FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
85
73
  end
86
74
  end
87
-
88
- def test_install
89
- assert File.exists?(Rbbt.software.opt.xclip.produce)
90
- end
91
75
  end
92
76
 
@@ -86,5 +86,43 @@ class TestTCHash < Test::Unit::TestCase
86
86
  Process.wait pid
87
87
  end
88
88
  end
89
+
90
+ def test_serializer_alias
91
+ TmpFile.with_file do |f|
92
+ t = TCHash.get f, true, :double
93
+ t["1"] = [[1],[2]]
94
+ t["2"] = [[3],[4,5]]
95
+
96
+ t = TCHash.get f
97
+ assert_equal [["3"],["4","5"]], t["2"]
98
+
99
+ t.close
100
+ TCHash::CONNECTIONS.clear
101
+
102
+ t = TCHash.get f
103
+ assert_equal [["3"],["4","5"]], t["2"]
104
+ end
105
+ end
106
+
107
+ def test_serializer_reload
108
+ TmpFile.with_file do |f|
109
+ t = TCHash.get f, true, :double
110
+ t["1"] = [[1],[2]]
111
+ t["2"] = [[3],[4,5]]
112
+
113
+ t = TCHash.get f
114
+ assert_equal TCHash::StringDoubleArraySerializer, t.serializer
115
+ assert_equal [["3"],["4","5"]], t["2"]
116
+
117
+ t.close
118
+ TCHash::CONNECTIONS.clear
119
+
120
+ t = TCHash.get f
121
+ assert_equal [["3"],["4","5"]], t["2"]
122
+ end
123
+ end
124
+
125
+
126
+
89
127
  end
90
128