rbbt-util 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,22 +119,19 @@ class TSV
119
119
  new_fields = options[:new_fields]
120
120
 
121
121
  new = {}
122
- new_key_field, new_fields = through new_key_field, new_fields do |key, values|
123
- if Array === key
124
- keys = key
125
- else
126
- keys = [key]
127
- end
128
-
129
- new_values = keys.each do |key|
130
- if new[key].nil?
131
- new[key] = values
132
- else
133
- if type == :double
134
- new[key] = new[key].zip(values).collect{|v| v.flatten}
122
+ new_key_field, new_fields = through new_key_field, new_fields do |keys, values|
123
+ if Array === keys
124
+ keys.each do |key|
125
+ if new[key].nil? or not type == :double
126
+ new[key] = values.collect{|l| l.dup}
127
+ else
128
+ new[key] = new[key].zip(values).collect{|old_list, new_list| old_list.concat new_list}
135
129
  end
136
130
  end
131
+ else
132
+ new[keys] = values
137
133
  end
134
+ nil
138
135
  end
139
136
 
140
137
  new = TSV.new new
@@ -318,7 +318,7 @@ class TSV
318
318
 
319
319
  fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
320
320
  fields ||= other_fields
321
- [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
321
+ [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
322
322
  end
323
323
 
324
324
  end
@@ -26,6 +26,14 @@ module Resource
26
26
  TSV.index self, options
27
27
  end
28
28
 
29
+ def pos_index(pos_field, options = {})
30
+ TSV.pos_index self, pos_field, options
31
+ end
32
+
33
+ def range_index(start_field, end_field, options = {})
34
+ TSV.range_index self, start_field, end_field, options
35
+ end
36
+
29
37
  def open(options = {})
30
38
  produce
31
39
  Open.open(self.find, options)
@@ -5,12 +5,29 @@ require 'rbbt/util/misc'
5
5
 
6
6
  module WorkFlow
7
7
 
8
- def self.require_workflow(path)
9
- if Rbbt.etc.workflow_dir.exists?
10
- require Dir.glob(File.join(Rbbt.etc.workflow_dir.read.strip, '*', path + '.rb')).first
11
- else
12
- require Dir.glob(File.join(Rbbt.share.workflows.find, '*', path + '.rb')).first
8
+ def self.require_workflow(wf_name, wf_dir = nil)
9
+
10
+ if File.exists? wf_name
11
+ require "./workflow.rb"
12
+ return
13
13
  end
14
+
15
+ wf_dir ||= case
16
+ when File.exists?(File.join(File.dirname(Resource.caller_lib_dir), wf_name))
17
+ File.join(File.dirname(Resource.caller_lib_dir), wf_name)
18
+ when defined? Rbbt
19
+ if Rbbt.pkg.etc.workflow_dir.exists?
20
+ File.join(pkg.etc.workflow_dir.read.strip, wf_name)
21
+ else
22
+ Rbbt.workflows[wf_name]
23
+ end
24
+ else
25
+ File.join(ENV["HOME"], '.workflows')
26
+ end
27
+
28
+ wf_dir = Resource::Path.path(wf_dir)
29
+ $LOAD_PATH.unshift(File.join(File.dirname(wf_dir["workflow.rb"].find), 'lib'))
30
+ require wf_dir["workflow.rb"].find
14
31
  end
15
32
 
16
33
  def self.extended(base)
@@ -74,9 +91,9 @@ module WorkFlow
74
91
  def process_dangling
75
92
  res = [
76
93
  @dangling_options,
77
- Hash[*@dangling_options.zip(@dangling_option_descriptions.values_at(*@dangling_options)).flatten],
78
- Hash[*@dangling_options.zip(@dangling_option_types.values_at(*@dangling_options)).flatten],
79
- Hash[*@dangling_options.zip(@dangling_option_defaults.values_at(*@dangling_options)).flatten],
94
+ Misc.merge2hash(@dangling_options, @dangling_option_descriptions.values_at(*@dangling_options)),
95
+ Misc.merge2hash(@dangling_options, @dangling_option_types.values_at(*@dangling_options)),
96
+ Misc.merge2hash(@dangling_options, @dangling_option_defaults.values_at(*@dangling_options)),
80
97
  (@dangling_dependencies || [@last_task]).compact,
81
98
  @dangling_description,
82
99
  ]
@@ -103,6 +120,7 @@ module WorkFlow
103
120
  end
104
121
 
105
122
  def job(task, jobname = "Default", *args)
123
+ raise "Task '#{ task }' unkown" if tasks[task].nil?
106
124
  tasks[task].job(jobname, *args)
107
125
  end
108
126
 
data/share/lib/R/util.R CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
26
26
  }
27
27
 
28
28
  rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
29
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
30
30
  f = file(filename, 'r');
31
31
  headers = readLines(f, 1);
32
32
  if (length(grep("^#: ", headers)) > 0){
@@ -139,3 +139,73 @@ rbbt.run <- function(filename){
139
139
  }
140
140
 
141
141
 
142
+ # UTILITIES
143
+
144
+ # Addapted from http://www.phaget4.org/R/image_matrix.html
145
+ rbbt.plot.matrix <- function(x, ...){
146
+ min <- min(x);
147
+ max <- max(x);
148
+ yLabels <- rownames(x);
149
+ xLabels <- colnames(x);
150
+ title <-c();
151
+ # check for additional function arguments
152
+ if( length(list(...)) ){
153
+ Lst <- list(...);
154
+ if( !is.null(Lst$zlim) ){
155
+ min <- Lst$zlim[1];
156
+ max <- Lst$zlim[2];
157
+ }
158
+ if( !is.null(Lst$yLabels) ){
159
+ yLabels <- c(Lst$yLabels);
160
+ }
161
+ if( !is.null(Lst$xLabels) ){
162
+ xLabels <- c(Lst$xLabels);
163
+ }
164
+ if( !is.null(Lst$title) ){
165
+ title <- Lst$title;
166
+ }
167
+ }
168
+ # check for null values
169
+ if( is.null(xLabels) ){
170
+ xLabels <- c(1:ncol(x));
171
+ }
172
+ if( is.null(yLabels) ){
173
+ yLabels <- c(1:nrow(x));
174
+ }
175
+
176
+ layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(4,1), heights=c(1,1));
177
+
178
+ # Red and green range from 0 to 1 while Blue ranges from 1 to 0
179
+ ColorRamp <- rgb( seq(0,1,length=256), # Red
180
+ seq(0,1,length=256), # Green
181
+ seq(1,0,length=256)) # Blue
182
+ ColorLevels <- seq(min, max, length=length(ColorRamp));
183
+
184
+ # Reverse Y axis
185
+ reverse <- nrow(x) : 1;
186
+ yLabels <- yLabels[reverse];
187
+ x <- x[reverse,];
188
+
189
+ # Data Map
190
+ par(mar = c(3,5,2.5,2));
191
+ image(1:length(xLabels), 1:length(yLabels), t(x), col=ColorRamp, xlab="",
192
+ ylab="", axes=FALSE, zlim=c(min,max));
193
+ if( !is.null(title) ){
194
+ title(main=title);
195
+ }
196
+ axis(BELOW<-1, at=1:length(xLabels), labels=xLabels, cex.axis=0.7);
197
+ axis(LEFT <-2, at=1:length(yLabels), labels=yLabels, las= HORIZONTAL<-1,
198
+ cex.axis=0.7);
199
+
200
+ # Color Scale
201
+ par(mar = c(3,2.5,2.5,2));
202
+ image(1, ColorLevels,
203
+ matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
204
+ col=ColorRamp,
205
+ xlab="",ylab="",
206
+ xaxt="n");
207
+
208
+ layout(1);
209
+ }
210
+
211
+
@@ -142,4 +142,21 @@ This is an example file. Entries are separated by Entry
142
142
  assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
143
143
  end
144
144
 
145
+ def test_intersect_sorted_arrays
146
+ assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
147
+ end
148
+
149
+
150
+ def test_in_dir
151
+ TmpFile.with_file do |dir|
152
+ FileUtils.mkdir_p dir
153
+ Open.write(File.join(dir, 'test_file_in_dir'), 'test_file_in_dir')
154
+ Misc.in_dir(dir) do
155
+ assert Dir.glob("*").include? 'test_file_in_dir'
156
+ end
157
+ assert Dir.glob(File.join(dir, "*")).include?(File.join(dir, 'test_file_in_dir'))
158
+ assert(! Dir.glob("*").include?('test_file_in_dir'))
159
+ end
160
+ end
161
+
145
162
  end
@@ -164,6 +164,38 @@ row2 2 4 6 8
164
164
  Process.wait pid
165
165
  end
166
166
 
167
+ def test_persist_fwt
168
+ content =<<-EOF
169
+ #: :sep=/\\s+/
170
+ #Id Start End
171
+ row1 1 10
172
+ row2 20 30
173
+ EOF
174
+
175
+ TmpFile.with_file(content) do |filename|
176
+ fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
177
+ fwt = TSV.new(filename).range_index("Start", "End", :persistence => false)
178
+ assert fwt[15].empty?
179
+ assert fwt[25].include? "row2"
180
+ fwt
181
+ end
182
+
183
+ assert fwt[5].include? "row1"
184
+ assert fwt[(5..25)].include? "row1"
185
+ assert fwt[(5..25)].include? "row2"
186
+
187
+ fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
188
+ assert false
189
+ end
190
+
191
+ assert fwt[5].include? "row1"
192
+ assert fwt[(5..25)].include? "row1"
193
+ assert fwt[(5..25)].include? "row2"
194
+
195
+ end
196
+
197
+ end
198
+
167
199
 
168
200
  end
169
201
 
@@ -67,6 +67,7 @@ class TestResource < Test::Unit::TestCase
67
67
  begin
68
68
  assert_equal "Test String", Rbbt.tmp.test_string.read
69
69
  assert_equal "Test String", Rbbt.tmp.work.foo.read
70
+ rescue
70
71
  ensure
71
72
  FileUtils.rm Rbbt.tmp.test_string.find if File.exists? Rbbt.tmp.test_string.find
72
73
  FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
@@ -37,6 +37,20 @@ class TestTask < Test::Unit::TestCase
37
37
  end
38
38
  end
39
39
 
40
+ def test_task_options_from_hash
41
+ TmpFile.with_file do |f|
42
+ task = Task.new(:test_task, nil, :name) do |name| Open.write(f, name) end
43
+ job = task.job(:job1, :name => "TestName")
44
+ assert_equal "job1" << "_" << Misc.hash2md5(:name => "TestName"), job.id
45
+ job.fork
46
+ job.join
47
+
48
+ assert File.exists? f
49
+ assert_equal "TestName", File.open(f).read
50
+ end
51
+ end
52
+
53
+
40
54
  def test_task_result
41
55
  task = Task.new(:test_task, nil, :name) do |name| name end
42
56
  job = task.job(:job1, "TestName")
@@ -54,6 +54,22 @@ class TestTCHash < Test::Unit::TestCase
54
54
  end
55
55
  end
56
56
 
57
+ def test_stringDoubleArraySerializer
58
+ TmpFile.with_file do |f|
59
+ t = TCHash.get f, true, TCHash::TSVSerializer
60
+ tsv = TSV.new({})
61
+ tsv["1"] = [[1],[2]]
62
+ tsv["2"] = [[3],[4,5]]
63
+ t["TSV"] = tsv
64
+
65
+ t = TCHash.get f
66
+ t.collect do |k,v|
67
+ assert_equal [["3"],["4","5"]], t["TSV"]["2"]
68
+ end
69
+ end
70
+ end
71
+
72
+
57
73
  def test_serializer_alias
58
74
  TmpFile.with_file do |f|
59
75
  t = TCHash.get f, true, :double
@@ -146,6 +146,5 @@ row2 A B C
146
146
  end
147
147
  end
148
148
 
149
-
150
149
  end
151
150
 
@@ -135,7 +135,33 @@ row2 A B
135
135
  end
136
136
  end
137
137
 
138
+ def test_delete
139
+ content =<<-EOF
140
+ #ID ValueA ValueB Comment
141
+ row1 a b c
142
+ row2 A B C
143
+ EOF
144
+
145
+ TmpFile.with_file(content) do |filename|
146
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
147
+ assert_equal 2, tsv.keys.length
148
+ tsv.delete "row2"
149
+ assert_equal 1, tsv.keys.length
150
+
151
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
152
+ tsv.filter
153
+ tsv.add_filter "field:ValueA", ["A"]
138
154
 
155
+ assert_equal 1, tsv.keys.length
156
+ assert_equal ["row2"], tsv.keys
157
+
158
+ tsv.delete "row2"
159
+ assert_equal 0, tsv.keys.length
160
+
161
+ tsv.pop_filter
162
+ assert_equal ["row1"], tsv.keys
163
+ end
164
+ end
139
165
 
140
166
  end
141
167
 
@@ -128,7 +128,39 @@ row2 A B
128
128
  tsv
129
129
  end
130
130
 
131
- def test_sorted_index
131
+ def test_pos_index
132
+ content =<<-EOF
133
+ #Id ValueA ValueB Pos
134
+ row1 a|aa|aaa b 0|10
135
+ row2 A B 30
136
+ EOF
137
+
138
+ TmpFile.with_file(content) do |filename|
139
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
140
+ index = tsv.pos_index("Pos")
141
+ assert_equal ["row1"], index[10]
142
+ end
143
+ end
144
+
145
+
146
+ def test_range_index
147
+ content =<<-EOF
148
+ #Id ValueA ValueB Pos1 Pos2
149
+ row1 a|aa|aaa b 0|10 10|30
150
+ row2 A B 30 35
151
+ EOF
152
+
153
+ TmpFile.with_file(content) do |filename|
154
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
155
+ index = tsv.pos_index("Pos1")
156
+ assert_equal ["row1"], index[10]
157
+
158
+ index = tsv.range_index("Pos1", "Pos2")
159
+ assert_equal ["row1"], index[20]
160
+ end
161
+ end
162
+
163
+ def test_range_index2
132
164
  data =<<-EOF
133
165
  #ID:Range
134
166
  #:012345678901234567890
@@ -142,7 +174,7 @@ g: ____
142
174
  EOF
143
175
  TmpFile.with_file(data) do |datafile|
144
176
  tsv = load_data(datafile)
145
- f = tsv.sorted_index
177
+ f = tsv.range_index("Start", "End")
146
178
 
147
179
  assert_equal %w(), f[0].sort
148
180
  assert_equal %w(b), f[1].sort
@@ -153,35 +185,55 @@ g: ____
153
185
  end
154
186
  end
155
187
 
156
- def test_pos_index
157
- content =<<-EOF
158
- #Id ValueA ValueB Pos
159
- row1 a|aa|aaa b 0|10
160
- row2 A B 30
188
+ def test_range_index_persistent
189
+ data =<<-EOF
190
+ #ID:Range
191
+ #:012345678901234567890
192
+ a: ______
193
+ b: ______
194
+ c: _______
195
+ d: ____
196
+ e: ______
197
+ f: ___
198
+ g: ____
161
199
  EOF
162
-
163
- TmpFile.with_file(content) do |filename|
164
- tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
165
- index = tsv.pos_index("Pos", :memory, true)
166
- assert_equal ["row1"], index[10]
200
+ TmpFile.with_file(data) do |datafile|
201
+ TmpFile.with_file(load_data(datafile)) do |tsvfile|
202
+ f = TSV.range_index(tsvfile, "Start", "End", :persistence => true)
203
+
204
+ assert_equal %w(), f[0].sort
205
+ assert_equal %w(b), f[1].sort
206
+ assert_equal %w(), f[20].sort
207
+ assert_equal %w(), f[(20..100)].sort
208
+ assert_equal %w(a b d), f[3].sort
209
+ assert_equal %w(a b c d e), f[(3..4)].sort
210
+ end
167
211
  end
168
212
  end
169
213
 
170
-
171
- def test_range_index
172
- content =<<-EOF
173
- #Id ValueA ValueB Pos1 Pos2
174
- row1 a|aa|aaa b 0|10 10|30
175
- row2 A B 30 35
214
+ def test_range_index_persistent_with_filter
215
+ data =<<-EOF
216
+ #ID:Range
217
+ #:012345678901234567890
218
+ a: ______
219
+ b: ______
220
+ c: _______
221
+ d: ____
222
+ e: ______
223
+ f: ___
224
+ g: ____
176
225
  EOF
177
-
178
- TmpFile.with_file(content) do |filename|
179
- tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
180
- index = tsv.pos_index("Pos1", :memory, true)
181
- assert_equal ["row1"], index[10]
182
-
183
- index = tsv.range_index("Pos1", "Pos2", :memory, true)
184
- assert_equal ["row1"], index[20]
226
+ TmpFile.with_file(data) do |datafile|
227
+ TmpFile.with_file(load_data(datafile)) do |tsvfile|
228
+ f = TSV.range_index(tsvfile, "Start", "End", :filters => [["field:Start", "3"]])
229
+
230
+ assert_equal %w(), f[0].sort
231
+ assert_equal %w(), f[1].sort
232
+ assert_equal %w(), f[20].sort
233
+ assert_equal %w(), f[(20..100)].sort
234
+ assert_equal %w(a), f[3].sort
235
+ assert_equal %w(a), f[(3..4)].sort
236
+ end
185
237
  end
186
238
  end
187
239