rbbt-util 4.1.0 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,113 @@
1
+ require 'rbbt/util/chain_methods'
2
+ require 'rbbt/util/misc'
3
+
4
+ module NamedArray
5
+ extend ChainMethods
6
+
7
+ self.chain_prefix = :named_array
8
+ attr_accessor :fields
9
+ attr_accessor :key
10
+ attr_accessor :namespace
11
+
12
+ def self.setup(array, fields, key = nil, namespace = nil)
13
+ array.extend NamedArray unless NamedArray === array
14
+ array.fields = fields
15
+ array.key = key
16
+ array.namespace = namespace
17
+ array
18
+ end
19
+
20
+ def merge(array)
21
+ double = Array === array.first
22
+ new = self.dup
23
+ (0..length - 1).each do |i|
24
+ if double
25
+ new[i] = new[i] + array[i]
26
+ else
27
+ new[i] << array[i]
28
+ end
29
+ end
30
+ new
31
+ end
32
+
33
+ def positions(fields)
34
+ if Array == fields
35
+ fields.collect{|field|
36
+ Misc.field_position(@fields, field)
37
+ }
38
+ else
39
+ Misc.field_position(@fields, fields)
40
+ end
41
+ end
42
+
43
+ def named_array_get_brackets(key)
44
+ pos = Misc.field_position(fields, key)
45
+ elem = named_array_clean_get_brackets(pos)
46
+
47
+ return elem if @fields.nil? or @fields.empty?
48
+
49
+ field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
50
+ elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
51
+ elem
52
+ end
53
+
54
+ def named_array_each(&block)
55
+ if defined?(Entity) and not @fields.nil? and not @fields.empty?
56
+ @fields.zip(self).each do |field,elem|
57
+ elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
58
+ yield(elem)
59
+ elem
60
+ end
61
+ else
62
+ named_array_clean_each &block
63
+ end
64
+ end
65
+
66
+ def named_array_collect
67
+ res = []
68
+
69
+ named_array_each do |elem|
70
+ if block_given?
71
+ res << yield(elem)
72
+ else
73
+ res << elem
74
+ end
75
+ end
76
+
77
+ res
78
+ end
79
+
80
+ def named_array_set_brackets(key,value)
81
+ named_array_clean_set_brackets(Misc.field_position(fields, key), value)
82
+ end
83
+
84
+ def named_array_values_at(*keys)
85
+ keys = keys.collect{|k| Misc.field_position(fields, k) }
86
+ named_array_clean_values_at(*keys)
87
+ end
88
+
89
+ def zip_fields
90
+ return [] if self.empty?
91
+ zipped = Misc.zip_fields(self)
92
+ zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
93
+ zipped
94
+ end
95
+
96
+ def detach(file)
97
+ file_fields = file.fields.collect{|field| field.fullname}
98
+ detached_fields = []
99
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
100
+ fields = self.fields.values_at *detached_fields
101
+ values = self.values_at *detached_fields
102
+ values = NamedArray.name(values, fields)
103
+ values.zip_fields
104
+ end
105
+
106
+ def report
107
+ fields.zip(self).collect do |field,value|
108
+ "#{ field }: #{ Array === value ? value * "|" : value }"
109
+ end * "\n"
110
+ end
111
+
112
+ end
113
+
@@ -3,7 +3,6 @@ require 'rbbt/util/misc'
3
3
  require 'rbbt/util/tmpfile'
4
4
 
5
5
  require 'zlib'
6
- require 'digest/md5'
7
6
 
8
7
  module Open
9
8
  class OpenURLError < StandardError; end
@@ -77,12 +76,14 @@ module Open
77
76
  end
78
77
  end
79
78
 
79
+ def self.digest_url(url, options = {})
80
+ params = [url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]).split("\n").sort * "\n" : "")]
81
+ digest = Misc.digest(params.inspect)
82
+ end
80
83
  # Cache
81
84
  #
82
85
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
84
-
85
- filename = File.join(REMOTE_CACHEDIR, digest)
86
+ filename = File.join(REMOTE_CACHEDIR, digest_url(url, options))
86
87
  if File.exists? filename
87
88
  return filename
88
89
  else
@@ -91,7 +92,7 @@ module Open
91
92
  end
92
93
 
93
94
  def self.remove_from_cache(url, options = {})
94
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
+ digest = Misc.digest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
96
 
96
97
  filename = File.join(REMOTE_CACHEDIR, digest)
97
98
  if File.exists? filename
@@ -102,8 +103,10 @@ module Open
102
103
  end
103
104
 
104
105
  def self.add_cache(url, data, options = {})
105
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
106
- Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
106
+ file = File.join(REMOTE_CACHEDIR, digest_url(url, options))
107
+ Misc.lock(file) do
108
+ Misc.sensiblewrite(file, data)
109
+ end
107
110
  end
108
111
 
109
112
  # Grep
@@ -168,16 +171,20 @@ module Open
168
171
  wget_options[:nice_key] = options.delete(:nice_key)
169
172
  wget_options[:quiet] = options.delete(:quiet)
170
173
  wget_options["--post-data="] = options.delete(:post) if options.include? :post
174
+ wget_options["--post-file"] = options.delete("--post-file") if options.include? "--post-file"
175
+ wget_options["--post-file="] = options.delete("--post-file=") if options.include? "--post-file="
171
176
  wget_options[:cookies] = options.delete(:cookies)
172
177
 
173
178
  io = case
174
179
  when (not remote?(url))
175
180
  file_open(url, options[:grep])
176
- when options[:nocache]
181
+ when (options[:nocache] and options[:nocache] != :update)
177
182
  # What about grep?
178
183
  wget(url, wget_options)
179
- when in_cache(url, wget_options)
180
- file_open(in_cache(url, wget_options), options[:grep])
184
+ when (options[:nocache] != :update and in_cache(url, wget_options))
185
+ Misc.lock(in_cache(url, wget_options)) do
186
+ file_open(in_cache(url, wget_options), options[:grep])
187
+ end
181
188
  else
182
189
  io = wget(url, wget_options)
183
190
  add_cache(url, io, wget_options)
@@ -26,7 +26,7 @@ class Step
26
26
  Misc.lock(info_file) do
27
27
  i = info
28
28
  i[key] = value
29
- Open.write(info_file, i.to_yaml)
29
+ Open.write(info_file, YAML.dump(i))
30
30
  value
31
31
  end
32
32
  end
@@ -140,6 +140,14 @@ class Step
140
140
  end
141
141
 
142
142
  module Workflow
143
+ def log(status, message = nil)
144
+ if message
145
+ Log.low "#{ status }: #{ message }"
146
+ else
147
+ Log.low "#{ status }"
148
+ end
149
+ end
150
+
143
151
  def task_info(name)
144
152
  task = tasks[name]
145
153
  description = task.description
@@ -23,7 +23,7 @@ class Step
23
23
  @inputs = inputs || []
24
24
  end
25
25
 
26
- def prepare_entity(value, description = nil, info = {})
26
+ def prepare_result(value, description = nil, info = {})
27
27
  return value if description.nil?
28
28
  Entity.formats[description].setup(value, info.merge(:format => description)) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? description
29
29
  value
@@ -31,7 +31,7 @@ class Step
31
31
 
32
32
  def exec
33
33
  result = @task.exec_in self, *@inputs
34
- prepare_entity result, @task.result_description
34
+ prepare_result result, @task.result_description
35
35
  end
36
36
 
37
37
  def join
@@ -41,17 +41,18 @@ class Step
41
41
  sleep 5
42
42
  end
43
43
  else
44
+ Log.debug "Waiting for pid: #{@pid}"
44
45
  Process.waitpid @pid
45
46
  @pid = nil
46
47
  end
47
48
  self
48
49
  end
49
50
 
50
- def run
51
- result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq do
51
+ def run(no_load = false)
52
+ result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq, :no_load => no_load do
52
53
  log task.name, "Starting task: #{ name }"
53
54
  set_info :dependencies, @dependencies.collect{|dep| [dep.task.name, dep.name]}
54
- @dependencies.each{|dependency| dependency.run}
55
+ @dependencies.each{|dependency| dependency.run true}
55
56
  set_info :status, :start
56
57
  set_info :inputs, Misc.zip2hash(task.inputs, @inputs) unless task.inputs.nil?
57
58
  res = exec
@@ -59,7 +60,7 @@ class Step
59
60
  res
60
61
  end
61
62
 
62
- prepare_entity result, @task.result_description, info
63
+ prepare_result result, @task.result_description, info
63
64
  end
64
65
 
65
66
  def fork
@@ -90,7 +91,7 @@ class Step
90
91
  result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path} do
91
92
  exec
92
93
  end
93
- prepare_entity result, @task.result_description, info
94
+ prepare_result result, @task.result_description, info
94
95
  end
95
96
 
96
97
  def clean
@@ -108,6 +109,7 @@ class Step
108
109
  def rec_dependencies
109
110
  @dependencies.collect{|step| step.rec_dependencies}.flatten.concat @dependencies
110
111
  end
112
+
111
113
  def step(name)
112
114
  rec_dependencies.select{|step| step.task.name.to_sym == name.to_sym}.first
113
115
  end
data/share/lib/R/util.R CHANGED
@@ -1,7 +1,11 @@
1
- rbbt.ruby <- function(code, load = TRUE){
1
+ rbbt.ruby <- function(code, load = TRUE, flat = FALSE){
2
2
  file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
3
3
  if (load){
4
- data = rbbt.tsv(file);
4
+ if(flat){
5
+ data = rbbt.flat.tsv(file);
6
+ }else{
7
+ data = rbbt.tsv(file);
8
+ }
5
9
  rm(file);
6
10
  return(data);
7
11
  }else{
@@ -25,8 +29,26 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
25
29
  return(data);
26
30
  }
27
31
 
32
+ rbbt.flat.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
33
+ f = file(filename, 'r');
34
+ headers = readLines(f, 1);
35
+ if (length(grep("^#: ", headers)) > 0){
36
+ headers = readLines(f, 1);
37
+ }
38
+ result = list();
39
+ while( TRUE ){
40
+ line = readLines(f, 1);
41
+ if (length(line) == 0){ break;}
42
+ parts = unlist(strsplit(line, sep, fixed = TRUE));
43
+ id = parts[1];
44
+ result[[id]] = parts[2:length(parts)];
45
+ }
46
+ close(f);
47
+ return(result);
48
+ }
49
+
28
50
  rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
51
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
30
52
  f = file(filename, 'r');
31
53
  headers = readLines(f, 1);
32
54
  if (length(grep("^#: ", headers)) > 0){
@@ -148,8 +170,8 @@ rbbt.run <- function(filename){
148
170
 
149
171
  # Addapted from http://www.phaget4.org/R/image_matrix.html
150
172
  rbbt.plot.matrix <- function(x, ...){
151
- min <- min(x);
152
- max <- max(x);
173
+ min <- min(x, na.rm=T);
174
+ max <- max(x, na.rm=T);
153
175
  yLabels <- rownames(x);
154
176
  xLabels <- colnames(x);
155
177
  title <-c();
@@ -184,7 +206,7 @@ rbbt.plot.matrix <- function(x, ...){
184
206
  ColorRamp <- rgb( seq(0,1,length=256), # Red
185
207
  seq(0,1,length=256), # Green
186
208
  seq(1,0,length=256)) # Blue
187
- ColorLevels <- seq(min, max, length=length(ColorRamp));
209
+ ColorLevels <- seq(min, max, length=length(ColorRamp));
188
210
 
189
211
  # Reverse Y axis
190
212
  reverse <- nrow(x) : 1;
@@ -35,7 +35,7 @@ class TestAnnotations < Test::Unit::TestCase
35
35
  def test_array
36
36
  ary = ["string"]
37
37
  annotation_str = "Annotation String"
38
- AnnotatedArray.setup_chain(ary)
38
+ ary.extend AnnotatedArray
39
39
  AnnotatedString.setup(ary, annotation_str)
40
40
  assert_equal [AnnotatedString], ary.annotation_types
41
41
  assert_equal annotation_str, ary.annotation_str
@@ -87,6 +87,20 @@ class TestAnnotations < Test::Unit::TestCase
87
87
  assert_equal str1, Annotated.load_tsv(Annotated.tsv([str1, str2], :literal, :JSON)).sort.first
88
88
  end
89
89
 
90
+ def test_load_array_tsv
91
+ str1 = "string1"
92
+ str2 = "string2"
93
+ a = [str1, str2]
94
+ annotation_str = "Annotation String 2"
95
+ AnnotatedString.setup(a, annotation_str)
96
+ a.extend AnnotatedArray
97
+
98
+
99
+ assert_equal annotation_str, Annotated.load_tsv(Annotated.tsv(a, :all)).annotation_str
100
+
101
+ assert_equal str1, Annotated.load_tsv(Annotated.tsv(a, :literal, :JSON)).sort.first
102
+ end
103
+
90
104
  def test_inheritance
91
105
  str = "string1"
92
106
  annotation_str1 = "Annotation String 1"
@@ -104,7 +118,7 @@ class TestAnnotations < Test::Unit::TestCase
104
118
  assert_equal str + annotation_str, str.add_annot
105
119
  end
106
120
 
107
- def test_annotation_positional2hash
121
+ def test_annotation_positional2hash
108
122
  str = "string"
109
123
  annotation_str = "Annotation String"
110
124
  AnnotatedString.setup(str, :annotation_str => annotation_str)
@@ -3,7 +3,43 @@ require 'rbbt/persist'
3
3
  require 'rbbt/util/tmpfile'
4
4
  require 'test/unit'
5
5
 
6
+ module TestAnnotation
7
+ extend Annotation
8
+
9
+ self.annotation :test_annotation
10
+ end
6
11
  class TestPersist < Test::Unit::TestCase
12
+
13
+ def test_annotation_persist
14
+ TmpFile.with_file do |tmp|
15
+ entity1 = "Entity 1"
16
+ entity2 = "Entity 2"
17
+
18
+ TestAnnotation.setup(entity1, :test_annotation => "1")
19
+ TestAnnotation.setup(entity2, :test_annotation => "2")
20
+
21
+ annotations = [entity1, entity2]
22
+
23
+ persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
24
+ annotations
25
+ end
26
+
27
+ assert_equal "Entity 1", persisted_annotations.first
28
+ assert_equal "Entity 2", persisted_annotations.last
29
+ assert_equal "1", persisted_annotations.first.test_annotation
30
+ assert_equal "2", persisted_annotations.last.test_annotation
31
+
32
+ persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
33
+ annotations
34
+ end
35
+
36
+ assert_equal "Entity 1", persisted_annotations.sort.first
37
+ assert_equal "Entity 2", persisted_annotations.sort.last
38
+ assert_equal "1", persisted_annotations.sort.first.test_annotation
39
+ assert_equal "2", persisted_annotations.sort.last.test_annotation
40
+ end
41
+ end
42
+
7
43
  def test_array_persist
8
44
  TmpFile.with_file do |tmp|
9
45
  10.times do
@@ -12,7 +48,7 @@ class TestPersist < Test::Unit::TestCase
12
48
  end)
13
49
  end
14
50
  end
15
-
51
+
16
52
  TmpFile.with_file do |tmp|
17
53
  10.times do
18
54
  assert_equal [],(Persist.persist("Test", :array, :file => tmp) do
@@ -458,5 +458,17 @@ row2 A|AA|AAA
458
458
 
459
459
  end
460
460
 
461
+ def test_flat2single
462
+ content =<<-EOF
463
+ #: :type=:flat
464
+ #Id Value
465
+ row1 a aa aaa
466
+ row2 A AA AAA
467
+ EOF
461
468
 
469
+ TmpFile.with_file(content) do |filename|
470
+ assert TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value").include? "aaa"
471
+ end
472
+
473
+ end
462
474
  end
@@ -221,7 +221,7 @@ row2 E
221
221
 
222
222
  tsv1.attach tsv2, :fields => ["OtherID"], :persist_input => true
223
223
 
224
- assert_equal tsv1.fields,%w(ValueA ValueB OtherID)
224
+ assert_equal tsv1.fields, %w(ValueA ValueB OtherID)
225
225
  assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
226
226
  end
227
227