rbbt-util 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ require 'rbbt/util/chain_methods'
2
+ require 'rbbt/util/misc'
3
+
4
+ module NamedArray
5
+ extend ChainMethods
6
+
7
+ self.chain_prefix = :named_array
8
+ attr_accessor :fields
9
+ attr_accessor :key
10
+ attr_accessor :namespace
11
+
12
+ def self.setup(array, fields, key = nil, namespace = nil)
13
+ array.extend NamedArray unless NamedArray === array
14
+ array.fields = fields
15
+ array.key = key
16
+ array.namespace = namespace
17
+ array
18
+ end
19
+
20
+ def merge(array)
21
+ double = Array === array.first
22
+ new = self.dup
23
+ (0..length - 1).each do |i|
24
+ if double
25
+ new[i] = new[i] + array[i]
26
+ else
27
+ new[i] << array[i]
28
+ end
29
+ end
30
+ new
31
+ end
32
+
33
+ def positions(fields)
34
+ if Array == fields
35
+ fields.collect{|field|
36
+ Misc.field_position(@fields, field)
37
+ }
38
+ else
39
+ Misc.field_position(@fields, fields)
40
+ end
41
+ end
42
+
43
+ def named_array_get_brackets(key)
44
+ pos = Misc.field_position(fields, key)
45
+ elem = named_array_clean_get_brackets(pos)
46
+
47
+ return elem if @fields.nil? or @fields.empty?
48
+
49
+ field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
50
+ elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
51
+ elem
52
+ end
53
+
54
+ def named_array_each(&block)
55
+ if defined?(Entity) and not @fields.nil? and not @fields.empty?
56
+ @fields.zip(self).each do |field,elem|
57
+ elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
58
+ yield(elem)
59
+ elem
60
+ end
61
+ else
62
+ named_array_clean_each &block
63
+ end
64
+ end
65
+
66
+ def named_array_collect
67
+ res = []
68
+
69
+ named_array_each do |elem|
70
+ if block_given?
71
+ res << yield(elem)
72
+ else
73
+ res << elem
74
+ end
75
+ end
76
+
77
+ res
78
+ end
79
+
80
+ def named_array_set_brackets(key,value)
81
+ named_array_clean_set_brackets(Misc.field_position(fields, key), value)
82
+ end
83
+
84
+ def named_array_values_at(*keys)
85
+ keys = keys.collect{|k| Misc.field_position(fields, k) }
86
+ named_array_clean_values_at(*keys)
87
+ end
88
+
89
+ def zip_fields
90
+ return [] if self.empty?
91
+ zipped = Misc.zip_fields(self)
92
+ zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
93
+ zipped
94
+ end
95
+
96
+ def detach(file)
97
+ file_fields = file.fields.collect{|field| field.fullname}
98
+ detached_fields = []
99
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
100
+ fields = self.fields.values_at *detached_fields
101
+ values = self.values_at *detached_fields
102
+ values = NamedArray.name(values, fields)
103
+ values.zip_fields
104
+ end
105
+
106
+ def report
107
+ fields.zip(self).collect do |field,value|
108
+ "#{ field }: #{ Array === value ? value * "|" : value }"
109
+ end * "\n"
110
+ end
111
+
112
+ end
113
+
@@ -3,7 +3,6 @@ require 'rbbt/util/misc'
3
3
  require 'rbbt/util/tmpfile'
4
4
 
5
5
  require 'zlib'
6
- require 'digest/md5'
7
6
 
8
7
  module Open
9
8
  class OpenURLError < StandardError; end
@@ -77,12 +76,14 @@ module Open
77
76
  end
78
77
  end
79
78
 
79
+ def self.digest_url(url, options = {})
80
+ params = [url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]).split("\n").sort * "\n" : "")]
81
+ digest = Misc.digest(params.inspect)
82
+ end
80
83
  # Cache
81
84
  #
82
85
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
84
-
85
- filename = File.join(REMOTE_CACHEDIR, digest)
86
+ filename = File.join(REMOTE_CACHEDIR, digest_url(url, options))
86
87
  if File.exists? filename
87
88
  return filename
88
89
  else
@@ -91,7 +92,7 @@ module Open
91
92
  end
92
93
 
93
94
  def self.remove_from_cache(url, options = {})
94
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
+ digest = Misc.digest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
96
 
96
97
  filename = File.join(REMOTE_CACHEDIR, digest)
97
98
  if File.exists? filename
@@ -102,8 +103,10 @@ module Open
102
103
  end
103
104
 
104
105
  def self.add_cache(url, data, options = {})
105
- digest = Digest::MD5.hexdigest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
106
- Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
106
+ file = File.join(REMOTE_CACHEDIR, digest_url(url, options))
107
+ Misc.lock(file) do
108
+ Misc.sensiblewrite(file, data)
109
+ end
107
110
  end
108
111
 
109
112
  # Grep
@@ -168,16 +171,20 @@ module Open
168
171
  wget_options[:nice_key] = options.delete(:nice_key)
169
172
  wget_options[:quiet] = options.delete(:quiet)
170
173
  wget_options["--post-data="] = options.delete(:post) if options.include? :post
174
+ wget_options["--post-file"] = options.delete("--post-file") if options.include? "--post-file"
175
+ wget_options["--post-file="] = options.delete("--post-file=") if options.include? "--post-file="
171
176
  wget_options[:cookies] = options.delete(:cookies)
172
177
 
173
178
  io = case
174
179
  when (not remote?(url))
175
180
  file_open(url, options[:grep])
176
- when options[:nocache]
181
+ when (options[:nocache] and options[:nocache] != :update)
177
182
  # What about grep?
178
183
  wget(url, wget_options)
179
- when in_cache(url, wget_options)
180
- file_open(in_cache(url, wget_options), options[:grep])
184
+ when (options[:nocache] != :update and in_cache(url, wget_options))
185
+ Misc.lock(in_cache(url, wget_options)) do
186
+ file_open(in_cache(url, wget_options), options[:grep])
187
+ end
181
188
  else
182
189
  io = wget(url, wget_options)
183
190
  add_cache(url, io, wget_options)
@@ -26,7 +26,7 @@ class Step
26
26
  Misc.lock(info_file) do
27
27
  i = info
28
28
  i[key] = value
29
- Open.write(info_file, i.to_yaml)
29
+ Open.write(info_file, YAML.dump(i))
30
30
  value
31
31
  end
32
32
  end
@@ -140,6 +140,14 @@ class Step
140
140
  end
141
141
 
142
142
  module Workflow
143
+ def log(status, message = nil)
144
+ if message
145
+ Log.low "#{ status }: #{ message }"
146
+ else
147
+ Log.low "#{ status }"
148
+ end
149
+ end
150
+
143
151
  def task_info(name)
144
152
  task = tasks[name]
145
153
  description = task.description
@@ -23,7 +23,7 @@ class Step
23
23
  @inputs = inputs || []
24
24
  end
25
25
 
26
- def prepare_entity(value, description = nil, info = {})
26
+ def prepare_result(value, description = nil, info = {})
27
27
  return value if description.nil?
28
28
  Entity.formats[description].setup(value, info.merge(:format => description)) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? description
29
29
  value
@@ -31,7 +31,7 @@ class Step
31
31
 
32
32
  def exec
33
33
  result = @task.exec_in self, *@inputs
34
- prepare_entity result, @task.result_description
34
+ prepare_result result, @task.result_description
35
35
  end
36
36
 
37
37
  def join
@@ -41,17 +41,18 @@ class Step
41
41
  sleep 5
42
42
  end
43
43
  else
44
+ Log.debug "Waiting for pid: #{@pid}"
44
45
  Process.waitpid @pid
45
46
  @pid = nil
46
47
  end
47
48
  self
48
49
  end
49
50
 
50
- def run
51
- result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq do
51
+ def run(no_load = false)
52
+ result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq, :no_load => no_load do
52
53
  log task.name, "Starting task: #{ name }"
53
54
  set_info :dependencies, @dependencies.collect{|dep| [dep.task.name, dep.name]}
54
- @dependencies.each{|dependency| dependency.run}
55
+ @dependencies.each{|dependency| dependency.run true}
55
56
  set_info :status, :start
56
57
  set_info :inputs, Misc.zip2hash(task.inputs, @inputs) unless task.inputs.nil?
57
58
  res = exec
@@ -59,7 +60,7 @@ class Step
59
60
  res
60
61
  end
61
62
 
62
- prepare_entity result, @task.result_description, info
63
+ prepare_result result, @task.result_description, info
63
64
  end
64
65
 
65
66
  def fork
@@ -90,7 +91,7 @@ class Step
90
91
  result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path} do
91
92
  exec
92
93
  end
93
- prepare_entity result, @task.result_description, info
94
+ prepare_result result, @task.result_description, info
94
95
  end
95
96
 
96
97
  def clean
@@ -108,6 +109,7 @@ class Step
108
109
  def rec_dependencies
109
110
  @dependencies.collect{|step| step.rec_dependencies}.flatten.concat @dependencies
110
111
  end
112
+
111
113
  def step(name)
112
114
  rec_dependencies.select{|step| step.task.name.to_sym == name.to_sym}.first
113
115
  end
data/share/lib/R/util.R CHANGED
@@ -1,7 +1,11 @@
1
- rbbt.ruby <- function(code, load = TRUE){
1
+ rbbt.ruby <- function(code, load = TRUE, flat = FALSE){
2
2
  file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
3
3
  if (load){
4
- data = rbbt.tsv(file);
4
+ if(flat){
5
+ data = rbbt.flat.tsv(file);
6
+ }else{
7
+ data = rbbt.tsv(file);
8
+ }
5
9
  rm(file);
6
10
  return(data);
7
11
  }else{
@@ -25,8 +29,26 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
25
29
  return(data);
26
30
  }
27
31
 
32
+ rbbt.flat.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
33
+ f = file(filename, 'r');
34
+ headers = readLines(f, 1);
35
+ if (length(grep("^#: ", headers)) > 0){
36
+ headers = readLines(f, 1);
37
+ }
38
+ result = list();
39
+ while( TRUE ){
40
+ line = readLines(f, 1);
41
+ if (length(line) == 0){ break;}
42
+ parts = unlist(strsplit(line, sep, fixed = TRUE));
43
+ id = parts[1];
44
+ result[[id]] = parts[2:length(parts)];
45
+ }
46
+ close(f);
47
+ return(result);
48
+ }
49
+
28
50
  rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
51
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
30
52
  f = file(filename, 'r');
31
53
  headers = readLines(f, 1);
32
54
  if (length(grep("^#: ", headers)) > 0){
@@ -148,8 +170,8 @@ rbbt.run <- function(filename){
148
170
 
149
171
  # Addapted from http://www.phaget4.org/R/image_matrix.html
150
172
  rbbt.plot.matrix <- function(x, ...){
151
- min <- min(x);
152
- max <- max(x);
173
+ min <- min(x, na.rm=T);
174
+ max <- max(x, na.rm=T);
153
175
  yLabels <- rownames(x);
154
176
  xLabels <- colnames(x);
155
177
  title <-c();
@@ -184,7 +206,7 @@ rbbt.plot.matrix <- function(x, ...){
184
206
  ColorRamp <- rgb( seq(0,1,length=256), # Red
185
207
  seq(0,1,length=256), # Green
186
208
  seq(1,0,length=256)) # Blue
187
- ColorLevels <- seq(min, max, length=length(ColorRamp));
209
+ ColorLevels <- seq(min, max, length=length(ColorRamp));
188
210
 
189
211
  # Reverse Y axis
190
212
  reverse <- nrow(x) : 1;
@@ -35,7 +35,7 @@ class TestAnnotations < Test::Unit::TestCase
35
35
  def test_array
36
36
  ary = ["string"]
37
37
  annotation_str = "Annotation String"
38
- AnnotatedArray.setup_chain(ary)
38
+ ary.extend AnnotatedArray
39
39
  AnnotatedString.setup(ary, annotation_str)
40
40
  assert_equal [AnnotatedString], ary.annotation_types
41
41
  assert_equal annotation_str, ary.annotation_str
@@ -87,6 +87,20 @@ class TestAnnotations < Test::Unit::TestCase
87
87
  assert_equal str1, Annotated.load_tsv(Annotated.tsv([str1, str2], :literal, :JSON)).sort.first
88
88
  end
89
89
 
90
+ def test_load_array_tsv
91
+ str1 = "string1"
92
+ str2 = "string2"
93
+ a = [str1, str2]
94
+ annotation_str = "Annotation String 2"
95
+ AnnotatedString.setup(a, annotation_str)
96
+ a.extend AnnotatedArray
97
+
98
+
99
+ assert_equal annotation_str, Annotated.load_tsv(Annotated.tsv(a, :all)).annotation_str
100
+
101
+ assert_equal str1, Annotated.load_tsv(Annotated.tsv(a, :literal, :JSON)).sort.first
102
+ end
103
+
90
104
  def test_inheritance
91
105
  str = "string1"
92
106
  annotation_str1 = "Annotation String 1"
@@ -104,7 +118,7 @@ class TestAnnotations < Test::Unit::TestCase
104
118
  assert_equal str + annotation_str, str.add_annot
105
119
  end
106
120
 
107
- def test_annotation_positional2hash
121
+ def test_annotation_positional2hash
108
122
  str = "string"
109
123
  annotation_str = "Annotation String"
110
124
  AnnotatedString.setup(str, :annotation_str => annotation_str)
@@ -3,7 +3,43 @@ require 'rbbt/persist'
3
3
  require 'rbbt/util/tmpfile'
4
4
  require 'test/unit'
5
5
 
6
+ module TestAnnotation
7
+ extend Annotation
8
+
9
+ self.annotation :test_annotation
10
+ end
6
11
  class TestPersist < Test::Unit::TestCase
12
+
13
+ def test_annotation_persist
14
+ TmpFile.with_file do |tmp|
15
+ entity1 = "Entity 1"
16
+ entity2 = "Entity 2"
17
+
18
+ TestAnnotation.setup(entity1, :test_annotation => "1")
19
+ TestAnnotation.setup(entity2, :test_annotation => "2")
20
+
21
+ annotations = [entity1, entity2]
22
+
23
+ persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
24
+ annotations
25
+ end
26
+
27
+ assert_equal "Entity 1", persisted_annotations.first
28
+ assert_equal "Entity 2", persisted_annotations.last
29
+ assert_equal "1", persisted_annotations.first.test_annotation
30
+ assert_equal "2", persisted_annotations.last.test_annotation
31
+
32
+ persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
33
+ annotations
34
+ end
35
+
36
+ assert_equal "Entity 1", persisted_annotations.sort.first
37
+ assert_equal "Entity 2", persisted_annotations.sort.last
38
+ assert_equal "1", persisted_annotations.sort.first.test_annotation
39
+ assert_equal "2", persisted_annotations.sort.last.test_annotation
40
+ end
41
+ end
42
+
7
43
  def test_array_persist
8
44
  TmpFile.with_file do |tmp|
9
45
  10.times do
@@ -12,7 +48,7 @@ class TestPersist < Test::Unit::TestCase
12
48
  end)
13
49
  end
14
50
  end
15
-
51
+
16
52
  TmpFile.with_file do |tmp|
17
53
  10.times do
18
54
  assert_equal [],(Persist.persist("Test", :array, :file => tmp) do
@@ -458,5 +458,17 @@ row2 A|AA|AAA
458
458
 
459
459
  end
460
460
 
461
+ def test_flat2single
462
+ content =<<-EOF
463
+ #: :type=:flat
464
+ #Id Value
465
+ row1 a aa aaa
466
+ row2 A AA AAA
467
+ EOF
461
468
 
469
+ TmpFile.with_file(content) do |filename|
470
+ assert TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value").include? "aaa"
471
+ end
472
+
473
+ end
462
474
  end
@@ -221,7 +221,7 @@ row2 E
221
221
 
222
222
  tsv1.attach tsv2, :fields => ["OtherID"], :persist_input => true
223
223
 
224
- assert_equal tsv1.fields,%w(ValueA ValueB OtherID)
224
+ assert_equal tsv1.fields, %w(ValueA ValueB OtherID)
225
225
  assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
226
226
  end
227
227