rbbt-util 3.0.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  class TSV
2
2
 
3
+ attr_accessor :monitor
4
+
3
5
  def through(new_key_field = :key, new_fields = nil, &block)
4
6
 
5
7
  # Get positions
@@ -43,11 +45,23 @@ class TSV
43
45
  end if fields
44
46
 
45
47
  # Cycle through
48
+ if monitor
49
+ desc = "Parsing Stream"
50
+ step = 100
51
+ if Hash === monitor
52
+ desc = monitor[:desc] if monitor.include? :desc
53
+ step = monitor[:step] if monitor.include? :step
54
+ end
55
+ progress_monitor = Progress::Bar.new(size, 0, step, desc)
56
+ else
57
+ progress_monitor = nil
58
+ end
46
59
 
47
60
  if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
48
- each do |key, fields| yield key, fields end
61
+ each do |key, fields| progress_monitor.tick if progress_monitor; yield key, fields end
49
62
  else
50
63
  each do |key, fields|
64
+ progress_monitor.tick if progress_monitor;
51
65
  new_key_value = case
52
66
  when (new_key_position.nil? or new_key_position == :key)
53
67
  key
@@ -190,8 +204,14 @@ class TSV
190
204
  new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
191
205
  end
192
206
  when String === method
193
- through do |key, values|
194
- new[key] = values if [key,values].flatten.select{|v| v == method}.any?
207
+ if block_given?
208
+ through do |key, values|
209
+ new[key] = values if yield((method == key_field or method == :key)? key : values[method])
210
+ end
211
+ else
212
+ through do |key, values|
213
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
214
+ end
195
215
  end
196
216
  when Hash === method
197
217
  key = method.keys.first
@@ -255,6 +275,8 @@ class TSV
255
275
  end
256
276
 
257
277
  self.fields = self.fields + [name] if fields != nil and name != nil
278
+
279
+ self
258
280
  end
259
281
 
260
282
  def add_fields(names = nil)
@@ -0,0 +1,31 @@
1
+ require 'rbbt/util/tsv'
2
+
3
+ class TSV
4
+ def self.keys(file, sep = "\t")
5
+ CMD.cmd("cut -f 1 -d'#{sep}' '#{file}'|grep -v ^#").read.split("\n")
6
+ end
7
+
8
+ def self.field_match_counts(file, values)
9
+ key_field, fields = TSV.parse_header(Open.open(file))
10
+ fields.unshift key_field
11
+
12
+ counts = {}
13
+ TmpFile.with_file do |tmpfile|
14
+ if Array === values
15
+ Open.write(tmpfile, values * "\n")
16
+ else
17
+ FileUtils.ln_s values, tmpfile
18
+ end
19
+
20
+ fields.each_with_index do |field,i|
21
+ counts[field] = begin
22
+ CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
23
+ rescue
24
+ 0
25
+ end
26
+ end
27
+ end
28
+
29
+ counts
30
+ end
31
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/misc'
2
+ require 'progress-bar'
2
3
  class TSV
3
4
 
4
5
  def self.parse_fields(io, delimiter = "\t")
@@ -7,7 +8,6 @@ class TSV
7
8
  ## split with delimiter, do not remove empty
8
9
  fields = io.split(delimiter, -1)
9
10
 
10
-
11
11
  fields
12
12
  end
13
13
 
@@ -58,6 +58,7 @@ class TSV
58
58
  options = Misc.add_defaults options, more_options
59
59
 
60
60
  options = Misc.add_defaults options,
61
+ :monitor => false,
61
62
  :case_insensitive => false,
62
63
  :type => :double,
63
64
  :namespace => nil,
@@ -78,6 +79,8 @@ class TSV
78
79
  :exclude => nil,
79
80
  :select => nil,
80
81
  :grep => nil
82
+
83
+ monitor = Misc.process_options options, :monitor
81
84
 
82
85
  header_hash, sep, sep2 =
83
86
  Misc.process_options options, :header_hash, :sep, :sep2
@@ -119,13 +122,33 @@ class TSV
119
122
 
120
123
  exclude ||= Misc.process_options options, :reject if options.include? :reject
121
124
 
125
+ if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
126
+ size = case
127
+ when stream.respond_to?(:size)
128
+ stream.size
129
+ else
130
+ stream.stat.size
131
+ end
132
+ desc = "Parsing Stream"
133
+ step = 100
134
+ if Hash === monitor
135
+ desc = monitor[:desc] if monitor.include? :desc
136
+ step = monitor[:step] if monitor.include? :step
137
+ end
138
+ progress_monitor = Progress::Bar.new(size, 0, step, desc)
139
+ else
140
+ progress_monitor = nil
141
+ end
142
+
122
143
  #{{{ Process rest
123
- data = {}
144
+ data = options[:persistence_data] || {}
124
145
  single = type.to_sym != :double
125
146
  max_cols = 0
126
147
  while line do
127
148
  line.chomp!
128
149
 
150
+ progress_monitor.tick(stream.pos) if progress_monitor
151
+
129
152
  if line.empty? or
130
153
  (exclude and exclude.call(line)) or
131
154
  (select and not select.call(line))
@@ -187,7 +210,7 @@ class TSV
187
210
  data[id] = extra.first
188
211
  when type == :flat
189
212
  if data.include? id
190
- data[id].concat extra
213
+ data[id] = data[id] + extra
191
214
  else
192
215
  data[id] = extra
193
216
  end
@@ -261,7 +284,7 @@ class TSV
261
284
  end
262
285
  end
263
286
 
264
- if keep_empty and max_cols > 0
287
+ if keep_empty and max_cols > 0 and not Persistence::TSV === data
265
288
  data.each do |key, values|
266
289
  next if values =~ /__Ref:/
267
290
  new_values = values
@@ -16,6 +16,12 @@ module Resource
16
16
  namespace || File.basename(File.dirname(self))
17
17
  end
18
18
 
19
+ def to_yaml(opts = {})
20
+ YAML.quick_emit( nil, opts ) { |out|
21
+ out.scalar( taguri, self, :plain )
22
+ }
23
+ end
24
+
19
25
  def index(options = {})
20
26
  TSV.index self, options
21
27
  end
@@ -55,7 +55,7 @@ module WorkFlow
55
55
 
56
56
  def task_dependencies(dependencies)
57
57
  dependencies = [dependencies] unless Array === dependencies
58
- @dangling_dependencies = dependencies
58
+ @dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
59
59
  end
60
60
 
61
61
  def task_description(description)
@@ -0,0 +1,117 @@
1
+ require 'simplews'
2
+ require 'rbbt/util/workflow'
3
+ require 'base64'
4
+
5
+ class WorkFlowWS < SimpleWS
6
+
7
+ def self.klass=(klass)
8
+ @klass = klass
9
+ end
10
+
11
+ def self.klass
12
+ @klass || self
13
+ end
14
+
15
+ def task(name)
16
+ self.class.klass.tasks[name]
17
+ end
18
+
19
+ def export(name)
20
+ task = self.class.klass.tasks[name]
21
+
22
+ options, optional_options = task.option_summary
23
+
24
+ desc task.description
25
+ options.each do |option|
26
+ param_desc option[:name] => option[:description] if option[:description]
27
+ end
28
+ param_desc :return => "Job Identifier"
29
+ option_names = [:name] + options.collect{|option| option[:name]}
30
+ option_types = Hash[*option_names.zip([ :string] + options.collect{|option| option[:type] || :string}).flatten]
31
+ serve name, option_names, option_types do |*args|
32
+ task(name).job(*args).fork.id
33
+ end
34
+ end
35
+
36
+ param_desc :task => "Task name"
37
+ param_desc :id => "Job id"
38
+ serve :abort, %w(task id), {:return => false, :task => :string, :id => :string} do |task, id|
39
+ task(task).load(id).abort
40
+ nil
41
+ end
42
+
43
+ param_desc :task => "Task name"
44
+ param_desc :id => "Job id"
45
+ param_desc :return => "Info hash in yaml"
46
+ serve :info, %w(task id), {:task => :string, :id => :string} do |task, id|
47
+ Open.read(task(task).load(id).info_file)
48
+ end
49
+
50
+ param_desc :task => "Task name"
51
+ param_desc :id => "Job id"
52
+ param_desc :return => "Step"
53
+ serve :step, %w(task id), {:task => :string, :id => :string} do |task, id|
54
+ task(task).load(id).step.to_s
55
+ end
56
+
57
+ param_desc :task => "Task name"
58
+ param_desc :id => "Job id"
59
+ param_desc :return => "True if job is done (error or not)"
60
+ serve :done, %w(task id), {:return => :boolean, :task => :string, :id => :string} do |task, id|
61
+ task(task).load(id).done?
62
+ end
63
+
64
+ param_desc :task => "Task name"
65
+ param_desc :id => "Job id"
66
+ param_desc :return => "True if job finished with error. Error message is the last of the messages (see 'messages' method)."
67
+ serve :error, %w(task id), {:return => :boolean, :task => :string, :id => :string} do |task, id|
68
+ task(task).load(id).error?
69
+ end
70
+
71
+ param_desc :task => "Task name"
72
+ param_desc :id => "Job id"
73
+ param_desc :return => "Messages"
74
+ serve :messages, %w(task id), {:return => :array, :task => :string, :id => :string} do |task, id|
75
+ task(task).load(id).messages
76
+ end
77
+
78
+ param_desc :task => "Task name"
79
+ param_desc :id => "Job id"
80
+ param_desc :return => "Job result in Base64"
81
+ serve :load, %w(task id), {:return => :binary, :task => :string, :id => :string} do |task, id|
82
+ Base64.encode64(task(task).load(id).read)
83
+ end
84
+
85
+ param_desc :task => "Task name"
86
+ param_desc :id => "Job id"
87
+ param_desc :return => "File names"
88
+ serve :files, %w(task id), {:return => :array, :task => :string, :id => :string} do |task, id|
89
+ task(task).load(id).files
90
+ end
91
+
92
+ param_desc :task => "Task name"
93
+ param_desc :id => "Job id"
94
+ param_desc :file => "File name"
95
+ param_desc :return => "File contents in Base64"
96
+ serve :file, %w(task id file), {:return => :array, :task => :string, :id => :string, :file => :string} do |task, id|
97
+ Base64.encode64(task(task).load(id).files(file).read)
98
+ end
99
+ end
100
+
101
+
102
+ if __FILE__ == $0
103
+
104
+ require 'rbbt/sources/organism/sequence'
105
+ class SequenceWF < WorkFlowWS
106
+ self.klass = Organism
107
+ end
108
+
109
+
110
+ wf = SequenceWF.new
111
+ wf.export :genomic_mutations_to_genes
112
+ Open.write('/tmp/foo.wsdl', wf.wsdl)
113
+ wf.start
114
+ end
115
+
116
+
117
+
data/share/lib/R/util.R CHANGED
@@ -1,3 +1,14 @@
1
+ rbbt.ruby <- function(code, load = TRUE){
2
+ file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
3
+ if (load){
4
+ data = rbbt.tsv(file);
5
+ rm(file);
6
+ return(data);
7
+ }else{
8
+ return(file);
9
+ }
10
+ }
11
+
1
12
  rbbt.glob <- function(d, pattern){
2
13
  d=sub("/$", '', d);
3
14
  sapply(dir(d, pattern), function(file){paste(d,file,sep="/")});
@@ -14,8 +25,8 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
14
25
  return(data);
15
26
  }
16
27
 
17
- rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
18
- data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=1, comment.char = comment.char, ...);
28
+ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
29
+ data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
19
30
  f = file(filename, 'r');
20
31
  headers = readLines(f, 1);
21
32
  if (length(grep("^#: ", headers)) > 0){
@@ -30,6 +41,13 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
30
41
  return(data);
31
42
  }
32
43
 
44
+ rbbt.tsv2matrix <- function(data){
45
+ new <- data.matrix(data);
46
+ colnames(new) <- colnames(data);
47
+ rownames(new) <- rownames(data);
48
+ return(new);
49
+ }
50
+
33
51
  rbbt.tsv.write <- function(filename, data, key.field = NULL){
34
52
  if (is.null(key.field)){ key.field = "ID";}
35
53
 
@@ -55,6 +73,14 @@ rbbt.percent <- function(values){
55
73
  values=values/sum(values);
56
74
  }
57
75
 
76
+ rbbt.split <- function(string){
77
+ return(unlist(strsplit(string, "\\|")));
78
+ }
79
+
80
+ rbbt.last <-function(data){
81
+ data[length(data)];
82
+ }
83
+
58
84
  rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
59
85
  if (is.numeric){
60
86
  field.data=as.numeric(data[,field]);
@@ -89,3 +115,27 @@ rbbt.init <- function(data, new){
89
115
  }
90
116
  }
91
117
 
118
+ rbbt.this.script = system("rbbt_Rutil.rb", intern =T)
119
+
120
+ rbbt.reload <- function (){
121
+ source(rbbt.this.script)
122
+ }
123
+
124
+ rbbt.parse <- function(filename){
125
+ f <- file(filename, open='r');
126
+ lines <- readLines(f);
127
+ close(f);
128
+
129
+ from = match(1,as.vector(sapply(lines, function(x){grep('#[[:space:]]*START',x,ignore.case=TRUE)})));
130
+ to = match(1,as.vector(sapply(lines, function(x){grep('#[[:space:]]*END',x,ignore.case=TRUE)})));
131
+ if (is.na(from)){from = 1}
132
+ if (is.na(to)){to = length(lines)}
133
+ return(parse(text=paste(lines[from:to],sep="\n")));
134
+ }
135
+
136
+ rbbt.run <- function(filename){
137
+ rbbt.reload();
138
+ eval(rbbt.parse(filename), envir=globalenv());
139
+ }
140
+
141
+
@@ -4,19 +4,19 @@ require 'test/unit'
4
4
 
5
5
  class TestMisc < Test::Unit::TestCase
6
6
 
7
- def _test_pdf2text_example
7
+ def test_pdf2text_example
8
8
  assert PDF2Text.pdf2text(test_datafile('example.pdf')).read =~ /An Example Paper/i
9
9
  end
10
10
 
11
- def _test_pdf2text_EPAR
11
+ def test_pdf2text_EPAR
12
12
  assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
13
13
  end
14
14
 
15
- def _test_pdf2text_wrong
15
+ def test_pdf2text_wrong
16
16
  assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
17
17
  end
18
18
 
19
- def _test_string2hash
19
+ def test_string2hash
20
20
  assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
21
21
  assert(Misc.string2hash(":true")[:true] == true)
22
22
  assert(Misc.string2hash("true")["true"] == true)
@@ -27,17 +27,17 @@ class TestMisc < Test::Unit::TestCase
27
27
  assert(Misc.string2hash("a=b#c=d#:h=:j")[:h] == :j)
28
28
  end
29
29
 
30
- def _test_named_array
30
+ def test_named_array
31
31
  a = NamedArray.name([1,2,3,4], %w(a b c d))
32
32
  assert_equal(1, a['a'])
33
33
  end
34
34
 
35
- def _test_path_relative_to
35
+ def test_path_relative_to
36
36
  assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
37
37
  end
38
38
 
39
- def _test_chunk
40
- _test =<<-EOF
39
+ def test_chunk
40
+ test =<<-EOF
41
41
  This is an example file. Entries are separated by Entry
42
42
  -- Entry
43
43
  1
@@ -52,7 +52,7 @@ This is an example file. Entries are separated by Entry
52
52
  assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
53
53
  end
54
54
 
55
- def _test_hash2string
55
+ def test_hash2string
56
56
  hash = {}
57
57
  assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
58
58
 
@@ -73,14 +73,14 @@ This is an example file. Entries are separated by Entry
73
73
 
74
74
  end
75
75
 
76
- def _test_merge
76
+ def test_merge
77
77
  a = [[1],[2]]
78
78
  a = NamedArray.name a, %w(1 2)
79
79
  a.merge [3,4]
80
80
  assert_equal [1,3], a[0]
81
81
  end
82
82
 
83
- def _test_indiferent_hash
83
+ def test_indiferent_hash
84
84
  a = {:a => 1, "b" => 2}
85
85
  a.extend IndiferentHash
86
86