rbbt-util 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/tsv.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'rbbt/util/simpleopt'
4
4
 
5
- options = SOPT.parse "-h--help:-to--tsv-options*:-p--persistence"
5
+ options = SOPT.get "-h--help:-to--tsv-options*:-p--persistence"
6
6
 
7
7
  command = ARGV.shift
8
8
  file = ARGV.shift
@@ -10,5 +10,5 @@ file = ARGV.shift
10
10
  case command
11
11
  when 'cat'
12
12
  puts TSV.new(file, options["tsv-options"].merge(options["persistence"]))
13
- when '
13
+ when
14
14
 
data/bin/workflow.rb ADDED
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/util/workflow'
6
+
7
+ options = SOPT.get "-h--help:-t--target*:-n--name*"
8
+
9
+ raise "No target" unless options[:target]
10
+
11
+ file = ARGV.shift
12
+
13
+ if ARGV.empty?
14
+ data = STDIN.read
15
+ else
16
+ data = ARGV.shift
17
+ end
18
+
19
+ job = options[:name] || "Job"
20
+
21
+ puts "Runing WorkFlow in #{file} for target #{options[:target]}. Job: #{job}"
22
+ WorkFlow.load file, File.join(options[:target], job), data
23
+ puts
24
+ puts "WorkFlow done. Please find results in: #{File.join(options[:target], job)}"
@@ -101,7 +101,7 @@ module Open
101
101
  case
102
102
  when Array === grep
103
103
  TmpFile.with_file(grep * "\n", false) do |f|
104
- CMD.cmd("grep", "-F" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
104
+ CMD.cmd("grep", "-E" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
105
105
  end
106
106
  else
107
107
  CMD.cmd("grep '#{grep}' -", :in => stream, :pipe => true)
@@ -201,6 +201,7 @@ module Open
201
201
  end
202
202
 
203
203
  def self.write(file, content)
204
+ FileUtils.mkdir_p File.dirname(file)
204
205
  if String === content
205
206
  File.open(file, 'w') do |f| f.write content end
206
207
  else
@@ -123,7 +123,6 @@ module Path
123
123
  key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}.select{|f| f.namespace == namespace}
124
124
  end
125
125
 
126
-
127
126
  def filename
128
127
  self.to_s
129
128
  end
@@ -138,7 +137,9 @@ module Path
138
137
  end
139
138
 
140
139
  def produce
141
- return if File.exists? self
140
+ return self if File.exists? self
141
+
142
+ raise "No especified pkg_data for file #{ self }, cannot produce" if pkg_data.nil?
142
143
 
143
144
  Log.debug("Trying to produce '#{ self }'")
144
145
  file, producer = pkg_module.reclaim self
@@ -146,6 +147,8 @@ module Path
146
147
  raise "File #{self} has not been claimed, cannot produce" if file.nil? or producer.nil?
147
148
 
148
149
  pkg_module.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
150
+
151
+ self
149
152
  end
150
153
  end
151
154
 
@@ -21,7 +21,16 @@ module Persistence
21
21
 
22
22
  def self.get_persistence_file(file, prefix, options = {})
23
23
  name = prefix.to_s << ":" << file.to_s << ":"
24
- File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
24
+ o = {}
25
+ options.each do |k,v|
26
+ if v.inspect =~ /:0x0/
27
+ o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
28
+ else
29
+ o[k] = v
30
+ end
31
+ end
32
+
33
+ File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, o].inspect))
25
34
  end
26
35
 
27
36
  def self.get_filename(file)
@@ -12,9 +12,12 @@ module RakeHelper
12
12
  FileUtils.chdir chdir if chdir
13
13
 
14
14
  Rake::FileTask.module_eval do
15
- class << self
16
- alias_method :old_define_task, :define_task
15
+ if not self.respond_to? :old_define_task
16
+ class << self
17
+ alias_method :old_define_task, :define_task
18
+ end
17
19
  end
20
+
18
21
  def self.define_task(file, *args, &block)
19
22
  @@files ||= []
20
23
  if Hash === file
@@ -64,7 +67,11 @@ module RakeHelper
64
67
  end
65
68
  end
66
69
 
67
- load rakefile
70
+ if block_given?
71
+ yield
72
+ else
73
+ load rakefile
74
+ end
68
75
 
69
76
  task(:default) do |t|
70
77
  Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
@@ -35,7 +35,7 @@ module TmpFile
35
35
 
36
36
  result = yield(tmpfile)
37
37
 
38
- FileUtils.rm tmpfile if File.exists?(tmpfile) and erase
38
+ FileUtils.rm_rf tmpfile if File.exists?(tmpfile) and erase
39
39
 
40
40
  result
41
41
  end
@@ -139,14 +139,14 @@ class TSV
139
139
  # both have fields => list of names
140
140
  # not both have fields => nil
141
141
 
142
- # fields2add = case
143
- # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
- # nil
145
- # when fields2add.nil?
146
- # other.all_fields
147
- # else
148
- # fields2add
149
- # end
142
+ # fields2add = case
143
+ # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
+ # nil
145
+ # when fields2add.nil?
146
+ # other.all_fields
147
+ # else
148
+ # fields2add
149
+ # end
150
150
 
151
151
  # Determine common fields
152
152
 
@@ -171,6 +171,8 @@ class TSV
171
171
  match_source = (all_fields & match.all_fields).first
172
172
  index = match.index :target => other.key_field, :fields => match_source
173
173
  [match_source, index]
174
+ when (String === match and match == key_field)
175
+ [:key, other.index]
174
176
  when String === match
175
177
  [match, other.index]
176
178
  when Array === match
@@ -182,32 +184,42 @@ class TSV
182
184
  # through
183
185
  new = {}
184
186
  each do |key,values|
185
- source_keys = match_source == :key ? key : values[match_source]
187
+ source_keys = match_source == :key ? key : values[match_source_position]
186
188
  source_keys = [source_keys] unless Array === source_keys
187
189
  other_keys = case
188
- when index.nil?
189
- source_keys
190
- else
191
- index.values_at(*source_keys).flatten.compact
192
- end
190
+ when index.nil?
191
+ source_keys
192
+ else
193
+ index.values_at(*source_keys).flatten.compact
194
+ end
195
+
193
196
  other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
194
197
 
198
+
195
199
  other_values = other_keys.collect do |other_key|
196
200
  next unless other.include? other_key
197
201
  new_fields.collect do |field|
198
202
  if field == other.key_field
199
- other_key
203
+ if type == :double
204
+ [other_key]
205
+ else
206
+ other_key
207
+ end
200
208
  else
201
209
  other[other_key][field]
202
210
  end
203
211
  end
204
212
  end.compact
205
213
 
206
- if type == :double
207
- new_values = values + TSV.zip_fields(other_values)
208
- else
209
- new_values = values + TSV.zip_fields(other_values).collect{|v| v.first}
210
- end
214
+ other_values = case
215
+ when type == :double
216
+ TSV.zip_fields(other_values).collect{|v| v.flatten.uniq}
217
+ else
218
+ TSV.zip_fields(other_values).collect{|v| v.flatten.first}
219
+ end
220
+
221
+ new_values = values + other_values
222
+
211
223
  new[key] = new_values
212
224
  end
213
225
 
@@ -232,7 +244,7 @@ class TSV
232
244
  field_values[field] = []
233
245
  }
234
246
 
235
- if type == :double
247
+ if tsv.type == :double
236
248
  tsv.through do |key,entry_values|
237
249
  fields.zip(entry_values).each do |field,entry_field_values|
238
250
  field_values[field].concat entry_field_values
@@ -86,7 +86,7 @@ class TSV
86
86
 
87
87
  if key_field.nil?
88
88
  key_pos = key
89
- key_field, fields = nil
89
+ other_pos = fields
90
90
  else
91
91
  all_fields = [key_field].concat other_fields
92
92
 
@@ -158,7 +158,7 @@ class TSV
158
158
 
159
159
  next if data.include?(id) and type != :flat
160
160
 
161
- if key_field.nil?
161
+ if other_pos.nil? or (fields == nil and type == :flat)
162
162
  other_pos = (0..(parts.length - 1)).to_a
163
163
  other_pos.delete key_pos
164
164
  end
@@ -199,7 +199,7 @@ class TSV
199
199
  id = ids.shift
200
200
  ids.each do |id2| data[id2] = "__Ref:#{id}" end
201
201
 
202
- if key_field.nil?
202
+ if other_pos.nil? or (fields == nil and type == :flat)
203
203
  other_pos = (0..(parts.length - 1)).to_a
204
204
  other_pos.delete key_pos
205
205
  end
@@ -252,6 +252,8 @@ class TSV
252
252
  end
253
253
  end
254
254
 
255
+ fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
256
+ fields ||= other_fields
255
257
  [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
256
258
  end
257
259
 
data/lib/rbbt/util/tsv.rb CHANGED
@@ -102,7 +102,7 @@ class TSV
102
102
  @data = file.data
103
103
  when Persistence::TSV === file
104
104
  @data = file
105
- %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
105
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
106
106
  if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
107
107
  self.send "#{key}=".to_sym, @data.send(key.to_sym)
108
108
  end
@@ -145,7 +145,7 @@ class TSV
145
145
  end
146
146
 
147
147
  if not extra.nil?
148
- %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
148
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
149
149
  if extra.include? key.to_sym
150
150
  self.send("#{key}=".to_sym, extra[key.to_sym])
151
151
  if @data.respond_to? "#{key}=".to_sym
@@ -1,3 +1,90 @@
1
+ require 'rake'
2
+ require 'rbbt/util/rake'
3
+
1
4
  module WorkFlow
5
+ module Runner
6
+ def data
7
+ $_workflow_prereq
8
+ end
9
+
10
+ def input
11
+ $_workflow_input
12
+ end
13
+
14
+ $_workflow_default_persistence = :string
15
+ def default_persistence
16
+ $_workflow_default_persistence
17
+ end
18
+
19
+ def default_persistence=(type)
20
+ $_workflow_default_persistence = type
21
+ end
22
+
23
+ def step(step_name, options = nil)
24
+ dependencies, options = case
25
+ when ((String === options or Symbol === options) and %w(string marshal tsv tsv_string).include? options.to_s)
26
+ [nil, {:persistence_type => options}]
27
+ when Hash === options
28
+ [nil, options]
29
+ else
30
+ [options, {}]
31
+ end
32
+
33
+ options = Misc.add_defaults options, :persistence_type => default_persistence
34
+ persistence_type = Misc.process_options options, :persistence_type
35
+ dependencies = Misc.process_options options, :dependencies if options.include? :dependencies
36
+
37
+ re = Regexp.new(/(?:^|\/)#{Regexp.quote step_name.to_s}\/.*$/)
2
38
 
39
+ @last_step = nil unless defined? @last_step
40
+ @last_persistence_type = nil unless defined? @last_persistence_type
41
+
42
+ if dependencies.nil? && ! @last_step.nil?
43
+ dependencies = @last_step
44
+ end
45
+ @last_step = step_name
46
+
47
+ # Generate the Hash definition
48
+ rule_def = case
49
+ when dependencies.nil?
50
+ re
51
+ when String === dependencies || Symbol === dependencies
52
+ {re => lambda{|filename| filename.sub(step_name.to_s, dependencies.to_s) }}
53
+ when Array === dependencies
54
+ {re => lambda{|filename| dependencies.collect{|dep| filename.sub(step_name.to_s, dep.to_s) } }}
55
+ when Proc === dependencies
56
+ {re => dependencies}
57
+ end
58
+
59
+ @last_step = step_name
60
+ last_persistence_type, @last_persistence_type = @last_persistence_type, persistence_type
61
+
62
+ rule rule_def do |t|
63
+ Persistence.persist(t.name, "", persistence_type, :persistence_file => t.name) do
64
+ $_workflow_prereq = case
65
+ when (t.prerequisites.nil? or (Array === t.prerequisites and t.prerequisites.empty?))
66
+ nil
67
+ else
68
+ Persistence.persist(t.prerequisites.first, "", last_persistence_type, :persistence_file => t.prerequisites.first) do
69
+ raise "Error, this file should be produced already"
70
+ end
71
+ end
72
+ yield
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def self.run(file = :default, workflow_input = nil, &block)
79
+ $_workflow_input = workflow_input
80
+ RakeHelper.run("Runtime", file) do
81
+ yield
82
+ end
83
+ end
84
+
85
+ def self.load(wf_file, file = :default, workflow_input = nil)
86
+ $_workflow_input = workflow_input
87
+ RakeHelper.run(wf_file, file)
88
+ end
3
89
  end
90
+
@@ -11,7 +11,7 @@ class TestR < Test::Unit::TestCase
11
11
  tsv2 = tsv.R <<-EOF
12
12
  data = data + 1
13
13
  EOF
14
- puts tsv2.to_s
14
+ assert_equal "2", tsv2["a"].first
15
15
  end
16
16
  end
17
17
 
@@ -13,11 +13,6 @@ class TestPKGData < Test::Unit::TestCase
13
13
  end
14
14
  end
15
15
 
16
- def test_path
17
- assert_equal File.join(Rbbt.datadir, 'Organism/Hsa'), Rbbt.files.Organism.Hsa
18
- Rbbt.files.Organism.Hsa.identifiers.produce
19
- end
20
-
21
16
  def test_claim_proc
22
17
  begin
23
18
  assert_nil Rbbt.reclaim(Rbbt.files.foo)
@@ -6,9 +6,73 @@ require 'rbbt/util/workflow'
6
6
 
7
7
  class TestWorkflow < Test::Unit::TestCase
8
8
 
9
- def test_true
10
- assert true
9
+ def workflow
10
+ TmpFile.with_file do |dir|
11
+ old_pdw = FileUtils.pwd
12
+ begin
13
+ FileUtils.mkdir dir
14
+ cd dir
15
+ yield dir
16
+ ensure
17
+ cd old_pdw
18
+ end
19
+ end
11
20
  end
12
21
 
22
+ def _test_run
23
+ workflow do |dir|
24
+ WorkFlow.run do
25
+ file :foo do |t|
26
+ touch t.name
27
+ end
28
+ end
29
+ assert File.exists? File.join(dir, 'foo')
30
+ end
31
+ end
32
+
33
+ def _test_step
34
+ jobid = 'jobid'
35
+ target_step = 'last'
36
+ workflow do |dir|
37
+
38
+ WorkFlow.run(File.join(target_step, jobid)) do
39
+ self.extend WorkFlow::Runner
40
+
41
+ step :first do
42
+ "Test"
43
+ end
44
+
45
+ step :last do
46
+ data.reverse
47
+ end
48
+ end
49
+ assert File.exists? File.join(dir, target_step, 'jobid')
50
+ assert_equal 'tseT', Open.read(File.join(dir, target_step, 'jobid'))
51
+ end
52
+ end
53
+
54
+ def test_input
55
+ jobid = 'jobid'
56
+ target_step = 'last'
57
+ message = "Message"
58
+
59
+ workflow do |dir|
60
+
61
+ WorkFlow.run(File.join(target_step, jobid), message) do
62
+ self.extend WorkFlow::Runner
63
+
64
+ step :first, :marshal do
65
+ input
66
+ end
67
+
68
+ step :last do
69
+ data.reverse
70
+ end
71
+ end
72
+
73
+ assert File.exists? File.join(dir, target_step, 'jobid')
74
+ assert_equal message.reverse, Open.read(File.join(dir, target_step, 'jobid'))
75
+ end
76
+ end
13
77
  end
14
78
 
@@ -206,9 +206,6 @@ row1 e
206
206
  row2 E
207
207
  EOF
208
208
 
209
-
210
- require 'rbbt/sources/organism'
211
-
212
209
  Rbbt.claim "data", StringIO.new(content1), "Test1"
213
210
  Rbbt.claim "data", StringIO.new(content2), "Test2"
214
211
  Rbbt.claim "identifiers", StringIO.new(content_index), "Test2"
@@ -218,7 +215,7 @@ row2 E
218
215
  tsv1 = Rbbt.files.Test1.data.tsv :double, :sep => /\s+/
219
216
  tsv2 = Rbbt.files.Test2.data.tsv :double, :sep => /\s+/
220
217
 
221
- tsv2.identifiers = Rbbt.files.Test2.identifiers
218
+ tsv2.identifiers = Rbbt.files.Test2.identifiers.produce
222
219
 
223
220
  tsv1.attach tsv2, "OtherID", :in_namespace => false
224
221
 
@@ -51,7 +51,6 @@ row3 A a|B Id4
51
51
  TmpFile.with_file(content) do |filename|
52
52
  tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true)
53
53
  index = tsv.index(:case_insensitive => false, :order => true)
54
- ddd index
55
54
  assert_equal "Id1", index['a'].first
56
55
  assert_equal "Id3", index['A'].first
57
56
  assert_equal "OtherID", index.fields.first
@@ -67,14 +66,16 @@ row3 A a|B Id4
67
66
 
68
67
  #{{{ Test Attach
69
68
 
70
- def ___test_smart_merge_single
69
+ def test_smart_merge_single
71
70
  content1 =<<-EOF
71
+ #: :case_insensitive=false
72
72
  #Id ValueA ValueB
73
73
  row1 a|aa|aaa b
74
74
  row2 A B
75
75
  EOF
76
76
 
77
77
  content2 =<<-EOF
78
+ #: :case_insensitive=false
78
79
  #ValueC ValueB OtherID
79
80
  c|cc|ccc b Id1|Id2
80
81
  C B Id3
@@ -91,9 +92,10 @@ C B Id3
91
92
 
92
93
  tsv1 = tsv1.smart_merge tsv2, "ValueB"
93
94
 
94
- assert_equal "C", tsv1["row2"]["ValueC"]
95
- assert %w(c cc ccc).include? tsv1["row1"]["ValueC"]
96
- assert_equal "Id1", tsv1["row1"]["OtherID"]
95
+ assert_equal "C", tsv1["row2"]["ValueC"].first
96
+ assert %w(c cc ccc).include? tsv1["row1"]["ValueC"].first
97
+ ddd tsv1
98
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"].sort
97
99
  end
98
100
 
99
101
  def test_index_to_key
@@ -49,6 +49,21 @@ row2 A B
49
49
  end
50
50
  end
51
51
 
52
+ def test_unven_flat
53
+ content =<<-EOF
54
+ row1 a b
55
+ row2 A B C
56
+ EOF
57
+
58
+ TmpFile.with_file(content) do |filename|
59
+ data = {}
60
+ data, extra = TSV.parse(File.open(filename), :type => :flat, :sep => /\s+/)
61
+ assert data["row2"].include? "C"
62
+ end
63
+ end
64
+
65
+
66
+
52
67
  def test_options_line
53
68
  content =<<-EOF
54
69
  #: :sep=/\\s+/#:case_insensitive=true
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 2
8
- - 0
9
8
  - 1
10
- version: 2.0.1
9
+ - 0
10
+ version: 2.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-02-09 00:00:00 +01:00
18
+ date: 2011-02-10 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ email: miguel.vazquez@fdi.ucm.es
93
93
  executables:
94
94
  - tsv.rb
95
95
  - tchash.rb
96
+ - workflow.rb
96
97
  extensions: []
97
98
 
98
99
  extra_rdoc_files:
@@ -160,6 +161,7 @@ files:
160
161
  - test/test_rbbt.rb
161
162
  - bin/tsv.rb
162
163
  - bin/tchash.rb
164
+ - bin/workflow.rb
163
165
  has_rdoc: true
164
166
  homepage: http://github.com/mikisvaz/rbbt-util
165
167
  licenses: []