rbbt-util 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/tsv.rb CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'rbbt/util/simpleopt'
4
4
 
5
- options = SOPT.parse "-h--help:-to--tsv-options*:-p--persistence"
5
+ options = SOPT.get "-h--help:-to--tsv-options*:-p--persistence"
6
6
 
7
7
  command = ARGV.shift
8
8
  file = ARGV.shift
@@ -10,5 +10,5 @@ file = ARGV.shift
10
10
  case command
11
11
  when 'cat'
12
12
  puts TSV.new(file, options["tsv-options"].merge(options["persistence"]))
13
- when '
13
+ when
14
14
 
data/bin/workflow.rb ADDED
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/util/workflow'
6
+
7
+ options = SOPT.get "-h--help:-t--target*:-n--name*"
8
+
9
+ raise "No target" unless options[:target]
10
+
11
+ file = ARGV.shift
12
+
13
+ if ARGV.empty?
14
+ data = STDIN.read
15
+ else
16
+ data = ARGV.shift
17
+ end
18
+
19
+ job = options[:name] || "Job"
20
+
21
+ puts "Runing WorkFlow in #{file} for target #{options[:target]}. Job: #{job}"
22
+ WorkFlow.load file, File.join(options[:target], job), data
23
+ puts
24
+ puts "WorkFlow done. Please find results in: #{File.join(options[:target], job)}"
@@ -101,7 +101,7 @@ module Open
101
101
  case
102
102
  when Array === grep
103
103
  TmpFile.with_file(grep * "\n", false) do |f|
104
- CMD.cmd("grep", "-F" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
104
+ CMD.cmd("grep", "-E" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
105
105
  end
106
106
  else
107
107
  CMD.cmd("grep '#{grep}' -", :in => stream, :pipe => true)
@@ -201,6 +201,7 @@ module Open
201
201
  end
202
202
 
203
203
  def self.write(file, content)
204
+ FileUtils.mkdir_p File.dirname(file)
204
205
  if String === content
205
206
  File.open(file, 'w') do |f| f.write content end
206
207
  else
@@ -123,7 +123,6 @@ module Path
123
123
  key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}.select{|f| f.namespace == namespace}
124
124
  end
125
125
 
126
-
127
126
  def filename
128
127
  self.to_s
129
128
  end
@@ -138,7 +137,9 @@ module Path
138
137
  end
139
138
 
140
139
  def produce
141
- return if File.exists? self
140
+ return self if File.exists? self
141
+
142
+ raise "No especified pkg_data for file #{ self }, cannot produce" if pkg_data.nil?
142
143
 
143
144
  Log.debug("Trying to produce '#{ self }'")
144
145
  file, producer = pkg_module.reclaim self
@@ -146,6 +147,8 @@ module Path
146
147
  raise "File #{self} has not been claimed, cannot produce" if file.nil? or producer.nil?
147
148
 
148
149
  pkg_module.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
150
+
151
+ self
149
152
  end
150
153
  end
151
154
 
@@ -21,7 +21,16 @@ module Persistence
21
21
 
22
22
  def self.get_persistence_file(file, prefix, options = {})
23
23
  name = prefix.to_s << ":" << file.to_s << ":"
24
- File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
24
+ o = {}
25
+ options.each do |k,v|
26
+ if v.inspect =~ /:0x0/
27
+ o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
28
+ else
29
+ o[k] = v
30
+ end
31
+ end
32
+
33
+ File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, o].inspect))
25
34
  end
26
35
 
27
36
  def self.get_filename(file)
@@ -12,9 +12,12 @@ module RakeHelper
12
12
  FileUtils.chdir chdir if chdir
13
13
 
14
14
  Rake::FileTask.module_eval do
15
- class << self
16
- alias_method :old_define_task, :define_task
15
+ if not self.respond_to? :old_define_task
16
+ class << self
17
+ alias_method :old_define_task, :define_task
18
+ end
17
19
  end
20
+
18
21
  def self.define_task(file, *args, &block)
19
22
  @@files ||= []
20
23
  if Hash === file
@@ -64,7 +67,11 @@ module RakeHelper
64
67
  end
65
68
  end
66
69
 
67
- load rakefile
70
+ if block_given?
71
+ yield
72
+ else
73
+ load rakefile
74
+ end
68
75
 
69
76
  task(:default) do |t|
70
77
  Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
@@ -35,7 +35,7 @@ module TmpFile
35
35
 
36
36
  result = yield(tmpfile)
37
37
 
38
- FileUtils.rm tmpfile if File.exists?(tmpfile) and erase
38
+ FileUtils.rm_rf tmpfile if File.exists?(tmpfile) and erase
39
39
 
40
40
  result
41
41
  end
@@ -139,14 +139,14 @@ class TSV
139
139
  # both have fields => list of names
140
140
  # not both have fields => nil
141
141
 
142
- # fields2add = case
143
- # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
- # nil
145
- # when fields2add.nil?
146
- # other.all_fields
147
- # else
148
- # fields2add
149
- # end
142
+ # fields2add = case
143
+ # when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
144
+ # nil
145
+ # when fields2add.nil?
146
+ # other.all_fields
147
+ # else
148
+ # fields2add
149
+ # end
150
150
 
151
151
  # Determine common fields
152
152
 
@@ -171,6 +171,8 @@ class TSV
171
171
  match_source = (all_fields & match.all_fields).first
172
172
  index = match.index :target => other.key_field, :fields => match_source
173
173
  [match_source, index]
174
+ when (String === match and match == key_field)
175
+ [:key, other.index]
174
176
  when String === match
175
177
  [match, other.index]
176
178
  when Array === match
@@ -182,32 +184,42 @@ class TSV
182
184
  # through
183
185
  new = {}
184
186
  each do |key,values|
185
- source_keys = match_source == :key ? key : values[match_source]
187
+ source_keys = match_source == :key ? key : values[match_source_position]
186
188
  source_keys = [source_keys] unless Array === source_keys
187
189
  other_keys = case
188
- when index.nil?
189
- source_keys
190
- else
191
- index.values_at(*source_keys).flatten.compact
192
- end
190
+ when index.nil?
191
+ source_keys
192
+ else
193
+ index.values_at(*source_keys).flatten.compact
194
+ end
195
+
193
196
  other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
194
197
 
198
+
195
199
  other_values = other_keys.collect do |other_key|
196
200
  next unless other.include? other_key
197
201
  new_fields.collect do |field|
198
202
  if field == other.key_field
199
- other_key
203
+ if type == :double
204
+ [other_key]
205
+ else
206
+ other_key
207
+ end
200
208
  else
201
209
  other[other_key][field]
202
210
  end
203
211
  end
204
212
  end.compact
205
213
 
206
- if type == :double
207
- new_values = values + TSV.zip_fields(other_values)
208
- else
209
- new_values = values + TSV.zip_fields(other_values).collect{|v| v.first}
210
- end
214
+ other_values = case
215
+ when type == :double
216
+ TSV.zip_fields(other_values).collect{|v| v.flatten.uniq}
217
+ else
218
+ TSV.zip_fields(other_values).collect{|v| v.flatten.first}
219
+ end
220
+
221
+ new_values = values + other_values
222
+
211
223
  new[key] = new_values
212
224
  end
213
225
 
@@ -232,7 +244,7 @@ class TSV
232
244
  field_values[field] = []
233
245
  }
234
246
 
235
- if type == :double
247
+ if tsv.type == :double
236
248
  tsv.through do |key,entry_values|
237
249
  fields.zip(entry_values).each do |field,entry_field_values|
238
250
  field_values[field].concat entry_field_values
@@ -86,7 +86,7 @@ class TSV
86
86
 
87
87
  if key_field.nil?
88
88
  key_pos = key
89
- key_field, fields = nil
89
+ other_pos = fields
90
90
  else
91
91
  all_fields = [key_field].concat other_fields
92
92
 
@@ -158,7 +158,7 @@ class TSV
158
158
 
159
159
  next if data.include?(id) and type != :flat
160
160
 
161
- if key_field.nil?
161
+ if other_pos.nil? or (fields == nil and type == :flat)
162
162
  other_pos = (0..(parts.length - 1)).to_a
163
163
  other_pos.delete key_pos
164
164
  end
@@ -199,7 +199,7 @@ class TSV
199
199
  id = ids.shift
200
200
  ids.each do |id2| data[id2] = "__Ref:#{id}" end
201
201
 
202
- if key_field.nil?
202
+ if other_pos.nil? or (fields == nil and type == :flat)
203
203
  other_pos = (0..(parts.length - 1)).to_a
204
204
  other_pos.delete key_pos
205
205
  end
@@ -252,6 +252,8 @@ class TSV
252
252
  end
253
253
  end
254
254
 
255
+ fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
256
+ fields ||= other_fields
255
257
  [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
256
258
  end
257
259
 
data/lib/rbbt/util/tsv.rb CHANGED
@@ -102,7 +102,7 @@ class TSV
102
102
  @data = file.data
103
103
  when Persistence::TSV === file
104
104
  @data = file
105
- %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
105
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
106
106
  if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
107
107
  self.send "#{key}=".to_sym, @data.send(key.to_sym)
108
108
  end
@@ -145,7 +145,7 @@ class TSV
145
145
  end
146
146
 
147
147
  if not extra.nil?
148
- %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
148
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
149
149
  if extra.include? key.to_sym
150
150
  self.send("#{key}=".to_sym, extra[key.to_sym])
151
151
  if @data.respond_to? "#{key}=".to_sym
@@ -1,3 +1,90 @@
1
+ require 'rake'
2
+ require 'rbbt/util/rake'
3
+
1
4
  module WorkFlow
5
+ module Runner
6
+ def data
7
+ $_workflow_prereq
8
+ end
9
+
10
+ def input
11
+ $_workflow_input
12
+ end
13
+
14
+ $_workflow_default_persistence = :string
15
+ def default_persistence
16
+ $_workflow_default_persistence
17
+ end
18
+
19
+ def default_persistence=(type)
20
+ $_workflow_default_persistence = type
21
+ end
22
+
23
+ def step(step_name, options = nil)
24
+ dependencies, options = case
25
+ when ((String === options or Symbol === options) and %w(string marshal tsv tsv_string).include? options.to_s)
26
+ [nil, {:persistence_type => options}]
27
+ when Hash === options
28
+ [nil, options]
29
+ else
30
+ [options, {}]
31
+ end
32
+
33
+ options = Misc.add_defaults options, :persistence_type => default_persistence
34
+ persistence_type = Misc.process_options options, :persistence_type
35
+ dependencies = Misc.process_options options, :dependencies if options.include? :dependencies
36
+
37
+ re = Regexp.new(/(?:^|\/)#{Regexp.quote step_name.to_s}\/.*$/)
2
38
 
39
+ @last_step = nil unless defined? @last_step
40
+ @last_persistence_type = nil unless defined? @last_persistence_type
41
+
42
+ if dependencies.nil? && ! @last_step.nil?
43
+ dependencies = @last_step
44
+ end
45
+ @last_step = step_name
46
+
47
+ # Generate the Hash definition
48
+ rule_def = case
49
+ when dependencies.nil?
50
+ re
51
+ when String === dependencies || Symbol === dependencies
52
+ {re => lambda{|filename| filename.sub(step_name.to_s, dependencies.to_s) }}
53
+ when Array === dependencies
54
+ {re => lambda{|filename| dependencies.collect{|dep| filename.sub(step_name.to_s, dep.to_s) } }}
55
+ when Proc === dependencies
56
+ {re => dependencies}
57
+ end
58
+
59
+ @last_step = step_name
60
+ last_persistence_type, @last_persistence_type = @last_persistence_type, persistence_type
61
+
62
+ rule rule_def do |t|
63
+ Persistence.persist(t.name, "", persistence_type, :persistence_file => t.name) do
64
+ $_workflow_prereq = case
65
+ when (t.prerequisites.nil? or (Array === t.prerequisites and t.prerequisites.empty?))
66
+ nil
67
+ else
68
+ Persistence.persist(t.prerequisites.first, "", last_persistence_type, :persistence_file => t.prerequisites.first) do
69
+ raise "Error, this file should be produced already"
70
+ end
71
+ end
72
+ yield
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def self.run(file = :default, workflow_input = nil, &block)
79
+ $_workflow_input = workflow_input
80
+ RakeHelper.run("Runtime", file) do
81
+ yield
82
+ end
83
+ end
84
+
85
+ def self.load(wf_file, file = :default, workflow_input = nil)
86
+ $_workflow_input = workflow_input
87
+ RakeHelper.run(wf_file, file)
88
+ end
3
89
  end
90
+
@@ -11,7 +11,7 @@ class TestR < Test::Unit::TestCase
11
11
  tsv2 = tsv.R <<-EOF
12
12
  data = data + 1
13
13
  EOF
14
- puts tsv2.to_s
14
+ assert_equal "2", tsv2["a"].first
15
15
  end
16
16
  end
17
17
 
@@ -13,11 +13,6 @@ class TestPKGData < Test::Unit::TestCase
13
13
  end
14
14
  end
15
15
 
16
- def test_path
17
- assert_equal File.join(Rbbt.datadir, 'Organism/Hsa'), Rbbt.files.Organism.Hsa
18
- Rbbt.files.Organism.Hsa.identifiers.produce
19
- end
20
-
21
16
  def test_claim_proc
22
17
  begin
23
18
  assert_nil Rbbt.reclaim(Rbbt.files.foo)
@@ -6,9 +6,73 @@ require 'rbbt/util/workflow'
6
6
 
7
7
  class TestWorkflow < Test::Unit::TestCase
8
8
 
9
- def test_true
10
- assert true
9
+ def workflow
10
+ TmpFile.with_file do |dir|
11
+ old_pdw = FileUtils.pwd
12
+ begin
13
+ FileUtils.mkdir dir
14
+ cd dir
15
+ yield dir
16
+ ensure
17
+ cd old_pdw
18
+ end
19
+ end
11
20
  end
12
21
 
22
+ def _test_run
23
+ workflow do |dir|
24
+ WorkFlow.run do
25
+ file :foo do |t|
26
+ touch t.name
27
+ end
28
+ end
29
+ assert File.exists? File.join(dir, 'foo')
30
+ end
31
+ end
32
+
33
+ def _test_step
34
+ jobid = 'jobid'
35
+ target_step = 'last'
36
+ workflow do |dir|
37
+
38
+ WorkFlow.run(File.join(target_step, jobid)) do
39
+ self.extend WorkFlow::Runner
40
+
41
+ step :first do
42
+ "Test"
43
+ end
44
+
45
+ step :last do
46
+ data.reverse
47
+ end
48
+ end
49
+ assert File.exists? File.join(dir, target_step, 'jobid')
50
+ assert_equal 'tseT', Open.read(File.join(dir, target_step, 'jobid'))
51
+ end
52
+ end
53
+
54
+ def test_input
55
+ jobid = 'jobid'
56
+ target_step = 'last'
57
+ message = "Message"
58
+
59
+ workflow do |dir|
60
+
61
+ WorkFlow.run(File.join(target_step, jobid), message) do
62
+ self.extend WorkFlow::Runner
63
+
64
+ step :first, :marshal do
65
+ input
66
+ end
67
+
68
+ step :last do
69
+ data.reverse
70
+ end
71
+ end
72
+
73
+ assert File.exists? File.join(dir, target_step, 'jobid')
74
+ assert_equal message.reverse, Open.read(File.join(dir, target_step, 'jobid'))
75
+ end
76
+ end
13
77
  end
14
78
 
@@ -206,9 +206,6 @@ row1 e
206
206
  row2 E
207
207
  EOF
208
208
 
209
-
210
- require 'rbbt/sources/organism'
211
-
212
209
  Rbbt.claim "data", StringIO.new(content1), "Test1"
213
210
  Rbbt.claim "data", StringIO.new(content2), "Test2"
214
211
  Rbbt.claim "identifiers", StringIO.new(content_index), "Test2"
@@ -218,7 +215,7 @@ row2 E
218
215
  tsv1 = Rbbt.files.Test1.data.tsv :double, :sep => /\s+/
219
216
  tsv2 = Rbbt.files.Test2.data.tsv :double, :sep => /\s+/
220
217
 
221
- tsv2.identifiers = Rbbt.files.Test2.identifiers
218
+ tsv2.identifiers = Rbbt.files.Test2.identifiers.produce
222
219
 
223
220
  tsv1.attach tsv2, "OtherID", :in_namespace => false
224
221
 
@@ -51,7 +51,6 @@ row3 A a|B Id4
51
51
  TmpFile.with_file(content) do |filename|
52
52
  tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true)
53
53
  index = tsv.index(:case_insensitive => false, :order => true)
54
- ddd index
55
54
  assert_equal "Id1", index['a'].first
56
55
  assert_equal "Id3", index['A'].first
57
56
  assert_equal "OtherID", index.fields.first
@@ -67,14 +66,16 @@ row3 A a|B Id4
67
66
 
68
67
  #{{{ Test Attach
69
68
 
70
- def ___test_smart_merge_single
69
+ def test_smart_merge_single
71
70
  content1 =<<-EOF
71
+ #: :case_insensitive=false
72
72
  #Id ValueA ValueB
73
73
  row1 a|aa|aaa b
74
74
  row2 A B
75
75
  EOF
76
76
 
77
77
  content2 =<<-EOF
78
+ #: :case_insensitive=false
78
79
  #ValueC ValueB OtherID
79
80
  c|cc|ccc b Id1|Id2
80
81
  C B Id3
@@ -91,9 +92,10 @@ C B Id3
91
92
 
92
93
  tsv1 = tsv1.smart_merge tsv2, "ValueB"
93
94
 
94
- assert_equal "C", tsv1["row2"]["ValueC"]
95
- assert %w(c cc ccc).include? tsv1["row1"]["ValueC"]
96
- assert_equal "Id1", tsv1["row1"]["OtherID"]
95
+ assert_equal "C", tsv1["row2"]["ValueC"].first
96
+ assert %w(c cc ccc).include? tsv1["row1"]["ValueC"].first
97
+ ddd tsv1
98
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"].sort
97
99
  end
98
100
 
99
101
  def test_index_to_key
@@ -49,6 +49,21 @@ row2 A B
49
49
  end
50
50
  end
51
51
 
52
+ def test_unven_flat
53
+ content =<<-EOF
54
+ row1 a b
55
+ row2 A B C
56
+ EOF
57
+
58
+ TmpFile.with_file(content) do |filename|
59
+ data = {}
60
+ data, extra = TSV.parse(File.open(filename), :type => :flat, :sep => /\s+/)
61
+ assert data["row2"].include? "C"
62
+ end
63
+ end
64
+
65
+
66
+
52
67
  def test_options_line
53
68
  content =<<-EOF
54
69
  #: :sep=/\\s+/#:case_insensitive=true
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 2
8
- - 0
9
8
  - 1
10
- version: 2.0.1
9
+ - 0
10
+ version: 2.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-02-09 00:00:00 +01:00
18
+ date: 2011-02-10 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -93,6 +93,7 @@ email: miguel.vazquez@fdi.ucm.es
93
93
  executables:
94
94
  - tsv.rb
95
95
  - tchash.rb
96
+ - workflow.rb
96
97
  extensions: []
97
98
 
98
99
  extra_rdoc_files:
@@ -160,6 +161,7 @@ files:
160
161
  - test/test_rbbt.rb
161
162
  - bin/tsv.rb
162
163
  - bin/tchash.rb
164
+ - bin/workflow.rb
163
165
  has_rdoc: true
164
166
  homepage: http://github.com/mikisvaz/rbbt-util
165
167
  licenses: []