rbbt-util 5.2.4 → 5.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +8 -8
  2. data/bin/rbbt +23 -10
  3. data/bin/rbbt_monitor.rb +8 -8
  4. data/lib/rbbt/annotations.rb +22 -1
  5. data/lib/rbbt/annotations/util.rb +1 -1
  6. data/lib/rbbt/entity.rb +162 -0
  7. data/lib/rbbt/fix_width_table.rb +7 -0
  8. data/lib/rbbt/persist.rb +16 -9
  9. data/lib/rbbt/persist/tsv.rb +14 -8
  10. data/lib/rbbt/resource.rb +1 -6
  11. data/lib/rbbt/resource/path.rb +23 -27
  12. data/lib/rbbt/tsv.rb +33 -4
  13. data/lib/rbbt/tsv/accessor.rb +100 -57
  14. data/lib/rbbt/tsv/attach.rb +3 -1
  15. data/lib/rbbt/tsv/attach/util.rb +34 -10
  16. data/lib/rbbt/tsv/index.rb +12 -3
  17. data/lib/rbbt/tsv/manipulate.rb +25 -1
  18. data/lib/rbbt/tsv/parser.rb +1 -0
  19. data/lib/rbbt/util/R.rb +36 -6
  20. data/lib/rbbt/util/cmd.rb +2 -1
  21. data/lib/rbbt/util/color.rb +250 -0
  22. data/lib/rbbt/util/colorize.rb +57 -0
  23. data/lib/rbbt/util/misc.rb +57 -19
  24. data/lib/rbbt/util/named_array.rb +66 -14
  25. data/lib/rbbt/util/open.rb +134 -10
  26. data/lib/rbbt/util/semaphore.rb +71 -0
  27. data/lib/rbbt/workflow.rb +34 -7
  28. data/lib/rbbt/workflow/accessor.rb +12 -8
  29. data/lib/rbbt/workflow/step.rb +44 -28
  30. data/lib/rbbt/workflow/usage.rb +3 -0
  31. data/share/lib/R/util.R +31 -0
  32. data/share/rbbt_commands/app/start +5 -4
  33. data/share/rbbt_commands/study/task +222 -0
  34. data/share/rbbt_commands/tsv/attach +13 -0
  35. data/share/rbbt_commands/tsv/change_id +15 -0
  36. data/share/rbbt_commands/tsv/info +3 -1
  37. data/share/rbbt_commands/workflow/task +14 -15
  38. data/test/rbbt/test_entity.rb +221 -0
  39. data/test/rbbt/test_tsv.rb +2 -1
  40. data/test/rbbt/test_workflow.rb +0 -2
  41. data/test/rbbt/tsv/test_accessor.rb +2 -2
  42. data/test/rbbt/util/test_R.rb +9 -2
  43. data/test/rbbt/util/test_colorize.rb +12 -0
  44. data/test/rbbt/util/test_misc.rb +0 -5
  45. data/test/rbbt/util/test_open.rb +31 -0
  46. data/test/rbbt/workflow/test_step.rb +32 -0
  47. metadata +13 -2
@@ -0,0 +1,57 @@
1
+ require 'rbbt/util/color'
2
+
3
+ module Colorize
4
+ def self.from_name(color)
5
+ return color if color =~ /^#?[0-9A-F]+$/i
6
+ case color.to_s
7
+ when "white"
8
+ '#000'
9
+ when "black"
10
+ '#fff'
11
+ end
12
+ end
13
+
14
+ def self.continuous(array, start = :white, eend = :black)
15
+ start_color = Color.new from_name(start)
16
+ end_color = Color.new from_name(eend)
17
+
18
+ array = array.collect{|v| v.to_f}
19
+ max = array.max
20
+ min = array.min
21
+ range = max - min
22
+ array.collect do |v|
23
+ start_color.blend end_color, (v - min) / range
24
+ end
25
+ end
26
+
27
+ def self.distinct(array)
28
+ colors = Rbbt.share.color["diverging_colors.hex"].list.collect{|c| Color.new c}
29
+
30
+ num = array.uniq.length
31
+ times = num / 12
32
+
33
+ all_colors = colors.dup
34
+ times.times do
35
+ all_colors.concat colors.collect{|n| n.darken(0.2) }
36
+ end
37
+
38
+ value_color = Hash[*array.uniq.zip(all_colors).flatten]
39
+
40
+ value_color.values_at *array
41
+ end
42
+
43
+ def self.tsv(tsv)
44
+ values = tsv.values.flatten
45
+ if Fixnum === values.first or (values.first.to_f != 0 and values[0] != "0")
46
+ value_colors = Misc.process_to_hash(values){continuous(values)}
47
+ else
48
+ value_colors = Misc.process_to_hash(values){distinct(values)}
49
+ end
50
+
51
+ if tsv.type == :single
52
+ Hash[*tsv.keys.zip(value_colors.values_at(*values)).flatten]
53
+ else
54
+ Hash[*tsv.keys.zip(values.collect{|vs| value_colors.values_at(*vs)}).flatten]
55
+ end
56
+ end
57
+ end
@@ -22,6 +22,7 @@ module Misc
22
22
  def self.correct_icgc_mutation(pos, ref, mut_str)
23
23
  mut = mut_str
24
24
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
25
+ mut = "+" << mut if ref == '-'
25
26
  [pos, [mut]]
26
27
  end
27
28
 
@@ -143,9 +144,13 @@ module Misc
143
144
  end
144
145
 
145
146
  def self.sample(ary, size, replacement = false)
146
- total = ary.length
147
- p = random_sample_in_range(total, size)
148
- ary.values_at *p
147
+ if ary.respond_to? :sample
148
+ ary.sample size
149
+ else
150
+ total = ary.length
151
+ p = random_sample_in_range(total, size)
152
+ ary.values_at *p
153
+ end
149
154
  end
150
155
 
151
156
  Log2Multiplier = 1.0 / Math.log(2.0)
@@ -160,13 +165,14 @@ module Misc
160
165
 
161
166
  dup_array = options.delete :dup_array
162
167
 
163
- if Entity.respond_to?(:formats) and Entity.formats.include? field
168
+ if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
164
169
  params = options.dup
165
170
 
166
171
  params[:format] ||= params.delete "format"
167
172
  params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
168
173
 
169
- entity = Entity.formats[field].setup(
174
+ mod = Entity === field ? field : Entity.formats[field]
175
+ entity = mod.setup(
170
176
  ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
171
177
  params
172
178
  )
@@ -414,15 +420,6 @@ end
414
420
  for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
415
421
  for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
416
422
 
417
- #for spos in 1..cols-1 do
418
- # for rpos in 1..rows-1 do
419
- # match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
420
- # skip_sequence = a[spos-1,rpos] + gap
421
- # skip_reference = a[spos,rpos-1] + gap
422
- # a[spos,rpos] = [match, skip_sequence, skip_reference].max
423
- # end
424
- #end
425
-
426
423
  spos = 1
427
424
  while spos < cols do
428
425
  rpos = 1
@@ -760,7 +757,6 @@ end
760
757
  html
761
758
  end
762
759
 
763
-
764
760
  def self.path_relative_to(basedir, path)
765
761
  path = File.expand_path(path)
766
762
  basedir = File.expand_path(basedir)
@@ -775,6 +771,10 @@ end
775
771
  end
776
772
  end
777
773
 
774
+ def self.hostname
775
+ @hostanem ||= `hostname`.strip
776
+ end
777
+
778
778
  def self.lock(file, *args)
779
779
  return yield file, *args if file.nil?
780
780
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
@@ -785,7 +785,7 @@ end
785
785
 
786
786
  begin
787
787
  if File.exists? lockfile and
788
- `hostname`.strip == (info = YAML.load_file(lockfile))["host"] and
788
+ Misc.hostname == (info = YAML.load_file(lockfile))["host"] and
789
789
  info["pid"] and not Misc.pid_exists?(info["pid"])
790
790
 
791
791
  Log.info("Removing lockfile: #{lockfile}. This pid #{Process.pid}. Content: #{info.inspect}")
@@ -803,6 +803,39 @@ end
803
803
  res
804
804
  end
805
805
 
806
+ LOCK_REPO_SERIALIZER=Marshal
807
+
808
+ def self.lock_in_repo(repo, key, *args)
809
+ return yield file, *args if repo.nil? or key.nil?
810
+
811
+ lock_key = "lock-" << key
812
+
813
+ begin
814
+ if repo[lock_key] and
815
+ Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
816
+ info["pid"] and not Misc.pid_exists?(info["pid"])
817
+
818
+ Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
819
+ repo.out lock_key
820
+ end
821
+ rescue
822
+ Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
823
+ repo.out lock_key if repo.include? lock_key
824
+ end
825
+
826
+ while repo[lock_key]
827
+ sleep 1
828
+ end
829
+
830
+ repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
831
+
832
+ res = yield lock_key, *args
833
+
834
+ repo.delete lock_key
835
+
836
+ res
837
+ end
838
+
806
839
  def self.common_path(dir, file)
807
840
  file = File.expand_path file
808
841
  dir = File.expand_path dir
@@ -913,6 +946,10 @@ end
913
946
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
914
947
  keys = keys.sort_by{|k| k.to_s}
915
948
 
949
+ if hash.respond_to? :unnamed
950
+ unnamed = hash.unnamed
951
+ hash.unnamed = true
952
+ end
916
953
  keys.each do |k|
917
954
  next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
918
955
  v = hash[k]
@@ -926,7 +963,7 @@ end
926
963
  when Symbol === v
927
964
  str << k.to_s << "=>" << v.to_s
928
965
  when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
929
- str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH]
966
+ str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
930
967
  when String === v
931
968
  str << k.to_s << "=>" << v
932
969
  when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
@@ -947,6 +984,7 @@ end
947
984
 
948
985
  str << "_" << hash2md5(v.info) if Annotated === v
949
986
  end
987
+ hash.unnamed = unnamed if hash.respond_to? :unnamed
950
988
 
951
989
  if str.empty?
952
990
  ""
@@ -1044,8 +1082,8 @@ end
1044
1082
  return field if Integer === field or Range === field
1045
1083
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
1046
1084
  fields.each_with_index{|f,i| return i if f == field}
1047
- field_re = Regexp.new /#{field}/i
1048
- fields.each_with_index{|f,i| return i if f =~ field_re}
1085
+ field_re = Regexp.new /^#{field}$/i
1086
+ fields.each_with_index{|f,i| return i if f =~ field_re}
1049
1087
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
1050
1088
  end
1051
1089
 
@@ -1,10 +1,10 @@
1
- require 'rbbt/util/chain_methods'
1
+ #require 'rbbt/util/chain_methods'
2
2
  require 'rbbt/util/misc'
3
3
 
4
4
  module NamedArray
5
- extend ChainMethods
5
+ #extend ChainMethods
6
+ #self.chain_prefix = :named_array
6
7
 
7
- self.chain_prefix = :named_array
8
8
  attr_accessor :fields
9
9
  attr_accessor :key
10
10
  attr_accessor :entity_options
@@ -36,7 +36,7 @@ module NamedArray
36
36
  if entity_templates.include? field
37
37
  entity
38
38
  else
39
- template = Misc.prepare_entity("TEMPLATE", field, options)
39
+ template = Misc.prepare_entity("ENTITY_TEMPLATE", field, options)
40
40
  if Annotated === template
41
41
  entity_templates[field] = template
42
42
  entity = template.annotate(entity.frozen? ? entity.dup : entity)
@@ -73,9 +73,20 @@ module NamedArray
73
73
  end
74
74
  end
75
75
 
76
- def named_array_get_brackets(key)
76
+ #def named_array_get_brackets(key)
77
+ # pos = Misc.field_position(fields, key)
78
+ # elem = named_array_clean_get_brackets(pos)
79
+
80
+ # return elem if @fields.nil? or @fields.empty?
81
+
82
+ # field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
83
+ # elem = prepare_entity(elem, field, entity_options)
84
+ # elem
85
+ #end
86
+
87
+ def [](key, clean = false)
77
88
  pos = Misc.field_position(fields, key)
78
- elem = named_array_clean_get_brackets(pos)
89
+ elem = super(pos)
79
90
 
80
91
  return elem if @fields.nil? or @fields.empty?
81
92
 
@@ -84,7 +95,19 @@ module NamedArray
84
95
  elem
85
96
  end
86
97
 
87
- def named_array_each(&block)
98
+ #def named_array_each(&block)
99
+ # if defined?(Entity) and not @fields.nil? and not @fields.empty?
100
+ # @fields.zip(self).each do |field,elem|
101
+ # elem = prepare_entity(elem, field, entity_options)
102
+ # yield(elem)
103
+ # elem
104
+ # end
105
+ # else
106
+ # named_array_clean_each &block
107
+ # end
108
+ #end
109
+
110
+ def each(&block)
88
111
  if defined?(Entity) and not @fields.nil? and not @fields.empty?
89
112
  @fields.zip(self).each do |field,elem|
90
113
  elem = prepare_entity(elem, field, entity_options)
@@ -92,14 +115,30 @@ module NamedArray
92
115
  elem
93
116
  end
94
117
  else
95
- named_array_clean_each &block
118
+ super &block
96
119
  end
120
+
97
121
  end
98
122
 
99
- def named_array_collect
123
+ #def named_array_collect
124
+ # res = []
125
+
126
+ # each do |elem|
127
+ # if block_given?
128
+ # res << yield(elem)
129
+ # else
130
+ # res << elem
131
+ # end
132
+ # end
133
+
134
+ # res
135
+ #end
136
+
137
+
138
+ def collect
100
139
  res = []
101
140
 
102
- named_array_each do |elem|
141
+ each do |elem|
103
142
  if block_given?
104
143
  res << yield(elem)
105
144
  else
@@ -110,14 +149,27 @@ module NamedArray
110
149
  res
111
150
  end
112
151
 
113
- def named_array_set_brackets(key,value)
114
- named_array_clean_set_brackets(Misc.field_position(fields, key), value)
152
+ #def named_array_set_brackets(key,value)
153
+ # named_array_clean_set_brackets(Misc.field_position(fields, key), value)
154
+ #end
155
+
156
+ def []=(key, value)
157
+ super(Misc.field_position(fields, key), value)
115
158
  end
116
159
 
117
- def named_array_values_at(*keys)
160
+ #def named_array_values_at(*keys)
161
+ # keys = keys.collect{|k| Misc.field_position(fields, k, true) }
162
+ # keys.collect{|k|
163
+ # named_array_get_brackets(k) unless k.nil?
164
+ # }
165
+ #end
166
+
167
+
168
+
169
+ def values_at(*keys)
118
170
  keys = keys.collect{|k| Misc.field_position(fields, k, true) }
119
171
  keys.collect{|k|
120
- named_array_get_brackets(k) unless k.nil?
172
+ self[k] unless k.nil?
121
173
  }
122
174
  end
123
175
 
@@ -11,6 +11,19 @@ module Open
11
11
  REMOTE_CACHEDIR = "/tmp/open_cache"
12
12
  FileUtils.mkdir REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
13
13
 
14
+ class << self
15
+ attr_accessor :repository_dirs
16
+
17
+ def repository_dirs
18
+ @repository_dirs ||= begin
19
+ File.exists?(Rbbt.etc.repository_dirs.find) ?
20
+ File.read(Rbbt.etc.repository_dirs.find).split("\n") :
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+
14
27
  def self.cachedir=(cachedir)
15
28
  REMOTE_CACHEDIR.replace cachedir
16
29
  FileUtils.mkdir REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
@@ -119,15 +132,115 @@ module Open
119
132
  CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to? :force_close})
120
133
  end
121
134
  end
122
-
135
+
136
+ def self.get_repo_from_dir(dir)
137
+ @repos ||= {}
138
+ @repos[dir] ||= begin
139
+ repo_path = File.join(dir, '.file_repo')
140
+ Persist.open_tokyocabinet(repo_path, false, :clean,TokyoCabinet::BDB )
141
+ end
142
+ end
143
+
144
+ def self.get_stream_from_repo(dir, sub_path)
145
+ repo = get_repo_from_dir(dir)
146
+ repo.read
147
+ StringIO.new repo[sub_path]
148
+ end
149
+
150
+ def self.save_content_in_repo(dir, sub_path, content)
151
+ repo = get_repo_from_dir(dir)
152
+ repo.write
153
+ repo[sub_path] = content
154
+ end
155
+
156
+ def self.remove_from_repo(dir, sub_path, recursive = false)
157
+ repo = get_repo_from_dir(dir)
158
+ repo.write
159
+ if recursive
160
+ repo.outlist repo.range sub_path, true, sub_path.sub(/.$/,('\1'.ord + 1).chr), false
161
+ else
162
+ repo.outlist sub_path
163
+ end
164
+ end
165
+
166
+ def self.exists_in_repo(dir, sub_path, content)
167
+ repo = get_repo_from_dir(dir)
168
+ repo.include? sub_path
169
+ end
170
+
171
+ def self.find_repo_dir(file)
172
+ self.repository_dirs.each do |dir|
173
+ if file.start_with? dir
174
+ sub_path = file.to_s[dir.length..-1]
175
+ return [dir, sub_path]
176
+ end
177
+ end
178
+ nil
179
+ end
180
+
181
+ def self.rm(file)
182
+ if (dir_sub_path = find_repo_dir(file))
183
+ remove_from_repo(*dir_sub_path)
184
+ else
185
+ FileUtils.rm(file)
186
+ end
187
+ end
188
+
189
+ def self.rm_rf(file)
190
+ if (dir_sub_path = find_repo_dir(file))
191
+ remove_from_repo(*dir_sub_path, true)
192
+ else
193
+ FileUtils.rm_rf(file)
194
+ end
195
+ end
196
+
123
197
  def self.file_open(file, grep, mode = 'r', invert_grep = false)
198
+ if (dir_sub_path = find_repo_dir(file))
199
+ stream = get_stream_from_repo(*dir_sub_path)
200
+ else
201
+ stream = File.open(file, mode)
202
+ end
203
+
124
204
  if grep
125
- grep(File.open(file, mode), grep, invert_grep)
205
+ grep(stream, grep, invert_grep)
126
206
  else
127
- File.open(file, mode)
207
+ stream
128
208
  end
129
209
  end
130
210
 
211
+ def self.file_write(file, content, mode = 'w')
212
+ if (dir_sub_path = find_repo_dir(file))
213
+ dir_sub_path.push content
214
+ save_content_in_repo(*dir_sub_path)
215
+ else
216
+ File.open(file, mode) do |f|
217
+ f.flock(File::LOCK_EX)
218
+ f.write content
219
+ f.flock(File::LOCK_UN)
220
+ end
221
+ end
222
+ end
223
+
224
+ def self.exists?(file)
225
+ if (dir_sub_path = find_repo_dir(file))
226
+ dir_sub_path.push file
227
+ exists_in_repo(*dir_sub_path)
228
+ else
229
+ File.exists? file
230
+ end
231
+ end
232
+
233
+ def self.lock(file, &block)
234
+ if (dir_sub_path = find_repo_dir(file))
235
+ dir, sub_path = dir_sub_path
236
+ repo = get_repo_from_dir(dir)
237
+ Misc.lock_in_repo(repo, sub_path, &block)
238
+ else
239
+ Misc.lock(file, &block)
240
+ end
241
+ end
242
+
243
+
131
244
  # Decompression
132
245
 
133
246
  def self.gunzip(stream)
@@ -180,7 +293,15 @@ module Open
180
293
  end
181
294
 
182
295
  def self.open(url, options = {})
183
- return url if IO === url
296
+ if IO === url
297
+ if block_given?
298
+ res = yield url
299
+ url.close
300
+ return res
301
+ else
302
+ return url
303
+ end
304
+ end
184
305
  options = Misc.add_defaults options, :noz => false, :mode => 'r'
185
306
 
186
307
  mode = Misc.process_options options, :mode
@@ -216,7 +337,9 @@ module Open
216
337
  io = gunzip(io) if ((String === url and gzip?(url)) and not options[:noz]) or options[:gzip]
217
338
 
218
339
  if block_given?
219
- yield io
340
+ res = yield(io)
341
+ io.close
342
+ return res
220
343
  else
221
344
  io
222
345
  end
@@ -269,11 +392,12 @@ module Open
269
392
  raise $!
270
393
  end
271
394
  when String === content
272
- File.open(file, mode) do |f|
273
- f.flock(File::LOCK_EX)
274
- f.write content
275
- f.flock(File::LOCK_UN)
276
- end
395
+ file_write(file, content, mode)
396
+ #File.open(file, mode) do |f|
397
+ # f.flock(File::LOCK_EX)
398
+ # f.write content
399
+ # f.flock(File::LOCK_UN)
400
+ #end
277
401
  else
278
402
  begin
279
403
  File.open(file, mode) do |f|