rbbt-util 5.2.4 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +8 -8
  2. data/bin/rbbt +23 -10
  3. data/bin/rbbt_monitor.rb +8 -8
  4. data/lib/rbbt/annotations.rb +22 -1
  5. data/lib/rbbt/annotations/util.rb +1 -1
  6. data/lib/rbbt/entity.rb +162 -0
  7. data/lib/rbbt/fix_width_table.rb +7 -0
  8. data/lib/rbbt/persist.rb +16 -9
  9. data/lib/rbbt/persist/tsv.rb +14 -8
  10. data/lib/rbbt/resource.rb +1 -6
  11. data/lib/rbbt/resource/path.rb +23 -27
  12. data/lib/rbbt/tsv.rb +33 -4
  13. data/lib/rbbt/tsv/accessor.rb +100 -57
  14. data/lib/rbbt/tsv/attach.rb +3 -1
  15. data/lib/rbbt/tsv/attach/util.rb +34 -10
  16. data/lib/rbbt/tsv/index.rb +12 -3
  17. data/lib/rbbt/tsv/manipulate.rb +25 -1
  18. data/lib/rbbt/tsv/parser.rb +1 -0
  19. data/lib/rbbt/util/R.rb +36 -6
  20. data/lib/rbbt/util/cmd.rb +2 -1
  21. data/lib/rbbt/util/color.rb +250 -0
  22. data/lib/rbbt/util/colorize.rb +57 -0
  23. data/lib/rbbt/util/misc.rb +57 -19
  24. data/lib/rbbt/util/named_array.rb +66 -14
  25. data/lib/rbbt/util/open.rb +134 -10
  26. data/lib/rbbt/util/semaphore.rb +71 -0
  27. data/lib/rbbt/workflow.rb +34 -7
  28. data/lib/rbbt/workflow/accessor.rb +12 -8
  29. data/lib/rbbt/workflow/step.rb +44 -28
  30. data/lib/rbbt/workflow/usage.rb +3 -0
  31. data/share/lib/R/util.R +31 -0
  32. data/share/rbbt_commands/app/start +5 -4
  33. data/share/rbbt_commands/study/task +222 -0
  34. data/share/rbbt_commands/tsv/attach +13 -0
  35. data/share/rbbt_commands/tsv/change_id +15 -0
  36. data/share/rbbt_commands/tsv/info +3 -1
  37. data/share/rbbt_commands/workflow/task +14 -15
  38. data/test/rbbt/test_entity.rb +221 -0
  39. data/test/rbbt/test_tsv.rb +2 -1
  40. data/test/rbbt/test_workflow.rb +0 -2
  41. data/test/rbbt/tsv/test_accessor.rb +2 -2
  42. data/test/rbbt/util/test_R.rb +9 -2
  43. data/test/rbbt/util/test_colorize.rb +12 -0
  44. data/test/rbbt/util/test_misc.rb +0 -5
  45. data/test/rbbt/util/test_open.rb +31 -0
  46. data/test/rbbt/workflow/test_step.rb +32 -0
  47. metadata +13 -2
@@ -0,0 +1,57 @@
1
+ require 'rbbt/util/color'
2
+
3
+ module Colorize
4
+ def self.from_name(color)
5
+ return color if color =~ /^#?[0-9A-F]+$/i
6
+ case color.to_s
7
+ when "white"
8
+ '#000'
9
+ when "black"
10
+ '#fff'
11
+ end
12
+ end
13
+
14
+ def self.continuous(array, start = :white, eend = :black)
15
+ start_color = Color.new from_name(start)
16
+ end_color = Color.new from_name(eend)
17
+
18
+ array = array.collect{|v| v.to_f}
19
+ max = array.max
20
+ min = array.min
21
+ range = max - min
22
+ array.collect do |v|
23
+ start_color.blend end_color, (v - min) / range
24
+ end
25
+ end
26
+
27
+ def self.distinct(array)
28
+ colors = Rbbt.share.color["diverging_colors.hex"].list.collect{|c| Color.new c}
29
+
30
+ num = array.uniq.length
31
+ times = num / 12
32
+
33
+ all_colors = colors.dup
34
+ times.times do
35
+ all_colors.concat colors.collect{|n| n.darken(0.2) }
36
+ end
37
+
38
+ value_color = Hash[*array.uniq.zip(all_colors).flatten]
39
+
40
+ value_color.values_at *array
41
+ end
42
+
43
+ def self.tsv(tsv)
44
+ values = tsv.values.flatten
45
+ if Fixnum === values.first or (values.first.to_f != 0 and values[0] != "0")
46
+ value_colors = Misc.process_to_hash(values){continuous(values)}
47
+ else
48
+ value_colors = Misc.process_to_hash(values){distinct(values)}
49
+ end
50
+
51
+ if tsv.type == :single
52
+ Hash[*tsv.keys.zip(value_colors.values_at(*values)).flatten]
53
+ else
54
+ Hash[*tsv.keys.zip(values.collect{|vs| value_colors.values_at(*vs)}).flatten]
55
+ end
56
+ end
57
+ end
@@ -22,6 +22,7 @@ module Misc
22
22
  def self.correct_icgc_mutation(pos, ref, mut_str)
23
23
  mut = mut_str
24
24
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
25
+ mut = "+" << mut if ref == '-'
25
26
  [pos, [mut]]
26
27
  end
27
28
 
@@ -143,9 +144,13 @@ module Misc
143
144
  end
144
145
 
145
146
  def self.sample(ary, size, replacement = false)
146
- total = ary.length
147
- p = random_sample_in_range(total, size)
148
- ary.values_at *p
147
+ if ary.respond_to? :sample
148
+ ary.sample size
149
+ else
150
+ total = ary.length
151
+ p = random_sample_in_range(total, size)
152
+ ary.values_at *p
153
+ end
149
154
  end
150
155
 
151
156
  Log2Multiplier = 1.0 / Math.log(2.0)
@@ -160,13 +165,14 @@ module Misc
160
165
 
161
166
  dup_array = options.delete :dup_array
162
167
 
163
- if Entity.respond_to?(:formats) and Entity.formats.include? field
168
+ if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
164
169
  params = options.dup
165
170
 
166
171
  params[:format] ||= params.delete "format"
167
172
  params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
168
173
 
169
- entity = Entity.formats[field].setup(
174
+ mod = Entity === field ? field : Entity.formats[field]
175
+ entity = mod.setup(
170
176
  ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
171
177
  params
172
178
  )
@@ -414,15 +420,6 @@ end
414
420
  for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
415
421
  for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
416
422
 
417
- #for spos in 1..cols-1 do
418
- # for rpos in 1..rows-1 do
419
- # match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
420
- # skip_sequence = a[spos-1,rpos] + gap
421
- # skip_reference = a[spos,rpos-1] + gap
422
- # a[spos,rpos] = [match, skip_sequence, skip_reference].max
423
- # end
424
- #end
425
-
426
423
  spos = 1
427
424
  while spos < cols do
428
425
  rpos = 1
@@ -760,7 +757,6 @@ end
760
757
  html
761
758
  end
762
759
 
763
-
764
760
  def self.path_relative_to(basedir, path)
765
761
  path = File.expand_path(path)
766
762
  basedir = File.expand_path(basedir)
@@ -775,6 +771,10 @@ end
775
771
  end
776
772
  end
777
773
 
774
+ def self.hostname
775
+ @hostanem ||= `hostname`.strip
776
+ end
777
+
778
778
  def self.lock(file, *args)
779
779
  return yield file, *args if file.nil?
780
780
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
@@ -785,7 +785,7 @@ end
785
785
 
786
786
  begin
787
787
  if File.exists? lockfile and
788
- `hostname`.strip == (info = YAML.load_file(lockfile))["host"] and
788
+ Misc.hostname == (info = YAML.load_file(lockfile))["host"] and
789
789
  info["pid"] and not Misc.pid_exists?(info["pid"])
790
790
 
791
791
  Log.info("Removing lockfile: #{lockfile}. This pid #{Process.pid}. Content: #{info.inspect}")
@@ -803,6 +803,39 @@ end
803
803
  res
804
804
  end
805
805
 
806
+ LOCK_REPO_SERIALIZER=Marshal
807
+
808
+ def self.lock_in_repo(repo, key, *args)
809
+ return yield file, *args if repo.nil? or key.nil?
810
+
811
+ lock_key = "lock-" << key
812
+
813
+ begin
814
+ if repo[lock_key] and
815
+ Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
816
+ info["pid"] and not Misc.pid_exists?(info["pid"])
817
+
818
+ Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
819
+ repo.out lock_key
820
+ end
821
+ rescue
822
+ Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
823
+ repo.out lock_key if repo.include? lock_key
824
+ end
825
+
826
+ while repo[lock_key]
827
+ sleep 1
828
+ end
829
+
830
+ repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
831
+
832
+ res = yield lock_key, *args
833
+
834
+ repo.delete lock_key
835
+
836
+ res
837
+ end
838
+
806
839
  def self.common_path(dir, file)
807
840
  file = File.expand_path file
808
841
  dir = File.expand_path dir
@@ -913,6 +946,10 @@ end
913
946
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
914
947
  keys = keys.sort_by{|k| k.to_s}
915
948
 
949
+ if hash.respond_to? :unnamed
950
+ unnamed = hash.unnamed
951
+ hash.unnamed = true
952
+ end
916
953
  keys.each do |k|
917
954
  next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
918
955
  v = hash[k]
@@ -926,7 +963,7 @@ end
926
963
  when Symbol === v
927
964
  str << k.to_s << "=>" << v.to_s
928
965
  when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
929
- str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH]
966
+ str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
930
967
  when String === v
931
968
  str << k.to_s << "=>" << v
932
969
  when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
@@ -947,6 +984,7 @@ end
947
984
 
948
985
  str << "_" << hash2md5(v.info) if Annotated === v
949
986
  end
987
+ hash.unnamed = unnamed if hash.respond_to? :unnamed
950
988
 
951
989
  if str.empty?
952
990
  ""
@@ -1044,8 +1082,8 @@ end
1044
1082
  return field if Integer === field or Range === field
1045
1083
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
1046
1084
  fields.each_with_index{|f,i| return i if f == field}
1047
- field_re = Regexp.new /#{field}/i
1048
- fields.each_with_index{|f,i| return i if f =~ field_re}
1085
+ field_re = Regexp.new /^#{field}$/i
1086
+ fields.each_with_index{|f,i| return i if f =~ field_re}
1049
1087
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
1050
1088
  end
1051
1089
 
@@ -1,10 +1,10 @@
1
- require 'rbbt/util/chain_methods'
1
+ #require 'rbbt/util/chain_methods'
2
2
  require 'rbbt/util/misc'
3
3
 
4
4
  module NamedArray
5
- extend ChainMethods
5
+ #extend ChainMethods
6
+ #self.chain_prefix = :named_array
6
7
 
7
- self.chain_prefix = :named_array
8
8
  attr_accessor :fields
9
9
  attr_accessor :key
10
10
  attr_accessor :entity_options
@@ -36,7 +36,7 @@ module NamedArray
36
36
  if entity_templates.include? field
37
37
  entity
38
38
  else
39
- template = Misc.prepare_entity("TEMPLATE", field, options)
39
+ template = Misc.prepare_entity("ENTITY_TEMPLATE", field, options)
40
40
  if Annotated === template
41
41
  entity_templates[field] = template
42
42
  entity = template.annotate(entity.frozen? ? entity.dup : entity)
@@ -73,9 +73,20 @@ module NamedArray
73
73
  end
74
74
  end
75
75
 
76
- def named_array_get_brackets(key)
76
+ #def named_array_get_brackets(key)
77
+ # pos = Misc.field_position(fields, key)
78
+ # elem = named_array_clean_get_brackets(pos)
79
+
80
+ # return elem if @fields.nil? or @fields.empty?
81
+
82
+ # field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
83
+ # elem = prepare_entity(elem, field, entity_options)
84
+ # elem
85
+ #end
86
+
87
+ def [](key, clean = false)
77
88
  pos = Misc.field_position(fields, key)
78
- elem = named_array_clean_get_brackets(pos)
89
+ elem = super(pos)
79
90
 
80
91
  return elem if @fields.nil? or @fields.empty?
81
92
 
@@ -84,7 +95,19 @@ module NamedArray
84
95
  elem
85
96
  end
86
97
 
87
- def named_array_each(&block)
98
+ #def named_array_each(&block)
99
+ # if defined?(Entity) and not @fields.nil? and not @fields.empty?
100
+ # @fields.zip(self).each do |field,elem|
101
+ # elem = prepare_entity(elem, field, entity_options)
102
+ # yield(elem)
103
+ # elem
104
+ # end
105
+ # else
106
+ # named_array_clean_each &block
107
+ # end
108
+ #end
109
+
110
+ def each(&block)
88
111
  if defined?(Entity) and not @fields.nil? and not @fields.empty?
89
112
  @fields.zip(self).each do |field,elem|
90
113
  elem = prepare_entity(elem, field, entity_options)
@@ -92,14 +115,30 @@ module NamedArray
92
115
  elem
93
116
  end
94
117
  else
95
- named_array_clean_each &block
118
+ super &block
96
119
  end
120
+
97
121
  end
98
122
 
99
- def named_array_collect
123
+ #def named_array_collect
124
+ # res = []
125
+
126
+ # each do |elem|
127
+ # if block_given?
128
+ # res << yield(elem)
129
+ # else
130
+ # res << elem
131
+ # end
132
+ # end
133
+
134
+ # res
135
+ #end
136
+
137
+
138
+ def collect
100
139
  res = []
101
140
 
102
- named_array_each do |elem|
141
+ each do |elem|
103
142
  if block_given?
104
143
  res << yield(elem)
105
144
  else
@@ -110,14 +149,27 @@ module NamedArray
110
149
  res
111
150
  end
112
151
 
113
- def named_array_set_brackets(key,value)
114
- named_array_clean_set_brackets(Misc.field_position(fields, key), value)
152
+ #def named_array_set_brackets(key,value)
153
+ # named_array_clean_set_brackets(Misc.field_position(fields, key), value)
154
+ #end
155
+
156
+ def []=(key, value)
157
+ super(Misc.field_position(fields, key), value)
115
158
  end
116
159
 
117
- def named_array_values_at(*keys)
160
+ #def named_array_values_at(*keys)
161
+ # keys = keys.collect{|k| Misc.field_position(fields, k, true) }
162
+ # keys.collect{|k|
163
+ # named_array_get_brackets(k) unless k.nil?
164
+ # }
165
+ #end
166
+
167
+
168
+
169
+ def values_at(*keys)
118
170
  keys = keys.collect{|k| Misc.field_position(fields, k, true) }
119
171
  keys.collect{|k|
120
- named_array_get_brackets(k) unless k.nil?
172
+ self[k] unless k.nil?
121
173
  }
122
174
  end
123
175
 
@@ -11,6 +11,19 @@ module Open
11
11
  REMOTE_CACHEDIR = "/tmp/open_cache"
12
12
  FileUtils.mkdir REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
13
13
 
14
+ class << self
15
+ attr_accessor :repository_dirs
16
+
17
+ def repository_dirs
18
+ @repository_dirs ||= begin
19
+ File.exists?(Rbbt.etc.repository_dirs.find) ?
20
+ File.read(Rbbt.etc.repository_dirs.find).split("\n") :
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+
14
27
  def self.cachedir=(cachedir)
15
28
  REMOTE_CACHEDIR.replace cachedir
16
29
  FileUtils.mkdir REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
@@ -119,15 +132,115 @@ module Open
119
132
  CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to? :force_close})
120
133
  end
121
134
  end
122
-
135
+
136
+ def self.get_repo_from_dir(dir)
137
+ @repos ||= {}
138
+ @repos[dir] ||= begin
139
+ repo_path = File.join(dir, '.file_repo')
140
+ Persist.open_tokyocabinet(repo_path, false, :clean,TokyoCabinet::BDB )
141
+ end
142
+ end
143
+
144
+ def self.get_stream_from_repo(dir, sub_path)
145
+ repo = get_repo_from_dir(dir)
146
+ repo.read
147
+ StringIO.new repo[sub_path]
148
+ end
149
+
150
+ def self.save_content_in_repo(dir, sub_path, content)
151
+ repo = get_repo_from_dir(dir)
152
+ repo.write
153
+ repo[sub_path] = content
154
+ end
155
+
156
+ def self.remove_from_repo(dir, sub_path, recursive = false)
157
+ repo = get_repo_from_dir(dir)
158
+ repo.write
159
+ if recursive
160
+ repo.outlist repo.range sub_path, true, sub_path.sub(/.$/,('\1'.ord + 1).chr), false
161
+ else
162
+ repo.outlist sub_path
163
+ end
164
+ end
165
+
166
+ def self.exists_in_repo(dir, sub_path, content)
167
+ repo = get_repo_from_dir(dir)
168
+ repo.include? sub_path
169
+ end
170
+
171
+ def self.find_repo_dir(file)
172
+ self.repository_dirs.each do |dir|
173
+ if file.start_with? dir
174
+ sub_path = file.to_s[dir.length..-1]
175
+ return [dir, sub_path]
176
+ end
177
+ end
178
+ nil
179
+ end
180
+
181
+ def self.rm(file)
182
+ if (dir_sub_path = find_repo_dir(file))
183
+ remove_from_repo(*dir_sub_path)
184
+ else
185
+ FileUtils.rm(file)
186
+ end
187
+ end
188
+
189
+ def self.rm_rf(file)
190
+ if (dir_sub_path = find_repo_dir(file))
191
+ remove_from_repo(*dir_sub_path, true)
192
+ else
193
+ FileUtils.rm_rf(file)
194
+ end
195
+ end
196
+
123
197
  def self.file_open(file, grep, mode = 'r', invert_grep = false)
198
+ if (dir_sub_path = find_repo_dir(file))
199
+ stream = get_stream_from_repo(*dir_sub_path)
200
+ else
201
+ stream = File.open(file, mode)
202
+ end
203
+
124
204
  if grep
125
- grep(File.open(file, mode), grep, invert_grep)
205
+ grep(stream, grep, invert_grep)
126
206
  else
127
- File.open(file, mode)
207
+ stream
128
208
  end
129
209
  end
130
210
 
211
+ def self.file_write(file, content, mode = 'w')
212
+ if (dir_sub_path = find_repo_dir(file))
213
+ dir_sub_path.push content
214
+ save_content_in_repo(*dir_sub_path)
215
+ else
216
+ File.open(file, mode) do |f|
217
+ f.flock(File::LOCK_EX)
218
+ f.write content
219
+ f.flock(File::LOCK_UN)
220
+ end
221
+ end
222
+ end
223
+
224
+ def self.exists?(file)
225
+ if (dir_sub_path = find_repo_dir(file))
226
+ dir_sub_path.push file
227
+ exists_in_repo(*dir_sub_path)
228
+ else
229
+ File.exists? file
230
+ end
231
+ end
232
+
233
+ def self.lock(file, &block)
234
+ if (dir_sub_path = find_repo_dir(file))
235
+ dir, sub_path = dir_sub_path
236
+ repo = get_repo_from_dir(dir)
237
+ Misc.lock_in_repo(repo, sub_path, &block)
238
+ else
239
+ Misc.lock(file, &block)
240
+ end
241
+ end
242
+
243
+
131
244
  # Decompression
132
245
 
133
246
  def self.gunzip(stream)
@@ -180,7 +293,15 @@ module Open
180
293
  end
181
294
 
182
295
  def self.open(url, options = {})
183
- return url if IO === url
296
+ if IO === url
297
+ if block_given?
298
+ res = yield url
299
+ url.close
300
+ return res
301
+ else
302
+ return url
303
+ end
304
+ end
184
305
  options = Misc.add_defaults options, :noz => false, :mode => 'r'
185
306
 
186
307
  mode = Misc.process_options options, :mode
@@ -216,7 +337,9 @@ module Open
216
337
  io = gunzip(io) if ((String === url and gzip?(url)) and not options[:noz]) or options[:gzip]
217
338
 
218
339
  if block_given?
219
- yield io
340
+ res = yield(io)
341
+ io.close
342
+ return res
220
343
  else
221
344
  io
222
345
  end
@@ -269,11 +392,12 @@ module Open
269
392
  raise $!
270
393
  end
271
394
  when String === content
272
- File.open(file, mode) do |f|
273
- f.flock(File::LOCK_EX)
274
- f.write content
275
- f.flock(File::LOCK_UN)
276
- end
395
+ file_write(file, content, mode)
396
+ #File.open(file, mode) do |f|
397
+ # f.flock(File::LOCK_EX)
398
+ # f.write content
399
+ # f.flock(File::LOCK_UN)
400
+ #end
277
401
  else
278
402
  begin
279
403
  File.open(file, mode) do |f|