rbbt-util 5.4.1 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/bin/rbbt_monitor.rb +8 -4
  3. data/lib/rbbt.rb +4 -11
  4. data/lib/rbbt/annotations.rb +4 -1
  5. data/lib/rbbt/association.rb +218 -157
  6. data/lib/rbbt/association/index.rb +92 -0
  7. data/lib/rbbt/association/item.rb +44 -0
  8. data/lib/rbbt/entity.rb +4 -0
  9. data/lib/rbbt/fix_width_table.rb +14 -9
  10. data/lib/rbbt/knowledge_base.rb +269 -0
  11. data/lib/rbbt/persist.rb +1 -1
  12. data/lib/rbbt/persist/tsv.rb +22 -2
  13. data/lib/rbbt/resource.rb +0 -1
  14. data/lib/rbbt/resource/path.rb +1 -1
  15. data/lib/rbbt/resource/util.rb +0 -1
  16. data/lib/rbbt/tsv.rb +15 -14
  17. data/lib/rbbt/tsv/accessor.rb +21 -16
  18. data/lib/rbbt/tsv/attach.rb +5 -5
  19. data/lib/rbbt/tsv/attach/util.rb +4 -2
  20. data/lib/rbbt/tsv/change_id.rb +67 -0
  21. data/lib/rbbt/tsv/index.rb +5 -3
  22. data/lib/rbbt/tsv/manipulate.rb +83 -37
  23. data/lib/rbbt/tsv/parser.rb +2 -1
  24. data/lib/rbbt/tsv/util.rb +2 -0
  25. data/lib/rbbt/util/cmd.rb +1 -2
  26. data/lib/rbbt/util/log.rb +42 -38
  27. data/lib/rbbt/util/misc.rb +134 -46
  28. data/lib/rbbt/util/open.rb +3 -17
  29. data/lib/rbbt/util/semaphore.rb +8 -2
  30. data/lib/rbbt/workflow.rb +31 -46
  31. data/lib/rbbt/workflow/accessor.rb +1 -1
  32. data/lib/rbbt/workflow/step.rb +5 -3
  33. data/share/rbbt_commands/workflow/server +1 -0
  34. data/share/rbbt_commands/workflow/task +12 -2
  35. data/test/rbbt/association/test_index.rb +36 -0
  36. data/test/rbbt/test_annotations.rb +5 -4
  37. data/test/rbbt/test_association.rb +40 -13
  38. data/test/rbbt/test_knowledge_base.rb +103 -0
  39. data/test/rbbt/test_workflow.rb +4 -2
  40. data/test/rbbt/tsv/test_change_id.rb +43 -0
  41. data/test/rbbt/tsv/test_index.rb +2 -1
  42. data/test/rbbt/tsv/test_manipulate.rb +51 -0
  43. data/test/rbbt/util/test_misc.rb +21 -1
  44. data/test/test_helper.rb +8 -4
  45. metadata +12 -86
@@ -18,6 +18,7 @@ module TSV
18
18
 
19
19
  line = stream.gets
20
20
  raise "Empty content" if line.nil?
21
+ line = Misc.fixutf8 line
21
22
  line.chomp!
22
23
 
23
24
  # Process options line
@@ -33,7 +34,7 @@ module TSV
33
34
 
34
35
  # Process fields line
35
36
 
36
- if line and line =~ /^#{@header_hash}/
37
+ if line and Misc.fixutf8(line) =~ /^#{@header_hash}/
37
38
  line.chomp!
38
39
  @fields = line.split(@sep)
39
40
  @key_field = @fields.shift
data/lib/rbbt/tsv/util.rb CHANGED
@@ -69,6 +69,8 @@ module TSV
69
69
  pos ||= :key if key_field == field
70
70
  Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
71
71
  pos
72
+ when key_field == field
73
+ :key
72
74
  else
73
75
  raise "No fields specified in TSV.identify_field" if fields.nil?
74
76
  Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
data/lib/rbbt/util/cmd.rb CHANGED
@@ -1,4 +1,3 @@
1
- require 'rbbt/util/misc'
2
1
  require 'rbbt/util/log'
3
2
  require 'stringio'
4
3
 
@@ -99,7 +98,7 @@ module CMD
99
98
  end
100
99
 
101
100
  def self.cmd(cmd, options = {}, &block)
102
- options = Misc.add_defaults options, :stderr => Log::DEBUG
101
+ options[:stderr] ||= Log::DEBUG
103
102
  in_content = options.delete(:in)
104
103
  stderr = options.delete(:stderr)
105
104
  pipe = options.delete(:pipe)
data/lib/rbbt/util/log.rb CHANGED
@@ -17,54 +17,48 @@ module Log
17
17
  @logfile = nil
18
18
  end
19
19
 
20
- #def self.severity=(severity)
21
- # @severity = severity
22
- #end
23
-
24
- #def self.severity
25
- # @severity
26
- #end
27
-
28
20
  SEVERITY_COLOR = ["0;37m", "0;32m", "0;33m", "0;31m","0;37m", "0;32m", "0;33m"].collect{|e| "\033[#{e}"}
29
21
 
30
- def self.log(message, severity = MEDIUM)
31
- message ||= ""
22
+ def self.log(message = nil, severity = MEDIUM, &block)
23
+ message ||= block
32
24
  severity_color = SEVERITY_COLOR[severity]
33
25
  font_color = {false => "\033[0;37m", true => "\033[0m"}[severity >= INFO]
34
26
 
35
- if severity >= self.severity and not message.empty?
36
- str = "\033[0;37m#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}#{severity_color}[#{severity.to_s}]\033[0m:#{font_color} " << message.strip << "\033[0m"
37
- STDERR.puts str
38
- logfile.puts str unless logfile.nil?
39
- end
27
+ return if severity < self.severity
28
+ message = message.call if Proc === message
29
+ return if message.nil? or message.empty?
30
+
31
+ str = "\033[0;37m#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}#{severity_color}[#{severity.to_s}]\033[0m:#{font_color} " << message.strip << "\033[0m"
32
+ STDERR.puts str
33
+ logfile.puts str unless logfile.nil?
40
34
  end
41
35
 
42
- def self.debug(message)
43
- log(message, DEBUG)
36
+ def self.debug(message = nil, &block)
37
+ log(message, DEBUG, &block)
44
38
  end
45
39
 
46
- def self.low(message)
47
- log(message, LOW)
40
+ def self.low(message = nil, &block)
41
+ log(message, LOW, &block)
48
42
  end
49
43
 
50
- def self.medium(message)
51
- log(message, MEDIUM)
44
+ def self.medium(message = nil, &block)
45
+ log(message, MEDIUM, &block)
52
46
  end
53
47
 
54
- def self.high(message)
55
- log(message, HIGH)
48
+ def self.high(message = nil, &block)
49
+ log(message, HIGH, &block)
56
50
  end
57
51
 
58
- def self.info(message)
59
- log(message, INFO)
52
+ def self.info(message = nil, &block)
53
+ log(message, INFO, &block)
60
54
  end
61
55
 
62
- def self.warn(message)
63
- log(message, WARN)
56
+ def self.warn(message = nil, &block)
57
+ log(message, WARN, &block)
64
58
  end
65
59
 
66
- def self.error(message)
67
- log(message, ERROR)
60
+ def self.error(message = nil, &block)
61
+ log(message, ERROR, &block)
68
62
  end
69
63
 
70
64
 
@@ -84,16 +78,26 @@ module Log
84
78
  end
85
79
  end
86
80
 
87
- def ddd(message, file = $stdout)
88
- Log.debug "DEVEL: " << caller.first
89
- Log.debug ""
90
- Log.debug "=> " << message.inspect
91
- Log.debug ""
92
- end
93
-
94
81
  def ppp(message)
95
- puts "PRINT: " << caller.first
82
+ stack = caller
83
+ puts "#{Log::SEVERITY_COLOR[1]}PRINT:#{Log::SEVERITY_COLOR[0]} " << stack.first
96
84
  puts ""
97
- puts "=> " << message.inspect
85
+ puts "=> " << message
98
86
  puts ""
99
87
  end
88
+
89
+ def ddd(message, file = $stdout)
90
+ stack = caller
91
+ Log.debug{"#{Log::SEVERITY_COLOR[1]}DEVEL:#{Log::SEVERITY_COLOR[0]} " << stack.first}
92
+ Log.debug{""}
93
+ Log.debug{"=> " << message.inspect}
94
+ Log.debug{""}
95
+ end
96
+
97
+ def fff(object)
98
+ stack = caller
99
+ Log.debug{"#{Log::SEVERITY_COLOR[1]}FINGERPRINT:#{Log::SEVERITY_COLOR[0]} " << stack.first}
100
+ Log.debug{""}
101
+ Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
102
+ Log.debug{""}
103
+ end
@@ -1,9 +1,5 @@
1
1
  require 'lockfile'
2
- require 'rbbt/util/chain_methods'
3
- require 'rbbt/resource/path'
4
- require 'rbbt/annotations'
5
2
  require 'net/smtp'
6
- require 'narray'
7
3
  require 'digest/md5'
8
4
 
9
5
  class Hash
@@ -16,6 +12,12 @@ class Hash
16
12
  end
17
13
  end
18
14
 
15
+ module LaterString
16
+ def to_s
17
+ yield
18
+ end
19
+ end
20
+
19
21
  module Misc
20
22
  class FieldNotFoundError < StandardError;end
21
23
 
@@ -201,10 +203,48 @@ module Misc
201
203
  filename
202
204
  end
203
205
 
206
+ def self.fingerprint(obj)
207
+ case obj
208
+ when nil
209
+ "nil"
210
+ when Symbol
211
+ ":" << obj.to_s
212
+ when String
213
+ if obj.length > 100
214
+ "'" << obj[0..20-1] << "<...#{obj.length}...>" << obj[-10..-1] << " " << "'"
215
+ else
216
+ "'" << obj << "'"
217
+ end
218
+ when Array
219
+ if (length = obj.length) > 10
220
+ "[#{length}-" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
221
+ else
222
+ "[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
223
+ end
224
+ when TSV
225
+ obj.with_unnamed do
226
+ "TSV:{"<< fingerprint(obj.all_fields|| []).inspect << "," << fingerprint(obj.keys).inspect << "}"
227
+ end
228
+ when Hash
229
+ if obj.length > 10
230
+ "H:{"<< fingerprint(obj.keys) << "," << fingerprint(obj.values) << "}"
231
+ else
232
+ new = "{"
233
+ obj.each do |k,v|
234
+ new << k.to_s << '=>' << fingerprint(v) << ' '
235
+ end
236
+ new << "}"
237
+ end
238
+ else
239
+ obj.to_s
240
+ end
241
+ end
242
+
243
+
204
244
  def self.remove_long_items(obj)
205
245
  case
206
246
  when TSV === obj
207
- remove_long_items(obj.fields + obj.keys.sort)
247
+ remove_long_items((obj.all_fields || []) + obj.keys.sort)
208
248
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
209
249
  remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
210
250
  when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
@@ -310,7 +350,6 @@ module Misc
310
350
  def self.send_email(from, to, subject, message, options = {})
311
351
  IndiferentHash.setup(options)
312
352
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
313
- IndiferentHash.setup(options)
314
353
 
315
354
  server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
316
355
 
@@ -406,6 +445,7 @@ end
406
445
  }
407
446
 
408
447
  def self.fast_align(reference, sequence)
448
+ require 'narray'
409
449
  init_gap = -1
410
450
  gap = -2
411
451
  diff = -2
@@ -486,11 +526,35 @@ end
486
526
  end
487
527
 
488
528
  def self.is_filename?(string)
489
- return true if Path === string
529
+ return true if string.respond_to? :exists
490
530
  return true if String === string and string.length < 265 and File.exists? string
491
531
  return false
492
532
  end
493
533
 
534
+ def self.sorted_array_hits(a1, a2)
535
+ e1, e2 = a1.shift, a2.shift
536
+ counter = 0
537
+ match = []
538
+ while true
539
+ break if e1.nil? or e2.nil?
540
+ case e1 <=> e2
541
+ when 0
542
+ match << counter
543
+ e1, e2 = a1.shift, a2.shift
544
+ counter += 1
545
+ when -1
546
+ while not e1.nil? and e1 < e2
547
+ e1 = a1.shift
548
+ counter += 1
549
+ end
550
+ when 1
551
+ e2 = a2.shift
552
+ e2 = a2.shift while not e2.nil? and e2 < e1
553
+ end
554
+ end
555
+ match
556
+ end
557
+
494
558
  def self.intersect_sorted_arrays(a1, a2)
495
559
  e1, e2 = a1.shift, a2.shift
496
560
  intersect = []
@@ -542,6 +606,33 @@ end
542
606
  new
543
607
  end
544
608
 
609
+ def self.binary_include?(array, elem)
610
+ upper = array.size - 1
611
+ lower = 0
612
+
613
+ return -1 if upper < lower
614
+
615
+ while(upper >= lower) do
616
+ idx = lower + (upper - lower) / 2
617
+ value = array[idx]
618
+
619
+ case elem <=> value
620
+ when 0
621
+ return true
622
+ when -1
623
+ upper = idx - 1
624
+ when 1
625
+ lower = idx + 1
626
+ else
627
+ raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
628
+ end
629
+ end
630
+
631
+ return false
632
+ end
633
+
634
+
635
+
545
636
  def self.array2hash(array, default = nil)
546
637
  hash = {}
547
638
  array.each do |key, value|
@@ -748,10 +839,11 @@ end
748
839
 
749
840
  def self.html_tag(tag, content = nil, params = {})
750
841
  attr_str = hash_to_html_tag_attributes(params)
842
+ attr_str = " " << attr_str if String === attr_str and attr_str != ""
751
843
  html = if content.nil?
752
- "<#{ tag } #{attr_str} />"
844
+ "<#{ tag }#{attr_str}/>"
753
845
  else
754
- "<#{ tag } #{attr_str} >#{ content }</#{ tag }>"
846
+ "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
755
847
  end
756
848
 
757
849
  html
@@ -982,7 +1074,7 @@ end
982
1074
 
983
1075
  end
984
1076
 
985
- str << "_" << hash2md5(v.info) if Annotated === v
1077
+ str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
986
1078
  end
987
1079
  hash.unnamed = unnamed if hash.respond_to? :unnamed
988
1080
 
@@ -1099,7 +1191,7 @@ end
1099
1191
  chunks
1100
1192
  end
1101
1193
 
1102
- # Divides the array into +num+ chunks of the same size by placing one
1194
+ # Divides the array into chunks of +num+ same size by placing one
1103
1195
  # element in each chunk iteratively.
1104
1196
  def self.ordered_divide(array, num)
1105
1197
  last = array.length - 1
@@ -1120,6 +1212,7 @@ end
1120
1212
 
1121
1213
  def self.snake_case(string)
1122
1214
  return nil if string.nil?
1215
+ string = string.to_s if Symbol === string
1123
1216
  string.
1124
1217
  gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
1125
1218
  gsub(/([a-z])([A-Z])/,'\1_\2').
@@ -1187,54 +1280,49 @@ class RBBTError < StandardError
1187
1280
  end
1188
1281
 
1189
1282
  module IndiferentHash
1190
- extend ChainMethods
1191
- self.chain_prefix = :indiferent
1192
-
1193
- def indiferent_get_brackets(key)
1194
- case
1195
- when (Symbol === key and indiferent_clean_include? key)
1196
- indiferent_clean_get_brackets(key)
1197
- when (Symbol === key and indiferent_clean_include? key.to_s)
1198
- indiferent_clean_get_brackets(key.to_s)
1199
- when (String === key and indiferent_clean_include? key)
1200
- indiferent_clean_get_brackets(key)
1201
- when (String === key and indiferent_clean_include? key.to_sym)
1202
- indiferent_clean_get_brackets(key.to_sym)
1283
+
1284
+ def self.setup(hash)
1285
+ hash.extend IndiferentHash
1286
+ end
1287
+
1288
+ def [](key)
1289
+ res = super(key) and return res
1290
+
1291
+ case key
1292
+ when Symbol, Module
1293
+ super(key.to_s)
1294
+ when String
1295
+ super(key.to_sym)
1203
1296
  else
1204
- indiferent_clean_get_brackets(key)
1297
+ super(key)
1205
1298
  end
1206
1299
  end
1207
1300
 
1208
- def indiferent_values_at(*key_list)
1209
- res = []
1210
- key_list.each{|key| res << indiferent_get_brackets(key)}
1211
- res
1301
+ def values_at(*key_list)
1302
+ key_list.inject([]){|acc,key| acc << self[key]}
1212
1303
  end
1213
1304
 
1214
- def indiferent_include?(key)
1215
- case
1216
- when Symbol === key
1217
- indiferent_clean_include?(key) or indiferent_clean_include?(key.to_s)
1218
- when String === key
1219
- indiferent_clean_include?(key) or indiferent_clean_include?(key.to_sym)
1305
+ def include?(key)
1306
+ case key
1307
+ when Symbol, Module
1308
+ super(key) || super(key.to_s)
1309
+ when String
1310
+ super(key) || super(key.to_sym)
1220
1311
  else
1221
- indiferent_clean_include?(key)
1312
+ super(key)
1222
1313
  end
1223
1314
  end
1224
1315
 
1225
- def indiferent_delete(value)
1226
- if indiferent_clean_include? value.to_s
1227
- indiferent_clean_delete(value.to_s)
1316
+ def delete(key)
1317
+ case key
1318
+ when Symbol, Module
1319
+ super(key) || super(key.to_s)
1320
+ when String
1321
+ super(key) || super(key.to_sym)
1228
1322
  else
1229
- indiferent_clean_delete(value.to_sym)
1323
+ super(key)
1230
1324
  end
1231
1325
  end
1232
-
1233
- def self.setup(hash)
1234
- return hash if IndiferentHash === hash
1235
- hash.extend IndiferentHash unless IndiferentHash === hash
1236
- hash
1237
- end
1238
1326
  end
1239
1327
 
1240
1328
  module PDF2Text
@@ -1,6 +1,6 @@
1
1
  require 'rbbt/util/cmd'
2
- require 'rbbt/util/misc'
3
2
  require 'rbbt/util/tmpfile'
3
+ require 'rbbt/util/misc'
4
4
 
5
5
  require 'zlib'
6
6
 
@@ -19,6 +19,8 @@ module Open
19
19
  File.exists?(Rbbt.etc.repository_dirs.find) ?
20
20
  File.read(Rbbt.etc.repository_dirs.find).split("\n") :
21
21
  []
22
+ rescue
23
+ []
22
24
  end
23
25
  end
24
26
 
@@ -393,11 +395,6 @@ module Open
393
395
  end
394
396
  when String === content
395
397
  file_write(file, content, mode)
396
- #File.open(file, mode) do |f|
397
- # f.flock(File::LOCK_EX)
398
- # f.write content
399
- # f.flock(File::LOCK_UN)
400
- #end
401
398
  else
402
399
  begin
403
400
  File.open(file, mode) do |f|
@@ -415,14 +412,3 @@ module Open
415
412
  end
416
413
  end
417
414
  end
418
-
419
- if __FILE__ == $0
420
- require 'benchmark'
421
- require 'progress-monitor'
422
-
423
- file = '/home/mvazquezg/rbbt/data/dbs/entrez/gene_info'
424
- puts Benchmark.measure {
425
- #Open.open(file).read.split(/\n/).each do |l| l end
426
- Open.read(file) do |l| l end
427
- }
428
- end