rbbt-util 4.3.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ require 'rbbt/resource/rake'
2
2
 
3
3
  module Path
4
4
 
5
- def self.caller_lib_dir(file = nil)
5
+ def self.caller_lib_dir(file = nil, relative_to = 'lib')
6
6
  file = caller.reject{|l|
7
7
  l =~ /rbbt\/(?:resource\.rb|workflow\.rb)/ or
8
8
  l =~ /rbbt\/resource\/path\.rb/ or
@@ -11,11 +11,11 @@ module Path
11
11
  }.first.sub(/\.rb.*/,'.rb') if file.nil?
12
12
 
13
13
  file = File.expand_path file
14
- return Path.setup(file) if File.exists? File.join(file, 'lib')
14
+ return Path.setup(file) if File.exists? File.join(file, relative_to)
15
15
 
16
16
  while file != '/'
17
17
  dir = File.dirname file
18
- return Path.setup(dir) if File.exists? File.join(dir, 'lib')
18
+ return Path.setup(dir) if File.exists? File.join(dir, relative_to)
19
19
  file = File.dirname file
20
20
  end
21
21
 
data/lib/rbbt/tsv.rb CHANGED
@@ -49,7 +49,9 @@ module TSV
49
49
  data.serializer = serializer
50
50
  end
51
51
 
52
- stream = get_stream source
52
+ open_options = Misc.pull_keys options, :open
53
+
54
+ stream = get_stream source, open_options
53
55
  parse stream, data, options
54
56
 
55
57
  data.filename = filename.to_s unless filename.nil?
@@ -71,11 +73,15 @@ module TSV
71
73
 
72
74
  def self.parse(stream, data, options = {})
73
75
  monitor, grep = Misc.process_options options, :monitor, :grep
76
+
77
+ parser = Parser.new stream, options
78
+
74
79
  if grep
80
+ stream.rewind
75
81
  stream = Open.grep(stream, grep)
82
+ parser.first_line = stream.gets
76
83
  end
77
84
 
78
- parser = Parser.new stream, options
79
85
  line = parser.rescue_first_line
80
86
 
81
87
  if TokyoCabinet::HDB === data and parser.straight
@@ -1,10 +1,18 @@
1
1
  require 'rbbt/util/chain_methods'
2
-
3
2
  module TSV
4
3
  extend ChainMethods
5
4
  self.chain_prefix = :tsv
6
5
 
7
- attr_accessor :unnamed, :serializer_module
6
+ attr_accessor :unnamed, :serializer_module, :entity_options
7
+
8
+ def entity_options
9
+ options = namespace ? {:namespace => namespace, :organism => namespace} : {}
10
+ if @entity_options
11
+ options.merge(@entity_options)
12
+ else
13
+ options
14
+ end
15
+ end
8
16
 
9
17
  def with_unnamed
10
18
  saved_unnamed = @unnamed
@@ -72,10 +80,11 @@ module TSV
72
80
 
73
81
  case type
74
82
  when :double, :list
75
- NamedArray.setup value, fields, key, namespace
83
+ NamedArray.setup value, fields, key, entity_options
76
84
  when :flat, :single
77
85
  value = value.dup if value.frozen?
78
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
86
+
87
+ value = Misc.prepare_entity(value, fields.first, entity_options)
79
88
  end
80
89
  value
81
90
  end
@@ -88,11 +97,7 @@ module TSV
88
97
  keys = tsv_clean_keys - ENTRY_KEYS
89
98
  return keys if @unnamed or key_field.nil?
90
99
 
91
- if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
92
- Entity.formats[key_field].setup(keys.collect{|k| k.dup}, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}) )
93
- else
94
- keys
95
- end
100
+ Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
96
101
  end
97
102
 
98
103
  def tsv_values
@@ -101,11 +106,9 @@ module TSV
101
106
 
102
107
  case type
103
108
  when :double, :list
104
- values.each{|value| NamedArray.setup value, fields, nil, namespace }
109
+ values.each{|value| NamedArray.setup value, fields, nil, entity_options}
105
110
  when :flat, :single
106
- values.each{|value|
107
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first}))
108
- } if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
111
+ values = values.collect{|v| Misc.prepare_entity(v, fields.first, entity_options)}
109
112
  end
110
113
 
111
114
  values
@@ -127,14 +130,12 @@ module TSV
127
130
  if not fields.nil?
128
131
  case type
129
132
  when :double, :list
130
- NamedArray.setup value, fields, key, namespace if Array === value
133
+ NamedArray.setup value, fields, key, entity_options if Array === value
131
134
  when :flat, :single
132
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
135
+ Misc.prepare_entity(value, fields.first, entity_options)
133
136
  end
134
137
  end
135
- if defined?(Entity) and not key_field.nil? and Entity.respond_to?(:formats) and Entity.formats.include? key_field
136
- key = Entity.formats[key_field].setup(key.dup, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}))
137
- end
138
+ key = Misc.prepare_entity(key, key_field, entity_options)
138
139
  end
139
140
 
140
141
  yield key, value if block_given?
@@ -156,14 +157,12 @@ module TSV
156
157
  if not fields.nil?
157
158
  case type
158
159
  when :double, :list
159
- NamedArray.setup value, fields, key, namespace if Array === value
160
+ NamedArray.setup value, fields, key, entity_options if Array === value
160
161
  when :flat, :single
161
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
162
+ value = Misc.prepare_entity(value, fields.first, entity_options)
162
163
  end
163
164
  end
164
- if defined?(Entity) and not key_field.nil? and Entity.respond_to?(:formats) and Entity.formats.include? key_field
165
- key = Entity.formats[key_field].setup(key.dup, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}))
166
- end
165
+ key = Misc.prepare_entity(key, key_field, entity_options)
167
166
  end
168
167
 
169
168
 
@@ -217,14 +216,14 @@ module TSV
217
216
  if fields == :all
218
217
  if just_keys
219
218
  keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
220
- Entity.formats[key_field].setup(keys, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
219
+ keys = Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
221
220
  else
222
221
  elems.sort_by{|key, value| key }
223
222
  end
224
223
  else
225
224
  if just_keys
226
225
  keys = elems.sort_by{|key, value| value }.collect{|key, value| key}
227
- Entity.formats[key_field].setup(keys, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
226
+ keys = Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
228
227
  keys
229
228
  else
230
229
  elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
@@ -346,7 +345,7 @@ end
346
345
  if @fields.nil? or @unnamed
347
346
  @fields
348
347
  else
349
- NamedArray.setup @fields, @fields, nil, namespace
348
+ NamedArray.setup @fields, @fields, nil, entity_options
350
349
  end
351
350
  end
352
351
 
@@ -366,7 +365,7 @@ end
366
365
  case
367
366
  when (TSV === identifiers.first or identifiers.empty?)
368
367
  identifiers
369
- when
368
+ else
370
369
  identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
371
370
  end
372
371
  when identifiers
@@ -413,7 +412,9 @@ end
413
412
  end
414
413
 
415
414
  if keys == :sort
416
- keys = self.keys.sort
415
+ with_unnamed do
416
+ keys = self.keys.sort
417
+ end
417
418
  end
418
419
 
419
420
  str = ""
@@ -50,7 +50,7 @@ module TSV
50
50
  # Merge two files with the same keys and different fields
51
51
  def self.merge_different_fields(file1, file2, output, sep = "\t")
52
52
  case
53
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
53
+ when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
54
54
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
55
55
  when (String === file1 or StringIO === file1)
56
56
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
@@ -59,7 +59,7 @@ module TSV
59
59
  end
60
60
 
61
61
  case
62
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
62
+ when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exists?(file2))
63
63
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
64
64
  when (String === file2 or StringIO === file2)
65
65
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
@@ -146,7 +146,12 @@ module TSV
146
146
 
147
147
  Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
148
148
 
149
- other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
149
+ unless TSV === other
150
+ other_identifier_files = other.identifier_files
151
+ other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
152
+ other.identifiers = other_identifier_files
153
+ end
154
+
150
155
  case
151
156
  when key_field == other.key_field
152
157
  attach_same_key other, fields
@@ -3,6 +3,8 @@ module TSV
3
3
  def attach_same_key(other, fields = nil)
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
+ fields = [fields].compact unless Array === fields
7
+
6
8
  field_positions = fields.collect{|field| other.identify_field field}
7
9
  other.with_unnamed do
8
10
  with_unnamed do
@@ -114,6 +116,7 @@ module TSV
114
116
  with_unnamed do
115
117
  through do |key, values|
116
118
  source_keys = index[key]
119
+ source_keys = [source_keys] unless Array === source_keys
117
120
  if source_keys.nil? or source_keys.empty?
118
121
  all_new_values = []
119
122
  else
@@ -0,0 +1,91 @@
1
+ require 'spreadsheet'
2
+ module TSV
3
+ def self.excel(tsv, filename, options ={})
4
+ name = Misc.process_options options, :name
5
+ sort_by = Misc.process_options options, :sort_by
6
+ sort_by_cast = Misc.process_options options, :sort_by_cast
7
+ fields = Misc.process_options(options, :fields) || all_fields
8
+
9
+ book = Spreadsheet::Workbook.new
10
+ sheet1 = book.create_worksheet
11
+ sheet1.row(0).concat fields
12
+ i = 1
13
+ if sort_by
14
+ if sort_by_cast
15
+ data = tsv.sort_by sort_by do |k, v|
16
+ if Array === v
17
+ v.first.send(sort_by_cast)
18
+ else
19
+ v.send(sort_by_cast)
20
+ end
21
+ end
22
+ else
23
+ data = tsv.sort_by sort_by
24
+ end
25
+ else
26
+ data = tsv
27
+ end
28
+
29
+ data.each do |key, values|
30
+ cells = []
31
+ cells.push((name and key.respond_to?(:name)) ? key.name || key : key )
32
+
33
+ values.each do |value|
34
+ v = (name and value.respond_to?(:name)) ? value.name || value : value
35
+ if Array === v
36
+ cells.push v * ", "
37
+ else
38
+ cells.push v
39
+ end
40
+ end
41
+
42
+ sheet1.row(i).concat cells
43
+ i += 1
44
+ end
45
+ book.write filename
46
+ end
47
+
48
+ def excel(filename, options ={})
49
+ name = Misc.process_options options, :name
50
+ sort_by = Misc.process_options options, :sort_by
51
+ sort_by_cast = Misc.process_options options, :sort_by_cast
52
+
53
+ book = Spreadsheet::Workbook.new
54
+ sheet1 = book.create_worksheet
55
+ sheet1.row(0).concat all_fields
56
+ i = 1
57
+ if sort_by
58
+ if sort_by_cast
59
+ data = self.sort_by sort_by do |k, v|
60
+ if Array === v
61
+ v.first.send(sort_by_cast)
62
+ else
63
+ v.send(sort_by_cast)
64
+ end
65
+ end
66
+ else
67
+ data = self.sort_by sort_by
68
+ end
69
+ else
70
+ data = self
71
+ end
72
+
73
+ data.each do |key, values|
74
+ cells = []
75
+ cells.push((name and key.respond_to?(:name)) ? key.name || key : key )
76
+
77
+ values.each do |value|
78
+ v = (name and value.respond_to?(:name)) ? value.name || value : value
79
+ if Array === v
80
+ cells.push v * ", "
81
+ else
82
+ cells.push v
83
+ end
84
+ end
85
+
86
+ sheet1.row(i).concat cells
87
+ i += 1
88
+ end
89
+ book.write filename
90
+ end
91
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/misc'
2
+ require 'set'
2
3
  module Filtered
3
4
 
4
5
  class FilterArray
@@ -37,12 +38,21 @@ module Filtered
37
38
 
38
39
  @list = nil
39
40
  case
41
+ when @match == :key
42
+ @value = Set.new(@value)
43
+ class << self
44
+ self
45
+ end.class_eval <<-EOC
46
+ def match_entry(key, entry)
47
+ key == @value or (Set === @value and @value.include? key)
48
+ end
49
+ EOC
40
50
  when @match.match(/field:(.*)/)
41
51
  @fieldnum = data.identify_field $1
42
52
  class << self
43
53
  self
44
54
  end.class_eval <<-EOC
45
- def match_entry(entry)
55
+ def match_entry(key, entry)
46
56
  value = entry[@fieldnum]
47
57
  value == @value or (Array === value and value.include? @value)
48
58
  end
@@ -78,7 +88,7 @@ module Filtered
78
88
 
79
89
  data.with_unnamed do
80
90
  data.unfiltered_each do |key, entry|
81
- ids << key if match_entry(entry)
91
+ ids << key if match_entry(key, entry)
82
92
  end
83
93
  end
84
94
 
@@ -173,7 +183,7 @@ module Filtered
173
183
  if filters.empty?
174
184
  unfiltered_filename
175
185
  else
176
- unfiltered_filename + ":Filtered[#{filters.collect{|f| [f.match, f.value] * "="} * ", "}]"
186
+ unfiltered_filename + ":Filtered[#{filters.collect{|f| [f.match, Array === f.value ? Misc.hash2md5(:values => f.value) : f.value] * "="} * ", "}]"
177
187
  end
178
188
  end
179
189
 
@@ -182,7 +192,7 @@ module Filtered
182
192
  self.send(:unfiltered_set, key, value)
183
193
  else
184
194
  filters.each do |filter|
185
- filter.add key if filter.match_entry value
195
+ filter.add key if filter.match_entry key, value
186
196
  end
187
197
  self.send(:unfiltered_set, key, value)
188
198
  end
@@ -269,12 +279,12 @@ module TSV
269
279
  end
270
280
 
271
281
  def reset_filters
272
- if filter_dir.nil? or filter_dir.empty?
273
- filters.each do |filter| filter.reset end
282
+ if @filter_dir.nil? or @filter_dir.empty?
283
+ @filters.each do |filter| filter.reset end if Array === @filters
274
284
  return
275
285
  end
276
286
 
277
- Dir.glob(File.join(filter_dir, '*.filter')).each do |f|
287
+ Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
278
288
  FileUtils.rm f
279
289
  end
280
290
  end
@@ -174,14 +174,16 @@ module TSV
174
174
 
175
175
  keys, value = traverser.process(key, value)
176
176
 
177
+ keys = [keys].compact unless Array === keys
178
+
177
179
  # Annotated with Entity and NamedArray
178
180
  if not @unnamed
179
181
  if not traverser.new_field_names.nil?
180
182
  case type
181
183
  when :double, :list
182
- NamedArray.setup value, traverser.new_field_names
184
+ NamedArray.setup value, traverser.new_field_names, key, entity_options
183
185
  when :flat, :single
184
- Entity.formats[traverser.new_field_names.first].setup(value, :format => traverser.new_field_names.first) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_field_names
186
+ Misc.prepare_entity(value, traverser.new_field_names.first, entity_options)
185
187
  end
186
188
  end
187
189
  end
@@ -197,8 +199,8 @@ module TSV
197
199
  r
198
200
  }
199
201
 
200
- if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
201
- k = Entity.formats[traverser.new_key_field_name].setup(k.dup, :format => traverser.new_key_field_name)
202
+ if not @unnamed
203
+ k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
202
204
  end
203
205
  v.key = k if NamedArray === v
204
206
  yield k, v
@@ -207,8 +209,8 @@ module TSV
207
209
 
208
210
  else
209
211
  keys.each do |key|
210
- if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
211
- key = Entity.formats[traverser.new_key_field_name].setup(key.dup, :format => traverser.new_key_field_name)
212
+ if not @unnamed
213
+ k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
212
214
  end
213
215
  value.key = key if NamedArray === value
214
216
  yield key, value
@@ -318,11 +320,23 @@ module TSV
318
320
  end
319
321
  when String === method
320
322
  if block_given?
321
- pos = identify_field method
322
323
  with_unnamed do
323
- through do |key, values|
324
- new[key] = values if yield((method == key_field or method == :key)? key : values[pos])
324
+ case
325
+ when (method == key_field or method == :key)
326
+ through do |key, values|
327
+ new[key] = values if yield(key)
328
+ end
329
+ when (type == :single or type == :flat)
330
+ through do |key, value|
331
+ new[key] = value if yield(value)
332
+ end
333
+ else
334
+ pos = identify_field method
335
+ through do |key, values|
336
+ new[key] = values if yield(values[pos])
337
+ end
325
338
  end
339
+
326
340
  end
327
341
  else
328
342
  with_unnamed do
@@ -351,7 +365,7 @@ module TSV
351
365
  end
352
366
  when :list
353
367
  through :key, key do |key, values|
354
- new[key] = self[key] if method.include? value.first
368
+ new[key] = self[key] if method.include? values.first
355
369
  end
356
370
  when :flat #untested
357
371
  through :key, key do |key, values|
@@ -359,7 +373,7 @@ module TSV
359
373
  end
360
374
  else
361
375
  through :key, key do |key, values|
362
- new[key] = self[key] if (method & values.first).any?
376
+ new[key] = self[key] if (method & values.flatten).any?
363
377
  end
364
378
  end
365
379
  end
@@ -448,6 +462,7 @@ module TSV
448
462
  else
449
463
  values << new_values
450
464
  end
465
+
451
466
  self[key] = values
452
467
  end
453
468
  @monitor = old_monitor