rbbt-util 4.3.0 → 4.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@ require 'rbbt/resource/rake'
2
2
 
3
3
  module Path
4
4
 
5
- def self.caller_lib_dir(file = nil)
5
+ def self.caller_lib_dir(file = nil, relative_to = 'lib')
6
6
  file = caller.reject{|l|
7
7
  l =~ /rbbt\/(?:resource\.rb|workflow\.rb)/ or
8
8
  l =~ /rbbt\/resource\/path\.rb/ or
@@ -11,11 +11,11 @@ module Path
11
11
  }.first.sub(/\.rb.*/,'.rb') if file.nil?
12
12
 
13
13
  file = File.expand_path file
14
- return Path.setup(file) if File.exists? File.join(file, 'lib')
14
+ return Path.setup(file) if File.exists? File.join(file, relative_to)
15
15
 
16
16
  while file != '/'
17
17
  dir = File.dirname file
18
- return Path.setup(dir) if File.exists? File.join(dir, 'lib')
18
+ return Path.setup(dir) if File.exists? File.join(dir, relative_to)
19
19
  file = File.dirname file
20
20
  end
21
21
 
data/lib/rbbt/tsv.rb CHANGED
@@ -49,7 +49,9 @@ module TSV
49
49
  data.serializer = serializer
50
50
  end
51
51
 
52
- stream = get_stream source
52
+ open_options = Misc.pull_keys options, :open
53
+
54
+ stream = get_stream source, open_options
53
55
  parse stream, data, options
54
56
 
55
57
  data.filename = filename.to_s unless filename.nil?
@@ -71,11 +73,15 @@ module TSV
71
73
 
72
74
  def self.parse(stream, data, options = {})
73
75
  monitor, grep = Misc.process_options options, :monitor, :grep
76
+
77
+ parser = Parser.new stream, options
78
+
74
79
  if grep
80
+ stream.rewind
75
81
  stream = Open.grep(stream, grep)
82
+ parser.first_line = stream.gets
76
83
  end
77
84
 
78
- parser = Parser.new stream, options
79
85
  line = parser.rescue_first_line
80
86
 
81
87
  if TokyoCabinet::HDB === data and parser.straight
@@ -1,10 +1,18 @@
1
1
  require 'rbbt/util/chain_methods'
2
-
3
2
  module TSV
4
3
  extend ChainMethods
5
4
  self.chain_prefix = :tsv
6
5
 
7
- attr_accessor :unnamed, :serializer_module
6
+ attr_accessor :unnamed, :serializer_module, :entity_options
7
+
8
+ def entity_options
9
+ options = namespace ? {:namespace => namespace, :organism => namespace} : {}
10
+ if @entity_options
11
+ options.merge(@entity_options)
12
+ else
13
+ options
14
+ end
15
+ end
8
16
 
9
17
  def with_unnamed
10
18
  saved_unnamed = @unnamed
@@ -72,10 +80,11 @@ module TSV
72
80
 
73
81
  case type
74
82
  when :double, :list
75
- NamedArray.setup value, fields, key, namespace
83
+ NamedArray.setup value, fields, key, entity_options
76
84
  when :flat, :single
77
85
  value = value.dup if value.frozen?
78
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
86
+
87
+ value = Misc.prepare_entity(value, fields.first, entity_options)
79
88
  end
80
89
  value
81
90
  end
@@ -88,11 +97,7 @@ module TSV
88
97
  keys = tsv_clean_keys - ENTRY_KEYS
89
98
  return keys if @unnamed or key_field.nil?
90
99
 
91
- if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
92
- Entity.formats[key_field].setup(keys.collect{|k| k.dup}, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}) )
93
- else
94
- keys
95
- end
100
+ Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
96
101
  end
97
102
 
98
103
  def tsv_values
@@ -101,11 +106,9 @@ module TSV
101
106
 
102
107
  case type
103
108
  when :double, :list
104
- values.each{|value| NamedArray.setup value, fields, nil, namespace }
109
+ values.each{|value| NamedArray.setup value, fields, nil, entity_options}
105
110
  when :flat, :single
106
- values.each{|value|
107
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first}))
108
- } if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
111
+ values = values.collect{|v| Misc.prepare_entity(v, fields.first, entity_options)}
109
112
  end
110
113
 
111
114
  values
@@ -127,14 +130,12 @@ module TSV
127
130
  if not fields.nil?
128
131
  case type
129
132
  when :double, :list
130
- NamedArray.setup value, fields, key, namespace if Array === value
133
+ NamedArray.setup value, fields, key, entity_options if Array === value
131
134
  when :flat, :single
132
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
135
+ Misc.prepare_entity(value, fields.first, entity_options)
133
136
  end
134
137
  end
135
- if defined?(Entity) and not key_field.nil? and Entity.respond_to?(:formats) and Entity.formats.include? key_field
136
- key = Entity.formats[key_field].setup(key.dup, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}))
137
- end
138
+ key = Misc.prepare_entity(key, key_field, entity_options)
138
139
  end
139
140
 
140
141
  yield key, value if block_given?
@@ -156,14 +157,12 @@ module TSV
156
157
  if not fields.nil?
157
158
  case type
158
159
  when :double, :list
159
- NamedArray.setup value, fields, key, namespace if Array === value
160
+ NamedArray.setup value, fields, key, entity_options if Array === value
160
161
  when :flat, :single
161
- Entity.formats[fields.first].setup(value, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? fields.first
162
+ value = Misc.prepare_entity(value, fields.first, entity_options)
162
163
  end
163
164
  end
164
- if defined?(Entity) and not key_field.nil? and Entity.respond_to?(:formats) and Entity.formats.include? key_field
165
- key = Entity.formats[key_field].setup(key.dup, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field}))
166
- end
165
+ key = Misc.prepare_entity(key, key_field, entity_options)
167
166
  end
168
167
 
169
168
 
@@ -217,14 +216,14 @@ module TSV
217
216
  if fields == :all
218
217
  if just_keys
219
218
  keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
220
- Entity.formats[key_field].setup(keys, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => key_field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
219
+ keys = Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
221
220
  else
222
221
  elems.sort_by{|key, value| key }
223
222
  end
224
223
  else
225
224
  if just_keys
226
225
  keys = elems.sort_by{|key, value| value }.collect{|key, value| key}
227
- Entity.formats[key_field].setup(keys, (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => fields.first})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? key_field
226
+ keys = Misc.prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
228
227
  keys
229
228
  else
230
229
  elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
@@ -346,7 +345,7 @@ end
346
345
  if @fields.nil? or @unnamed
347
346
  @fields
348
347
  else
349
- NamedArray.setup @fields, @fields, nil, namespace
348
+ NamedArray.setup @fields, @fields, nil, entity_options
350
349
  end
351
350
  end
352
351
 
@@ -366,7 +365,7 @@ end
366
365
  case
367
366
  when (TSV === identifiers.first or identifiers.empty?)
368
367
  identifiers
369
- when
368
+ else
370
369
  identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
371
370
  end
372
371
  when identifiers
@@ -413,7 +412,9 @@ end
413
412
  end
414
413
 
415
414
  if keys == :sort
416
- keys = self.keys.sort
415
+ with_unnamed do
416
+ keys = self.keys.sort
417
+ end
417
418
  end
418
419
 
419
420
  str = ""
@@ -50,7 +50,7 @@ module TSV
50
50
  # Merge two files with the same keys and different fields
51
51
  def self.merge_different_fields(file1, file2, output, sep = "\t")
52
52
  case
53
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
53
+ when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
54
54
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
55
55
  when (String === file1 or StringIO === file1)
56
56
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
@@ -59,7 +59,7 @@ module TSV
59
59
  end
60
60
 
61
61
  case
62
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
62
+ when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exists?(file2))
63
63
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
64
64
  when (String === file2 or StringIO === file2)
65
65
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
@@ -146,7 +146,12 @@ module TSV
146
146
 
147
147
  Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
148
148
 
149
- other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
149
+ unless TSV === other
150
+ other_identifier_files = other.identifier_files
151
+ other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
152
+ other.identifiers = other_identifier_files
153
+ end
154
+
150
155
  case
151
156
  when key_field == other.key_field
152
157
  attach_same_key other, fields
@@ -3,6 +3,8 @@ module TSV
3
3
  def attach_same_key(other, fields = nil)
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
+ fields = [fields].compact unless Array === fields
7
+
6
8
  field_positions = fields.collect{|field| other.identify_field field}
7
9
  other.with_unnamed do
8
10
  with_unnamed do
@@ -114,6 +116,7 @@ module TSV
114
116
  with_unnamed do
115
117
  through do |key, values|
116
118
  source_keys = index[key]
119
+ source_keys = [source_keys] unless Array === source_keys
117
120
  if source_keys.nil? or source_keys.empty?
118
121
  all_new_values = []
119
122
  else
@@ -0,0 +1,91 @@
1
+ require 'spreadsheet'
2
+ module TSV
3
+ def self.excel(tsv, filename, options ={})
4
+ name = Misc.process_options options, :name
5
+ sort_by = Misc.process_options options, :sort_by
6
+ sort_by_cast = Misc.process_options options, :sort_by_cast
7
+ fields = Misc.process_options(options, :fields) || all_fields
8
+
9
+ book = Spreadsheet::Workbook.new
10
+ sheet1 = book.create_worksheet
11
+ sheet1.row(0).concat fields
12
+ i = 1
13
+ if sort_by
14
+ if sort_by_cast
15
+ data = tsv.sort_by sort_by do |k, v|
16
+ if Array === v
17
+ v.first.send(sort_by_cast)
18
+ else
19
+ v.send(sort_by_cast)
20
+ end
21
+ end
22
+ else
23
+ data = tsv.sort_by sort_by
24
+ end
25
+ else
26
+ data = tsv
27
+ end
28
+
29
+ data.each do |key, values|
30
+ cells = []
31
+ cells.push((name and key.respond_to?(:name)) ? key.name || key : key )
32
+
33
+ values.each do |value|
34
+ v = (name and value.respond_to?(:name)) ? value.name || value : value
35
+ if Array === v
36
+ cells.push v * ", "
37
+ else
38
+ cells.push v
39
+ end
40
+ end
41
+
42
+ sheet1.row(i).concat cells
43
+ i += 1
44
+ end
45
+ book.write filename
46
+ end
47
+
48
+ def excel(filename, options ={})
49
+ name = Misc.process_options options, :name
50
+ sort_by = Misc.process_options options, :sort_by
51
+ sort_by_cast = Misc.process_options options, :sort_by_cast
52
+
53
+ book = Spreadsheet::Workbook.new
54
+ sheet1 = book.create_worksheet
55
+ sheet1.row(0).concat all_fields
56
+ i = 1
57
+ if sort_by
58
+ if sort_by_cast
59
+ data = self.sort_by sort_by do |k, v|
60
+ if Array === v
61
+ v.first.send(sort_by_cast)
62
+ else
63
+ v.send(sort_by_cast)
64
+ end
65
+ end
66
+ else
67
+ data = self.sort_by sort_by
68
+ end
69
+ else
70
+ data = self
71
+ end
72
+
73
+ data.each do |key, values|
74
+ cells = []
75
+ cells.push((name and key.respond_to?(:name)) ? key.name || key : key )
76
+
77
+ values.each do |value|
78
+ v = (name and value.respond_to?(:name)) ? value.name || value : value
79
+ if Array === v
80
+ cells.push v * ", "
81
+ else
82
+ cells.push v
83
+ end
84
+ end
85
+
86
+ sheet1.row(i).concat cells
87
+ i += 1
88
+ end
89
+ book.write filename
90
+ end
91
+ end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/misc'
2
+ require 'set'
2
3
  module Filtered
3
4
 
4
5
  class FilterArray
@@ -37,12 +38,21 @@ module Filtered
37
38
 
38
39
  @list = nil
39
40
  case
41
+ when @match == :key
42
+ @value = Set.new(@value)
43
+ class << self
44
+ self
45
+ end.class_eval <<-EOC
46
+ def match_entry(key, entry)
47
+ key == @value or (Set === @value and @value.include? key)
48
+ end
49
+ EOC
40
50
  when @match.match(/field:(.*)/)
41
51
  @fieldnum = data.identify_field $1
42
52
  class << self
43
53
  self
44
54
  end.class_eval <<-EOC
45
- def match_entry(entry)
55
+ def match_entry(key, entry)
46
56
  value = entry[@fieldnum]
47
57
  value == @value or (Array === value and value.include? @value)
48
58
  end
@@ -78,7 +88,7 @@ module Filtered
78
88
 
79
89
  data.with_unnamed do
80
90
  data.unfiltered_each do |key, entry|
81
- ids << key if match_entry(entry)
91
+ ids << key if match_entry(key, entry)
82
92
  end
83
93
  end
84
94
 
@@ -173,7 +183,7 @@ module Filtered
173
183
  if filters.empty?
174
184
  unfiltered_filename
175
185
  else
176
- unfiltered_filename + ":Filtered[#{filters.collect{|f| [f.match, f.value] * "="} * ", "}]"
186
+ unfiltered_filename + ":Filtered[#{filters.collect{|f| [f.match, Array === f.value ? Misc.hash2md5(:values => f.value) : f.value] * "="} * ", "}]"
177
187
  end
178
188
  end
179
189
 
@@ -182,7 +192,7 @@ module Filtered
182
192
  self.send(:unfiltered_set, key, value)
183
193
  else
184
194
  filters.each do |filter|
185
- filter.add key if filter.match_entry value
195
+ filter.add key if filter.match_entry key, value
186
196
  end
187
197
  self.send(:unfiltered_set, key, value)
188
198
  end
@@ -269,12 +279,12 @@ module TSV
269
279
  end
270
280
 
271
281
  def reset_filters
272
- if filter_dir.nil? or filter_dir.empty?
273
- filters.each do |filter| filter.reset end
282
+ if @filter_dir.nil? or @filter_dir.empty?
283
+ @filters.each do |filter| filter.reset end if Array === @filters
274
284
  return
275
285
  end
276
286
 
277
- Dir.glob(File.join(filter_dir, '*.filter')).each do |f|
287
+ Dir.glob(File.join(@filter_dir, '*.filter')).each do |f|
278
288
  FileUtils.rm f
279
289
  end
280
290
  end
@@ -174,14 +174,16 @@ module TSV
174
174
 
175
175
  keys, value = traverser.process(key, value)
176
176
 
177
+ keys = [keys].compact unless Array === keys
178
+
177
179
  # Annotated with Entity and NamedArray
178
180
  if not @unnamed
179
181
  if not traverser.new_field_names.nil?
180
182
  case type
181
183
  when :double, :list
182
- NamedArray.setup value, traverser.new_field_names
184
+ NamedArray.setup value, traverser.new_field_names, key, entity_options
183
185
  when :flat, :single
184
- Entity.formats[traverser.new_field_names.first].setup(value, :format => traverser.new_field_names.first) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_field_names
186
+ Misc.prepare_entity(value, traverser.new_field_names.first, entity_options)
185
187
  end
186
188
  end
187
189
  end
@@ -197,8 +199,8 @@ module TSV
197
199
  r
198
200
  }
199
201
 
200
- if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
201
- k = Entity.formats[traverser.new_key_field_name].setup(k.dup, :format => traverser.new_key_field_name)
202
+ if not @unnamed
203
+ k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
202
204
  end
203
205
  v.key = k if NamedArray === v
204
206
  yield k, v
@@ -207,8 +209,8 @@ module TSV
207
209
 
208
210
  else
209
211
  keys.each do |key|
210
- if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
211
- key = Entity.formats[traverser.new_key_field_name].setup(key.dup, :format => traverser.new_key_field_name)
212
+ if not @unnamed
213
+ k = Misc.prepare_entity(k, traverser.new_key_field_name, entity_options)
212
214
  end
213
215
  value.key = key if NamedArray === value
214
216
  yield key, value
@@ -318,11 +320,23 @@ module TSV
318
320
  end
319
321
  when String === method
320
322
  if block_given?
321
- pos = identify_field method
322
323
  with_unnamed do
323
- through do |key, values|
324
- new[key] = values if yield((method == key_field or method == :key)? key : values[pos])
324
+ case
325
+ when (method == key_field or method == :key)
326
+ through do |key, values|
327
+ new[key] = values if yield(key)
328
+ end
329
+ when (type == :single or type == :flat)
330
+ through do |key, value|
331
+ new[key] = value if yield(value)
332
+ end
333
+ else
334
+ pos = identify_field method
335
+ through do |key, values|
336
+ new[key] = values if yield(values[pos])
337
+ end
325
338
  end
339
+
326
340
  end
327
341
  else
328
342
  with_unnamed do
@@ -351,7 +365,7 @@ module TSV
351
365
  end
352
366
  when :list
353
367
  through :key, key do |key, values|
354
- new[key] = self[key] if method.include? value.first
368
+ new[key] = self[key] if method.include? values.first
355
369
  end
356
370
  when :flat #untested
357
371
  through :key, key do |key, values|
@@ -359,7 +373,7 @@ module TSV
359
373
  end
360
374
  else
361
375
  through :key, key do |key, values|
362
- new[key] = self[key] if (method & values.first).any?
376
+ new[key] = self[key] if (method & values.flatten).any?
363
377
  end
364
378
  end
365
379
  end
@@ -448,6 +462,7 @@ module TSV
448
462
  else
449
463
  values << new_values
450
464
  end
465
+
451
466
  self[key] = values
452
467
  end
453
468
  @monitor = old_monitor