rbbt-util 5.14.25 → 5.14.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88885c1c0256441286016e2dcce5064c4bd62c75
4
- data.tar.gz: 73044f407bdd1fb7c936841074211291cc13a327
3
+ metadata.gz: e2e5f1318aa0869985d9387dc3ed40c68ea0f6f6
4
+ data.tar.gz: 521b91ab2b83d9665c18e02c79e5685b6e4290ad
5
5
  SHA512:
6
- metadata.gz: c61172f11902cc5eb070d82a764a469741f9899efbf7fda38c011a18cde6306bbfcfc6e6251372af29f8a5565dd7ffaccf7d091b98cc5ce3715bc6508ef41131
7
- data.tar.gz: f34f9a02776870e4dcc253877f953ea48d278a5a60bec8481b04eb084c97d53aa65034133c336ea53e8edfa2c65b20e3cb8de9cb0af3a42d4861ae09771839f5
6
+ metadata.gz: 7599d8f628c59747e0aead2b7bd2583d2a3800cf73d27b7b1bea7811726713a4dc77c878280814889ef87ce616f4e1dff4725bd0e0590394483c306d45c1274a
7
+ data.tar.gz: 0457ae9ef9d490080271f1749b1e97eb68fc74cb0762a8e4944b01fd5f52725f6b16ef1d1f4c44487720d9491c9ae0cab8382fd6b96f0ba94fac8e296897cd4a
@@ -76,7 +76,7 @@ module Association
76
76
  :fields => fields.collect{|f| String === f ? all_fields.index(f): f },
77
77
  #:type => (options[:type] and options[:type].to_sym == :flat) ? :flat : nil,
78
78
  :unnamed => true,
79
- :merge => (options[:type] and options[:type].to_sym == :flat) ? false : true
79
+ :merge => (options[:merge] or (options[:type] and options[:type].to_sym == :flat) ? false : true)
80
80
  })
81
81
 
82
82
  open_options["header_hash"] = "#" if options["header_hash"] == ""
@@ -259,10 +259,10 @@ module Association
259
259
  # Persist.open_tokyocabinet(index_file, write, :list, TokyoCabinet::BDB).tap{|r| r.unnamed = true; Association::Index.setup r }
260
260
  #end
261
261
 
262
- def self.index(file, options = {}, persist_options = {})
262
+ def self.index(file, options = {}, persist_options = nil)
263
263
  options = {} if options.nil?
264
264
  options = Misc.add_defaults options, :persist => true
265
- persist_options = {} if persist_options.nil?
265
+ persist_options = Misc.pull_keys options, persist_options if persist_options.nil?
266
266
 
267
267
  Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association Index"}.merge(persist_options).merge(:engine => TokyoCabinet::BDB, :serializer => :clean)) do |assocs|
268
268
  undirected = options[:undirected]
@@ -163,7 +163,7 @@ class KnowledgeBase
163
163
  file, registered_options = registry[name]
164
164
 
165
165
  options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
166
- options = Misc.add_defaults options, registered_options
166
+ options = Misc.add_defaults options, registered_options if registered_options
167
167
 
168
168
  persist_options = Misc.pull_keys options, :persist
169
169
 
@@ -16,6 +16,7 @@ require 'rbbt/tsv/filter'
16
16
  require 'rbbt/tsv/field_index'
17
17
  require 'rbbt/tsv/parallel'
18
18
  require 'rbbt/tsv/stream'
19
+ require 'rbbt/tsv/melt'
19
20
 
20
21
  module TSV
21
22
  class << self
@@ -58,7 +59,7 @@ module TSV
58
59
  unnamed = Misc.process_options options, :unnamed
59
60
  entity_options = Misc.process_options options, :entity_options
60
61
 
61
- Log.debug "TSV open: #{ filename } - #{options.inspect}.#{unnamed ? " [unnamed]" : "[not unnamed]"}"
62
+ Log.debug "TSV open: #{ filename } - #{Misc.fingerprint options }.#{unnamed ? " [unnamed]" : "[not unnamed]"}"
62
63
 
63
64
  data = nil
64
65
 
@@ -118,6 +119,7 @@ module TSV
118
119
  Parser.new stream, options
119
120
  end
120
121
  end
122
+
121
123
  def self.parse(stream, data, options = {})
122
124
 
123
125
  parser = TSV::Parser.new stream, options
@@ -200,10 +200,10 @@ module TSV
200
200
  Log.debug "Attachment with same key: #{other.key_field}"
201
201
  attach_same_key other, fields
202
202
  when (not in_namespace and self.fields.include?(other.key_field))
203
- Log.debug "Found other's key field: #{other.key_field}"
203
+ Log.debug "Found other key field: #{other.key_field}"
204
204
  attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
205
205
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
206
- Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
206
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
207
207
  attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
208
208
  else
209
209
  index = TSV.find_traversal(self, other, options)
@@ -0,0 +1,24 @@
1
+ module TSV
2
+ def self.melt(tsv, key_field, header_field, fields, *info_fields, &block)
3
+ info_fields.unshift header_field
4
+ TSV.traverse tsv, :into => :dumper, :key_field => key_field, :fields => info_fields do |k,values|
5
+ res = fields.zip(values).collect do |field, value|
6
+ info_values = if block_given?
7
+ new = block.call value
8
+ next if new.nil?
9
+ new
10
+ else
11
+ [value]
12
+ end
13
+ info_values.unshift field
14
+ [field, info_values]
15
+ end
16
+ res.extend MultipleResult
17
+ res
18
+ end
19
+ end
20
+
21
+ def melt(header_field, *info_fields, &block)
22
+ TSV.melt self, key_field, header_field, fields, *info_fields, &block
23
+ end
24
+ end
@@ -128,16 +128,18 @@ module TSV
128
128
  [keys, [value]]
129
129
  end
130
130
 
131
- def get_values_flat(parts)
131
+
132
+ def get_values_flat_merge(parts)
132
133
  begin
133
134
  orig = parts
134
- if key_position and key_position != 0 and field_positions.nil?
135
- value = parts.shift
136
- keys = parts.dup
137
- return [keys, [value]]
138
- end
139
135
 
140
- return parts.shift.split(@sep2, -1).first, parts.collect{|value| value.split(@sep2, -1)}.flatten if
136
+ if key_position and key_position != 0 and field_positions.nil?
137
+ value = parts.shift.split(@sep2, -1)
138
+ keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
139
+ return [keys, value]
140
+ end
141
+
142
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
141
143
  field_positions.nil? and (key_position.nil? or key_position == 0)
142
144
  rescue
143
145
  raise $!
@@ -159,18 +161,22 @@ module TSV
159
161
  values = values.split(@sep2, -1)
160
162
  end
161
163
 
164
+ [keys, values]
165
+ end
166
+
167
+ def get_values_flat(parts)
168
+ keys, values = get_values_flat_merge(parts)
162
169
  [keys.first, values]
163
170
  end
164
171
 
172
+
165
173
  def add_to_data_no_merge_list(data, key, values)
166
174
  data[key] = values unless data.include? key
167
175
  nil
168
176
  end
169
177
 
170
- def add_to_data_flat_keys(data, keys, values)
171
- keys.each do |key|
172
- data[key] = values unless data.include? key
173
- end
178
+ def add_to_data_flat_keys(data, key, values)
179
+ data[key] = values unless data.include? key
174
180
  nil
175
181
  end
176
182
 
@@ -188,6 +194,17 @@ module TSV
188
194
  nil
189
195
  end
190
196
 
197
+ def add_to_data_flat_merge_double(data, keys, values)
198
+ keys.each do |key|
199
+ if data.include? key
200
+ data[key] = data[key].concat values
201
+ else
202
+ data[key] = values
203
+ end
204
+ end
205
+ nil
206
+ end
207
+
191
208
  def add_to_data_flat_merge_keys(data, keys, values)
192
209
  keys.each do |key|
193
210
  if data.include? key
@@ -346,7 +363,7 @@ module TSV
346
363
  raise "Field not identified: #{ field }" if pos.nil?
347
364
  pos
348
365
  else
349
- raise "Format of fields not understood: #{fields.inspect}"
366
+ raise "Format of fields not understood: #{field.inspect}"
350
367
  end
351
368
  }
352
369
  end
@@ -423,15 +440,17 @@ module TSV
423
440
  self.instance_eval do alias add_to_data add_to_data_no_merge_list end
424
441
  when :flat
425
442
  @take_all = true if field_positions.nil?
426
- self.instance_eval do alias get_values get_values_flat end
427
443
  self.instance_eval do alias cast_values cast_values_flat end
428
- if merge
444
+ merge = true if key_position and key_position != 0 and field_positions.nil?
445
+ if merge
446
+ self.instance_eval do alias get_values get_values_flat_merge end
429
447
  if key_position and key_position != 0 and field_positions.nil?
430
448
  self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
431
449
  else
432
- self.instance_eval do alias add_to_data add_to_data_flat_merge end
450
+ self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
433
451
  end
434
452
  else
453
+ self.instance_eval do alias get_values get_values_flat end
435
454
  if key_position and key_position != 0 and field_positions.nil?
436
455
  self.instance_eval do alias add_to_data add_to_data_flat_keys end
437
456
  else
@@ -86,6 +86,10 @@ module TSV
86
86
  file
87
87
  when (defined? Bgzf and Bgzf)
88
88
  file
89
+ when TSV
90
+ file
91
+ when TSV::Dumper
92
+ file.stream
89
93
  when TSV::Parser
90
94
  file.stream
91
95
  when Path
@@ -117,8 +121,6 @@ module TSV
117
121
  file.join
118
122
  get_stream(file.path)
119
123
  end
120
- when TSV::Dumper
121
- file.stream
122
124
  when Array
123
125
  Misc.open_pipe do |sin|
124
126
  file.each do |l|
@@ -33,7 +33,7 @@ module Bgzf
33
33
 
34
34
  def _index
35
35
  @_index ||= begin
36
- index = Persist.persist("BGZF index" + filename.sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
36
+ index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
37
37
  index = []
38
38
  pos = 0
39
39
  while true do
@@ -1,5 +1,9 @@
1
1
  module Misc
2
2
 
3
+ def self.add_libdir(dir)
4
+ $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
5
+ end
6
+
3
7
  def self.pre_fork
4
8
  Persist::CONNECTIONS.values.each do |db| db.close if db.write? end
5
9
  ObjectSpace.each_object(Mutex) do |m|
@@ -33,7 +33,7 @@ module Misc
33
33
  ":" << obj.to_s
34
34
  when String
35
35
  if obj.length > 100
36
- "'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
36
+ "'" << obj.slice(0,30) << "<...#{obj.length}...>" << obj.slice(-10,30) << " " << "'"
37
37
  else
38
38
  "'" << obj << "'"
39
39
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
 
6
6
  class TestTSV < Test::Unit::TestCase
7
7
 
8
- def test_extend
8
+ def _test_extend
9
9
  a = {
10
10
  "one" => "1",
11
11
  "two" => "2"
@@ -18,7 +18,7 @@ class TestTSV < Test::Unit::TestCase
18
18
  assert_equal "1", a["one"]
19
19
  end
20
20
 
21
- def test_tsv
21
+ def _test_tsv
22
22
  content =<<-EOF
23
23
  #Id ValueA ValueB OtherID
24
24
  row1 a|aa|aaa b Id1|Id2
@@ -41,7 +41,7 @@ row2 A B Id3
41
41
  end
42
42
  end
43
43
 
44
- def test_headerless
44
+ def _test_headerless
45
45
  content =<<-EOF
46
46
  row1 a|aa|aaa b Id1|Id2
47
47
  row2 A B Id3
@@ -54,7 +54,7 @@ row2 A B Id3
54
54
  end
55
55
  end
56
56
 
57
- def test_headerless_fields
57
+ def _test_headerless_fields
58
58
  content =<<-EOF
59
59
  row1 a|aa|aaa b Id1|Id2
60
60
  row2 A B Id3
@@ -68,7 +68,7 @@ row2 A B Id3
68
68
  end
69
69
  end
70
70
 
71
- def test_tsv_persistence
71
+ def _test_tsv_persistence
72
72
  content =<<-EOF
73
73
  #Id ValueA ValueB OtherID
74
74
  row1 a|aa|aaa b Id1|Id2
@@ -94,7 +94,7 @@ row2 A B Id3
94
94
  end
95
95
  end
96
96
 
97
- def test_tsv_field_selection
97
+ def _test_tsv_field_selection
98
98
  content =<<-EOF
99
99
  #Id ValueA ValueB OtherID
100
100
  row1 a|aa|aaa b Id1|Id2
@@ -136,7 +136,7 @@ row2 A B Id3
136
136
  end
137
137
  end
138
138
 
139
- def test_tsv_cast
139
+ def _test_tsv_cast
140
140
  content =<<-EOF
141
141
  #Id Value
142
142
  row1 1|2|3
@@ -151,7 +151,7 @@ row2 4
151
151
  end
152
152
  end
153
153
 
154
- def test_tsv_single
154
+ def _test_tsv_single
155
155
  content =<<-EOF
156
156
  #Id Value
157
157
  row1 1
@@ -168,7 +168,7 @@ row2 4
168
168
  end
169
169
  end
170
170
 
171
- def test_tsv_single_from_flat
171
+ def _test_tsv_single_from_flat
172
172
  content =<<-EOF
173
173
  #: :type=:flat
174
174
  #Id Value
@@ -183,7 +183,7 @@ row2 4
183
183
  end
184
184
 
185
185
 
186
- def test_tsv_serializer
186
+ def _test_tsv_serializer
187
187
  content =<<-EOF
188
188
  #Id Value
189
189
  row1 1
@@ -197,7 +197,7 @@ row2 4
197
197
  end
198
198
  end
199
199
 
200
- def test_tsv_header_options
200
+ def _test_tsv_header_options
201
201
  content =<<-EOF
202
202
  #: :sep=/\\s+/
203
203
  #Id Value
@@ -211,7 +211,7 @@ row2 4
211
211
  end
212
212
  end
213
213
 
214
- def test_tsv_fastimport
214
+ def _test_tsv_fastimport
215
215
  content =<<-EOF
216
216
  #Id ValueA ValueB OtherID
217
217
  row1 a|aa|aaa b Id1|Id2
@@ -228,7 +228,7 @@ row3 AA B Id3
228
228
  end
229
229
  end
230
230
 
231
- def test_header_type
231
+ def _test_header_type
232
232
  content =<<-EOF
233
233
  #: :sep=/\\s+/#:type=:single
234
234
  #Id Value
@@ -244,7 +244,7 @@ row3 AA B Id3
244
244
 
245
245
  end
246
246
 
247
- def test_single_cast
247
+ def _test_single_cast
248
248
  content =<<-EOF
249
249
  #: :sep=/\\s+/#:type=:single#:cast=:to_i
250
250
  #Id Value
@@ -259,7 +259,7 @@ b 2
259
259
  end
260
260
  end
261
261
 
262
- def test_key_field
262
+ def _test_key_field
263
263
  content =<<-EOF
264
264
  #: :sep=/\\s+/#:type=:single
265
265
  #Id Value
@@ -275,7 +275,7 @@ b 2
275
275
  end
276
276
  end
277
277
 
278
- def test_fix
278
+ def _test_fix
279
279
  content =<<-EOF
280
280
  #: :sep=/\\s+/#:type=:single
281
281
  #Id Value
@@ -289,7 +289,7 @@ b 2
289
289
  end
290
290
  end
291
291
 
292
- def test_select
292
+ def _test_select
293
293
  content =<<-EOF
294
294
  #: :sep=/\\s+/#:type=:single
295
295
  #Id Value
@@ -303,7 +303,7 @@ b 2
303
303
  end
304
304
  end
305
305
 
306
- def test_grep
306
+ def _test_grep
307
307
  content =<<-EOF
308
308
  #: :sep=/\\s+/#:type=:single
309
309
  #Id Value
@@ -317,7 +317,7 @@ b 2
317
317
  end
318
318
  end
319
319
 
320
- def test_grep_invert
320
+ def _test_grep_invert
321
321
  content =<<-EOF
322
322
  #: :sep=/\\s+/#:type=:single
323
323
  #Id Value
@@ -331,7 +331,7 @@ b 2
331
331
  end
332
332
  end
333
333
 
334
- def test_grep_header
334
+ def _test_grep_header
335
335
  content =<<-EOF
336
336
  #: :sep=/\\s+/#:type=:single#:namespace=Test
337
337
  #Id Value
@@ -346,7 +346,7 @@ b 2
346
346
  end
347
347
  end
348
348
 
349
- def test_json
349
+ def _test_json
350
350
  content =<<-EOF
351
351
  #: :sep=/\\s+/#:type=:single
352
352
  #Id Value
@@ -361,7 +361,7 @@ b 2
361
361
 
362
362
  end
363
363
 
364
- def test_flat_no_merge
364
+ def _test_flat_no_merge
365
365
  content =<<-EOF
366
366
  #Id ValueA ValueB OtherID
367
367
  row1 a|aa|aaa b Id1|Id2
@@ -377,7 +377,7 @@ row2 A B Id3
377
377
  end
378
378
  end
379
379
 
380
- def test_flat_merge
380
+ def _test_flat_merge
381
381
  content =<<-EOF
382
382
  #Id ValueA ValueB OtherID
383
383
  row1 a|aa|aaa b Id1|Id2
@@ -399,12 +399,12 @@ row2 b bbb bbbb bb
399
399
  EOF
400
400
 
401
401
  TmpFile.with_file(content) do |filename|
402
- tsv = TSV.open(filename, :sep => /\s+/, :merge => true, :type => :flat, :fields => ["ValueA"])
402
+ tsv = TSV.open(filename, :sep => /\s+/, :merge => false, :type => :flat, :fields => ["ValueA"])
403
403
  assert_equal ["a", "aa", "aaa"], tsv["row1"]
404
404
  end
405
405
  end
406
406
 
407
- def test_flat_key
407
+ def _test_flat_key
408
408
  content =<<-EOF
409
409
  #Id ValueA
410
410
  row1 a aa aaa
@@ -420,7 +420,7 @@ row2 b bbb bbbb bb aa
420
420
 
421
421
 
422
422
 
423
- def test_zipped
423
+ def _test_zipped
424
424
  content =<<-EOF
425
425
  #Id ValueA ValueB
426
426
  row1 a|aa|aaa b|bb|bbb
@@ -433,7 +433,7 @@ row2 a|aa|aaa c|cc|ccc
433
433
  end
434
434
  end
435
435
 
436
- def test_named_array_key
436
+ def _test_named_array_key
437
437
  content =<<-EOF
438
438
  #Id ValueA ValueB OtherID
439
439
  row1 a|aa|aaa b Id1|Id2
@@ -448,7 +448,7 @@ row2 A B Id3
448
448
 
449
449
  end
450
450
 
451
- def test_unnamed_key
451
+ def _test_unnamed_key
452
452
  content =<<-EOF
453
453
  row1 a|aa|aaa b Id1|Id2
454
454
  row2 A B Id3
@@ -461,7 +461,7 @@ row2 A B Id3
461
461
 
462
462
  end
463
463
 
464
- def test_float_array
464
+ def _test_float_array
465
465
  content =<<-EOF
466
466
  #Id ValueA ValueB OtherID
467
467
  row1 0.2 0.3 0
@@ -476,7 +476,7 @@ row2 0.1 4.5 0
476
476
 
477
477
  end
478
478
 
479
- def test_flat_field_select
479
+ def _test_flat_field_select
480
480
  content =<<-EOF
481
481
  #: :type=:flat
482
482
  #Id Value
@@ -485,11 +485,12 @@ row2 b bb bbb
485
485
  EOF
486
486
 
487
487
  TmpFile.with_file(content) do |filename|
488
+ puts TSV.open(filename, :sep => /\s+/, :key_field => "Value").to_s
488
489
  assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa"
489
490
  end
490
491
  end
491
492
 
492
- def test_flat2
493
+ def _test_flat2
493
494
  content =<<-EOF
494
495
  #: :type=:flat
495
496
  #Id Value
@@ -503,10 +504,25 @@ row2 A|AA|AAA
503
504
  assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id")["row1"].include? "a"
504
505
  assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id", :fields => ["Value"])["row1"].include? "a"
505
506
  end
507
+ end
508
+
509
+
510
+ def _test_tsv_flat_double
511
+ content =<<-EOF
512
+ #Id ValueA ValueB OtherID
513
+ row1 a|aa|aaa b Id1|Id2
514
+ row2 A B Id3
515
+ EOF
516
+
517
+ TmpFile.with_file(content) do |filename|
518
+ tsv = TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "ValueA", :fields => ["OtherID"], :merge => true)
519
+ assert tsv["aaa"].include? "Id1"
520
+ assert tsv["aaa"].include? "Id2"
506
521
 
522
+ end
507
523
  end
508
524
 
509
- def test_flat2single
525
+ def _test_flat2single
510
526
  content =<<-EOF
511
527
  #: :type=:flat
512
528
  #Id Value
@@ -520,7 +536,7 @@ row2 A AA AAA
520
536
 
521
537
  end
522
538
 
523
- def test_shard
539
+ def _test_shard
524
540
  shard_function = Proc.new do |key|
525
541
  key[-1]
526
542
  end
@@ -451,13 +451,4 @@ eum fugiat quo voluptas nulla pariatur?"
451
451
  puts `ls -l /proc/#{ Process.pid }/fd`
452
452
  end
453
453
 
454
- def test_bootstrap
455
- Misc.bootstrap (0..10).to_a do
456
- Misc.bootstrap (0..10).to_a do
457
- Misc.bootstrap (0..10).to_a do
458
- sleep 0.1
459
- end
460
- end
461
- end
462
- end
463
454
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.14.25
4
+ version: 5.14.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-07 00:00:00.000000000 Z
11
+ date: 2014-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -176,6 +176,7 @@ files:
176
176
  - lib/rbbt/tsv/filter.rb
177
177
  - lib/rbbt/tsv/index.rb
178
178
  - lib/rbbt/tsv/manipulate.rb
179
+ - lib/rbbt/tsv/melt.rb
179
180
  - lib/rbbt/tsv/parallel.rb
180
181
  - lib/rbbt/tsv/parallel/through.rb
181
182
  - lib/rbbt/tsv/parallel/traverse.rb