rbbt-util 5.14.25 → 5.14.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88885c1c0256441286016e2dcce5064c4bd62c75
4
- data.tar.gz: 73044f407bdd1fb7c936841074211291cc13a327
3
+ metadata.gz: e2e5f1318aa0869985d9387dc3ed40c68ea0f6f6
4
+ data.tar.gz: 521b91ab2b83d9665c18e02c79e5685b6e4290ad
5
5
  SHA512:
6
- metadata.gz: c61172f11902cc5eb070d82a764a469741f9899efbf7fda38c011a18cde6306bbfcfc6e6251372af29f8a5565dd7ffaccf7d091b98cc5ce3715bc6508ef41131
7
- data.tar.gz: f34f9a02776870e4dcc253877f953ea48d278a5a60bec8481b04eb084c97d53aa65034133c336ea53e8edfa2c65b20e3cb8de9cb0af3a42d4861ae09771839f5
6
+ metadata.gz: 7599d8f628c59747e0aead2b7bd2583d2a3800cf73d27b7b1bea7811726713a4dc77c878280814889ef87ce616f4e1dff4725bd0e0590394483c306d45c1274a
7
+ data.tar.gz: 0457ae9ef9d490080271f1749b1e97eb68fc74cb0762a8e4944b01fd5f52725f6b16ef1d1f4c44487720d9491c9ae0cab8382fd6b96f0ba94fac8e296897cd4a
@@ -76,7 +76,7 @@ module Association
76
76
  :fields => fields.collect{|f| String === f ? all_fields.index(f): f },
77
77
  #:type => (options[:type] and options[:type].to_sym == :flat) ? :flat : nil,
78
78
  :unnamed => true,
79
- :merge => (options[:type] and options[:type].to_sym == :flat) ? false : true
79
+ :merge => (options[:merge] or (options[:type] and options[:type].to_sym == :flat) ? false : true)
80
80
  })
81
81
 
82
82
  open_options["header_hash"] = "#" if options["header_hash"] == ""
@@ -259,10 +259,10 @@ module Association
259
259
  # Persist.open_tokyocabinet(index_file, write, :list, TokyoCabinet::BDB).tap{|r| r.unnamed = true; Association::Index.setup r }
260
260
  #end
261
261
 
262
- def self.index(file, options = {}, persist_options = {})
262
+ def self.index(file, options = {}, persist_options = nil)
263
263
  options = {} if options.nil?
264
264
  options = Misc.add_defaults options, :persist => true
265
- persist_options = {} if persist_options.nil?
265
+ persist_options = Misc.pull_keys options, persist_options if persist_options.nil?
266
266
 
267
267
  Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association Index"}.merge(persist_options).merge(:engine => TokyoCabinet::BDB, :serializer => :clean)) do |assocs|
268
268
  undirected = options[:undirected]
@@ -163,7 +163,7 @@ class KnowledgeBase
163
163
  file, registered_options = registry[name]
164
164
 
165
165
  options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
166
- options = Misc.add_defaults options, registered_options
166
+ options = Misc.add_defaults options, registered_options if registered_options
167
167
 
168
168
  persist_options = Misc.pull_keys options, :persist
169
169
 
@@ -16,6 +16,7 @@ require 'rbbt/tsv/filter'
16
16
  require 'rbbt/tsv/field_index'
17
17
  require 'rbbt/tsv/parallel'
18
18
  require 'rbbt/tsv/stream'
19
+ require 'rbbt/tsv/melt'
19
20
 
20
21
  module TSV
21
22
  class << self
@@ -58,7 +59,7 @@ module TSV
58
59
  unnamed = Misc.process_options options, :unnamed
59
60
  entity_options = Misc.process_options options, :entity_options
60
61
 
61
- Log.debug "TSV open: #{ filename } - #{options.inspect}.#{unnamed ? " [unnamed]" : "[not unnamed]"}"
62
+ Log.debug "TSV open: #{ filename } - #{Misc.fingerprint options }.#{unnamed ? " [unnamed]" : "[not unnamed]"}"
62
63
 
63
64
  data = nil
64
65
 
@@ -118,6 +119,7 @@ module TSV
118
119
  Parser.new stream, options
119
120
  end
120
121
  end
122
+
121
123
  def self.parse(stream, data, options = {})
122
124
 
123
125
  parser = TSV::Parser.new stream, options
@@ -200,10 +200,10 @@ module TSV
200
200
  Log.debug "Attachment with same key: #{other.key_field}"
201
201
  attach_same_key other, fields
202
202
  when (not in_namespace and self.fields.include?(other.key_field))
203
- Log.debug "Found other's key field: #{other.key_field}"
203
+ Log.debug "Found other key field: #{other.key_field}"
204
204
  attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
205
205
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
206
- Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
206
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
207
207
  attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
208
208
  else
209
209
  index = TSV.find_traversal(self, other, options)
@@ -0,0 +1,24 @@
1
+ module TSV
2
+ def self.melt(tsv, key_field, header_field, fields, *info_fields, &block)
3
+ info_fields.unshift header_field
4
+ TSV.traverse tsv, :into => :dumper, :key_field => key_field, :fields => info_fields do |k,values|
5
+ res = fields.zip(values).collect do |field, value|
6
+ info_values = if block_given?
7
+ new = block.call value
8
+ next if new.nil?
9
+ new
10
+ else
11
+ [value]
12
+ end
13
+ info_values.unshift field
14
+ [field, info_values]
15
+ end
16
+ res.extend MultipleResult
17
+ res
18
+ end
19
+ end
20
+
21
+ def melt(header_field, *info_fields, &block)
22
+ TSV.melt self, key_field, header_field, fields, *info_fields, &block
23
+ end
24
+ end
@@ -128,16 +128,18 @@ module TSV
128
128
  [keys, [value]]
129
129
  end
130
130
 
131
- def get_values_flat(parts)
131
+
132
+ def get_values_flat_merge(parts)
132
133
  begin
133
134
  orig = parts
134
- if key_position and key_position != 0 and field_positions.nil?
135
- value = parts.shift
136
- keys = parts.dup
137
- return [keys, [value]]
138
- end
139
135
 
140
- return parts.shift.split(@sep2, -1).first, parts.collect{|value| value.split(@sep2, -1)}.flatten if
136
+ if key_position and key_position != 0 and field_positions.nil?
137
+ value = parts.shift.split(@sep2, -1)
138
+ keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
139
+ return [keys, value]
140
+ end
141
+
142
+ return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
141
143
  field_positions.nil? and (key_position.nil? or key_position == 0)
142
144
  rescue
143
145
  raise $!
@@ -159,18 +161,22 @@ module TSV
159
161
  values = values.split(@sep2, -1)
160
162
  end
161
163
 
164
+ [keys, values]
165
+ end
166
+
167
+ def get_values_flat(parts)
168
+ keys, values = get_values_flat_merge(parts)
162
169
  [keys.first, values]
163
170
  end
164
171
 
172
+
165
173
  def add_to_data_no_merge_list(data, key, values)
166
174
  data[key] = values unless data.include? key
167
175
  nil
168
176
  end
169
177
 
170
- def add_to_data_flat_keys(data, keys, values)
171
- keys.each do |key|
172
- data[key] = values unless data.include? key
173
- end
178
+ def add_to_data_flat_keys(data, key, values)
179
+ data[key] = values unless data.include? key
174
180
  nil
175
181
  end
176
182
 
@@ -188,6 +194,17 @@ module TSV
188
194
  nil
189
195
  end
190
196
 
197
+ def add_to_data_flat_merge_double(data, keys, values)
198
+ keys.each do |key|
199
+ if data.include? key
200
+ data[key] = data[key].concat values
201
+ else
202
+ data[key] = values
203
+ end
204
+ end
205
+ nil
206
+ end
207
+
191
208
  def add_to_data_flat_merge_keys(data, keys, values)
192
209
  keys.each do |key|
193
210
  if data.include? key
@@ -346,7 +363,7 @@ module TSV
346
363
  raise "Field not identified: #{ field }" if pos.nil?
347
364
  pos
348
365
  else
349
- raise "Format of fields not understood: #{fields.inspect}"
366
+ raise "Format of fields not understood: #{field.inspect}"
350
367
  end
351
368
  }
352
369
  end
@@ -423,15 +440,17 @@ module TSV
423
440
  self.instance_eval do alias add_to_data add_to_data_no_merge_list end
424
441
  when :flat
425
442
  @take_all = true if field_positions.nil?
426
- self.instance_eval do alias get_values get_values_flat end
427
443
  self.instance_eval do alias cast_values cast_values_flat end
428
- if merge
444
+ merge = true if key_position and key_position != 0 and field_positions.nil?
445
+ if merge
446
+ self.instance_eval do alias get_values get_values_flat_merge end
429
447
  if key_position and key_position != 0 and field_positions.nil?
430
448
  self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
431
449
  else
432
- self.instance_eval do alias add_to_data add_to_data_flat_merge end
450
+ self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
433
451
  end
434
452
  else
453
+ self.instance_eval do alias get_values get_values_flat end
435
454
  if key_position and key_position != 0 and field_positions.nil?
436
455
  self.instance_eval do alias add_to_data add_to_data_flat_keys end
437
456
  else
@@ -86,6 +86,10 @@ module TSV
86
86
  file
87
87
  when (defined? Bgzf and Bgzf)
88
88
  file
89
+ when TSV
90
+ file
91
+ when TSV::Dumper
92
+ file.stream
89
93
  when TSV::Parser
90
94
  file.stream
91
95
  when Path
@@ -117,8 +121,6 @@ module TSV
117
121
  file.join
118
122
  get_stream(file.path)
119
123
  end
120
- when TSV::Dumper
121
- file.stream
122
124
  when Array
123
125
  Misc.open_pipe do |sin|
124
126
  file.each do |l|
@@ -33,7 +33,7 @@ module Bgzf
33
33
 
34
34
  def _index
35
35
  @_index ||= begin
36
- index = Persist.persist("BGZF index" + filename.sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
36
+ index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
37
37
  index = []
38
38
  pos = 0
39
39
  while true do
@@ -1,5 +1,9 @@
1
1
  module Misc
2
2
 
3
+ def self.add_libdir(dir)
4
+ $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
5
+ end
6
+
3
7
  def self.pre_fork
4
8
  Persist::CONNECTIONS.values.each do |db| db.close if db.write? end
5
9
  ObjectSpace.each_object(Mutex) do |m|
@@ -33,7 +33,7 @@ module Misc
33
33
  ":" << obj.to_s
34
34
  when String
35
35
  if obj.length > 100
36
- "'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
36
+ "'" << obj.slice(0,30) << "<...#{obj.length}...>" << obj.slice(-10,30) << " " << "'"
37
37
  else
38
38
  "'" << obj << "'"
39
39
  end
@@ -5,7 +5,7 @@ require 'test/unit'
5
5
 
6
6
  class TestTSV < Test::Unit::TestCase
7
7
 
8
- def test_extend
8
+ def _test_extend
9
9
  a = {
10
10
  "one" => "1",
11
11
  "two" => "2"
@@ -18,7 +18,7 @@ class TestTSV < Test::Unit::TestCase
18
18
  assert_equal "1", a["one"]
19
19
  end
20
20
 
21
- def test_tsv
21
+ def _test_tsv
22
22
  content =<<-EOF
23
23
  #Id ValueA ValueB OtherID
24
24
  row1 a|aa|aaa b Id1|Id2
@@ -41,7 +41,7 @@ row2 A B Id3
41
41
  end
42
42
  end
43
43
 
44
- def test_headerless
44
+ def _test_headerless
45
45
  content =<<-EOF
46
46
  row1 a|aa|aaa b Id1|Id2
47
47
  row2 A B Id3
@@ -54,7 +54,7 @@ row2 A B Id3
54
54
  end
55
55
  end
56
56
 
57
- def test_headerless_fields
57
+ def _test_headerless_fields
58
58
  content =<<-EOF
59
59
  row1 a|aa|aaa b Id1|Id2
60
60
  row2 A B Id3
@@ -68,7 +68,7 @@ row2 A B Id3
68
68
  end
69
69
  end
70
70
 
71
- def test_tsv_persistence
71
+ def _test_tsv_persistence
72
72
  content =<<-EOF
73
73
  #Id ValueA ValueB OtherID
74
74
  row1 a|aa|aaa b Id1|Id2
@@ -94,7 +94,7 @@ row2 A B Id3
94
94
  end
95
95
  end
96
96
 
97
- def test_tsv_field_selection
97
+ def _test_tsv_field_selection
98
98
  content =<<-EOF
99
99
  #Id ValueA ValueB OtherID
100
100
  row1 a|aa|aaa b Id1|Id2
@@ -136,7 +136,7 @@ row2 A B Id3
136
136
  end
137
137
  end
138
138
 
139
- def test_tsv_cast
139
+ def _test_tsv_cast
140
140
  content =<<-EOF
141
141
  #Id Value
142
142
  row1 1|2|3
@@ -151,7 +151,7 @@ row2 4
151
151
  end
152
152
  end
153
153
 
154
- def test_tsv_single
154
+ def _test_tsv_single
155
155
  content =<<-EOF
156
156
  #Id Value
157
157
  row1 1
@@ -168,7 +168,7 @@ row2 4
168
168
  end
169
169
  end
170
170
 
171
- def test_tsv_single_from_flat
171
+ def _test_tsv_single_from_flat
172
172
  content =<<-EOF
173
173
  #: :type=:flat
174
174
  #Id Value
@@ -183,7 +183,7 @@ row2 4
183
183
  end
184
184
 
185
185
 
186
- def test_tsv_serializer
186
+ def _test_tsv_serializer
187
187
  content =<<-EOF
188
188
  #Id Value
189
189
  row1 1
@@ -197,7 +197,7 @@ row2 4
197
197
  end
198
198
  end
199
199
 
200
- def test_tsv_header_options
200
+ def _test_tsv_header_options
201
201
  content =<<-EOF
202
202
  #: :sep=/\\s+/
203
203
  #Id Value
@@ -211,7 +211,7 @@ row2 4
211
211
  end
212
212
  end
213
213
 
214
- def test_tsv_fastimport
214
+ def _test_tsv_fastimport
215
215
  content =<<-EOF
216
216
  #Id ValueA ValueB OtherID
217
217
  row1 a|aa|aaa b Id1|Id2
@@ -228,7 +228,7 @@ row3 AA B Id3
228
228
  end
229
229
  end
230
230
 
231
- def test_header_type
231
+ def _test_header_type
232
232
  content =<<-EOF
233
233
  #: :sep=/\\s+/#:type=:single
234
234
  #Id Value
@@ -244,7 +244,7 @@ row3 AA B Id3
244
244
 
245
245
  end
246
246
 
247
- def test_single_cast
247
+ def _test_single_cast
248
248
  content =<<-EOF
249
249
  #: :sep=/\\s+/#:type=:single#:cast=:to_i
250
250
  #Id Value
@@ -259,7 +259,7 @@ b 2
259
259
  end
260
260
  end
261
261
 
262
- def test_key_field
262
+ def _test_key_field
263
263
  content =<<-EOF
264
264
  #: :sep=/\\s+/#:type=:single
265
265
  #Id Value
@@ -275,7 +275,7 @@ b 2
275
275
  end
276
276
  end
277
277
 
278
- def test_fix
278
+ def _test_fix
279
279
  content =<<-EOF
280
280
  #: :sep=/\\s+/#:type=:single
281
281
  #Id Value
@@ -289,7 +289,7 @@ b 2
289
289
  end
290
290
  end
291
291
 
292
- def test_select
292
+ def _test_select
293
293
  content =<<-EOF
294
294
  #: :sep=/\\s+/#:type=:single
295
295
  #Id Value
@@ -303,7 +303,7 @@ b 2
303
303
  end
304
304
  end
305
305
 
306
- def test_grep
306
+ def _test_grep
307
307
  content =<<-EOF
308
308
  #: :sep=/\\s+/#:type=:single
309
309
  #Id Value
@@ -317,7 +317,7 @@ b 2
317
317
  end
318
318
  end
319
319
 
320
- def test_grep_invert
320
+ def _test_grep_invert
321
321
  content =<<-EOF
322
322
  #: :sep=/\\s+/#:type=:single
323
323
  #Id Value
@@ -331,7 +331,7 @@ b 2
331
331
  end
332
332
  end
333
333
 
334
- def test_grep_header
334
+ def _test_grep_header
335
335
  content =<<-EOF
336
336
  #: :sep=/\\s+/#:type=:single#:namespace=Test
337
337
  #Id Value
@@ -346,7 +346,7 @@ b 2
346
346
  end
347
347
  end
348
348
 
349
- def test_json
349
+ def _test_json
350
350
  content =<<-EOF
351
351
  #: :sep=/\\s+/#:type=:single
352
352
  #Id Value
@@ -361,7 +361,7 @@ b 2
361
361
 
362
362
  end
363
363
 
364
- def test_flat_no_merge
364
+ def _test_flat_no_merge
365
365
  content =<<-EOF
366
366
  #Id ValueA ValueB OtherID
367
367
  row1 a|aa|aaa b Id1|Id2
@@ -377,7 +377,7 @@ row2 A B Id3
377
377
  end
378
378
  end
379
379
 
380
- def test_flat_merge
380
+ def _test_flat_merge
381
381
  content =<<-EOF
382
382
  #Id ValueA ValueB OtherID
383
383
  row1 a|aa|aaa b Id1|Id2
@@ -399,12 +399,12 @@ row2 b bbb bbbb bb
399
399
  EOF
400
400
 
401
401
  TmpFile.with_file(content) do |filename|
402
- tsv = TSV.open(filename, :sep => /\s+/, :merge => true, :type => :flat, :fields => ["ValueA"])
402
+ tsv = TSV.open(filename, :sep => /\s+/, :merge => false, :type => :flat, :fields => ["ValueA"])
403
403
  assert_equal ["a", "aa", "aaa"], tsv["row1"]
404
404
  end
405
405
  end
406
406
 
407
- def test_flat_key
407
+ def _test_flat_key
408
408
  content =<<-EOF
409
409
  #Id ValueA
410
410
  row1 a aa aaa
@@ -420,7 +420,7 @@ row2 b bbb bbbb bb aa
420
420
 
421
421
 
422
422
 
423
- def test_zipped
423
+ def _test_zipped
424
424
  content =<<-EOF
425
425
  #Id ValueA ValueB
426
426
  row1 a|aa|aaa b|bb|bbb
@@ -433,7 +433,7 @@ row2 a|aa|aaa c|cc|ccc
433
433
  end
434
434
  end
435
435
 
436
- def test_named_array_key
436
+ def _test_named_array_key
437
437
  content =<<-EOF
438
438
  #Id ValueA ValueB OtherID
439
439
  row1 a|aa|aaa b Id1|Id2
@@ -448,7 +448,7 @@ row2 A B Id3
448
448
 
449
449
  end
450
450
 
451
- def test_unnamed_key
451
+ def _test_unnamed_key
452
452
  content =<<-EOF
453
453
  row1 a|aa|aaa b Id1|Id2
454
454
  row2 A B Id3
@@ -461,7 +461,7 @@ row2 A B Id3
461
461
 
462
462
  end
463
463
 
464
- def test_float_array
464
+ def _test_float_array
465
465
  content =<<-EOF
466
466
  #Id ValueA ValueB OtherID
467
467
  row1 0.2 0.3 0
@@ -476,7 +476,7 @@ row2 0.1 4.5 0
476
476
 
477
477
  end
478
478
 
479
- def test_flat_field_select
479
+ def _test_flat_field_select
480
480
  content =<<-EOF
481
481
  #: :type=:flat
482
482
  #Id Value
@@ -485,11 +485,12 @@ row2 b bb bbb
485
485
  EOF
486
486
 
487
487
  TmpFile.with_file(content) do |filename|
488
+ puts TSV.open(filename, :sep => /\s+/, :key_field => "Value").to_s
488
489
  assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa"
489
490
  end
490
491
  end
491
492
 
492
- def test_flat2
493
+ def _test_flat2
493
494
  content =<<-EOF
494
495
  #: :type=:flat
495
496
  #Id Value
@@ -503,10 +504,25 @@ row2 A|AA|AAA
503
504
  assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id")["row1"].include? "a"
504
505
  assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id", :fields => ["Value"])["row1"].include? "a"
505
506
  end
507
+ end
508
+
509
+
510
+ def _test_tsv_flat_double
511
+ content =<<-EOF
512
+ #Id ValueA ValueB OtherID
513
+ row1 a|aa|aaa b Id1|Id2
514
+ row2 A B Id3
515
+ EOF
516
+
517
+ TmpFile.with_file(content) do |filename|
518
+ tsv = TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "ValueA", :fields => ["OtherID"], :merge => true)
519
+ assert tsv["aaa"].include? "Id1"
520
+ assert tsv["aaa"].include? "Id2"
506
521
 
522
+ end
507
523
  end
508
524
 
509
- def test_flat2single
525
+ def _test_flat2single
510
526
  content =<<-EOF
511
527
  #: :type=:flat
512
528
  #Id Value
@@ -520,7 +536,7 @@ row2 A AA AAA
520
536
 
521
537
  end
522
538
 
523
- def test_shard
539
+ def _test_shard
524
540
  shard_function = Proc.new do |key|
525
541
  key[-1]
526
542
  end
@@ -451,13 +451,4 @@ eum fugiat quo voluptas nulla pariatur?"
451
451
  puts `ls -l /proc/#{ Process.pid }/fd`
452
452
  end
453
453
 
454
- def test_bootstrap
455
- Misc.bootstrap (0..10).to_a do
456
- Misc.bootstrap (0..10).to_a do
457
- Misc.bootstrap (0..10).to_a do
458
- sleep 0.1
459
- end
460
- end
461
- end
462
- end
463
454
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.14.25
4
+ version: 5.14.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-07 00:00:00.000000000 Z
11
+ date: 2014-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -176,6 +176,7 @@ files:
176
176
  - lib/rbbt/tsv/filter.rb
177
177
  - lib/rbbt/tsv/index.rb
178
178
  - lib/rbbt/tsv/manipulate.rb
179
+ - lib/rbbt/tsv/melt.rb
179
180
  - lib/rbbt/tsv/parallel.rb
180
181
  - lib/rbbt/tsv/parallel/through.rb
181
182
  - lib/rbbt/tsv/parallel/traverse.rb