scout-gear 7.3.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +20 -9
  3. data/VERSION +1 -1
  4. data/bin/scout +6 -3
  5. data/lib/rbbt-scout.rb +1 -0
  6. data/lib/scout/cmd.rb +1 -1
  7. data/lib/scout/concurrent_stream.rb +26 -23
  8. data/lib/scout/config.rb +1 -1
  9. data/lib/scout/log/color.rb +4 -1
  10. data/lib/scout/log/progress/report.rb +1 -1
  11. data/lib/scout/log/progress/util.rb +58 -54
  12. data/lib/scout/log/progress.rb +1 -1
  13. data/lib/scout/log/trap.rb +107 -0
  14. data/lib/scout/log.rb +56 -21
  15. data/lib/scout/meta_extension.rb +13 -6
  16. data/lib/scout/misc/digest.rb +1 -1
  17. data/lib/scout/misc/format.rb +12 -0
  18. data/lib/scout/misc/insist.rb +1 -1
  19. data/lib/scout/misc/monitor.rb +11 -0
  20. data/lib/scout/misc/system.rb +10 -0
  21. data/lib/scout/named_array.rb +65 -3
  22. data/lib/scout/open/lock/lockfile.rb +587 -0
  23. data/lib/scout/open/lock.rb +28 -2
  24. data/lib/scout/open/remote.rb +4 -0
  25. data/lib/scout/open/stream.rb +90 -15
  26. data/lib/scout/open/util.rb +13 -3
  27. data/lib/scout/path/find.rb +9 -1
  28. data/lib/scout/path/util.rb +35 -0
  29. data/lib/scout/persist/serialize.rb +18 -5
  30. data/lib/scout/persist.rb +28 -12
  31. data/lib/scout/resource/path.rb +53 -0
  32. data/lib/scout/resource/produce.rb +0 -8
  33. data/lib/scout/resource/util.rb +2 -1
  34. data/lib/scout/tmpfile.rb +7 -8
  35. data/lib/scout/tsv/attach.rb +177 -0
  36. data/lib/scout/tsv/change_id.rb +40 -0
  37. data/lib/scout/tsv/dumper.rb +72 -46
  38. data/lib/scout/tsv/index.rb +69 -13
  39. data/lib/scout/tsv/open.rb +138 -84
  40. data/lib/scout/tsv/parser.rb +135 -80
  41. data/lib/scout/tsv/path.rb +1 -2
  42. data/lib/scout/tsv/persist/adapter.rb +15 -45
  43. data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
  44. data/lib/scout/tsv/persist/tokyocabinet.rb +4 -1
  45. data/lib/scout/tsv/persist.rb +4 -0
  46. data/lib/scout/tsv/transformer.rb +141 -0
  47. data/lib/scout/tsv/traverse.rb +96 -92
  48. data/lib/scout/tsv/util/filter.rb +9 -0
  49. data/lib/scout/tsv/util/reorder.rb +81 -0
  50. data/lib/scout/tsv/util/select.rb +78 -33
  51. data/lib/scout/tsv/util/unzip.rb +86 -0
  52. data/lib/scout/tsv/util.rb +60 -11
  53. data/lib/scout/tsv.rb +26 -3
  54. data/lib/scout/work_queue/socket.rb +6 -1
  55. data/lib/scout/work_queue/worker.rb +5 -2
  56. data/lib/scout/work_queue.rb +15 -8
  57. data/lib/scout/workflow/definition.rb +21 -2
  58. data/lib/scout/workflow/step/dependencies.rb +24 -4
  59. data/lib/scout/workflow/step/info.rb +36 -5
  60. data/lib/scout/workflow/step/provenance.rb +8 -7
  61. data/lib/scout/workflow/step/status.rb +45 -0
  62. data/lib/scout/workflow/step.rb +100 -34
  63. data/lib/scout/workflow/task/inputs.rb +14 -20
  64. data/lib/scout/workflow/task.rb +81 -46
  65. data/lib/scout/workflow/usage.rb +8 -6
  66. data/scout-gear.gemspec +24 -20
  67. data/scout_commands/workflow/task +34 -7
  68. data/test/scout/open/test_stream.rb +60 -58
  69. data/test/scout/path/test_find.rb +10 -1
  70. data/test/scout/resource/test_produce.rb +15 -0
  71. data/test/scout/test_meta_extension.rb +25 -0
  72. data/test/scout/test_named_array.rb +18 -0
  73. data/test/scout/test_persist.rb +6 -0
  74. data/test/scout/test_tsv.rb +212 -2
  75. data/test/scout/test_work_queue.rb +21 -19
  76. data/test/scout/tsv/persist/test_adapter.rb +1 -1
  77. data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
  78. data/test/scout/tsv/test_attach.rb +227 -0
  79. data/test/scout/tsv/test_change_id.rb +98 -0
  80. data/test/scout/tsv/test_dumper.rb +1 -1
  81. data/test/scout/tsv/test_index.rb +35 -3
  82. data/test/scout/tsv/test_open.rb +160 -2
  83. data/test/scout/tsv/test_parser.rb +19 -2
  84. data/test/scout/tsv/test_persist.rb +2 -0
  85. data/test/scout/tsv/test_transformer.rb +108 -0
  86. data/test/scout/tsv/test_traverse.rb +88 -3
  87. data/test/scout/tsv/test_util.rb +1 -0
  88. data/test/scout/tsv/util/test_reorder.rb +94 -0
  89. data/test/scout/tsv/util/test_select.rb +25 -11
  90. data/test/scout/tsv/util/test_unzip.rb +112 -0
  91. data/test/scout/work_queue/test_socket.rb +0 -1
  92. data/test/scout/workflow/step/test_status.rb +31 -0
  93. data/test/scout/workflow/task/test_inputs.rb +14 -14
  94. data/test/scout/workflow/test_step.rb +3 -3
  95. data/test/scout/workflow/test_task.rb +168 -32
  96. data/test/scout/workflow/test_usage.rb +33 -6
  97. metadata +20 -6
@@ -0,0 +1,177 @@
1
+ module TSV
2
+
3
+ def self.match_keys(source, other, match_key: nil, other_key: nil)
4
+ match_key = (source.all_fields & other.all_fields).first if match_key.nil?
5
+
6
+ if match_key.nil?
7
+ source.all_fields.collect do |f|
8
+ other_key = other.identify_field(f)
9
+ if other_key
10
+ other_key = other.key_field if other_key == :key
11
+ match_key = f
12
+ break
13
+ end
14
+ end
15
+ end
16
+
17
+ if match_key.nil?
18
+ other.all_fields.collect do |f|
19
+ match_key = source.identify_field(f)
20
+ if match_key
21
+ other_key = f
22
+ break
23
+ end
24
+ end
25
+ end
26
+
27
+ match_key = source.key_field if match_key.nil?
28
+
29
+ if other_key.nil?
30
+ other_key = other.identify_field(match_key)
31
+ end
32
+
33
+ other_key = other.key_field if other_key.nil?
34
+
35
+ match_key = :key if match_key == source.key_field
36
+ other_key = :key if other_key == other.key_field
37
+
38
+ [match_key, other_key]
39
+ end
40
+
41
+ def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
42
+ source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
43
+ other = TSV.open other, persist: persist_input unless TSV === other
44
+
45
+ fields = [fields] if String === fields
46
+
47
+ match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)
48
+
49
+ if TSV::Transformer === source
50
+ source.dumper = case target
51
+ when :stream
52
+ TSV::Dumper.new(source.options.merge(sep: "\t"))
53
+ when nil
54
+ TSV.setup({}, **source.options.dup)
55
+ else
56
+ target
57
+ end
58
+ end
59
+
60
+ other.with_unnamed do
61
+ source.with_unnamed do
62
+
63
+ other_key_name = other_key == :key ? other.key_field : other_key
64
+ other_key_name = other.fields[other_key_name] if Integer === other_key
65
+ fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
66
+
67
+ if other_key != :key
68
+ other = other.reorder other_key, fields, one2one: one2one
69
+ end
70
+
71
+ other_field_positions = other.identify_field(fields)
72
+
73
+ log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
74
+ Log.debug log_message
75
+ bar = log_message if TrueClass === bar
76
+
77
+ new = fields - source.fields
78
+
79
+ source.fields = (source.fields + fields).uniq
80
+
81
+ overlaps = source.identify_field(fields)
82
+
83
+ empty_other_values = case source.type
84
+ when :list
85
+ [nil] * other.fields.length
86
+ when :flat
87
+ []
88
+ when :double
89
+ [[]] * other.fields.length
90
+ end
91
+
92
+ insitu = TSV === source ? true : false if insitu.nil?
93
+
94
+ match_key_pos = source.identify_field(match_key)
95
+ source.traverse bar: bar, unnamed: true do |orig_key,current_values|
96
+ keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
97
+ keys = [keys] unless Array === keys
98
+
99
+ current_values = current_values.dup unless insitu
100
+ keys.each do |current_key|
101
+ other_values = other[current_key]
102
+
103
+ if other_values.nil?
104
+ other_values = empty_other_values
105
+ elsif other.type == :flat
106
+ other_values = [other_values]
107
+ elsif other.type == :list && source.type == :double
108
+ other_values = other_values.collect{|v| [v] }
109
+ elsif other.type == :double && source.type == :list
110
+ other_values = other_values.collect{|v| v.first }
111
+ end
112
+
113
+ other_values = other_values.values_at *other_field_positions
114
+
115
+ other_values.zip(overlaps).each do |v,overlap|
116
+ if source.type == :list
117
+ current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
118
+ else
119
+ current_values[overlap] ||= []
120
+ current_values[overlap].concat (v - current_values[overlap])
121
+ end
122
+ end
123
+ end
124
+ source[orig_key] = current_values unless insitu
125
+ nil
126
+ end
127
+
128
+ if complete && match_key == :key
129
+ empty_self_values = case source.type
130
+ when :list
131
+ [nil] * source.fields.length
132
+ when :flat
133
+ []
134
+ when :double
135
+ [[]] * source.fields.length
136
+ end
137
+ other.each do |other_key,other_values|
138
+ next if source.include?(other_key)
139
+ if other.type == :flat
140
+ other_values = [other_values]
141
+ elsif other.type == :list && source.type == :double
142
+ other_values = other_values.collect{|v| [v] }
143
+ elsif other.type == :double && source.type == :list
144
+ other_values = other_values.collect{|v| v.first }
145
+ end
146
+
147
+ new_values = case source.type
148
+ when :list
149
+ [nil] * source.fields.length
150
+ when :flat
151
+ []
152
+ when :double
153
+ source.fields.length.times.collect{ [] }
154
+ end
155
+
156
+ other_values.zip(overlaps).each do |v,overlap|
157
+ if false && overlap == :key
158
+ other_key = Array === v ? v : v.first
159
+ elsif source.type == :list
160
+ new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
161
+ else
162
+ new_values[overlap].concat v
163
+ end
164
+ end
165
+ source[other_key] = new_values
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ source
172
+ end
173
+
174
+ def attach(*args, **kwargs)
175
+ TSV.attach(self, *args, **kwargs)
176
+ end
177
+ end
@@ -0,0 +1,40 @@
1
+ module TSV
2
+ def self.change_key(source, new_key_field, identifiers: nil, one2one: false, stream: false, keep: false, persist_identifiers: nil)
3
+ source = TSV::Parser.new source if String === source
4
+ if identifiers && source.identify_field(new_key_field, strict: true).nil?
5
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
6
+ new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
7
+ new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one)
8
+ return new
9
+ end
10
+
11
+ fields = source.fields.dup - [new_key_field]
12
+ fields.unshift source.key_field if keep
13
+ transformer = TSV::Transformer.new source
14
+ transformer.key_field = new_key_field
15
+ transformer.fields = fields
16
+ transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v|
17
+ [k, v]
18
+ end
19
+
20
+ stream ? transformer : transformer.tsv
21
+ end
22
+
23
+ def change_key(*args, **kwargs)
24
+ TSV.change_key(self, *args, **kwargs)
25
+ end
26
+
27
+ def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false)
28
+ source = TSV::Parser.new source if String === source
29
+
30
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
31
+
32
+ new_fields = source.fields.dup
33
+ new_fields[new_fields.index(source_id)] = new_id
34
+ return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields)
35
+ end
36
+
37
+ def change_id(*args, **kwargs)
38
+ TSV.change_id(self, *args, **kwargs)
39
+ end
40
+ end
@@ -1,38 +1,14 @@
1
1
  module TSV
2
2
  class Dumper
3
- def self.header_lines(key_field, fields, entry_hash = nil)
4
- if Hash === entry_hash
5
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
- preamble = entry_hash[:preamble]
7
- header_hash = entry_hash[:header_hash]
8
- end
9
-
10
- header_hash = "#" if header_hash.nil?
11
-
12
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
-
14
- str = ""
15
- str << preamble.strip << "\n" if preamble and not preamble.empty?
16
- if fields
17
- if fields.empty?
18
- str << header_hash << (key_field || "ID").to_s << "\n"
19
- else
20
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
- end
22
- end
23
-
24
- str
25
- end
26
-
27
3
  def self.header(options={})
28
- key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
- :key_field, :fields, :sep, :header_hash, :preamble,
4
+ key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
5
+ :key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
30
6
  :sep => "\t", :header_hash => "#", :preamble => true
31
7
 
32
- if fields.nil? || key_field.nil?
8
+ if fields.nil?
33
9
  fields_str = nil
34
10
  else
35
- fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
11
+ fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
36
12
  end
37
13
 
38
14
  if preamble && options.values.compact.any?
@@ -45,31 +21,62 @@ module TSV
45
21
  end
46
22
 
47
23
 
48
- attr_accessor :options
24
+ attr_accessor :options, :initialized, :type, :sep
49
25
  def initialize(options = {})
26
+ options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
50
27
  @sep, @type = IndiferentHash.process_options options,
51
28
  :sep, :type,
52
29
  :sep => "\t", :type => :double
53
30
  @options = options
54
31
  @sout, @sin = Open.pipe
32
+ @initialized = false
33
+ @mutex = Mutex.new
55
34
  ConcurrentStream.setup(@sin, pair: @sout)
56
35
  ConcurrentStream.setup(@sout, pair: @sin)
57
36
  end
58
37
 
59
- def init
60
- header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
- @sin.puts header if header and ! header.empty?
38
+ def key_field
39
+ @options[:key_field]
40
+ end
41
+
42
+ def fields
43
+ @options[:fields]
62
44
  end
63
45
 
64
- def add(key, value)
46
+ def key_field=(key_field)
47
+ @options[:key_field] = key_field
48
+ end
49
+
50
+ def fields=(fields)
51
+ @options[:fields] = fields
52
+ end
53
+
54
+ def all_fields
55
+ return nil if fields.nil?
56
+ [key_field] + fields
57
+ end
65
58
 
66
- case @type
67
- when :single
68
- @sin.puts key + @sep + value
69
- when :list, :flat
70
- @sin.puts key + @sep + value * @sep
71
- when :double
72
- @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
59
+
60
+ def init(preamble: true)
61
+ header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
62
+ @mutex.synchronize do
63
+ @initialized = true
64
+ @sin.puts header if header and ! header.empty?
65
+ end
66
+ end
67
+
68
+ def add(key, value)
69
+ @mutex.synchronize do
70
+
71
+ key = key.to_s unless String === key
72
+ case @type
73
+ when :single
74
+ @sin.puts key + @sep + value.to_s
75
+ when :list, :flat
76
+ @sin.puts key + @sep + value * @sep
77
+ when :double
78
+ @sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
79
+ end
73
80
  end
74
81
  end
75
82
 
@@ -85,15 +92,32 @@ module TSV
85
92
  def abort(exception=nil)
86
93
  @sin.abort(exception)
87
94
  end
95
+
96
+ def tsv(*args)
97
+ TSV.open(stream, *args)
98
+ end
99
+
100
+ def fingerprint
101
+ "Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
102
+ end
103
+
104
+ def digest_str
105
+ fingerprint
106
+ end
107
+
108
+ def inspect
109
+ fingerprint
110
+ end
88
111
  end
89
112
 
90
- def stream
91
- dumper = TSV::Dumper.new self.extension_attr_hash
92
- dumper.init
113
+ def dumper_stream(options = {})
114
+ preamble = IndiferentHash.process_options options, :preamble, :preamble => true
115
+ dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
93
116
  t = Thread.new do
94
117
  begin
95
118
  Thread.current.report_on_exception = true
96
119
  Thread.current["name"] = "Dumper thread"
120
+ dumper.init(preamble: preamble)
97
121
  self.each do |k,v|
98
122
  dumper.add k, v
99
123
  end
@@ -103,10 +127,12 @@ module TSV
103
127
  end
104
128
  end
105
129
  Thread.pass until t["name"]
106
- dumper.stream
130
+ s = dumper.stream
131
+ ConcurrentStream.setup(s, :threads => [t])
132
+ s
107
133
  end
108
134
 
109
- def to_s
110
- stream.read
135
+ def to_s(options = {})
136
+ dumper_stream(options).read
111
137
  end
112
138
  end
@@ -1,13 +1,16 @@
1
1
  require_relative 'parser'
2
+ require_relative 'transformer'
2
3
  require_relative 'persist/fix_width_table'
3
4
  module TSV
4
- def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
5
- persist, type = IndiferentHash.process_options kwargs,
6
- :persist, :persist_type,
5
+ def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
6
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
7
+ :persist, :persist_type, :persist_update, :data_persist,
7
8
  :persist => false, :persist_type => "HDB"
8
9
  kwargs.delete :type
9
10
 
10
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
11
+ fields = :all if fields.nil?
12
+
13
+ Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
11
14
  if filename
12
15
  index = ScoutCabinet.open(filename, true, type)
13
16
  TSV.setup(index, :type => :single)
@@ -16,11 +19,17 @@ module TSV
16
19
  index = TSV.setup({}, :type => :single)
17
20
  end
18
21
 
19
- dummy_data = TSV.setup({}, :key_field => "Key", :fields => ["Target"])
22
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
23
+
24
+ bar = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" if TrueClass === bar
25
+
20
26
  if order
21
27
  tmp_index = {}
22
- key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :double, into: dummy_data, unnamed: true, **kwargs do |k,values|
28
+ include_self = fields == :all || (Array === fields) && fields.include?(target)
29
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :double, unnamed: true, bar: bar, **kwargs do |k,values|
30
+ tmp_index[k] ||= [[k]] if include_self
23
31
  values.each_with_index do |list,i|
32
+ i += 1 if include_self
24
33
  list.each do |e|
25
34
  tmp_index[e] ||= []
26
35
  tmp_index[e][i] ||= []
@@ -31,16 +40,24 @@ module TSV
31
40
  tmp_index.each do |e,list|
32
41
  index[e] = list.flatten.compact.uniq.first
33
42
  end
43
+
44
+ index.key_field = source_field_names * ","
45
+ index.fields = [target_key_field]
46
+
47
+ tmp_index = {}
48
+
34
49
  else
35
- key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :flat, into: dummy_data, unnamed: true, **kwargs do |k,values|
50
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
36
51
  values.each do |e|
37
52
  index[e] = k unless index.include?(e)
38
53
  end
39
54
  end
55
+
56
+ index.key_field = source_field_names * ","
57
+ index.fields = [target_key_field]
40
58
  end
41
59
 
42
- index.key_field = dummy_data.fields * ", "
43
- index.fields = [dummy_data.key_field]
60
+
44
61
  index
45
62
  end
46
63
  end
@@ -50,16 +67,18 @@ module TSV
50
67
  end
51
68
 
52
69
  def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
53
- persist, type = IndiferentHash.process_options kwargs,
54
- :persist, :persist_type,
70
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
71
+ :persist, :persist_type, :persist_update, :data_persist,
55
72
  :persist => false, :persist_type => :fwt
56
73
  kwargs.delete :type
57
74
 
58
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
75
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :prefix => "RangeIndex", :other_options => kwargs, update: persist_update)) do |filename|
76
+
77
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
59
78
 
60
79
  max_key_size = 0
61
80
  index_data = []
62
- TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
81
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
63
82
  key_size = key.length
64
83
  max_key_size = key_size if key_size > max_key_size
65
84
 
@@ -85,6 +104,43 @@ module TSV
85
104
  TSV.range_index(self, *args, **kwargs, &block)
86
105
  end
87
106
 
107
+ def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
108
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
109
+ :persist, :persist_type, :persist_update, :data_persist,
110
+ :persist => false, :persist_type => :fwt
111
+ kwargs.delete :type
112
+
113
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, update: persist_update, :prefix => "RangeIndex", :other_options => kwargs)) do |filename|
114
+
115
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
116
+
117
+ max_key_size = 0
118
+ index_data = []
119
+ TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
120
+ key_size = key.length
121
+ max_key_size = key_size if key_size > max_key_size
122
+
123
+ if Array === pos
124
+ pos.zip(end_pos).each do |p|
125
+ index_pos << [key, p]
126
+ end
127
+ else
128
+ index_data << [key, pos]
129
+ end
130
+ end
131
+
132
+ filename = :memory if filename.nil?
133
+ index = FixWidthTable.get(filename, max_key_size, false)
134
+ index.add_point index_data
135
+ index.read
136
+ index
137
+ end
138
+ end
139
+
140
+ def pos_index(*args, **kwargs, &block)
141
+ TSV.pos_index(self, *args, **kwargs, &block)
142
+ end
143
+
88
144
 
89
145
  #def range_index(start_field = nil, end_field = nil, options = {})
90
146
  # start_field ||= "Start"