scout-gear 7.2.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +51 -6
  3. data/VERSION +1 -1
  4. data/bin/scout +6 -3
  5. data/lib/rbbt-scout.rb +1 -0
  6. data/lib/scout/cmd.rb +1 -1
  7. data/lib/scout/concurrent_stream.rb +33 -29
  8. data/lib/scout/config.rb +1 -1
  9. data/lib/scout/exceptions.rb +1 -0
  10. data/lib/scout/log/color.rb +4 -2
  11. data/lib/scout/log/progress/report.rb +1 -1
  12. data/lib/scout/log/progress/util.rb +71 -2
  13. data/lib/scout/log/progress.rb +1 -1
  14. data/lib/scout/log/trap.rb +107 -0
  15. data/lib/scout/log.rb +56 -21
  16. data/lib/scout/meta_extension.rb +13 -6
  17. data/lib/scout/misc/digest.rb +1 -1
  18. data/lib/scout/misc/format.rb +12 -0
  19. data/lib/scout/misc/helper.rb +31 -0
  20. data/lib/scout/misc/insist.rb +1 -1
  21. data/lib/scout/misc/monitor.rb +12 -1
  22. data/lib/scout/misc/system.rb +10 -0
  23. data/lib/scout/misc.rb +1 -0
  24. data/lib/scout/named_array.rb +65 -3
  25. data/lib/scout/open/lock/lockfile.rb +587 -0
  26. data/lib/scout/open/lock.rb +28 -2
  27. data/lib/scout/open/remote.rb +4 -0
  28. data/lib/scout/open/stream.rb +111 -42
  29. data/lib/scout/open/util.rb +13 -3
  30. data/lib/scout/path/find.rb +9 -1
  31. data/lib/scout/path/util.rb +35 -0
  32. data/lib/scout/persist/serialize.rb +18 -5
  33. data/lib/scout/persist.rb +60 -30
  34. data/lib/scout/resource/path.rb +53 -0
  35. data/lib/scout/resource/produce.rb +0 -8
  36. data/lib/scout/resource/util.rb +2 -1
  37. data/lib/scout/semaphore.rb +8 -1
  38. data/lib/scout/tmpfile.rb +7 -8
  39. data/lib/scout/tsv/attach.rb +177 -0
  40. data/lib/scout/tsv/change_id.rb +40 -0
  41. data/lib/scout/tsv/dumper.rb +85 -54
  42. data/lib/scout/tsv/index.rb +188 -20
  43. data/lib/scout/tsv/open.rb +182 -0
  44. data/lib/scout/tsv/parser.rb +200 -118
  45. data/lib/scout/tsv/path.rb +5 -6
  46. data/lib/scout/tsv/persist/adapter.rb +26 -37
  47. data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
  48. data/lib/scout/tsv/persist/serialize.rb +117 -0
  49. data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
  50. data/lib/scout/tsv/persist.rb +4 -2
  51. data/lib/scout/tsv/transformer.rb +141 -0
  52. data/lib/scout/tsv/traverse.rb +136 -37
  53. data/lib/scout/tsv/util/filter.rb +312 -0
  54. data/lib/scout/tsv/util/process.rb +73 -0
  55. data/lib/scout/tsv/util/reorder.rb +81 -0
  56. data/lib/scout/tsv/util/select.rb +265 -0
  57. data/lib/scout/tsv/util/unzip.rb +86 -0
  58. data/lib/scout/tsv/util.rb +126 -19
  59. data/lib/scout/tsv.rb +28 -5
  60. data/lib/scout/work_queue/socket.rb +6 -1
  61. data/lib/scout/work_queue/worker.rb +5 -2
  62. data/lib/scout/work_queue.rb +15 -8
  63. data/lib/scout/workflow/definition.rb +29 -2
  64. data/lib/scout/workflow/step/dependencies.rb +24 -4
  65. data/lib/scout/workflow/step/info.rb +40 -5
  66. data/lib/scout/workflow/step/progress.rb +14 -0
  67. data/lib/scout/workflow/step/provenance.rb +8 -7
  68. data/lib/scout/workflow/step/status.rb +45 -0
  69. data/lib/scout/workflow/step.rb +104 -33
  70. data/lib/scout/workflow/task/inputs.rb +14 -20
  71. data/lib/scout/workflow/task.rb +86 -47
  72. data/lib/scout/workflow/usage.rb +10 -6
  73. data/scout-gear.gemspec +30 -3
  74. data/scout_commands/workflow/task +37 -9
  75. data/scout_commands/workflow/task_old +2 -2
  76. data/test/scout/open/test_stream.rb +61 -59
  77. data/test/scout/path/test_find.rb +10 -1
  78. data/test/scout/resource/test_produce.rb +15 -0
  79. data/test/scout/test_meta_extension.rb +25 -0
  80. data/test/scout/test_named_array.rb +18 -0
  81. data/test/scout/test_persist.rb +67 -0
  82. data/test/scout/test_tmpfile.rb +1 -1
  83. data/test/scout/test_tsv.rb +222 -3
  84. data/test/scout/test_work_queue.rb +21 -18
  85. data/test/scout/tsv/persist/test_adapter.rb +11 -1
  86. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  87. data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
  88. data/test/scout/tsv/test_attach.rb +227 -0
  89. data/test/scout/tsv/test_change_id.rb +98 -0
  90. data/test/scout/tsv/test_dumper.rb +1 -1
  91. data/test/scout/tsv/test_index.rb +127 -3
  92. data/test/scout/tsv/test_open.rb +167 -0
  93. data/test/scout/tsv/test_parser.rb +45 -3
  94. data/test/scout/tsv/test_persist.rb +9 -0
  95. data/test/scout/tsv/test_transformer.rb +108 -0
  96. data/test/scout/tsv/test_traverse.rb +195 -3
  97. data/test/scout/tsv/test_util.rb +24 -0
  98. data/test/scout/tsv/util/test_filter.rb +188 -0
  99. data/test/scout/tsv/util/test_process.rb +47 -0
  100. data/test/scout/tsv/util/test_reorder.rb +94 -0
  101. data/test/scout/tsv/util/test_select.rb +58 -0
  102. data/test/scout/tsv/util/test_unzip.rb +112 -0
  103. data/test/scout/work_queue/test_socket.rb +0 -1
  104. data/test/scout/work_queue/test_worker.rb +63 -6
  105. data/test/scout/workflow/step/test_load.rb +3 -3
  106. data/test/scout/workflow/step/test_status.rb +31 -0
  107. data/test/scout/workflow/task/test_inputs.rb +14 -14
  108. data/test/scout/workflow/test_step.rb +13 -13
  109. data/test/scout/workflow/test_task.rb +168 -32
  110. data/test/scout/workflow/test_usage.rb +33 -6
  111. data/test/test_helper.rb +3 -1
  112. metadata +29 -2
data/lib/scout/tmpfile.rb CHANGED
@@ -93,19 +93,18 @@ module TmpFile
93
93
  end
94
94
  end
95
95
 
96
+ SLASH_REPLACE = '·'
96
97
  def self.tmp_for_file(file, tmp_options = {}, other_options = {})
97
- tmp_for_file = IndiferentHash.process_options tmp_options, :file
98
+ tmp_for_file, prefix, key, persistence_dir = IndiferentHash.process_options tmp_options, :file, :prefix, :key, :dir
98
99
  return tmp_for_file unless tmp_for_file.nil?
99
100
 
100
- prefix = IndiferentHash.process_options tmp_options, :prefix
101
-
102
101
  if prefix.nil?
103
- perfile = file.to_s.gsub(/\//, '>')
102
+ perfile = file.to_s.sub(/\.b?gz$/,'')
104
103
  else
105
- perfile = prefix.to_s + ":" + file.to_s.gsub(/\//, '>')
104
+ perfile = prefix.to_s + ":" + file.to_s.sub(/\.b?gz$/,'')
106
105
  end
107
106
 
108
- perfile.sub!(/\.b?gz$/,'')
107
+ perfile += "[#{ key }]" if key
109
108
 
110
109
  if other_options.include? :filters
111
110
  other_options[:filters].each do |match,value|
@@ -113,10 +112,10 @@ module TmpFile
113
112
  end
114
113
  end
115
114
 
116
- persistence_dir = IndiferentHash.process_options(tmp_options, :dir) || TmpFile.tmpdir
115
+ persistence_dir = TmpFile.tmpdir if persistence_dir.nil?
117
116
  Path.setup(persistence_dir) unless Path === persistence_dir
118
117
 
119
- filename = perfile.gsub(/\s/,'_').gsub(/\//,'>')
118
+ filename = perfile.gsub(/\s/,'_').gsub('/', SLASH_REPLACE)
120
119
  clean_options = other_options.dup
121
120
  clean_options.delete :unnamed
122
121
  clean_options.delete "unnamed"
@@ -0,0 +1,177 @@
1
+ module TSV
2
+
3
+ def self.match_keys(source, other, match_key: nil, other_key: nil)
4
+ match_key = (source.all_fields & other.all_fields).first if match_key.nil?
5
+
6
+ if match_key.nil?
7
+ source.all_fields.collect do |f|
8
+ other_key = other.identify_field(f)
9
+ if other_key
10
+ other_key = other.key_field if other_key == :key
11
+ match_key = f
12
+ break
13
+ end
14
+ end
15
+ end
16
+
17
+ if match_key.nil?
18
+ other.all_fields.collect do |f|
19
+ match_key = source.identify_field(f)
20
+ if match_key
21
+ other_key = f
22
+ break
23
+ end
24
+ end
25
+ end
26
+
27
+ match_key = source.key_field if match_key.nil?
28
+
29
+ if other_key.nil?
30
+ other_key = other.identify_field(match_key)
31
+ end
32
+
33
+ other_key = other.key_field if other_key.nil?
34
+
35
+ match_key = :key if match_key == source.key_field
36
+ other_key = :key if other_key == other.key_field
37
+
38
+ [match_key, other_key]
39
+ end
40
+
41
+ def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
42
+ source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
43
+ other = TSV.open other, persist: persist_input unless TSV === other
44
+
45
+ fields = [fields] if String === fields
46
+
47
+ match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)
48
+
49
+ if TSV::Transformer === source
50
+ source.dumper = case target
51
+ when :stream
52
+ TSV::Dumper.new(source.options.merge(sep: "\t"))
53
+ when nil
54
+ TSV.setup({}, **source.options.dup)
55
+ else
56
+ target
57
+ end
58
+ end
59
+
60
+ other.with_unnamed do
61
+ source.with_unnamed do
62
+
63
+ other_key_name = other_key == :key ? other.key_field : other_key
64
+ other_key_name = other.fields[other_key_name] if Integer === other_key
65
+ fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
66
+
67
+ if other_key != :key
68
+ other = other.reorder other_key, fields, one2one: one2one
69
+ end
70
+
71
+ other_field_positions = other.identify_field(fields)
72
+
73
+ log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
74
+ Log.debug log_message
75
+ bar = log_message if TrueClass === bar
76
+
77
+ new = fields - source.fields
78
+
79
+ source.fields = (source.fields + fields).uniq
80
+
81
+ overlaps = source.identify_field(fields)
82
+
83
+ empty_other_values = case source.type
84
+ when :list
85
+ [nil] * other.fields.length
86
+ when :flat
87
+ []
88
+ when :double
89
+ [[]] * other.fields.length
90
+ end
91
+
92
+ insitu = TSV === source ? true : false if insitu.nil?
93
+
94
+ match_key_pos = source.identify_field(match_key)
95
+ source.traverse bar: bar, unnamed: true do |orig_key,current_values|
96
+ keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
97
+ keys = [keys] unless Array === keys
98
+
99
+ current_values = current_values.dup unless insitu
100
+ keys.each do |current_key|
101
+ other_values = other[current_key]
102
+
103
+ if other_values.nil?
104
+ other_values = empty_other_values
105
+ elsif other.type == :flat
106
+ other_values = [other_values]
107
+ elsif other.type == :list && source.type == :double
108
+ other_values = other_values.collect{|v| [v] }
109
+ elsif other.type == :double && source.type == :list
110
+ other_values = other_values.collect{|v| v.first }
111
+ end
112
+
113
+ other_values = other_values.values_at *other_field_positions
114
+
115
+ other_values.zip(overlaps).each do |v,overlap|
116
+ if source.type == :list
117
+ current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
118
+ else
119
+ current_values[overlap] ||= []
120
+ current_values[overlap].concat (v - current_values[overlap])
121
+ end
122
+ end
123
+ end
124
+ source[orig_key] = current_values unless insitu
125
+ nil
126
+ end
127
+
128
+ if complete && match_key == :key
129
+ empty_self_values = case source.type
130
+ when :list
131
+ [nil] * source.fields.length
132
+ when :flat
133
+ []
134
+ when :double
135
+ [[]] * source.fields.length
136
+ end
137
+ other.each do |other_key,other_values|
138
+ next if source.include?(other_key)
139
+ if other.type == :flat
140
+ other_values = [other_values]
141
+ elsif other.type == :list && source.type == :double
142
+ other_values = other_values.collect{|v| [v] }
143
+ elsif other.type == :double && source.type == :list
144
+ other_values = other_values.collect{|v| v.first }
145
+ end
146
+
147
+ new_values = case source.type
148
+ when :list
149
+ [nil] * source.fields.length
150
+ when :flat
151
+ []
152
+ when :double
153
+ source.fields.length.times.collect{ [] }
154
+ end
155
+
156
+ other_values.zip(overlaps).each do |v,overlap|
157
+ if false && overlap == :key
158
+ other_key = Array === v ? v : v.first
159
+ elsif source.type == :list
160
+ new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
161
+ else
162
+ new_values[overlap].concat v
163
+ end
164
+ end
165
+ source[other_key] = new_values
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ source
172
+ end
173
+
174
+ def attach(*args, **kwargs)
175
+ TSV.attach(self, *args, **kwargs)
176
+ end
177
+ end
@@ -0,0 +1,40 @@
1
+ module TSV
2
+ def self.change_key(source, new_key_field, identifiers: nil, one2one: false, stream: false, keep: false, persist_identifiers: nil)
3
+ source = TSV::Parser.new source if String === source
4
+ if identifiers && source.identify_field(new_key_field, strict: true).nil?
5
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
6
+ new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
7
+ new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one)
8
+ return new
9
+ end
10
+
11
+ fields = source.fields.dup - [new_key_field]
12
+ fields.unshift source.key_field if keep
13
+ transformer = TSV::Transformer.new source
14
+ transformer.key_field = new_key_field
15
+ transformer.fields = fields
16
+ transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v|
17
+ [k, v]
18
+ end
19
+
20
+ stream ? transformer : transformer.tsv
21
+ end
22
+
23
+ def change_key(*args, **kwargs)
24
+ TSV.change_key(self, *args, **kwargs)
25
+ end
26
+
27
+ def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false)
28
+ source = TSV::Parser.new source if String === source
29
+
30
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
31
+
32
+ new_fields = source.fields.dup
33
+ new_fields[new_fields.index(source_id)] = new_id
34
+ return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields)
35
+ end
36
+
37
+ def change_id(*args, **kwargs)
38
+ TSV.change_id(self, *args, **kwargs)
39
+ end
40
+ end
@@ -1,38 +1,14 @@
1
1
  module TSV
2
2
  class Dumper
3
- def self.header_lines(key_field, fields, entry_hash = nil)
4
- if Hash === entry_hash
5
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
- preamble = entry_hash[:preamble]
7
- header_hash = entry_hash[:header_hash]
8
- end
9
-
10
- header_hash = "#" if header_hash.nil?
11
-
12
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
-
14
- str = ""
15
- str << preamble.strip << "\n" if preamble and not preamble.empty?
16
- if fields
17
- if fields.empty?
18
- str << header_hash << (key_field || "ID").to_s << "\n"
19
- else
20
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
- end
22
- end
23
-
24
- str
25
- end
26
-
27
3
  def self.header(options={})
28
- key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
- :key_field, :fields, :sep, :header_hash, :preamble,
4
+ key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
5
+ :key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
30
6
  :sep => "\t", :header_hash => "#", :preamble => true
31
7
 
32
- if fields.nil? || key_field.nil?
8
+ if fields.nil?
33
9
  fields_str = nil
34
10
  else
35
- fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
11
+ fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
36
12
  end
37
13
 
38
14
  if preamble && options.values.compact.any?
@@ -45,31 +21,62 @@ module TSV
45
21
  end
46
22
 
47
23
 
48
- attr_accessor :options
24
+ attr_accessor :options, :initialized, :type, :sep
49
25
  def initialize(options = {})
26
+ options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
50
27
  @sep, @type = IndiferentHash.process_options options,
51
28
  :sep, :type,
52
29
  :sep => "\t", :type => :double
53
30
  @options = options
54
31
  @sout, @sin = Open.pipe
55
- ConcurrentStream.setup(@sin, :pair => @sout)
56
- ConcurrentStream.setup(@sout, :pair => @sin)
32
+ @initialized = false
33
+ @mutex = Mutex.new
34
+ ConcurrentStream.setup(@sin, pair: @sout)
35
+ ConcurrentStream.setup(@sout, pair: @sin)
57
36
  end
58
37
 
59
- def init
60
- header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
- @sin.puts header if header and ! header.empty?
38
+ def key_field
39
+ @options[:key_field]
40
+ end
41
+
42
+ def fields
43
+ @options[:fields]
62
44
  end
63
45
 
64
- def add(key, value)
46
+ def key_field=(key_field)
47
+ @options[:key_field] = key_field
48
+ end
49
+
50
+ def fields=(fields)
51
+ @options[:fields] = fields
52
+ end
53
+
54
+ def all_fields
55
+ return nil if fields.nil?
56
+ [key_field] + fields
57
+ end
65
58
 
66
- case @type
67
- when :single
68
- @sin.puts key + @sep + value
69
- when :list, :flat
70
- @sin.puts key + @sep + value * @sep
71
- when :double
72
- @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
59
+
60
+ def init(preamble: true)
61
+ header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
62
+ @mutex.synchronize do
63
+ @initialized = true
64
+ @sin.puts header if header and ! header.empty?
65
+ end
66
+ end
67
+
68
+ def add(key, value)
69
+ @mutex.synchronize do
70
+
71
+ key = key.to_s unless String === key
72
+ case @type
73
+ when :single
74
+ @sin.puts key + @sep + value.to_s
75
+ when :list, :flat
76
+ @sin.puts key + @sep + value * @sep
77
+ when :double
78
+ @sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
79
+ end
73
80
  end
74
81
  end
75
82
 
@@ -85,23 +92,47 @@ module TSV
85
92
  def abort(exception=nil)
86
93
  @sin.abort(exception)
87
94
  end
95
+
96
+ def tsv(*args)
97
+ TSV.open(stream, *args)
98
+ end
99
+
100
+ def fingerprint
101
+ "Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
102
+ end
103
+
104
+ def digest_str
105
+ fingerprint
106
+ end
107
+
108
+ def inspect
109
+ fingerprint
110
+ end
88
111
  end
89
112
 
90
- def stream
91
- iii self.extension_attr_hash
92
- dumper = TSV::Dumper.new self.extension_attr_hash
93
- dumper.init
94
- Thread.new do
95
- Thread.current["name"] = "Dumper thread"
96
- self.each do |k,v|
97
- dumper.add k, v
113
+ def dumper_stream(options = {})
114
+ preamble = IndiferentHash.process_options options, :preamble, :preamble => true
115
+ dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
116
+ t = Thread.new do
117
+ begin
118
+ Thread.current.report_on_exception = true
119
+ Thread.current["name"] = "Dumper thread"
120
+ dumper.init(preamble: preamble)
121
+ self.each do |k,v|
122
+ dumper.add k, v
123
+ end
124
+ dumper.close
125
+ rescue
126
+ dumper.abort($!)
98
127
  end
99
- dumper.close
100
128
  end
101
- dumper.stream
129
+ Thread.pass until t["name"]
130
+ s = dumper.stream
131
+ ConcurrentStream.setup(s, :threads => [t])
132
+ s
102
133
  end
103
134
 
104
- def to_s
105
- stream.read
135
+ def to_s(options = {})
136
+ dumper_stream(options).read
106
137
  end
107
138
  end
@@ -1,12 +1,16 @@
1
1
  require_relative 'parser'
2
+ require_relative 'transformer'
3
+ require_relative 'persist/fix_width_table'
2
4
  module TSV
3
- def self.index(tsv_file, target: 0, order: true, **kwargs)
4
- persist, type = IndiferentHash.process_options kwargs,
5
- :persist, :persist_type,
5
+ def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
6
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
7
+ :persist, :persist_type, :persist_update, :data_persist,
6
8
  :persist => false, :persist_type => "HDB"
7
9
  kwargs.delete :type
8
10
 
9
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
11
+ fields = :all if fields.nil?
12
+
13
+ Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
10
14
  if filename
11
15
  index = ScoutCabinet.open(filename, true, type)
12
16
  TSV.setup(index, :type => :single)
@@ -15,35 +19,199 @@ module TSV
15
19
  index = TSV.setup({}, :type => :single)
16
20
  end
17
21
 
18
- dummy_data = nil
22
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
23
+
24
+ bar = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" if TrueClass === bar
25
+
19
26
  if order
20
27
  tmp_index = {}
21
- dummy_data = Open.open(tsv_file) do |file|
22
- TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
- values.each_with_index do |list,i|
24
- list.each do |e|
25
- tmp_index[e] ||= []
26
- tmp_index[e][i] ||= []
27
- tmp_index[e][i] << k
28
- end
28
+ include_self = fields == :all || (Array === fields) && fields.include?(target)
29
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :double, unnamed: true, bar: bar, **kwargs do |k,values|
30
+ tmp_index[k] ||= [[k]] if include_self
31
+ values.each_with_index do |list,i|
32
+ i += 1 if include_self
33
+ list.each do |e|
34
+ tmp_index[e] ||= []
35
+ tmp_index[e][i] ||= []
36
+ tmp_index[e][i] << k
29
37
  end
30
38
  end
31
39
  end
32
40
  tmp_index.each do |e,list|
33
41
  index[e] = list.flatten.compact.uniq.first
34
42
  end
43
+
44
+ index.key_field = source_field_names * ","
45
+ index.fields = [target_key_field]
46
+
47
+ tmp_index = {}
48
+
35
49
  else
36
- dummy_data = Open.open(tsv_file) do |file|
37
- TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
- values.each do |e|
39
- index[e] = k unless index.include?(e)
40
- end
50
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
51
+ values.each do |e|
52
+ index[e] = k unless index.include?(e)
53
+ end
54
+ end
55
+
56
+ index.key_field = source_field_names * ","
57
+ index.fields = [target_key_field]
58
+ end
59
+
60
+
61
+ index
62
+ end
63
+ end
64
+
65
+ def index(*args, **kwargs, &block)
66
+ TSV.index(self, *args, **kwargs, &block)
67
+ end
68
+
69
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
70
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
71
+ :persist, :persist_type, :persist_update, :data_persist,
72
+ :persist => false, :persist_type => :fwt
73
+ kwargs.delete :type
74
+
75
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :prefix => "RangeIndex", :other_options => kwargs, update: persist_update)) do |filename|
76
+
77
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
78
+
79
+ max_key_size = 0
80
+ index_data = []
81
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
82
+ key_size = key.length
83
+ max_key_size = key_size if key_size > max_key_size
84
+
85
+ start_pos, end_pos = values
86
+ if Array === start_pos
87
+ start_pos.zip(end_pos).each do |s,e|
88
+ index_data << [key, [s.to_i, e.to_i]]
41
89
  end
90
+ else
91
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
42
92
  end
43
93
  end
44
- index.key_field = dummy_data.fields * ", "
45
- index.fields = [dummy_data.key_field]
94
+
95
+ filename = :memory if filename.nil?
96
+ index = FixWidthTable.get(filename, max_key_size, true)
97
+ index.add_range index_data
98
+ index.read
46
99
  index
47
100
  end
48
101
  end
102
+
103
+ def range_index(*args, **kwargs, &block)
104
+ TSV.range_index(self, *args, **kwargs, &block)
105
+ end
106
+
107
+ def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
108
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
109
+ :persist, :persist_type, :persist_update, :data_persist,
110
+ :persist => false, :persist_type => :fwt
111
+ kwargs.delete :type
112
+
113
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, update: persist_update, :prefix => "RangeIndex", :other_options => kwargs)) do |filename|
114
+
115
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
116
+
117
+ max_key_size = 0
118
+ index_data = []
119
+ TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
120
+ key_size = key.length
121
+ max_key_size = key_size if key_size > max_key_size
122
+
123
+ if Array === pos
124
+ pos.zip(end_pos).each do |p|
125
+ index_pos << [key, p]
126
+ end
127
+ else
128
+ index_data << [key, pos]
129
+ end
130
+ end
131
+
132
+ filename = :memory if filename.nil?
133
+ index = FixWidthTable.get(filename, max_key_size, false)
134
+ index.add_point index_data
135
+ index.read
136
+ index
137
+ end
138
+ end
139
+
140
+ def pos_index(*args, **kwargs, &block)
141
+ TSV.pos_index(self, *args, **kwargs, &block)
142
+ end
143
+
144
+
145
+ #def range_index(start_field = nil, end_field = nil, options = {})
146
+ # start_field ||= "Start"
147
+ # end_field ||= "End"
148
+
149
+ # options = Misc.add_defaults options,
150
+ # :persist => false, :persist_file => nil, :persist_update => false
151
+
152
+ # persist_options = Misc.pull_keys options, :persist
153
+ # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
154
+
155
+ # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
156
+ # max_key_size = 0
157
+ # index_data = []
158
+ # with_unnamed do
159
+ # with_monitor :desc => "Creating Index Data", :step => 10000 do
160
+ # through :key, [start_field, end_field] do |key, values|
161
+ # key_size = key.length
162
+ # max_key_size = key_size if key_size > max_key_size
163
+
164
+ # start_pos, end_pos = values
165
+ # if Array === start_pos
166
+ # start_pos.zip(end_pos).each do |s,e|
167
+ # index_data << [key, [s.to_i, e.to_i]]
168
+ # end
169
+ # else
170
+ # index_data << [key, [start_pos.to_i, end_pos.to_i]]
171
+ # end
172
+ # end
173
+ # end
174
+ # end
175
+
176
+ # index = FixWidthTable.get(:memory, max_key_size, true)
177
+ # index.add_range index_data
178
+ # index.read
179
+ # index
180
+ # end
181
+ #end
182
+
183
+ #def self.range_index(file, start_field = nil, end_field = nil, options = {})
184
+ # start_field ||= "Start"
185
+ # end_field ||= "End"
186
+
187
+ # data_options = Misc.pull_keys options, :data
188
+ # filename = case
189
+ # when (String === file or Path === file)
190
+ # file
191
+ # when file.respond_to?(:filename)
192
+ # file.filename
193
+ # else
194
+ # file.object_id.to_s
195
+ # end
196
+ # persist_options = Misc.pull_keys options, :persist
197
+ # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
198
+
199
+ # filters = Misc.process_options options, :filters
200
+
201
+ # if filters
202
+ # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
203
+ # end
204
+
205
+ # Persist.persist(filename, :fwt, persist_options) do
206
+ # tsv = TSV.open(file, data_options)
207
+ # if filters
208
+ # tsv.filter
209
+ # filters.each do |match, value|
210
+ # tsv.add_filter match, value
211
+ # end
212
+ # end
213
+
214
+ # tsv.range_index(start_field, end_field, options)
215
+ # end
216
+ #end
49
217
  end