scout-gear 7.2.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +51 -6
  3. data/VERSION +1 -1
  4. data/bin/scout +6 -3
  5. data/lib/rbbt-scout.rb +1 -0
  6. data/lib/scout/cmd.rb +1 -1
  7. data/lib/scout/concurrent_stream.rb +33 -29
  8. data/lib/scout/config.rb +1 -1
  9. data/lib/scout/exceptions.rb +1 -0
  10. data/lib/scout/log/color.rb +4 -2
  11. data/lib/scout/log/progress/report.rb +1 -1
  12. data/lib/scout/log/progress/util.rb +71 -2
  13. data/lib/scout/log/progress.rb +1 -1
  14. data/lib/scout/log/trap.rb +107 -0
  15. data/lib/scout/log.rb +56 -21
  16. data/lib/scout/meta_extension.rb +13 -6
  17. data/lib/scout/misc/digest.rb +1 -1
  18. data/lib/scout/misc/format.rb +12 -0
  19. data/lib/scout/misc/helper.rb +31 -0
  20. data/lib/scout/misc/insist.rb +1 -1
  21. data/lib/scout/misc/monitor.rb +12 -1
  22. data/lib/scout/misc/system.rb +10 -0
  23. data/lib/scout/misc.rb +1 -0
  24. data/lib/scout/named_array.rb +65 -3
  25. data/lib/scout/open/lock/lockfile.rb +587 -0
  26. data/lib/scout/open/lock.rb +28 -2
  27. data/lib/scout/open/remote.rb +4 -0
  28. data/lib/scout/open/stream.rb +111 -42
  29. data/lib/scout/open/util.rb +13 -3
  30. data/lib/scout/path/find.rb +9 -1
  31. data/lib/scout/path/util.rb +35 -0
  32. data/lib/scout/persist/serialize.rb +18 -5
  33. data/lib/scout/persist.rb +60 -30
  34. data/lib/scout/resource/path.rb +53 -0
  35. data/lib/scout/resource/produce.rb +0 -8
  36. data/lib/scout/resource/util.rb +2 -1
  37. data/lib/scout/semaphore.rb +8 -1
  38. data/lib/scout/tmpfile.rb +7 -8
  39. data/lib/scout/tsv/attach.rb +177 -0
  40. data/lib/scout/tsv/change_id.rb +40 -0
  41. data/lib/scout/tsv/dumper.rb +85 -54
  42. data/lib/scout/tsv/index.rb +188 -20
  43. data/lib/scout/tsv/open.rb +182 -0
  44. data/lib/scout/tsv/parser.rb +200 -118
  45. data/lib/scout/tsv/path.rb +5 -6
  46. data/lib/scout/tsv/persist/adapter.rb +26 -37
  47. data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
  48. data/lib/scout/tsv/persist/serialize.rb +117 -0
  49. data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
  50. data/lib/scout/tsv/persist.rb +4 -2
  51. data/lib/scout/tsv/transformer.rb +141 -0
  52. data/lib/scout/tsv/traverse.rb +136 -37
  53. data/lib/scout/tsv/util/filter.rb +312 -0
  54. data/lib/scout/tsv/util/process.rb +73 -0
  55. data/lib/scout/tsv/util/reorder.rb +81 -0
  56. data/lib/scout/tsv/util/select.rb +265 -0
  57. data/lib/scout/tsv/util/unzip.rb +86 -0
  58. data/lib/scout/tsv/util.rb +126 -19
  59. data/lib/scout/tsv.rb +28 -5
  60. data/lib/scout/work_queue/socket.rb +6 -1
  61. data/lib/scout/work_queue/worker.rb +5 -2
  62. data/lib/scout/work_queue.rb +15 -8
  63. data/lib/scout/workflow/definition.rb +29 -2
  64. data/lib/scout/workflow/step/dependencies.rb +24 -4
  65. data/lib/scout/workflow/step/info.rb +40 -5
  66. data/lib/scout/workflow/step/progress.rb +14 -0
  67. data/lib/scout/workflow/step/provenance.rb +8 -7
  68. data/lib/scout/workflow/step/status.rb +45 -0
  69. data/lib/scout/workflow/step.rb +104 -33
  70. data/lib/scout/workflow/task/inputs.rb +14 -20
  71. data/lib/scout/workflow/task.rb +86 -47
  72. data/lib/scout/workflow/usage.rb +10 -6
  73. data/scout-gear.gemspec +30 -3
  74. data/scout_commands/workflow/task +37 -9
  75. data/scout_commands/workflow/task_old +2 -2
  76. data/test/scout/open/test_stream.rb +61 -59
  77. data/test/scout/path/test_find.rb +10 -1
  78. data/test/scout/resource/test_produce.rb +15 -0
  79. data/test/scout/test_meta_extension.rb +25 -0
  80. data/test/scout/test_named_array.rb +18 -0
  81. data/test/scout/test_persist.rb +67 -0
  82. data/test/scout/test_tmpfile.rb +1 -1
  83. data/test/scout/test_tsv.rb +222 -3
  84. data/test/scout/test_work_queue.rb +21 -18
  85. data/test/scout/tsv/persist/test_adapter.rb +11 -1
  86. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  87. data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
  88. data/test/scout/tsv/test_attach.rb +227 -0
  89. data/test/scout/tsv/test_change_id.rb +98 -0
  90. data/test/scout/tsv/test_dumper.rb +1 -1
  91. data/test/scout/tsv/test_index.rb +127 -3
  92. data/test/scout/tsv/test_open.rb +167 -0
  93. data/test/scout/tsv/test_parser.rb +45 -3
  94. data/test/scout/tsv/test_persist.rb +9 -0
  95. data/test/scout/tsv/test_transformer.rb +108 -0
  96. data/test/scout/tsv/test_traverse.rb +195 -3
  97. data/test/scout/tsv/test_util.rb +24 -0
  98. data/test/scout/tsv/util/test_filter.rb +188 -0
  99. data/test/scout/tsv/util/test_process.rb +47 -0
  100. data/test/scout/tsv/util/test_reorder.rb +94 -0
  101. data/test/scout/tsv/util/test_select.rb +58 -0
  102. data/test/scout/tsv/util/test_unzip.rb +112 -0
  103. data/test/scout/work_queue/test_socket.rb +0 -1
  104. data/test/scout/work_queue/test_worker.rb +63 -6
  105. data/test/scout/workflow/step/test_load.rb +3 -3
  106. data/test/scout/workflow/step/test_status.rb +31 -0
  107. data/test/scout/workflow/task/test_inputs.rb +14 -14
  108. data/test/scout/workflow/test_step.rb +13 -13
  109. data/test/scout/workflow/test_task.rb +168 -32
  110. data/test/scout/workflow/test_usage.rb +33 -6
  111. data/test/test_helper.rb +3 -1
  112. metadata +29 -2
data/lib/scout/tmpfile.rb CHANGED
@@ -93,19 +93,18 @@ module TmpFile
93
93
  end
94
94
  end
95
95
 
96
+ SLASH_REPLACE = '·'
96
97
  def self.tmp_for_file(file, tmp_options = {}, other_options = {})
97
- tmp_for_file = IndiferentHash.process_options tmp_options, :file
98
+ tmp_for_file, prefix, key, persistence_dir = IndiferentHash.process_options tmp_options, :file, :prefix, :key, :dir
98
99
  return tmp_for_file unless tmp_for_file.nil?
99
100
 
100
- prefix = IndiferentHash.process_options tmp_options, :prefix
101
-
102
101
  if prefix.nil?
103
- perfile = file.to_s.gsub(/\//, '>')
102
+ perfile = file.to_s.sub(/\.b?gz$/,'')
104
103
  else
105
- perfile = prefix.to_s + ":" + file.to_s.gsub(/\//, '>')
104
+ perfile = prefix.to_s + ":" + file.to_s.sub(/\.b?gz$/,'')
106
105
  end
107
106
 
108
- perfile.sub!(/\.b?gz$/,'')
107
+ perfile += "[#{ key }]" if key
109
108
 
110
109
  if other_options.include? :filters
111
110
  other_options[:filters].each do |match,value|
@@ -113,10 +112,10 @@ module TmpFile
113
112
  end
114
113
  end
115
114
 
116
- persistence_dir = IndiferentHash.process_options(tmp_options, :dir) || TmpFile.tmpdir
115
+ persistence_dir = TmpFile.tmpdir if persistence_dir.nil?
117
116
  Path.setup(persistence_dir) unless Path === persistence_dir
118
117
 
119
- filename = perfile.gsub(/\s/,'_').gsub(/\//,'>')
118
+ filename = perfile.gsub(/\s/,'_').gsub('/', SLASH_REPLACE)
120
119
  clean_options = other_options.dup
121
120
  clean_options.delete :unnamed
122
121
  clean_options.delete "unnamed"
@@ -0,0 +1,177 @@
1
+ module TSV
2
+
3
+ def self.match_keys(source, other, match_key: nil, other_key: nil)
4
+ match_key = (source.all_fields & other.all_fields).first if match_key.nil?
5
+
6
+ if match_key.nil?
7
+ source.all_fields.collect do |f|
8
+ other_key = other.identify_field(f)
9
+ if other_key
10
+ other_key = other.key_field if other_key == :key
11
+ match_key = f
12
+ break
13
+ end
14
+ end
15
+ end
16
+
17
+ if match_key.nil?
18
+ other.all_fields.collect do |f|
19
+ match_key = source.identify_field(f)
20
+ if match_key
21
+ other_key = f
22
+ break
23
+ end
24
+ end
25
+ end
26
+
27
+ match_key = source.key_field if match_key.nil?
28
+
29
+ if other_key.nil?
30
+ other_key = other.identify_field(match_key)
31
+ end
32
+
33
+ other_key = other.key_field if other_key.nil?
34
+
35
+ match_key = :key if match_key == source.key_field
36
+ other_key = :key if other_key == other.key_field
37
+
38
+ [match_key, other_key]
39
+ end
40
+
41
+ def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
42
+ source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
43
+ other = TSV.open other, persist: persist_input unless TSV === other
44
+
45
+ fields = [fields] if String === fields
46
+
47
+ match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)
48
+
49
+ if TSV::Transformer === source
50
+ source.dumper = case target
51
+ when :stream
52
+ TSV::Dumper.new(source.options.merge(sep: "\t"))
53
+ when nil
54
+ TSV.setup({}, **source.options.dup)
55
+ else
56
+ target
57
+ end
58
+ end
59
+
60
+ other.with_unnamed do
61
+ source.with_unnamed do
62
+
63
+ other_key_name = other_key == :key ? other.key_field : other_key
64
+ other_key_name = other.fields[other_key_name] if Integer === other_key
65
+ fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
66
+
67
+ if other_key != :key
68
+ other = other.reorder other_key, fields, one2one: one2one
69
+ end
70
+
71
+ other_field_positions = other.identify_field(fields)
72
+
73
+ log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
74
+ Log.debug log_message
75
+ bar = log_message if TrueClass === bar
76
+
77
+ new = fields - source.fields
78
+
79
+ source.fields = (source.fields + fields).uniq
80
+
81
+ overlaps = source.identify_field(fields)
82
+
83
+ empty_other_values = case source.type
84
+ when :list
85
+ [nil] * other.fields.length
86
+ when :flat
87
+ []
88
+ when :double
89
+ [[]] * other.fields.length
90
+ end
91
+
92
+ insitu = TSV === source ? true : false if insitu.nil?
93
+
94
+ match_key_pos = source.identify_field(match_key)
95
+ source.traverse bar: bar, unnamed: true do |orig_key,current_values|
96
+ keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
97
+ keys = [keys] unless Array === keys
98
+
99
+ current_values = current_values.dup unless insitu
100
+ keys.each do |current_key|
101
+ other_values = other[current_key]
102
+
103
+ if other_values.nil?
104
+ other_values = empty_other_values
105
+ elsif other.type == :flat
106
+ other_values = [other_values]
107
+ elsif other.type == :list && source.type == :double
108
+ other_values = other_values.collect{|v| [v] }
109
+ elsif other.type == :double && source.type == :list
110
+ other_values = other_values.collect{|v| v.first }
111
+ end
112
+
113
+ other_values = other_values.values_at *other_field_positions
114
+
115
+ other_values.zip(overlaps).each do |v,overlap|
116
+ if source.type == :list
117
+ current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
118
+ else
119
+ current_values[overlap] ||= []
120
+ current_values[overlap].concat (v - current_values[overlap])
121
+ end
122
+ end
123
+ end
124
+ source[orig_key] = current_values unless insitu
125
+ nil
126
+ end
127
+
128
+ if complete && match_key == :key
129
+ empty_self_values = case source.type
130
+ when :list
131
+ [nil] * source.fields.length
132
+ when :flat
133
+ []
134
+ when :double
135
+ [[]] * source.fields.length
136
+ end
137
+ other.each do |other_key,other_values|
138
+ next if source.include?(other_key)
139
+ if other.type == :flat
140
+ other_values = [other_values]
141
+ elsif other.type == :list && source.type == :double
142
+ other_values = other_values.collect{|v| [v] }
143
+ elsif other.type == :double && source.type == :list
144
+ other_values = other_values.collect{|v| v.first }
145
+ end
146
+
147
+ new_values = case source.type
148
+ when :list
149
+ [nil] * source.fields.length
150
+ when :flat
151
+ []
152
+ when :double
153
+ source.fields.length.times.collect{ [] }
154
+ end
155
+
156
+ other_values.zip(overlaps).each do |v,overlap|
157
+ if false && overlap == :key
158
+ other_key = Array === v ? v : v.first
159
+ elsif source.type == :list
160
+ new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
161
+ else
162
+ new_values[overlap].concat v
163
+ end
164
+ end
165
+ source[other_key] = new_values
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ source
172
+ end
173
+
174
+ def attach(*args, **kwargs)
175
+ TSV.attach(self, *args, **kwargs)
176
+ end
177
+ end
@@ -0,0 +1,40 @@
1
+ module TSV
2
+ def self.change_key(source, new_key_field, identifiers: nil, one2one: false, stream: false, keep: false, persist_identifiers: nil)
3
+ source = TSV::Parser.new source if String === source
4
+ if identifiers && source.identify_field(new_key_field, strict: true).nil?
5
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
6
+ new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
7
+ new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one)
8
+ return new
9
+ end
10
+
11
+ fields = source.fields.dup - [new_key_field]
12
+ fields.unshift source.key_field if keep
13
+ transformer = TSV::Transformer.new source
14
+ transformer.key_field = new_key_field
15
+ transformer.fields = fields
16
+ transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v|
17
+ [k, v]
18
+ end
19
+
20
+ stream ? transformer : transformer.tsv
21
+ end
22
+
23
+ def change_key(*args, **kwargs)
24
+ TSV.change_key(self, *args, **kwargs)
25
+ end
26
+
27
+ def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false)
28
+ source = TSV::Parser.new source if String === source
29
+
30
+ identifiers = identifiers.nil? ? source.identifiers : identifiers
31
+
32
+ new_fields = source.fields.dup
33
+ new_fields[new_fields.index(source_id)] = new_id
34
+ return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields)
35
+ end
36
+
37
+ def change_id(*args, **kwargs)
38
+ TSV.change_id(self, *args, **kwargs)
39
+ end
40
+ end
@@ -1,38 +1,14 @@
1
1
  module TSV
2
2
  class Dumper
3
- def self.header_lines(key_field, fields, entry_hash = nil)
4
- if Hash === entry_hash
5
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
- preamble = entry_hash[:preamble]
7
- header_hash = entry_hash[:header_hash]
8
- end
9
-
10
- header_hash = "#" if header_hash.nil?
11
-
12
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
-
14
- str = ""
15
- str << preamble.strip << "\n" if preamble and not preamble.empty?
16
- if fields
17
- if fields.empty?
18
- str << header_hash << (key_field || "ID").to_s << "\n"
19
- else
20
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
- end
22
- end
23
-
24
- str
25
- end
26
-
27
3
  def self.header(options={})
28
- key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
- :key_field, :fields, :sep, :header_hash, :preamble,
4
+ key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
5
+ :key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
30
6
  :sep => "\t", :header_hash => "#", :preamble => true
31
7
 
32
- if fields.nil? || key_field.nil?
8
+ if fields.nil?
33
9
  fields_str = nil
34
10
  else
35
- fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
11
+ fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
36
12
  end
37
13
 
38
14
  if preamble && options.values.compact.any?
@@ -45,31 +21,62 @@ module TSV
45
21
  end
46
22
 
47
23
 
48
- attr_accessor :options
24
+ attr_accessor :options, :initialized, :type, :sep
49
25
  def initialize(options = {})
26
+ options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
50
27
  @sep, @type = IndiferentHash.process_options options,
51
28
  :sep, :type,
52
29
  :sep => "\t", :type => :double
53
30
  @options = options
54
31
  @sout, @sin = Open.pipe
55
- ConcurrentStream.setup(@sin, :pair => @sout)
56
- ConcurrentStream.setup(@sout, :pair => @sin)
32
+ @initialized = false
33
+ @mutex = Mutex.new
34
+ ConcurrentStream.setup(@sin, pair: @sout)
35
+ ConcurrentStream.setup(@sout, pair: @sin)
57
36
  end
58
37
 
59
- def init
60
- header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
- @sin.puts header if header and ! header.empty?
38
+ def key_field
39
+ @options[:key_field]
40
+ end
41
+
42
+ def fields
43
+ @options[:fields]
62
44
  end
63
45
 
64
- def add(key, value)
46
+ def key_field=(key_field)
47
+ @options[:key_field] = key_field
48
+ end
49
+
50
+ def fields=(fields)
51
+ @options[:fields] = fields
52
+ end
53
+
54
+ def all_fields
55
+ return nil if fields.nil?
56
+ [key_field] + fields
57
+ end
65
58
 
66
- case @type
67
- when :single
68
- @sin.puts key + @sep + value
69
- when :list, :flat
70
- @sin.puts key + @sep + value * @sep
71
- when :double
72
- @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
59
+
60
+ def init(preamble: true)
61
+ header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
62
+ @mutex.synchronize do
63
+ @initialized = true
64
+ @sin.puts header if header and ! header.empty?
65
+ end
66
+ end
67
+
68
+ def add(key, value)
69
+ @mutex.synchronize do
70
+
71
+ key = key.to_s unless String === key
72
+ case @type
73
+ when :single
74
+ @sin.puts key + @sep + value.to_s
75
+ when :list, :flat
76
+ @sin.puts key + @sep + value * @sep
77
+ when :double
78
+ @sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
79
+ end
73
80
  end
74
81
  end
75
82
 
@@ -85,23 +92,47 @@ module TSV
85
92
  def abort(exception=nil)
86
93
  @sin.abort(exception)
87
94
  end
95
+
96
+ def tsv(*args)
97
+ TSV.open(stream, *args)
98
+ end
99
+
100
+ def fingerprint
101
+ "Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
102
+ end
103
+
104
+ def digest_str
105
+ fingerprint
106
+ end
107
+
108
+ def inspect
109
+ fingerprint
110
+ end
88
111
  end
89
112
 
90
- def stream
91
- iii self.extension_attr_hash
92
- dumper = TSV::Dumper.new self.extension_attr_hash
93
- dumper.init
94
- Thread.new do
95
- Thread.current["name"] = "Dumper thread"
96
- self.each do |k,v|
97
- dumper.add k, v
113
+ def dumper_stream(options = {})
114
+ preamble = IndiferentHash.process_options options, :preamble, :preamble => true
115
+ dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
116
+ t = Thread.new do
117
+ begin
118
+ Thread.current.report_on_exception = true
119
+ Thread.current["name"] = "Dumper thread"
120
+ dumper.init(preamble: preamble)
121
+ self.each do |k,v|
122
+ dumper.add k, v
123
+ end
124
+ dumper.close
125
+ rescue
126
+ dumper.abort($!)
98
127
  end
99
- dumper.close
100
128
  end
101
- dumper.stream
129
+ Thread.pass until t["name"]
130
+ s = dumper.stream
131
+ ConcurrentStream.setup(s, :threads => [t])
132
+ s
102
133
  end
103
134
 
104
- def to_s
105
- stream.read
135
+ def to_s(options = {})
136
+ dumper_stream(options).read
106
137
  end
107
138
  end
@@ -1,12 +1,16 @@
1
1
  require_relative 'parser'
2
+ require_relative 'transformer'
3
+ require_relative 'persist/fix_width_table'
2
4
  module TSV
3
- def self.index(tsv_file, target: 0, order: true, **kwargs)
4
- persist, type = IndiferentHash.process_options kwargs,
5
- :persist, :persist_type,
5
+ def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
6
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
7
+ :persist, :persist_type, :persist_update, :data_persist,
6
8
  :persist => false, :persist_type => "HDB"
7
9
  kwargs.delete :type
8
10
 
9
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
11
+ fields = :all if fields.nil?
12
+
13
+ Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
10
14
  if filename
11
15
  index = ScoutCabinet.open(filename, true, type)
12
16
  TSV.setup(index, :type => :single)
@@ -15,35 +19,199 @@ module TSV
15
19
  index = TSV.setup({}, :type => :single)
16
20
  end
17
21
 
18
- dummy_data = nil
22
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
23
+
24
+ bar = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" if TrueClass === bar
25
+
19
26
  if order
20
27
  tmp_index = {}
21
- dummy_data = Open.open(tsv_file) do |file|
22
- TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
- values.each_with_index do |list,i|
24
- list.each do |e|
25
- tmp_index[e] ||= []
26
- tmp_index[e][i] ||= []
27
- tmp_index[e][i] << k
28
- end
28
+ include_self = fields == :all || (Array === fields) && fields.include?(target)
29
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :double, unnamed: true, bar: bar, **kwargs do |k,values|
30
+ tmp_index[k] ||= [[k]] if include_self
31
+ values.each_with_index do |list,i|
32
+ i += 1 if include_self
33
+ list.each do |e|
34
+ tmp_index[e] ||= []
35
+ tmp_index[e][i] ||= []
36
+ tmp_index[e][i] << k
29
37
  end
30
38
  end
31
39
  end
32
40
  tmp_index.each do |e,list|
33
41
  index[e] = list.flatten.compact.uniq.first
34
42
  end
43
+
44
+ index.key_field = source_field_names * ","
45
+ index.fields = [target_key_field]
46
+
47
+ tmp_index = {}
48
+
35
49
  else
36
- dummy_data = Open.open(tsv_file) do |file|
37
- TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
- values.each do |e|
39
- index[e] = k unless index.include?(e)
40
- end
50
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
51
+ values.each do |e|
52
+ index[e] = k unless index.include?(e)
53
+ end
54
+ end
55
+
56
+ index.key_field = source_field_names * ","
57
+ index.fields = [target_key_field]
58
+ end
59
+
60
+
61
+ index
62
+ end
63
+ end
64
+
65
+ def index(*args, **kwargs, &block)
66
+ TSV.index(self, *args, **kwargs, &block)
67
+ end
68
+
69
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
70
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
71
+ :persist, :persist_type, :persist_update, :data_persist,
72
+ :persist => false, :persist_type => :fwt
73
+ kwargs.delete :type
74
+
75
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :prefix => "RangeIndex", :other_options => kwargs, update: persist_update)) do |filename|
76
+
77
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
78
+
79
+ max_key_size = 0
80
+ index_data = []
81
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
82
+ key_size = key.length
83
+ max_key_size = key_size if key_size > max_key_size
84
+
85
+ start_pos, end_pos = values
86
+ if Array === start_pos
87
+ start_pos.zip(end_pos).each do |s,e|
88
+ index_data << [key, [s.to_i, e.to_i]]
41
89
  end
90
+ else
91
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
42
92
  end
43
93
  end
44
- index.key_field = dummy_data.fields * ", "
45
- index.fields = [dummy_data.key_field]
94
+
95
+ filename = :memory if filename.nil?
96
+ index = FixWidthTable.get(filename, max_key_size, true)
97
+ index.add_range index_data
98
+ index.read
46
99
  index
47
100
  end
48
101
  end
102
+
103
+ def range_index(*args, **kwargs, &block)
104
+ TSV.range_index(self, *args, **kwargs, &block)
105
+ end
106
+
107
+ def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
108
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
109
+ :persist, :persist_type, :persist_update, :data_persist,
110
+ :persist => false, :persist_type => :fwt
111
+ kwargs.delete :type
112
+
113
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, update: persist_update, :prefix => "RangeIndex", :other_options => kwargs)) do |filename|
114
+
115
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
116
+
117
+ max_key_size = 0
118
+ index_data = []
119
+ TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
120
+ key_size = key.length
121
+ max_key_size = key_size if key_size > max_key_size
122
+
123
+ if Array === pos
124
+ pos.zip(end_pos).each do |p|
125
+ index_pos << [key, p]
126
+ end
127
+ else
128
+ index_data << [key, pos]
129
+ end
130
+ end
131
+
132
+ filename = :memory if filename.nil?
133
+ index = FixWidthTable.get(filename, max_key_size, false)
134
+ index.add_point index_data
135
+ index.read
136
+ index
137
+ end
138
+ end
139
+
140
+ def pos_index(*args, **kwargs, &block)
141
+ TSV.pos_index(self, *args, **kwargs, &block)
142
+ end
143
+
144
+
145
+ #def range_index(start_field = nil, end_field = nil, options = {})
146
+ # start_field ||= "Start"
147
+ # end_field ||= "End"
148
+
149
+ # options = Misc.add_defaults options,
150
+ # :persist => false, :persist_file => nil, :persist_update => false
151
+
152
+ # persist_options = Misc.pull_keys options, :persist
153
+ # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
154
+
155
+ # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
156
+ # max_key_size = 0
157
+ # index_data = []
158
+ # with_unnamed do
159
+ # with_monitor :desc => "Creating Index Data", :step => 10000 do
160
+ # through :key, [start_field, end_field] do |key, values|
161
+ # key_size = key.length
162
+ # max_key_size = key_size if key_size > max_key_size
163
+
164
+ # start_pos, end_pos = values
165
+ # if Array === start_pos
166
+ # start_pos.zip(end_pos).each do |s,e|
167
+ # index_data << [key, [s.to_i, e.to_i]]
168
+ # end
169
+ # else
170
+ # index_data << [key, [start_pos.to_i, end_pos.to_i]]
171
+ # end
172
+ # end
173
+ # end
174
+ # end
175
+
176
+ # index = FixWidthTable.get(:memory, max_key_size, true)
177
+ # index.add_range index_data
178
+ # index.read
179
+ # index
180
+ # end
181
+ #end
182
+
183
+ #def self.range_index(file, start_field = nil, end_field = nil, options = {})
184
+ # start_field ||= "Start"
185
+ # end_field ||= "End"
186
+
187
+ # data_options = Misc.pull_keys options, :data
188
+ # filename = case
189
+ # when (String === file or Path === file)
190
+ # file
191
+ # when file.respond_to?(:filename)
192
+ # file.filename
193
+ # else
194
+ # file.object_id.to_s
195
+ # end
196
+ # persist_options = Misc.pull_keys options, :persist
197
+ # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
198
+
199
+ # filters = Misc.process_options options, :filters
200
+
201
+ # if filters
202
+ # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
203
+ # end
204
+
205
+ # Persist.persist(filename, :fwt, persist_options) do
206
+ # tsv = TSV.open(file, data_options)
207
+ # if filters
208
+ # tsv.filter
209
+ # filters.each do |match, value|
210
+ # tsv.add_filter match, value
211
+ # end
212
+ # end
213
+
214
+ # tsv.range_index(start_field, end_field, options)
215
+ # end
216
+ #end
49
217
  end