scout-gear 7.2.0 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +37 -3
  3. data/VERSION +1 -1
  4. data/lib/scout/concurrent_stream.rb +9 -8
  5. data/lib/scout/exceptions.rb +1 -0
  6. data/lib/scout/log/color.rb +0 -1
  7. data/lib/scout/log/progress/util.rb +65 -0
  8. data/lib/scout/misc/helper.rb +31 -0
  9. data/lib/scout/misc/monitor.rb +1 -1
  10. data/lib/scout/misc.rb +1 -0
  11. data/lib/scout/open/stream.rb +21 -27
  12. data/lib/scout/persist.rb +42 -28
  13. data/lib/scout/semaphore.rb +8 -1
  14. data/lib/scout/tsv/dumper.rb +13 -8
  15. data/lib/scout/tsv/index.rb +127 -15
  16. data/lib/scout/tsv/open.rb +128 -0
  17. data/lib/scout/tsv/parser.rb +70 -43
  18. data/lib/scout/tsv/path.rb +4 -4
  19. data/lib/scout/tsv/persist/adapter.rb +52 -33
  20. data/lib/scout/tsv/persist/fix_width_table.rb +324 -0
  21. data/lib/scout/tsv/persist/serialize.rb +117 -0
  22. data/lib/scout/tsv/persist/tokyocabinet.rb +3 -3
  23. data/lib/scout/tsv/persist.rb +0 -2
  24. data/lib/scout/tsv/traverse.rb +130 -35
  25. data/lib/scout/tsv/util/filter.rb +303 -0
  26. data/lib/scout/tsv/util/process.rb +73 -0
  27. data/lib/scout/tsv/util/select.rb +220 -0
  28. data/lib/scout/tsv/util.rb +77 -19
  29. data/lib/scout/tsv.rb +2 -2
  30. data/lib/scout/work_queue/worker.rb +1 -1
  31. data/lib/scout/workflow/definition.rb +8 -0
  32. data/lib/scout/workflow/step/info.rb +4 -0
  33. data/lib/scout/workflow/step/progress.rb +14 -0
  34. data/lib/scout/workflow/step.rb +10 -5
  35. data/lib/scout/workflow/task.rb +8 -4
  36. data/lib/scout/workflow/usage.rb +2 -0
  37. data/scout-gear.gemspec +33 -10
  38. data/scout_commands/workflow/task +3 -2
  39. data/scout_commands/workflow/task_old +2 -2
  40. data/test/scout/open/test_stream.rb +1 -1
  41. data/test/scout/test_persist.rb +61 -0
  42. data/test/scout/test_tmpfile.rb +1 -1
  43. data/test/scout/test_tsv.rb +10 -1
  44. data/test/scout/test_work_queue.rb +1 -0
  45. data/test/scout/tsv/persist/test_adapter.rb +10 -0
  46. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  47. data/test/scout/tsv/test_index.rb +94 -2
  48. data/test/scout/tsv/test_open.rb +9 -0
  49. data/test/scout/tsv/test_parser.rb +28 -3
  50. data/test/scout/tsv/test_persist.rb +7 -0
  51. data/test/scout/tsv/test_traverse.rb +110 -3
  52. data/test/scout/tsv/test_util.rb +23 -0
  53. data/test/scout/tsv/util/test_filter.rb +188 -0
  54. data/test/scout/tsv/util/test_process.rb +47 -0
  55. data/test/scout/tsv/util/test_select.rb +44 -0
  56. data/test/scout/work_queue/test_worker.rb +63 -6
  57. data/test/scout/workflow/step/test_load.rb +3 -3
  58. data/test/scout/workflow/test_step.rb +10 -10
  59. data/test/test_helper.rb +3 -1
  60. metadata +19 -6
@@ -1,6 +1,7 @@
1
1
  require_relative 'parser'
2
+ require_relative 'persist/fix_width_table'
2
3
  module TSV
3
- def self.index(tsv_file, target: 0, order: true, **kwargs)
4
+ def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
4
5
  persist, type = IndiferentHash.process_options kwargs,
5
6
  :persist, :persist_type,
6
7
  :persist => false, :persist_type => "HDB"
@@ -15,17 +16,15 @@ module TSV
15
16
  index = TSV.setup({}, :type => :single)
16
17
  end
17
18
 
18
- dummy_data = nil
19
+ dummy_data = TSV.setup({}, :key_field => "Key", :fields => ["Target"])
19
20
  if order
20
21
  tmp_index = {}
21
- dummy_data = Open.open(tsv_file) do |file|
22
- TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
- values.each_with_index do |list,i|
24
- list.each do |e|
25
- tmp_index[e] ||= []
26
- tmp_index[e][i] ||= []
27
- tmp_index[e][i] << k
28
- end
22
+ key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :double, into: dummy_data, unnamed: true, **kwargs do |k,values|
23
+ values.each_with_index do |list,i|
24
+ list.each do |e|
25
+ tmp_index[e] ||= []
26
+ tmp_index[e][i] ||= []
27
+ tmp_index[e][i] << k
29
28
  end
30
29
  end
31
30
  end
@@ -33,17 +32,130 @@ module TSV
33
32
  index[e] = list.flatten.compact.uniq.first
34
33
  end
35
34
  else
36
- dummy_data = Open.open(tsv_file) do |file|
37
- TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
- values.each do |e|
39
- index[e] = k unless index.include?(e)
40
- end
35
+ key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :flat, into: dummy_data, unnamed: true, **kwargs do |k,values|
36
+ values.each do |e|
37
+ index[e] = k unless index.include?(e)
41
38
  end
42
39
  end
43
40
  end
41
+
44
42
  index.key_field = dummy_data.fields * ", "
45
43
  index.fields = [dummy_data.key_field]
46
44
  index
47
45
  end
48
46
  end
47
+
48
+ def index(*args, **kwargs, &block)
49
+ TSV.index(self, *args, **kwargs, &block)
50
+ end
51
+
52
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
53
+ persist, type = IndiferentHash.process_options kwargs,
54
+ :persist, :persist_type,
55
+ :persist => false, :persist_type => :fwt
56
+ kwargs.delete :type
57
+
58
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
59
+
60
+ max_key_size = 0
61
+ index_data = []
62
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
63
+ key_size = key.length
64
+ max_key_size = key_size if key_size > max_key_size
65
+
66
+ start_pos, end_pos = values
67
+ if Array === start_pos
68
+ start_pos.zip(end_pos).each do |s,e|
69
+ index_data << [key, [s.to_i, e.to_i]]
70
+ end
71
+ else
72
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
73
+ end
74
+ end
75
+
76
+ filename = :memory if filename.nil?
77
+ index = FixWidthTable.get(filename, max_key_size, true)
78
+ index.add_range index_data
79
+ index.read
80
+ index
81
+ end
82
+ end
83
+
84
+ def range_index(*args, **kwargs, &block)
85
+ TSV.range_index(self, *args, **kwargs, &block)
86
+ end
87
+
88
+
89
+ #def range_index(start_field = nil, end_field = nil, options = {})
90
+ # start_field ||= "Start"
91
+ # end_field ||= "End"
92
+
93
+ # options = Misc.add_defaults options,
94
+ # :persist => false, :persist_file => nil, :persist_update => false
95
+
96
+ # persist_options = Misc.pull_keys options, :persist
97
+ # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
98
+
99
+ # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
100
+ # max_key_size = 0
101
+ # index_data = []
102
+ # with_unnamed do
103
+ # with_monitor :desc => "Creating Index Data", :step => 10000 do
104
+ # through :key, [start_field, end_field] do |key, values|
105
+ # key_size = key.length
106
+ # max_key_size = key_size if key_size > max_key_size
107
+
108
+ # start_pos, end_pos = values
109
+ # if Array === start_pos
110
+ # start_pos.zip(end_pos).each do |s,e|
111
+ # index_data << [key, [s.to_i, e.to_i]]
112
+ # end
113
+ # else
114
+ # index_data << [key, [start_pos.to_i, end_pos.to_i]]
115
+ # end
116
+ # end
117
+ # end
118
+ # end
119
+
120
+ # index = FixWidthTable.get(:memory, max_key_size, true)
121
+ # index.add_range index_data
122
+ # index.read
123
+ # index
124
+ # end
125
+ #end
126
+
127
+ #def self.range_index(file, start_field = nil, end_field = nil, options = {})
128
+ # start_field ||= "Start"
129
+ # end_field ||= "End"
130
+
131
+ # data_options = Misc.pull_keys options, :data
132
+ # filename = case
133
+ # when (String === file or Path === file)
134
+ # file
135
+ # when file.respond_to?(:filename)
136
+ # file.filename
137
+ # else
138
+ # file.object_id.to_s
139
+ # end
140
+ # persist_options = Misc.pull_keys options, :persist
141
+ # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
142
+
143
+ # filters = Misc.process_options options, :filters
144
+
145
+ # if filters
146
+ # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
147
+ # end
148
+
149
+ # Persist.persist(filename, :fwt, persist_options) do
150
+ # tsv = TSV.open(file, data_options)
151
+ # if filters
152
+ # tsv.filter
153
+ # filters.each do |match, value|
154
+ # tsv.add_filter match, value
155
+ # end
156
+ # end
157
+
158
+ # tsv.range_index(start_field, end_field, options)
159
+ # end
160
+ #end
49
161
  end
@@ -0,0 +1,128 @@
1
+ require_relative '../open'
2
+ module Open
3
+ def self.traverse_add(into, res)
4
+ case into
5
+ when TSV::Dumper
6
+ into.add *res
7
+ when TSV, Hash
8
+ key, value = res
9
+ into[key] = value
10
+ end
11
+ end
12
+
13
+ #def self.traverse(obj, into: nil, cpus: nil, bar: nil, **options, &block)
14
+ # case obj
15
+ # when TSV
16
+ # obj.traverse options[:key_field], options[:fields], **options do |k,v|
17
+ # res = yield k, v
18
+ # end
19
+ # when String
20
+ # f = Open.open(obj)
21
+ # self.traverse(f, into: into, cpus: cpus, bar: bar, **options, &block)
22
+ # when Step
23
+ # self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
24
+ # when IO
25
+ # if into && (IO === into || into.respond_to?(:stream) )
26
+ # into_thread = Thread.new do
27
+ # Thread.current.report_on_exception = false
28
+ # Thread.current["name"] = "Traverse into"
29
+ # TSV.parse obj, **options do |k,v|
30
+ # begin
31
+ # res = block.call k, v
32
+ # traverse_add into, res
33
+ # rescue
34
+ # into.abort $!
35
+ # end
36
+ # nil
37
+ # end
38
+ # into.close if into.respond_to?(:close)
39
+ # end
40
+ # Thread.pass until into_thread
41
+ # into
42
+ # else
43
+ # TSV.parse obj, **options do |k,v|
44
+ # block.call k, v
45
+ # nil
46
+ # end
47
+ # end
48
+ # end
49
+ #end
50
+
51
+ def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
52
+
53
+ if into || bar
54
+ orig_callback = callback if callback
55
+ bar = Log::ProgressBar.get_obj_bar(bar, obj)
56
+ callback = proc do |res|
57
+ bar.tick if bar
58
+ traverse_add into, res if into
59
+ orig_callback.call res if orig_callback
60
+ end
61
+
62
+ if into.respond_to?(:close)
63
+ into_thread = Thread.new do
64
+ Thread.current.report_on_exception = false
65
+ Thread.current["name"] = "Traverse into"
66
+ error = false
67
+ begin
68
+ self.traverse(obj, callback: callback, **options, &block)
69
+ into.close if into.respond_to?(:close)
70
+ bar.remove if bar
71
+ rescue Exception
72
+ into.abort($!) if into.respond_to?(:abort)
73
+ bar.remove($!) if bar
74
+ end
75
+ end
76
+ Thread.pass until into_thread
77
+ return into
78
+ end
79
+ end
80
+
81
+ begin
82
+ case obj
83
+ when TSV
84
+ obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v|
85
+ res = block.call(k, v)
86
+ callback.call res if callback
87
+ nil
88
+ end
89
+ when Array
90
+ obj.each do |line|
91
+ res = block.call(line)
92
+ callback.call res if callback
93
+ nil
94
+ end
95
+ when String
96
+ f = Open.open(obj)
97
+ self.traverse(f, cpus: cpus, callback: callback, **options, &block)
98
+ when Step
99
+ raise obj.exception if obj.error?
100
+ self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
101
+ when IO
102
+ TSV.parse obj, **options do |k,v|
103
+ res = block.call k, v
104
+ callback.call res if callback
105
+ nil
106
+ end
107
+ else
108
+ TSV.parse obj, **options do |k,v|
109
+ res = block.call k, v
110
+ callback.call res if callback
111
+ nil
112
+ end
113
+ end
114
+ bar.remove if bar
115
+ rescue
116
+ bar.abort($!) if bar
117
+ raise $!
118
+ end
119
+
120
+ into
121
+ end
122
+ end
123
+
124
+ module TSV
125
+ def self.traverse(*args, **kwargs, &block)
126
+ Open.traverse(*args, **kwargs, &block)
127
+ end
128
+ end
@@ -4,7 +4,11 @@ module TSV
4
4
  if Array === value
5
5
  value.collect{|e| cast_value(e, cast) }
6
6
  else
7
- value.send(cast)
7
+ if Proc === cast
8
+ cast.call value
9
+ else
10
+ value.send(cast)
11
+ end
8
12
  end
9
13
  end
10
14
 
@@ -75,40 +79,41 @@ module TSV
75
79
  these_items = items
76
80
  end
77
81
 
78
- these_items = case [source_type, type]
79
- when [:single, :single]
80
- these_items
81
- when [:list, :single]
82
- these_items.first
83
- when [:flat, :single]
84
- these_items.first
85
- when [:double, :single]
86
- these_items.first.first
87
- when [:single, :list]
88
- [these_items]
89
- when [:list, :list]
90
- these_items
91
- when [:flat, :list]
92
- these_items
93
- when [:double, :list]
94
- these_items.collect{|l| l.first }
95
- when [:single, :flat]
96
- [these_items]
97
- when [:list, :flat]
98
- these_items
99
- when [:flat, :flat]
100
- these_items
101
- when [:double, :flat]
102
- these_items.flatten
103
- when [:single, :double]
104
- [[these_items]]
105
- when [:list, :double]
106
- these_items.collect{|l| [l] }
107
- when [:flat, :double]
108
- [these_items]
109
- when [:double, :double]
110
- these_items
111
- end
82
+ these_items =
83
+ case [source_type, type]
84
+ when [:single, :single]
85
+ these_items
86
+ when [:list, :single]
87
+ these_items.first
88
+ when [:flat, :single]
89
+ these_items.first
90
+ when [:double, :single]
91
+ these_items.first.first
92
+ when [:single, :list]
93
+ [these_items]
94
+ when [:list, :list]
95
+ these_items
96
+ when [:flat, :list]
97
+ these_items
98
+ when [:double, :list]
99
+ these_items.collect{|l| l.first }
100
+ when [:single, :flat]
101
+ [these_items]
102
+ when [:list, :flat]
103
+ these_items
104
+ when [:flat, :flat]
105
+ these_items
106
+ when [:double, :flat]
107
+ these_items.flatten
108
+ when [:single, :double]
109
+ [[these_items]]
110
+ when [:list, :double]
111
+ these_items.collect{|l| [l] }
112
+ when [:flat, :double]
113
+ [these_items]
114
+ when [:double, :double]
115
+ these_items
116
+ end
112
117
 
113
118
  if block_given?
114
119
  res = block.call(key, these_items)
@@ -227,8 +232,8 @@ module TSV
227
232
  if key_field
228
233
  all_field_names ||= [@key_field] + @fields
229
234
  key = NamedArray.identify_name(all_field_names, key_field)
230
- kwargs[:key] = key
231
- key_field_name = all_field_names[key]
235
+ kwargs[:key] = key == :key ? 0 : key
236
+ key_field_name = key === :key ? @key_field : all_field_names[key]
232
237
  if fields.nil?
233
238
  field_names = all_field_names - [@key_field]
234
239
  end
@@ -247,21 +252,42 @@ module TSV
247
252
 
248
253
  data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
249
254
 
250
- TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type) if data
251
-
252
- data || self
255
+ if data
256
+ TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
257
+ else
258
+ self
259
+ end
253
260
  end
254
261
 
255
262
  end
256
263
 
257
- def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, **kwargs, &block)
264
+ def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
258
265
  parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
259
- kwargs = parser.options.merge(kwargs)
260
266
 
261
- type = kwargs[:type] ||= :double
267
+ cast = parser.options[:cast] || kwargs[:cast]
268
+ type = kwargs[:type] ||= parser.options[:type] ||= :double
262
269
  if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
263
270
  TSV.setup(data, type: type)
264
271
  data.extend TSVAdapter
272
+ if serializer
273
+ data.serializer = serializer
274
+ elsif cast
275
+ data.serializer =
276
+ case [cast, type]
277
+ when [:to_i, :single]
278
+ :integer
279
+ when [:to_i, :list], [:to_i, :flat]
280
+ :integer_array
281
+ when [:to_f, :single]
282
+ :float
283
+ when [:to_f, :list], [:to_f, :flat]
284
+ :float_array
285
+ else
286
+ type
287
+ end
288
+ else
289
+ data.serializer = type
290
+ end
265
291
  end
266
292
 
267
293
  kwargs[:data] = {} if kwargs[:data].nil?
@@ -270,6 +296,7 @@ module TSV
270
296
  data.type = type
271
297
  data.filename = filename
272
298
  data.namespace = namespace
299
+ data.unnamed = unnamed
273
300
  data
274
301
  end
275
302
 
@@ -1,13 +1,13 @@
1
1
  module Path
2
- def tsv(...)
2
+ def tsv(*args, **kwargs, &block)
3
3
  found = self.find
4
4
  found = self.set_extension('tsv').find unless found.exists?
5
- TSV.open(found, ...)
5
+ TSV.open(found, *args, **kwargs, &block)
6
6
  end
7
7
 
8
- def index(...)
8
+ def index(*args, **kwargs, &block)
9
9
  found = self.find
10
10
  found = self.set_extension('tsv').find unless found.exists?
11
- TSV.index(found, ...)
11
+ TSV.index(found, *args, **kwargs, &block)
12
12
  end
13
13
  end
@@ -1,7 +1,8 @@
1
1
  require_relative '../../open/lock'
2
+ require_relative 'serialize'
2
3
 
3
4
  module TSVAdapter
4
- attr_accessor :persistence_path, :persistence_class, :closed, :writable
5
+ attr_accessor :persistence_path, :persistence_class, :serializer, :closed, :writable
5
6
 
6
7
  class << self
7
8
  attr_accessor :lock_dir
@@ -13,8 +14,12 @@ module TSVAdapter
13
14
  EXTENSION_ATTR_HASH_KEY = "__extension_attr_hash__"
14
15
  EXTENSION_ATTR_HASH_SERIALIZER = Marshal
15
16
 
17
+ def serializer=(serializer)
18
+ @serializer = Symbol === serializer ? SERIALIZER_ALIAS[serializer] : serializer
19
+ end
20
+
16
21
  def load_extension_attr_hash
17
- EXTENSION_ATTR_HASH_SERIALIZER.load(self[EXTENSION_ATTR_HASH_KEY])
22
+ EXTENSION_ATTR_HASH_SERIALIZER.load(StringIO.new(self[EXTENSION_ATTR_HASH_KEY]))
18
23
  end
19
24
 
20
25
  def save_extension_attr_hash
@@ -22,12 +27,16 @@ module TSVAdapter
22
27
  end
23
28
 
24
29
  def self.extended(base)
25
- if base.include?(EXTENSION_ATTR_HASH_KEY)
26
- TSV.setup(base, base.load_extension_attr_hash)
27
- elsif TSV === base
28
- base[EXTENSION_ATTR_HASH_KEY] = EXTENSION_ATTR_HASH_SERIALIZER.dump(base.extension_attr_hash)
30
+ if ! TSVAdapter === base
31
+ if (! TSVAdapter === base) && base.include?(EXTENSION_ATTR_HASH_KEY)
32
+ TSV.setup(base, base.load_extension_attr_hash)
33
+ elsif TSV === base
34
+ base[EXTENSION_ATTR_HASH_KEY] = EXTENSION_ATTR_HASH_SERIALIZER.dump(base.extension_attr_hash)
35
+ end
29
36
  end
30
37
 
38
+ base.serializer = SERIALIZER_ALIAS[base.type]
39
+
31
40
  class << base
32
41
  alias orig_set []=
33
42
  alias orig_get []
@@ -43,37 +52,47 @@ module TSVAdapter
43
52
  super(key, save_value(value))
44
53
  end
45
54
  end
46
- end
47
55
 
48
- case base.type
49
- when :single
50
- class << base
51
- def load_value(value)
52
- value
53
- end
54
- def save_value(value)
55
- value
56
- end
57
- end
58
- when :list, :flat
59
- class << base
60
- def load_value(value)
61
- value.nil? ? nil : value.split("\t")
62
- end
63
- def save_value(value)
64
- value * "\t"
65
- end
56
+ def load_value(str)
57
+ serializer.load(str)
66
58
  end
67
- when :double
68
- class << base
69
- def load_value(value)
70
- value.nil? ? nil : value.split("\t").collect{|v| v.split("|") }
71
- end
72
- def save_value(value)
73
- value.collect{|v| v * "|" } * "\t"
74
- end
59
+
60
+ def save_value(value)
61
+ serializer.dump(value)
75
62
  end
63
+
64
+
76
65
  end
66
+
67
+ #case base.type
68
+ #when :single
69
+ # class << base
70
+ # def load_value(value)
71
+ # value
72
+ # end
73
+ # def save_value(value)
74
+ # value
75
+ # end
76
+ # end
77
+ #when :list, :flat
78
+ # class << base
79
+ # def load_value(value)
80
+ # value.nil? ? nil : value.split("\t")
81
+ # end
82
+ # def save_value(value)
83
+ # value * "\t"
84
+ # end
85
+ # end
86
+ #when :double
87
+ # class << base
88
+ # def load_value(value)
89
+ # value.nil? ? nil : value.split("\t").collect{|v| v.split("|") }
90
+ # end
91
+ # def save_value(value)
92
+ # value.collect{|v| v * "|" } * "\t"
93
+ # end
94
+ # end
95
+ #end
77
96
  end
78
97
 
79
98
  def keys(*args)