scout-gear 7.2.0 → 7.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +37 -3
  3. data/VERSION +1 -1
  4. data/lib/scout/concurrent_stream.rb +9 -8
  5. data/lib/scout/exceptions.rb +1 -0
  6. data/lib/scout/log/color.rb +0 -1
  7. data/lib/scout/log/progress/util.rb +65 -0
  8. data/lib/scout/misc/helper.rb +31 -0
  9. data/lib/scout/misc/monitor.rb +1 -1
  10. data/lib/scout/misc.rb +1 -0
  11. data/lib/scout/open/stream.rb +21 -27
  12. data/lib/scout/persist.rb +42 -28
  13. data/lib/scout/semaphore.rb +8 -1
  14. data/lib/scout/tsv/dumper.rb +13 -8
  15. data/lib/scout/tsv/index.rb +127 -15
  16. data/lib/scout/tsv/open.rb +128 -0
  17. data/lib/scout/tsv/parser.rb +70 -43
  18. data/lib/scout/tsv/path.rb +4 -4
  19. data/lib/scout/tsv/persist/adapter.rb +52 -33
  20. data/lib/scout/tsv/persist/fix_width_table.rb +324 -0
  21. data/lib/scout/tsv/persist/serialize.rb +117 -0
  22. data/lib/scout/tsv/persist/tokyocabinet.rb +3 -3
  23. data/lib/scout/tsv/persist.rb +0 -2
  24. data/lib/scout/tsv/traverse.rb +130 -35
  25. data/lib/scout/tsv/util/filter.rb +303 -0
  26. data/lib/scout/tsv/util/process.rb +73 -0
  27. data/lib/scout/tsv/util/select.rb +220 -0
  28. data/lib/scout/tsv/util.rb +77 -19
  29. data/lib/scout/tsv.rb +2 -2
  30. data/lib/scout/work_queue/worker.rb +1 -1
  31. data/lib/scout/workflow/definition.rb +8 -0
  32. data/lib/scout/workflow/step/info.rb +4 -0
  33. data/lib/scout/workflow/step/progress.rb +14 -0
  34. data/lib/scout/workflow/step.rb +10 -5
  35. data/lib/scout/workflow/task.rb +8 -4
  36. data/lib/scout/workflow/usage.rb +2 -0
  37. data/scout-gear.gemspec +33 -10
  38. data/scout_commands/workflow/task +3 -2
  39. data/scout_commands/workflow/task_old +2 -2
  40. data/test/scout/open/test_stream.rb +1 -1
  41. data/test/scout/test_persist.rb +61 -0
  42. data/test/scout/test_tmpfile.rb +1 -1
  43. data/test/scout/test_tsv.rb +10 -1
  44. data/test/scout/test_work_queue.rb +1 -0
  45. data/test/scout/tsv/persist/test_adapter.rb +10 -0
  46. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  47. data/test/scout/tsv/test_index.rb +94 -2
  48. data/test/scout/tsv/test_open.rb +9 -0
  49. data/test/scout/tsv/test_parser.rb +28 -3
  50. data/test/scout/tsv/test_persist.rb +7 -0
  51. data/test/scout/tsv/test_traverse.rb +110 -3
  52. data/test/scout/tsv/test_util.rb +23 -0
  53. data/test/scout/tsv/util/test_filter.rb +188 -0
  54. data/test/scout/tsv/util/test_process.rb +47 -0
  55. data/test/scout/tsv/util/test_select.rb +44 -0
  56. data/test/scout/work_queue/test_worker.rb +63 -6
  57. data/test/scout/workflow/step/test_load.rb +3 -3
  58. data/test/scout/workflow/test_step.rb +10 -10
  59. data/test/test_helper.rb +3 -1
  60. metadata +19 -6
@@ -1,6 +1,7 @@
1
1
  require_relative 'parser'
2
+ require_relative 'persist/fix_width_table'
2
3
  module TSV
3
- def self.index(tsv_file, target: 0, order: true, **kwargs)
4
+ def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
4
5
  persist, type = IndiferentHash.process_options kwargs,
5
6
  :persist, :persist_type,
6
7
  :persist => false, :persist_type => "HDB"
@@ -15,17 +16,15 @@ module TSV
15
16
  index = TSV.setup({}, :type => :single)
16
17
  end
17
18
 
18
- dummy_data = nil
19
+ dummy_data = TSV.setup({}, :key_field => "Key", :fields => ["Target"])
19
20
  if order
20
21
  tmp_index = {}
21
- dummy_data = Open.open(tsv_file) do |file|
22
- TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
- values.each_with_index do |list,i|
24
- list.each do |e|
25
- tmp_index[e] ||= []
26
- tmp_index[e][i] ||= []
27
- tmp_index[e][i] << k
28
- end
22
+ key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :double, into: dummy_data, unnamed: true, **kwargs do |k,values|
23
+ values.each_with_index do |list,i|
24
+ list.each do |e|
25
+ tmp_index[e] ||= []
26
+ tmp_index[e][i] ||= []
27
+ tmp_index[e][i] << k
29
28
  end
30
29
  end
31
30
  end
@@ -33,17 +32,130 @@ module TSV
33
32
  index[e] = list.flatten.compact.uniq.first
34
33
  end
35
34
  else
36
- dummy_data = Open.open(tsv_file) do |file|
37
- TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
- values.each do |e|
39
- index[e] = k unless index.include?(e)
40
- end
35
+ key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :flat, into: dummy_data, unnamed: true, **kwargs do |k,values|
36
+ values.each do |e|
37
+ index[e] = k unless index.include?(e)
41
38
  end
42
39
  end
43
40
  end
41
+
44
42
  index.key_field = dummy_data.fields * ", "
45
43
  index.fields = [dummy_data.key_field]
46
44
  index
47
45
  end
48
46
  end
47
+
48
+ def index(*args, **kwargs, &block)
49
+ TSV.index(self, *args, **kwargs, &block)
50
+ end
51
+
52
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
53
+ persist, type = IndiferentHash.process_options kwargs,
54
+ :persist, :persist_type,
55
+ :persist => false, :persist_type => :fwt
56
+ kwargs.delete :type
57
+
58
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
59
+
60
+ max_key_size = 0
61
+ index_data = []
62
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
63
+ key_size = key.length
64
+ max_key_size = key_size if key_size > max_key_size
65
+
66
+ start_pos, end_pos = values
67
+ if Array === start_pos
68
+ start_pos.zip(end_pos).each do |s,e|
69
+ index_data << [key, [s.to_i, e.to_i]]
70
+ end
71
+ else
72
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
73
+ end
74
+ end
75
+
76
+ filename = :memory if filename.nil?
77
+ index = FixWidthTable.get(filename, max_key_size, true)
78
+ index.add_range index_data
79
+ index.read
80
+ index
81
+ end
82
+ end
83
+
84
+ def range_index(*args, **kwargs, &block)
85
+ TSV.range_index(self, *args, **kwargs, &block)
86
+ end
87
+
88
+
89
+ #def range_index(start_field = nil, end_field = nil, options = {})
90
+ # start_field ||= "Start"
91
+ # end_field ||= "End"
92
+
93
+ # options = Misc.add_defaults options,
94
+ # :persist => false, :persist_file => nil, :persist_update => false
95
+
96
+ # persist_options = Misc.pull_keys options, :persist
97
+ # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
98
+
99
+ # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
100
+ # max_key_size = 0
101
+ # index_data = []
102
+ # with_unnamed do
103
+ # with_monitor :desc => "Creating Index Data", :step => 10000 do
104
+ # through :key, [start_field, end_field] do |key, values|
105
+ # key_size = key.length
106
+ # max_key_size = key_size if key_size > max_key_size
107
+
108
+ # start_pos, end_pos = values
109
+ # if Array === start_pos
110
+ # start_pos.zip(end_pos).each do |s,e|
111
+ # index_data << [key, [s.to_i, e.to_i]]
112
+ # end
113
+ # else
114
+ # index_data << [key, [start_pos.to_i, end_pos.to_i]]
115
+ # end
116
+ # end
117
+ # end
118
+ # end
119
+
120
+ # index = FixWidthTable.get(:memory, max_key_size, true)
121
+ # index.add_range index_data
122
+ # index.read
123
+ # index
124
+ # end
125
+ #end
126
+
127
+ #def self.range_index(file, start_field = nil, end_field = nil, options = {})
128
+ # start_field ||= "Start"
129
+ # end_field ||= "End"
130
+
131
+ # data_options = Misc.pull_keys options, :data
132
+ # filename = case
133
+ # when (String === file or Path === file)
134
+ # file
135
+ # when file.respond_to?(:filename)
136
+ # file.filename
137
+ # else
138
+ # file.object_id.to_s
139
+ # end
140
+ # persist_options = Misc.pull_keys options, :persist
141
+ # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
142
+
143
+ # filters = Misc.process_options options, :filters
144
+
145
+ # if filters
146
+ # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
147
+ # end
148
+
149
+ # Persist.persist(filename, :fwt, persist_options) do
150
+ # tsv = TSV.open(file, data_options)
151
+ # if filters
152
+ # tsv.filter
153
+ # filters.each do |match, value|
154
+ # tsv.add_filter match, value
155
+ # end
156
+ # end
157
+
158
+ # tsv.range_index(start_field, end_field, options)
159
+ # end
160
+ #end
49
161
  end
@@ -0,0 +1,128 @@
1
+ require_relative '../open'
2
+ module Open
3
+ def self.traverse_add(into, res)
4
+ case into
5
+ when TSV::Dumper
6
+ into.add *res
7
+ when TSV, Hash
8
+ key, value = res
9
+ into[key] = value
10
+ end
11
+ end
12
+
13
+ #def self.traverse(obj, into: nil, cpus: nil, bar: nil, **options, &block)
14
+ # case obj
15
+ # when TSV
16
+ # obj.traverse options[:key_field], options[:fields], **options do |k,v|
17
+ # res = yield k, v
18
+ # end
19
+ # when String
20
+ # f = Open.open(obj)
21
+ # self.traverse(f, into: into, cpus: cpus, bar: bar, **options, &block)
22
+ # when Step
23
+ # self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
24
+ # when IO
25
+ # if into && (IO === into || into.respond_to?(:stream) )
26
+ # into_thread = Thread.new do
27
+ # Thread.current.report_on_exception = false
28
+ # Thread.current["name"] = "Traverse into"
29
+ # TSV.parse obj, **options do |k,v|
30
+ # begin
31
+ # res = block.call k, v
32
+ # traverse_add into, res
33
+ # rescue
34
+ # into.abort $!
35
+ # end
36
+ # nil
37
+ # end
38
+ # into.close if into.respond_to?(:close)
39
+ # end
40
+ # Thread.pass until into_thread
41
+ # into
42
+ # else
43
+ # TSV.parse obj, **options do |k,v|
44
+ # block.call k, v
45
+ # nil
46
+ # end
47
+ # end
48
+ # end
49
+ #end
50
+
51
+ def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
52
+
53
+ if into || bar
54
+ orig_callback = callback if callback
55
+ bar = Log::ProgressBar.get_obj_bar(bar, obj)
56
+ callback = proc do |res|
57
+ bar.tick if bar
58
+ traverse_add into, res if into
59
+ orig_callback.call res if orig_callback
60
+ end
61
+
62
+ if into.respond_to?(:close)
63
+ into_thread = Thread.new do
64
+ Thread.current.report_on_exception = false
65
+ Thread.current["name"] = "Traverse into"
66
+ error = false
67
+ begin
68
+ self.traverse(obj, callback: callback, **options, &block)
69
+ into.close if into.respond_to?(:close)
70
+ bar.remove if bar
71
+ rescue Exception
72
+ into.abort($!) if into.respond_to?(:abort)
73
+ bar.remove($!) if bar
74
+ end
75
+ end
76
+ Thread.pass until into_thread
77
+ return into
78
+ end
79
+ end
80
+
81
+ begin
82
+ case obj
83
+ when TSV
84
+ obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v|
85
+ res = block.call(k, v)
86
+ callback.call res if callback
87
+ nil
88
+ end
89
+ when Array
90
+ obj.each do |line|
91
+ res = block.call(line)
92
+ callback.call res if callback
93
+ nil
94
+ end
95
+ when String
96
+ f = Open.open(obj)
97
+ self.traverse(f, cpus: cpus, callback: callback, **options, &block)
98
+ when Step
99
+ raise obj.exception if obj.error?
100
+ self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
101
+ when IO
102
+ TSV.parse obj, **options do |k,v|
103
+ res = block.call k, v
104
+ callback.call res if callback
105
+ nil
106
+ end
107
+ else
108
+ TSV.parse obj, **options do |k,v|
109
+ res = block.call k, v
110
+ callback.call res if callback
111
+ nil
112
+ end
113
+ end
114
+ bar.remove if bar
115
+ rescue
116
+ bar.abort($!) if bar
117
+ raise $!
118
+ end
119
+
120
+ into
121
+ end
122
+ end
123
+
124
+ module TSV
125
+ def self.traverse(*args, **kwargs, &block)
126
+ Open.traverse(*args, **kwargs, &block)
127
+ end
128
+ end
@@ -4,7 +4,11 @@ module TSV
4
4
  if Array === value
5
5
  value.collect{|e| cast_value(e, cast) }
6
6
  else
7
- value.send(cast)
7
+ if Proc === cast
8
+ cast.call value
9
+ else
10
+ value.send(cast)
11
+ end
8
12
  end
9
13
  end
10
14
 
@@ -75,40 +79,41 @@ module TSV
75
79
  these_items = items
76
80
  end
77
81
 
78
- these_items = case [source_type, type]
79
- when [:single, :single]
80
- these_items
81
- when [:list, :single]
82
- these_items.first
83
- when [:flat, :single]
84
- these_items.first
85
- when [:double, :single]
86
- these_items.first.first
87
- when [:single, :list]
88
- [these_items]
89
- when [:list, :list]
90
- these_items
91
- when [:flat, :list]
92
- these_items
93
- when [:double, :list]
94
- these_items.collect{|l| l.first }
95
- when [:single, :flat]
96
- [these_items]
97
- when [:list, :flat]
98
- these_items
99
- when [:flat, :flat]
100
- these_items
101
- when [:double, :flat]
102
- these_items.flatten
103
- when [:single, :double]
104
- [[these_items]]
105
- when [:list, :double]
106
- these_items.collect{|l| [l] }
107
- when [:flat, :double]
108
- [these_items]
109
- when [:double, :double]
110
- these_items
111
- end
82
+ these_items =
83
+ case [source_type, type]
84
+ when [:single, :single]
85
+ these_items
86
+ when [:list, :single]
87
+ these_items.first
88
+ when [:flat, :single]
89
+ these_items.first
90
+ when [:double, :single]
91
+ these_items.first.first
92
+ when [:single, :list]
93
+ [these_items]
94
+ when [:list, :list]
95
+ these_items
96
+ when [:flat, :list]
97
+ these_items
98
+ when [:double, :list]
99
+ these_items.collect{|l| l.first }
100
+ when [:single, :flat]
101
+ [these_items]
102
+ when [:list, :flat]
103
+ these_items
104
+ when [:flat, :flat]
105
+ these_items
106
+ when [:double, :flat]
107
+ these_items.flatten
108
+ when [:single, :double]
109
+ [[these_items]]
110
+ when [:list, :double]
111
+ these_items.collect{|l| [l] }
112
+ when [:flat, :double]
113
+ [these_items]
114
+ when [:double, :double]
115
+ these_items
116
+ end
112
117
 
113
118
  if block_given?
114
119
  res = block.call(key, these_items)
@@ -227,8 +232,8 @@ module TSV
227
232
  if key_field
228
233
  all_field_names ||= [@key_field] + @fields
229
234
  key = NamedArray.identify_name(all_field_names, key_field)
230
- kwargs[:key] = key
231
- key_field_name = all_field_names[key]
235
+ kwargs[:key] = key == :key ? 0 : key
236
+ key_field_name = key === :key ? @key_field : all_field_names[key]
232
237
  if fields.nil?
233
238
  field_names = all_field_names - [@key_field]
234
239
  end
@@ -247,21 +252,42 @@ module TSV
247
252
 
248
253
  data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
249
254
 
250
- TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type) if data
251
-
252
- data || self
255
+ if data
256
+ TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
257
+ else
258
+ self
259
+ end
253
260
  end
254
261
 
255
262
  end
256
263
 
257
- def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, **kwargs, &block)
264
+ def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
258
265
  parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
259
- kwargs = parser.options.merge(kwargs)
260
266
 
261
- type = kwargs[:type] ||= :double
267
+ cast = parser.options[:cast] || kwargs[:cast]
268
+ type = kwargs[:type] ||= parser.options[:type] ||= :double
262
269
  if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
263
270
  TSV.setup(data, type: type)
264
271
  data.extend TSVAdapter
272
+ if serializer
273
+ data.serializer = serializer
274
+ elsif cast
275
+ data.serializer =
276
+ case [cast, type]
277
+ when [:to_i, :single]
278
+ :integer
279
+ when [:to_i, :list], [:to_i, :flat]
280
+ :integer_array
281
+ when [:to_f, :single]
282
+ :float
283
+ when [:to_f, :list], [:to_f, :flat]
284
+ :float_array
285
+ else
286
+ type
287
+ end
288
+ else
289
+ data.serializer = type
290
+ end
265
291
  end
266
292
 
267
293
  kwargs[:data] = {} if kwargs[:data].nil?
@@ -270,6 +296,7 @@ module TSV
270
296
  data.type = type
271
297
  data.filename = filename
272
298
  data.namespace = namespace
299
+ data.unnamed = unnamed
273
300
  data
274
301
  end
275
302
 
@@ -1,13 +1,13 @@
1
1
  module Path
2
- def tsv(...)
2
+ def tsv(*args, **kwargs, &block)
3
3
  found = self.find
4
4
  found = self.set_extension('tsv').find unless found.exists?
5
- TSV.open(found, ...)
5
+ TSV.open(found, *args, **kwargs, &block)
6
6
  end
7
7
 
8
- def index(...)
8
+ def index(*args, **kwargs, &block)
9
9
  found = self.find
10
10
  found = self.set_extension('tsv').find unless found.exists?
11
- TSV.index(found, ...)
11
+ TSV.index(found, *args, **kwargs, &block)
12
12
  end
13
13
  end
@@ -1,7 +1,8 @@
1
1
  require_relative '../../open/lock'
2
+ require_relative 'serialize'
2
3
 
3
4
  module TSVAdapter
4
- attr_accessor :persistence_path, :persistence_class, :closed, :writable
5
+ attr_accessor :persistence_path, :persistence_class, :serializer, :closed, :writable
5
6
 
6
7
  class << self
7
8
  attr_accessor :lock_dir
@@ -13,8 +14,12 @@ module TSVAdapter
13
14
  EXTENSION_ATTR_HASH_KEY = "__extension_attr_hash__"
14
15
  EXTENSION_ATTR_HASH_SERIALIZER = Marshal
15
16
 
17
+ def serializer=(serializer)
18
+ @serializer = Symbol === serializer ? SERIALIZER_ALIAS[serializer] : serializer
19
+ end
20
+
16
21
  def load_extension_attr_hash
17
- EXTENSION_ATTR_HASH_SERIALIZER.load(self[EXTENSION_ATTR_HASH_KEY])
22
+ EXTENSION_ATTR_HASH_SERIALIZER.load(StringIO.new(self[EXTENSION_ATTR_HASH_KEY]))
18
23
  end
19
24
 
20
25
  def save_extension_attr_hash
@@ -22,12 +27,16 @@ module TSVAdapter
22
27
  end
23
28
 
24
29
  def self.extended(base)
25
- if base.include?(EXTENSION_ATTR_HASH_KEY)
26
- TSV.setup(base, base.load_extension_attr_hash)
27
- elsif TSV === base
28
- base[EXTENSION_ATTR_HASH_KEY] = EXTENSION_ATTR_HASH_SERIALIZER.dump(base.extension_attr_hash)
30
+ if ! TSVAdapter === base
31
+ if (! TSVAdapter === base) && base.include?(EXTENSION_ATTR_HASH_KEY)
32
+ TSV.setup(base, base.load_extension_attr_hash)
33
+ elsif TSV === base
34
+ base[EXTENSION_ATTR_HASH_KEY] = EXTENSION_ATTR_HASH_SERIALIZER.dump(base.extension_attr_hash)
35
+ end
29
36
  end
30
37
 
38
+ base.serializer = SERIALIZER_ALIAS[base.type]
39
+
31
40
  class << base
32
41
  alias orig_set []=
33
42
  alias orig_get []
@@ -43,37 +52,47 @@ module TSVAdapter
43
52
  super(key, save_value(value))
44
53
  end
45
54
  end
46
- end
47
55
 
48
- case base.type
49
- when :single
50
- class << base
51
- def load_value(value)
52
- value
53
- end
54
- def save_value(value)
55
- value
56
- end
57
- end
58
- when :list, :flat
59
- class << base
60
- def load_value(value)
61
- value.nil? ? nil : value.split("\t")
62
- end
63
- def save_value(value)
64
- value * "\t"
65
- end
56
+ def load_value(str)
57
+ serializer.load(str)
66
58
  end
67
- when :double
68
- class << base
69
- def load_value(value)
70
- value.nil? ? nil : value.split("\t").collect{|v| v.split("|") }
71
- end
72
- def save_value(value)
73
- value.collect{|v| v * "|" } * "\t"
74
- end
59
+
60
+ def save_value(value)
61
+ serializer.dump(value)
75
62
  end
63
+
64
+
76
65
  end
66
+
67
+ #case base.type
68
+ #when :single
69
+ # class << base
70
+ # def load_value(value)
71
+ # value
72
+ # end
73
+ # def save_value(value)
74
+ # value
75
+ # end
76
+ # end
77
+ #when :list, :flat
78
+ # class << base
79
+ # def load_value(value)
80
+ # value.nil? ? nil : value.split("\t")
81
+ # end
82
+ # def save_value(value)
83
+ # value * "\t"
84
+ # end
85
+ # end
86
+ #when :double
87
+ # class << base
88
+ # def load_value(value)
89
+ # value.nil? ? nil : value.split("\t").collect{|v| v.split("|") }
90
+ # end
91
+ # def save_value(value)
92
+ # value.collect{|v| v * "|" } * "\t"
93
+ # end
94
+ # end
95
+ #end
77
96
  end
78
97
 
79
98
  def keys(*args)