scout-gear 10.4.0 → 10.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +100 -656
  3. data/Rakefile +1 -0
  4. data/VERSION +1 -1
  5. data/bin/scout +1 -3
  6. data/lib/scout/association/fields.rb +170 -0
  7. data/lib/scout/association/index.rb +229 -0
  8. data/lib/scout/association/item.rb +227 -0
  9. data/lib/scout/association/util.rb +7 -0
  10. data/lib/scout/association.rb +100 -0
  11. data/lib/scout/entity/format.rb +62 -0
  12. data/lib/scout/entity/identifiers.rb +111 -0
  13. data/lib/scout/entity/object.rb +20 -0
  14. data/lib/scout/entity/property.rb +165 -0
  15. data/lib/scout/entity.rb +40 -0
  16. data/lib/scout/offsite/step.rb +2 -2
  17. data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
  18. data/lib/scout/persist/engine/packed_index.rb +100 -0
  19. data/lib/scout/persist/engine/sharder.rb +219 -0
  20. data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
  21. data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
  22. data/lib/scout/persist/engine.rb +4 -0
  23. data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
  24. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
  25. data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
  26. data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
  27. data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
  28. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
  29. data/lib/scout/persist/tsv/adapter.rb +6 -0
  30. data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
  31. data/lib/scout/persist/tsv.rb +107 -0
  32. data/lib/scout/tsv/annotation/repo.rb +83 -0
  33. data/lib/scout/tsv/annotation.rb +169 -0
  34. data/lib/scout/tsv/attach.rb +95 -19
  35. data/lib/scout/tsv/change_id/translate.rb +148 -0
  36. data/lib/scout/tsv/change_id.rb +3 -0
  37. data/lib/scout/tsv/csv.rb +85 -0
  38. data/lib/scout/tsv/dumper.rb +113 -25
  39. data/lib/scout/tsv/entity.rb +5 -0
  40. data/lib/scout/tsv/index.rb +88 -36
  41. data/lib/scout/tsv/open.rb +21 -8
  42. data/lib/scout/tsv/parser.rb +153 -90
  43. data/lib/scout/tsv/path.rb +7 -2
  44. data/lib/scout/tsv/stream.rb +48 -6
  45. data/lib/scout/tsv/transformer.rb +4 -3
  46. data/lib/scout/tsv/traverse.rb +26 -18
  47. data/lib/scout/tsv/util/process.rb +7 -0
  48. data/lib/scout/tsv/util/reorder.rb +25 -15
  49. data/lib/scout/tsv/util/select.rb +9 -1
  50. data/lib/scout/tsv/util/sort.rb +90 -2
  51. data/lib/scout/tsv/util/unzip.rb +56 -0
  52. data/lib/scout/tsv/util.rb +52 -5
  53. data/lib/scout/tsv.rb +45 -27
  54. data/lib/scout/work_queue/socket.rb +8 -0
  55. data/lib/scout/work_queue/worker.rb +22 -5
  56. data/lib/scout/work_queue.rb +38 -24
  57. data/lib/scout/workflow/definition.rb +11 -10
  58. data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
  59. data/lib/scout/workflow/deployment/trace.rb +205 -0
  60. data/lib/scout/workflow/deployment.rb +1 -0
  61. data/lib/scout/workflow/documentation.rb +1 -1
  62. data/lib/scout/workflow/step/archive.rb +42 -0
  63. data/lib/scout/workflow/step/children.rb +51 -0
  64. data/lib/scout/workflow/step/config.rb +1 -1
  65. data/lib/scout/workflow/step/dependencies.rb +24 -7
  66. data/lib/scout/workflow/step/file.rb +19 -0
  67. data/lib/scout/workflow/step/info.rb +37 -9
  68. data/lib/scout/workflow/step/progress.rb +11 -2
  69. data/lib/scout/workflow/step/status.rb +8 -1
  70. data/lib/scout/workflow/step.rb +80 -25
  71. data/lib/scout/workflow/task/dependencies.rb +4 -1
  72. data/lib/scout/workflow/task/inputs.rb +91 -41
  73. data/lib/scout/workflow/task.rb +54 -57
  74. data/lib/scout/workflow/usage.rb +1 -1
  75. data/lib/scout/workflow/util.rb +4 -0
  76. data/lib/scout/workflow.rb +110 -13
  77. data/lib/scout-gear.rb +2 -0
  78. data/lib/scout.rb +0 -1
  79. data/scout-gear.gemspec +80 -23
  80. data/scout_commands/rbbt +2 -0
  81. data/test/data/person/brothers +4 -0
  82. data/test/data/person/identifiers +10 -0
  83. data/test/data/person/marriages +3 -0
  84. data/test/data/person/parents +6 -0
  85. data/test/scout/association/test_fields.rb +105 -0
  86. data/test/scout/association/test_index.rb +70 -0
  87. data/test/scout/association/test_item.rb +21 -0
  88. data/test/scout/entity/test_format.rb +19 -0
  89. data/test/scout/entity/test_identifiers.rb +58 -0
  90. data/test/scout/entity/test_object.rb +0 -0
  91. data/test/scout/entity/test_property.rb +345 -0
  92. data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
  93. data/test/scout/persist/engine/test_packed_index.rb +99 -0
  94. data/test/scout/persist/engine/test_sharder.rb +31 -0
  95. data/test/scout/persist/engine/test_tkrzw.rb +0 -0
  96. data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
  97. data/test/scout/persist/test_tsv.rb +146 -0
  98. data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
  99. data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
  100. data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
  101. data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
  102. data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
  103. data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
  104. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
  105. data/test/scout/persist/tsv/test_serialize.rb +12 -0
  106. data/test/scout/test_association.rb +51 -0
  107. data/test/scout/test_entity.rb +40 -0
  108. data/test/scout/test_tsv.rb +33 -4
  109. data/test/scout/test_work_queue.rb +3 -2
  110. data/test/scout/test_workflow.rb +16 -15
  111. data/test/scout/tsv/annotation/test_repo.rb +150 -0
  112. data/test/scout/tsv/change_id/test_translate.rb +178 -0
  113. data/test/scout/tsv/test_annotation.rb +52 -0
  114. data/test/scout/tsv/test_attach.rb +226 -1
  115. data/test/scout/tsv/test_change_id.rb +25 -0
  116. data/test/scout/tsv/test_csv.rb +50 -0
  117. data/test/scout/tsv/test_dumper.rb +38 -0
  118. data/test/scout/tsv/test_entity.rb +0 -0
  119. data/test/scout/tsv/test_index.rb +82 -0
  120. data/test/scout/tsv/test_open.rb +44 -0
  121. data/test/scout/tsv/test_parser.rb +70 -0
  122. data/test/scout/tsv/test_stream.rb +22 -0
  123. data/test/scout/tsv/test_transformer.rb +27 -3
  124. data/test/scout/tsv/test_traverse.rb +78 -0
  125. data/test/scout/tsv/util/test_process.rb +16 -0
  126. data/test/scout/tsv/util/test_reorder.rb +67 -0
  127. data/test/scout/tsv/util/test_sort.rb +28 -1
  128. data/test/scout/tsv/util/test_unzip.rb +32 -0
  129. data/test/scout/work_queue/test_socket.rb +4 -1
  130. data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
  131. data/test/scout/workflow/deployment/test_trace.rb +25 -0
  132. data/test/scout/workflow/step/test_archive.rb +28 -0
  133. data/test/scout/workflow/step/test_children.rb +25 -0
  134. data/test/scout/workflow/step/test_info.rb +16 -0
  135. data/test/scout/workflow/task/test_dependencies.rb +16 -16
  136. data/test/scout/workflow/task/test_inputs.rb +45 -1
  137. data/test/scout/workflow/test_definition.rb +52 -0
  138. data/test/scout/workflow/test_step.rb +57 -0
  139. data/test/scout/workflow/test_task.rb +26 -1
  140. data/test/scout/workflow/test_usage.rb +4 -4
  141. data/test/test_helper.rb +23 -1
  142. metadata +71 -14
  143. data/lib/scout/tsv/persist.rb +0 -27
  144. data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
  145. data/test/scout/tsv/test_persist.rb +0 -45
@@ -5,26 +5,36 @@ module TSV
5
5
  res = self.annotate({})
6
6
  res.type = kwargs[:type] if kwargs.include?(:type)
7
7
  kwargs[:one2one] = one2one
8
- key_field_name, field_names = traverse key_field, fields, **kwargs do |k,v|
9
- if @type == :double && merge && res.include?(k)
10
- current = res[k]
11
- if merge == :concat
12
- v.each_with_index do |new,i|
13
- next if new.empty?
14
- current[i].concat(new)
8
+ key_field_name, field_names = with_unnamed do
9
+ traverse key_field, fields, **kwargs do |k,v|
10
+ if res.type == :double && merge && res.include?(k)
11
+ current = res[k]
12
+ if merge == :concat
13
+ v.each_with_index do |new,i|
14
+ next if new.empty?
15
+ current[i].concat(new)
16
+ end
17
+ else
18
+ merged = []
19
+ v.each_with_index do |new,i|
20
+ next if new.empty?
21
+ merged[i] = current[i] + new
22
+ end
23
+ res[k] = merged
15
24
  end
16
- else
17
- merged = []
18
- v.each_with_index do |new,i|
19
- next if new.empty?
20
- merged[i] = current[i] + new
25
+ elsif res.type == :flat
26
+ res[k] ||= []
27
+ if merge == :concat
28
+ res[k].concat v
29
+ else
30
+ res[k] += v
21
31
  end
22
- res[k] = merged
32
+ else
33
+ res[k] = v
23
34
  end
24
- else
25
- res[k] = v
26
35
  end
27
36
  end
37
+
28
38
  res.key_field = key_field_name
29
39
  res.fields = field_names
30
40
  res
@@ -16,7 +16,7 @@ module TSV
16
16
  end
17
17
 
18
18
  if field
19
- field = fields.index(field) if fields && String === field
19
+ field = NamedArray.identify_name(fields, field) if fields && String === field
20
20
  set = field == :key ? [key] : (type == :double ? values[field].split(sep) : values[field])
21
21
  else
22
22
  set = [key, (type == :double ? values.collect{|v| v.split(sep) } : values)]
@@ -262,4 +262,12 @@ module TSV
262
262
  end
263
263
  new
264
264
  end
265
+
266
+ def chunked_values_at(keys, max = 5000)
267
+ Misc.ordered_divide(keys, max).inject([]) do |acc,c|
268
+ new = self.values_at(*c)
269
+ new.annotate acc if new.respond_to? :annotate and acc.empty?
270
+ acc.concat(new)
271
+ end
272
+ end
265
273
  end
@@ -22,6 +22,79 @@ module TSV
22
22
  end
23
23
  end
24
24
 
25
+ if not block_given?
26
+ if fields == :all
27
+ if just_keys
28
+ keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
29
+ keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
30
+ else
31
+ elems.sort_by{|key, value| key }
32
+ end
33
+ else
34
+ sorted = elems.sort do |a, b|
35
+ a_value = a.last
36
+ b_value = b.last
37
+ a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
38
+ b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
39
+ case
40
+ when (a_empty and b_empty)
41
+ 0
42
+ when a_empty
43
+ -1
44
+ when b_empty
45
+ 1
46
+ when Array === a_value
47
+ if a_value.length == 1 and b_value.length == 1
48
+ a_value.first <=> b_value.first
49
+ else
50
+ a_value.length <=> b_value.length
51
+ end
52
+ else
53
+ a_value <=> b_value
54
+ end
55
+ end
56
+ if just_keys
57
+ keys = sorted.collect{|key, value| key}
58
+ keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
59
+ keys
60
+ else
61
+ sorted.collect{|key, value| [key, self[key]]}
62
+ end
63
+ end
64
+ else
65
+ if just_keys
66
+ keys = elems.sort_by(&block).collect{|key, value| key}
67
+ keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
68
+ keys
69
+ else
70
+ elems.sort_by(&block).collect{|key, value| [key, self[key]]}
71
+ end
72
+ end
73
+ end
74
+
75
+ def sort(field = nil, just_keys = false, &block)
76
+ field = :all if field.nil?
77
+
78
+ if field == :all
79
+ elems = collect
80
+ else
81
+ elems = []
82
+ case type
83
+ when :single
84
+ through :key, field do |key, field|
85
+ elems << [key, field]
86
+ end
87
+ when :list, :flat
88
+ through :key, field do |key, fields|
89
+ elems << [key, fields.first]
90
+ end
91
+ when :double
92
+ through :key, field do |key, fields|
93
+ elems << [key, fields.first]
94
+ end
95
+ end
96
+ end
97
+
25
98
  if not block_given?
26
99
  if fields == :all
27
100
  if just_keys
@@ -63,12 +136,27 @@ module TSV
63
136
  end
64
137
  else
65
138
  if just_keys
66
- keys = elems.sort_by(&block).collect{|key, value| key}
139
+ keys = elems.sort(&block).collect{|key, value| key}
67
140
  keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
68
141
  keys
69
142
  else
70
- elems.sort_by(&block).collect{|key, value| [key, self[key]]}
143
+ elems.sort(&block).collect{|key, value| [key, self[key]]}
71
144
  end
72
145
  end
73
146
  end
147
+
148
+ def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
149
+ pstart = psize * (pnum - 1)
150
+ pend = psize * pnum - 1
151
+ field = :key if field == "key"
152
+ keys = sort_by(field || :key, true, &block)
153
+ keys.reverse! if reverse
154
+
155
+ if just_keys
156
+ keys[pstart..pend]
157
+ else
158
+ select :key => keys[pstart..pend]
159
+ end
160
+ end
161
+
74
162
  end
@@ -83,4 +83,60 @@ module TSV
83
83
  def unzip(*args, **kwargs)
84
84
  TSV.unzip(self, *args, **kwargs)
85
85
  end
86
+
87
+ def unzip_replicates
88
+ raise "Can only unzip replicates in :double TSVs" unless type == :double
89
+
90
+ new = {}
91
+ self.with_unnamed do
92
+ through do |k,vs|
93
+ NamedArray.zip_fields(vs).each_with_index do |v,i|
94
+ new[k + "(#{i})"] = v
95
+ end
96
+ end
97
+ end
98
+
99
+ self.annotate(new)
100
+ new.type = :list
101
+
102
+ new
103
+ end
104
+
105
+ def zip(merge = false, field = "New Field", sep = ":")
106
+ new = {}
107
+ self.annotate new
108
+
109
+ new.type = :double if merge
110
+
111
+ new.with_unnamed do
112
+ if merge
113
+ self.through do |key,values|
114
+ new_key, new_value = key.split(sep)
115
+ new_values = values + [[new_value] * values.first.length]
116
+ if new.include? new_key
117
+ current = new[new_key]
118
+ current.each_with_index do |v,i|
119
+ v.concat(new_values[i])
120
+ end
121
+ else
122
+ new[new_key] = new_values
123
+ end
124
+ end
125
+ else
126
+ self.through do |key,values|
127
+ new_key, new_value = key.split(sep)
128
+ new_values = values + [new_value]
129
+ new[new_key] = new_values
130
+ end
131
+ end
132
+ end
133
+
134
+ if self.key_field and self.fields
135
+ new.key_field = self.key_field.partition(sep).first
136
+ new.fields = new.fields + [field]
137
+ end
138
+
139
+ new
140
+ end
141
+
86
142
  end
@@ -9,9 +9,43 @@ require_relative 'util/unzip'
9
9
  require_relative 'util/sort'
10
10
  require_relative 'util/melt'
11
11
  module TSV
12
+
13
+ def self.field_match_counts(file, values, options = {})
14
+ options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
15
+ persist_options = IndiferentHash.pull_keys options, :persist
16
+
17
+ filename = TSV === file ? file.filename : file
18
+ path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
19
+ tsv = TSV === file ? file : TSV.open(file, options)
20
+
21
+ text = ""
22
+ fields = nil
23
+ tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
24
+ names.zip(fields).each do |list, format|
25
+ list = [list] unless Array === list
26
+ list.delete_if do |name| name.empty? end
27
+ next if list.empty?
28
+ text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
29
+ end
30
+ text << [gene, tsv.key_field] * "\t" << "\n"
31
+ end
32
+ text
33
+ end
34
+
35
+ TmpFile.with_file(values.uniq * "\n", false) do |value_file|
36
+ cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
37
+ begin
38
+ TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
39
+ rescue
40
+ Log.exception $!
41
+ TSV.setup({}, :type => :single, :cast => :to_i)
42
+ end
43
+ end
44
+ end
45
+
12
46
  def self.identify_field(key_field, fields, name, strict: nil)
13
47
  return :key if name == :key || (! strict && NamedArray.field_match(key_field, name))
14
- name.collect!{|n| key_field == n ? :key : n } if Array === name
48
+ name.collect!{|n| NamedArray.field_match(key_field, n) ? :key : n } if Array === name
15
49
  NamedArray.identify_name(fields, name, strict: strict)
16
50
  end
17
51
 
@@ -21,12 +55,12 @@ module TSV
21
55
 
22
56
  def [](key, *rest)
23
57
  v = super(key, *rest)
24
- NamedArray.setup(v, @fields, key) unless @unnamed || ! (Array === v)
58
+ NamedArray.setup(v, @fields, key) unless @unnamed || @type == :flat || ! (Array === v)
25
59
  v
26
60
  end
27
61
 
28
62
  def options
29
- extension_attr_hash
63
+ annotation_hash
30
64
  end
31
65
 
32
66
  def zip_new(key, values, insitu: :lax)
@@ -51,7 +85,7 @@ module TSV
51
85
  def each(*args, &block)
52
86
  if block_given?
53
87
  super(*args) do |k,v|
54
- NamedArray.setup(v, @fields) unless @unnamed || ! (Array === v)
88
+ NamedArray.setup(v, @fields) unless @unnamed || @type == :flat || ! (Array === v)
55
89
  block.call(k, v)
56
90
  end
57
91
  else
@@ -115,8 +149,16 @@ Example:
115
149
  [@key_field] + @fields
116
150
  end
117
151
 
152
+ def self.all_fields(file)
153
+ if file.respond_to?(:all_fields)
154
+ file.all_fields
155
+ else
156
+ TSV.parse_header(file)["all_fields"]
157
+ end
158
+ end
159
+
118
160
  def options
119
- self.extension_attr_hash
161
+ self.annotation_hash
120
162
  end
121
163
 
122
164
  def fingerprint
@@ -130,4 +172,9 @@ Example:
130
172
  def inspect
131
173
  fingerprint
132
174
  end
175
+
176
+ def merge(other)
177
+ self.annotate(super(other))
178
+ end
179
+
133
180
  end
data/lib/scout/tsv.rb CHANGED
@@ -1,9 +1,9 @@
1
- require 'scout/meta_extension'
1
+ require 'scout/annotation'
2
2
  require_relative 'tsv/util'
3
3
  require_relative 'tsv/parser'
4
4
  require_relative 'tsv/dumper'
5
5
  require_relative 'tsv/transformer'
6
- require_relative 'tsv/persist'
6
+ require_relative 'persist/tsv'
7
7
  require_relative 'tsv/index'
8
8
  require_relative 'tsv/path'
9
9
  require_relative 'tsv/traverse'
@@ -11,10 +11,13 @@ require_relative 'tsv/open'
11
11
  require_relative 'tsv/attach'
12
12
  require_relative 'tsv/change_id'
13
13
  require_relative 'tsv/stream'
14
+ require_relative 'tsv/entity'
15
+ require_relative 'tsv/annotation'
16
+ require_relative 'tsv/csv'
14
17
 
15
18
  module TSV
16
- extend MetaExtension
17
- extension_attr :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers
19
+ extend Annotation
20
+ annotation :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers, :entity_options, :serializer
18
21
 
19
22
  def self.str2options(str)
20
23
  field_options,_sep, rest = str.partition("#")
@@ -59,6 +62,10 @@ module TSV
59
62
  end
60
63
  original_setup(obj, *rest, &block)
61
64
  end
65
+
66
+ obj.save_annotation_hash if obj.respond_to?(:save_annotation_hash)
67
+
68
+ obj
62
69
  end
63
70
  end
64
71
 
@@ -68,35 +75,37 @@ module TSV
68
75
  end
69
76
 
70
77
  def self.open(file, options = {})
71
- grep, invert_grep = IndiferentHash.process_options options, :grep, :invert_grep, :persist => false
78
+ grep, invert_grep, nocache, monitor, entity_options = IndiferentHash.process_options options, :grep, :invert_grep, :nocache, :monitor, :entity_options
72
79
 
73
80
  persist_options = IndiferentHash.pull_keys options, :persist
74
- persist_options = IndiferentHash.add_defaults persist_options, :prefix => "TSV", :type => :HDB
81
+ persist_options = IndiferentHash.add_defaults persist_options, prefix: "TSV", type: :HDB, persist: false
82
+ persist_options[:data] ||= options[:data]
75
83
 
76
84
  file = StringIO.new file if String === file && ! (Path === file) && file.index("\n")
77
85
 
78
- source_name, other_options = case file
79
- when StringIO
80
- [file.inspect, options]
81
- when TSV::Parser
82
- [file.options[:filename], file.options]
83
- else
84
- [file, options]
85
- end
86
-
87
- Persist.persist(source_name, persist_options[:type], persist_options.merge(:other_options => other_options)) do |filename|
88
- if filename
89
- data = case persist_options[:type]
90
- when :HDB, :BDB
91
- ScoutCabinet.open(filename, true, persist_options[:type])
92
- when :tkh, :tkt, :tks
93
- ScoutTKRZW.open(filename, true, persist_options[:type])
94
- end
86
+ source_name, options =
87
+ case file
88
+ when StringIO
89
+ [file.inspect, options]
90
+ when TSV::Parser
91
+ [file.options[:filename], file.options]
95
92
  else
96
- data = nil
93
+ [file, options]
97
94
  end
95
+
96
+ Persist.tsv(source_name, options, persist_options: persist_options) do |data|
98
97
  options[:data] = data if data
99
- options[:filename] = TSV::Parser === file ? file.options[:filename] : file
98
+ options[:filename] ||= if TSV::Parser === file
99
+ file.options[:filename]
100
+ elsif Path === file
101
+ file
102
+ elsif file.respond_to?(:filename)
103
+ file.filename
104
+ elsif Path.is_filename?(file)
105
+ file
106
+ else
107
+ nil
108
+ end
100
109
 
101
110
  if data
102
111
  Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
@@ -104,14 +113,23 @@ module TSV
104
113
  Log.debug "TSV open #{Log.fingerprint file}"
105
114
  end
106
115
 
107
- if TSV::Parser === file
116
+ tsv = if TSV::Parser === file
108
117
  TSV.parse(file, **options)
109
118
  else
110
- Open.open(file, grep: grep, invert_grep: invert_grep) do |f|
119
+ options[:tsv_invert_grep] ||= invert_grep if invert_grep
120
+ Open.open(file, grep: grep, invert_grep: invert_grep, nocache: nocache) do |f|
111
121
  TSV.parse(f, **options)
112
122
  end
113
123
  end
124
+
125
+ tsv.entity_options = entity_options
126
+
127
+ tsv
114
128
  end
115
129
  end
130
+
131
+ def to_hash
132
+ self.dup
133
+ end
116
134
  end
117
135
 
@@ -17,6 +17,10 @@ class WorkQueue
17
17
  ScoutSemaphore.create_semaphore(@read_sem,1)
18
18
  end
19
19
 
20
+ def socket_id
21
+ @key
22
+ end
23
+
20
24
  def clean
21
25
  @cleaned = true
22
26
  @sread.close unless @sread.closed?
@@ -37,6 +41,10 @@ class WorkQueue
37
41
  when nil
38
42
  size_head = [0,"N"].pack 'La'
39
43
  str = size_head
44
+ when Annotation::AnnotatedObject
45
+ payload = @serializer.dump(obj)
46
+ size_head = [payload.bytesize,"S"].pack 'La'
47
+ str = size_head << payload
40
48
  when String
41
49
  payload = obj
42
50
  size_head = [payload.bytesize,"C"].pack 'La'
@@ -1,13 +1,24 @@
1
1
  class WorkQueue
2
2
  class Worker
3
- attr_accessor :pid, :ignore_ouput
3
+ attr_accessor :pid, :ignore_ouput, :queue_id
4
4
  def initialize(ignore_ouput = false)
5
5
  @ignore_output = ignore_ouput
6
6
  end
7
7
 
8
+ def worker_short_id
9
+ [object_id, pid].compact * "@"
10
+ end
11
+
12
+ def worker_id
13
+ [worker_short_id, queue_id] * "->"
14
+ end
15
+
8
16
  def run
9
17
  @pid = Process.fork do
10
- Log.debug "Worker start with #{Process.pid}"
18
+ Signal.trap("INT") do
19
+ Kernel.exit! -1
20
+ end
21
+ Log.low "Worker start #{worker_id}"
11
22
  yield
12
23
  end
13
24
  end
@@ -15,6 +26,12 @@ class WorkQueue
15
26
  def process(input, output = nil, &block)
16
27
  run do
17
28
  begin
29
+ if output
30
+ Open.purge_pipes(output.swrite)
31
+ else
32
+ Open.purge_pipes
33
+ end
34
+
18
35
  while obj = input.read
19
36
  if DoneProcessing === obj
20
37
  output.write DoneProcessing.new
@@ -36,15 +53,15 @@ class WorkQueue
36
53
 
37
54
  def abort
38
55
  begin
39
- Log.debug "Aborting worker #{@pid}"
40
- Process.kill "INT", @pid
56
+ Log.medium "Aborting worker #{worker_id}"
57
+ Process.kill "INT", @pid
41
58
  rescue Errno::ECHILD
42
59
  rescue Errno::ESRCH
43
60
  end
44
61
  end
45
62
 
46
63
  def join
47
- Log.debug "Joining worker #{@pid}"
64
+ Log.low "Joining worker #{worker_id}"
48
65
  Process.waitpid @pid
49
66
  end
50
67
 
@@ -6,18 +6,29 @@ require 'timeout'
6
6
  class WorkQueue
7
7
  attr_accessor :workers, :worker_proc, :callback
8
8
 
9
+ def new_worker
10
+ worker = Worker.new
11
+ worker.queue_id = queue_id
12
+ worker
13
+ end
14
+
9
15
  def initialize(workers = 0, &block)
10
16
  workers = workers.to_i if String === workers
11
17
  @input = WorkQueue::Socket.new
12
18
  @output = WorkQueue::Socket.new
13
- @workers = workers.times.collect{ Worker.new }
19
+ @workers = workers.times.collect{ new_worker }
14
20
  @worker_proc = block
15
21
  @worker_mutex = Mutex.new
16
22
  @removed_workers = []
23
+ Log.medium "Starting queue #{queue_id} with workers: #{Log.fingerprint @workers.collect{|w| w.worker_short_id }} and sockets #{@input.socket_id} and #{@output.socket_id}"
24
+ end
25
+
26
+ def queue_id
27
+ [object_id, Process.pid] * "@"
17
28
  end
18
29
 
19
30
  def add_worker(&block)
20
- worker = Worker.new
31
+ worker = new_worker
21
32
  @worker_mutex.synchronize do
22
33
  @workers.push(worker)
23
34
  if block_given?
@@ -41,9 +52,11 @@ class WorkQueue
41
52
  @worker_mutex.synchronize do
42
53
  worker = @workers.index{|w| w.pid == pid}
43
54
  if worker
44
- Log.low "Removed worker #{pid}"
45
55
  @workers.delete_at(worker)
46
56
  @removed_workers << pid
57
+ Log.low "Removed worker #{pid} from #{queue_id}"
58
+ else
59
+ Log.medium "Worker #{pid} not from #{queue_id}"
47
60
  end
48
61
  end
49
62
  end
@@ -56,14 +69,14 @@ class WorkQueue
56
69
  @reader = Thread.new(Thread.current) do |parent|
57
70
  begin
58
71
  Thread.current.report_on_exception = false
59
- Thread.current["name"] = "Output reader #{Process.pid}"
72
+ Thread.current["name"] = "Output reader #{queue_id}"
60
73
  @done_workers ||= []
61
74
  while true
62
75
  obj = @output.read
63
76
  if DoneProcessing === obj
64
77
 
65
78
  done = @worker_mutex.synchronize do
66
- Log.low "Worker #{obj.pid} done"
79
+ Log.low "Worker #{obj.pid} from #{queue_id} done"
67
80
  @done_workers << obj.pid
68
81
  @closed && @done_workers.length == @removed_workers.length + @workers.length
69
82
  end
@@ -78,12 +91,12 @@ class WorkQueue
78
91
  rescue DoneProcessing
79
92
  rescue Aborted
80
93
  rescue WorkerException
81
- Log.error "Exception in worker #{obj.pid} in queue #{Process.pid}: #{obj.worker_exception.message}"
94
+ Log.error "Exception in worker #{obj.pid} in queue #{queue_id}: #{obj.worker_exception.message}"
82
95
  self.abort
83
96
  @input.abort obj.worker_exception
84
97
  raise obj.worker_exception
85
98
  rescue
86
- Log.error "Exception processing output in queue #{Process.pid}: #{$!.message}"
99
+ Log.error "Exception processing output in queue #{queue_id}: #{$!.message}"
87
100
  self.abort
88
101
  raise $!
89
102
  end
@@ -95,25 +108,19 @@ class WorkQueue
95
108
 
96
109
  @waiter = Thread.new do
97
110
  Thread.current.report_on_exception = false
98
- Thread.current["name"] = "Worker waiter #{Process.pid}"
111
+ Thread.current["name"] = "Worker waiter #{queue_id}"
99
112
  while true
100
113
  break if @worker_mutex.synchronize{ @workers.empty? }
101
- begin
102
- Timeout.timeout(1) do
103
- begin
104
- pid, status = Process.wait2
105
- remove_worker(pid) if pid
106
- rescue Exception
107
- Log.exception $!
108
- end
109
- end
110
- rescue Timeout::Error
111
- pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
112
- pids.each do |p|
113
- pid, status = Process.wait2 p, Process::WNOHANG
114
+ threads = @workers.collect do |w|
115
+ t = Thread.new do
116
+ Thread.current["name"] = "Worker waiter #{queue_id} worker #{w.pid}"
117
+ pid, status = Process.wait2 w.pid
114
118
  remove_worker(pid) if pid
115
119
  end
120
+ Thread.pass until t["name"]
121
+ t
116
122
  end
123
+ threads.each do |t| t.join end
117
124
  end
118
125
  end
119
126
 
@@ -131,16 +138,23 @@ class WorkQueue
131
138
  end
132
139
 
133
140
  def abort
134
- Log.low "Aborting #{@workers.length} workers in queue #{Process.pid}"
141
+ Log.low "Aborting #{@workers.length} workers in queue #{queue_id}"
135
142
  @worker_mutex.synchronize do
136
- @workers.each{|w| w.abort }
143
+ @workers.each do |w|
144
+ ScoutSemaphore.post_semaphore(@output.write_sem) if @output
145
+ ScoutSemaphore.post_semaphore(@input.read_sem) if @input
146
+ w.abort
147
+ end
137
148
  end
138
149
  end
139
150
 
140
151
  def close
141
152
  @closed = true
142
153
  @worker_mutex.synchronize{ @workers.length }.times do
143
- @input.write DoneProcessing.new() unless @input.closed_write?
154
+ begin
155
+ @input.write DoneProcessing.new() unless @input.closed_write?
156
+ rescue IOError
157
+ end
144
158
  end
145
159
  end
146
160