rgfa 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/bin/gfadiff.rb +420 -0
  3. data/bin/rgfa-findcrisprs.rb +208 -0
  4. data/bin/rgfa-mergelinear.rb +14 -0
  5. data/bin/rgfa-simdebruijn.rb +86 -0
  6. data/lib/rgfa.rb +376 -0
  7. data/lib/rgfa/byte_array.rb +74 -0
  8. data/lib/rgfa/cigar.rb +157 -0
  9. data/lib/rgfa/connectivity.rb +131 -0
  10. data/lib/rgfa/containments.rb +97 -0
  11. data/lib/rgfa/error.rb +3 -0
  12. data/lib/rgfa/field_array.rb +87 -0
  13. data/lib/rgfa/field_parser.rb +109 -0
  14. data/lib/rgfa/field_validator.rb +241 -0
  15. data/lib/rgfa/field_writer.rb +108 -0
  16. data/lib/rgfa/headers.rb +76 -0
  17. data/lib/rgfa/line.rb +721 -0
  18. data/lib/rgfa/line/containment.rb +87 -0
  19. data/lib/rgfa/line/header.rb +92 -0
  20. data/lib/rgfa/line/link.rb +379 -0
  21. data/lib/rgfa/line/path.rb +106 -0
  22. data/lib/rgfa/line/segment.rb +209 -0
  23. data/lib/rgfa/linear_paths.rb +285 -0
  24. data/lib/rgfa/lines.rb +155 -0
  25. data/lib/rgfa/links.rb +242 -0
  26. data/lib/rgfa/logger.rb +192 -0
  27. data/lib/rgfa/multiplication.rb +156 -0
  28. data/lib/rgfa/numeric_array.rb +196 -0
  29. data/lib/rgfa/paths.rb +98 -0
  30. data/lib/rgfa/rgl.rb +194 -0
  31. data/lib/rgfa/segment_ends_path.rb +9 -0
  32. data/lib/rgfa/segment_info.rb +162 -0
  33. data/lib/rgfa/segments.rb +99 -0
  34. data/lib/rgfa/sequence.rb +65 -0
  35. data/lib/rgfatools.rb +102 -0
  36. data/lib/rgfatools/artifacts.rb +29 -0
  37. data/lib/rgfatools/copy_number.rb +126 -0
  38. data/lib/rgfatools/invertible_segments.rb +104 -0
  39. data/lib/rgfatools/linear_paths.rb +140 -0
  40. data/lib/rgfatools/multiplication.rb +194 -0
  41. data/lib/rgfatools/p_bubbles.rb +66 -0
  42. data/lib/rgfatools/superfluous_links.rb +64 -0
  43. metadata +97 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2f8b7edb6df7ada4a6f53db674a3b306e00e2861
4
+ data.tar.gz: 87d224980a807f6b8e98917b845d48a12ec11e4b
5
+ SHA512:
6
+ metadata.gz: c38a9ef751a5220dd59991a4b0788d8860d66ead6880c03de53ab047970cb3ad4719b23c4dd1b7b1d262dfe29cc84b82e105d10e938e83231ad3969efdf86edd
7
+ data.tar.gz: b87e1dcd4f7ddccb77bdb03edad2a52ae23e8b5c9f22629f4aa57206c75306b7e7f813e5154c76245de14113d532e983795957721bd0ecce9d772a7e95ee6259
@@ -0,0 +1,420 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rgfa"
4
+
5
+ rt = []
6
+ all_rt = %W[-h -s -l -c -p]
7
+ all_rt.each do |rtopt|
8
+ rt << ARGV.delete(rtopt)
9
+ end
10
+ rt.compact!
11
+ rt = all_rt if rt.empty?
12
+
13
+ out_identical = ARGV.delete("-i")
14
+
15
+ out_script = ARGV.delete("-script")
16
+
17
+ if ARGV.size != 2
18
+ STDERR.puts "Compare two RGFA files"
19
+ STDERR.puts
20
+ STDERR.puts "Usage: #$0 [-h] [-s] [-l] [-c] [-p] [-i] [-script] <gfa1> <gfa2>"
21
+ STDERR.puts
22
+ STDERR.puts "If a combination of -h,-s,-l,-c and/or -p is specified, then"
23
+ STDERR.puts "only record of the specified type [h=headers, s=segments, "
24
+ STDERR.puts "l=links, c=containments, p=paths] are compared. "
25
+ STDERR.puts "(default: -h -s -l -c -p)"
26
+ STDERR.puts
27
+ STDERR.puts "Other options:"
28
+ STDERR.puts " -i: output msg if identical"
29
+ STDERR.puts " -script: create ruby script to transform gfa1 in gfa2"
30
+ exit 1
31
+ end
32
+
33
+ if out_script
34
+ puts "#!/usr/bin/env ruby"
35
+ puts
36
+ puts "#"
37
+ puts "# This script was automatically generated using gfadiff.rb"
38
+ puts "#"
39
+ puts "# Purpose: edit gfa1 into gfa2"
40
+ puts "#"
41
+ puts "# gfa1: #{ARGV[0]}"
42
+ puts "# gfa2: #{ARGV[1]}"
43
+ puts "#"
44
+ puts
45
+ puts "require \"rgfa\""
46
+ puts
47
+ puts "gfa = RGFA.from_file(\"#{ARGV[0]}\")"
48
+ puts
49
+ end
50
+
51
+ gfa1 = RGFA.new
52
+ gfa1.turn_off_validations
53
+ gfa1.read_file(ARGV[0])
54
+ gfa2 = RGFA.new
55
+ gfa2.turn_off_validations
56
+ gfa2.read_file(ARGV[1])
57
+
58
+ if rt.include?("-h")
59
+ h1 = gfa1.header
60
+ h2 = gfa2.header
61
+ if h1 == h2
62
+ if out_identical
63
+ puts "# Header values are identical"
64
+ elsif out_script
65
+ puts "# Headers"
66
+ puts "# ... are identical"
67
+ puts
68
+ end
69
+ else
70
+ if out_script
71
+ puts "# Headers"
72
+ puts
73
+ end
74
+ (h1.optional_fieldnames - h2.optional_fieldnames).each do |k|
75
+ if out_script
76
+ puts "gfa.header.delete_field(#{k.inspect})"
77
+ else
78
+ v = h1.get(k)
79
+ if v.kind_of?(RGFA::FieldArray)
80
+ t = v.datatype
81
+ v.each do |elem|
82
+ elem = elem.to_gfa_field(datatype: t)
83
+ puts "<\t[headers/exclusive/multivalue/#{k}]\t#{elem}"
84
+ end
85
+ else
86
+ v = h1.field_to_s(k, optfield: true)
87
+ puts "M\t[headers/exclusive]\t#{k.inspect}\t#{v}"
88
+ end
89
+ end
90
+ end
91
+ (h2.optional_fieldnames - h1.optional_fieldnames).each do |k|
92
+ v = h2.get(k)
93
+ if out_script
94
+ t = h2.get_datatype(k)
95
+ puts "gfa.header.set_datatype(#{k.inspect}, #{t.inspect})"
96
+ if v.kind_of?(RGFA::FieldArray)
97
+ t = v.datatype
98
+ v.each do |elem|
99
+ puts "gfa.header.add(#{k.inspect}, #{elem.inspect}, "+
100
+ "#{t.inspect})"
101
+ end
102
+ else
103
+ puts "gfa.header.#{k}=#{v.inspect}"
104
+ end
105
+ else
106
+ if v.kind_of?(RGFA::FieldArray)
107
+ t = v.datatype
108
+ v.each do |elem|
109
+ elem = elem.to_gfa_field(datatype: t)
110
+ puts ">\t[headers/exclusive/multivalue/#{k}]\t#{elem}"
111
+ end
112
+ else
113
+ v = h2.field_to_s(k, optfield: true)
114
+ puts ">\t[headers/exclusive]\t#{k.inspect}\t#{v}"
115
+ end
116
+ end
117
+ end
118
+ (h1.optional_fieldnames & h2.optional_fieldnames).each do |k|
119
+ v1 = h1.get(k)
120
+ v2 = h2.get(k)
121
+ v1a = v1.kind_of?(RGFA::FieldArray) ? v1.sort : [v1]
122
+ v2a = v2.kind_of?(RGFA::FieldArray) ? v2.sort : [v2]
123
+ t1 = v1.kind_of?(RGFA::FieldArray) ? v1.datatype : h1.get_datatype(k)
124
+ t2 = v2.kind_of?(RGFA::FieldArray) ? v2.datatype : h2.get_datatype(k)
125
+ m1 = v1.kind_of?(RGFA::FieldArray) ? "multivalue/" : ""
126
+ m2 = v2.kind_of?(RGFA::FieldArray) ? "multivalue/" : ""
127
+ if out_script
128
+ if t1 != t2 or v1a != v2a
129
+ puts "gfa.header.delete(#{k.inspect})"
130
+ v2a.each do |v2|
131
+ v2 = v2.to_gfa_field(datatype: t2)
132
+ puts "gfa.header.add(#{k.inspect}, #{v2.inspect}, "+
133
+ "#{t2.inspect})"
134
+ end
135
+ end
136
+ else
137
+ if t1 != t2
138
+ v1a.each do |v1|
139
+ v1 = v1.to_gfa_field(datatype: t1)
140
+ puts "<\t[headers/typediff/#{m1}#{k}#{}]\t#{v1}"
141
+ end
142
+ v2a.each do |v2|
143
+ v2 = v2.to_gfa_field(datatype: t2)
144
+ puts ">\t[headers/typediff/#{m2}#{k}]\t#{v2}"
145
+ end
146
+ else
147
+ (v1a-v2a).each do |v1|
148
+ v1 = v1.to_gfa_field(datatype: t1)
149
+ puts "<\t[headers/valuediff/#{m1}#{k}]\t#{v1}"
150
+ end
151
+ (v2a-v1a).each do |v2|
152
+ v2 = v2.to_gfa_field(datatype: t2)
153
+ puts ">\t[headers/valuediff/#{m2}#{k}]\t#{v2}"
154
+ end
155
+ end
156
+ end
157
+ end
158
+ if out_script
159
+ puts
160
+ end
161
+ end
162
+ end
163
+
164
+ def diff_segments_or_paths(gfa1,gfa2,rt,out_script,out_identical)
165
+ rts = rt + "s"
166
+ rtsU = rts[0].upcase + rts[1..-1]
167
+ s1names = gfa1.send("#{rt}_names").sort
168
+ s2names = gfa2.send("#{rt}_names").sort
169
+ difffound = false
170
+ if out_script
171
+ puts "# #{rtsU}"
172
+ puts
173
+ end
174
+ (s1names - s2names).each do |sn|
175
+ difffound = true
176
+ segstr = gfa1.send(rt,sn).to_s
177
+ if out_script
178
+ puts "gfa.rm(#{sn.inspect})"
179
+ else
180
+ puts "<\t[#{rts}/exclusive]\t#{segstr}"
181
+ end
182
+ end
183
+ (s2names - s1names).each do |sn|
184
+ difffound = true
185
+ segstr = gfa2.send(rt,sn).to_s
186
+ if out_script
187
+ puts "gfa << #{segstr.inspect}"
188
+ else
189
+ puts ">\t[#{rts}/exclusive]\t#{segstr}"
190
+ end
191
+ end
192
+ (s1names & s2names).each do |sn|
193
+ s1 = gfa1.send(rt,sn)
194
+ s2 = gfa2.send(rt,sn)
195
+ s1.required_fieldnames.each do |fn|
196
+ v1 = s1.field_to_s(fn)
197
+ v2 = s2.field_to_s(fn)
198
+ if v1 != v2
199
+ difffound = true
200
+ if out_script
201
+ puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v2.inspect}"
202
+ else
203
+ puts "<\t[#{rts}/reqfields/valuediff/#{sn}]\t#{v1}"
204
+ puts ">\t[#{rts}/reqfields/valuediff/#{sn}]\t#{v2}"
205
+ end
206
+ end
207
+ end
208
+ s1f = s1.optional_fieldnames
209
+ s2f = s2.optional_fieldnames
210
+ (s1f - s2f).each do |fn|
211
+ difffound = true
212
+ if out_script
213
+ puts "gfa.#{rt}(#{sn.inspect}).delete_field(#{fn.inspect})"
214
+ else
215
+ v = s1.field_to_s(fn, optfield: true)
216
+ puts "<\t[#{rts}/optfields/exclusive/#{sn}]\t#{v}"
217
+ end
218
+ end
219
+ (s2f - s1f).each do |fn|
220
+ difffound = true
221
+ if out_script
222
+ v = s2.get(fn)
223
+ t = s2.get_datatype(fn)
224
+ puts "gfa.#{rt}(#{sn.inspect}).set_datatype(#{fn.inspect},#{t})"
225
+ puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v.inspect}"
226
+ else
227
+ v = s2.field_to_s(fn, optfield: true)
228
+ puts ">\t[#{rts}/optfields/exclusive/#{sn}]\t#{v}"
229
+ end
230
+ end
231
+ (s1f & s2f).each do |fn|
232
+ v1 = s1.field_to_s(fn, optfield: true)
233
+ v2 = s2.field_to_s(fn, optfield: true)
234
+ if v1 != v2
235
+ difffound = true
236
+ if out_script
237
+ v = s2.get(fn)
238
+ t = s2.get_datatype(fn)
239
+ puts "gfa.#{rt}(#{sn.inspect}).set_datatype(#{fn.inspect},#{t})"
240
+ puts "gfa.#{rt}(#{sn.inspect}).#{fn}=#{v.inspect}"
241
+ else
242
+ puts "<\t[#{rts}/optfields/valuediff/#{sn}]\t#{v1}"
243
+ puts ">\t[#{rts}/optfields/valuediff/#{sn}]\t#{v2}"
244
+ end
245
+ end
246
+ end
247
+ end
248
+ if !difffound
249
+ if out_script
250
+ puts "# ... are identical"
251
+ elsif out_identical
252
+ puts "# #{rtsU} are identical"
253
+ end
254
+ end
255
+ puts if out_script
256
+ end
257
+
258
+ if rt.include?("-s")
259
+ diff_segments_or_paths(gfa1,gfa2, "segment",out_script,out_identical)
260
+ end
261
+
262
+ # TODO: diff of single optfields
263
+ if rt.include?("-l")
264
+ difffound = false
265
+ s1names = gfa1.segment_names.sort
266
+ s2names = gfa2.segment_names.sort
267
+ if out_script
268
+ puts "# Links"
269
+ puts
270
+ end
271
+ difflinks1 = []
272
+ (s1names - s2names).each do |sn|
273
+ difffound = true
274
+ [:B, :E].each {|et| difflinks1 += gfa1.links_of([sn, et])}
275
+ end
276
+ difflinks1.uniq.each do |l|
277
+ if !out_script
278
+ puts "<\t[links/exclusive_segments]\t#{l.to_s}"
279
+ end
280
+ end
281
+ difflinks2 = []
282
+ (s2names - s1names).each do |sn|
283
+ difffound = true
284
+ [:B, :E].each {|et| difflinks2 += gfa2.links_of([sn, et])}
285
+ end
286
+ difflinks2.uniq.each do |l|
287
+ if out_script
288
+ puts "gfa << #{l.to_s.inspect}"
289
+ else
290
+ puts ">\t[links/exclusive_segments]\t#{l.to_s}"
291
+ end
292
+ end
293
+ difflinks1b = []
294
+ difflinks2b = []
295
+ (s1names & s2names).each do |sn|
296
+ [:B, :E].each do |et|
297
+ l1 = gfa1.links_of([sn, et])
298
+ l2 = gfa2.links_of([sn, et])
299
+ d1 = l1 - l2
300
+ d2 = l2 - l1
301
+ if !d1.empty?
302
+ difffound = true
303
+ difflinks1b += d1
304
+ end
305
+ if !d2.empty?
306
+ difffound = true
307
+ difflinks2b += d2
308
+ end
309
+ end
310
+ end
311
+ (difflinks1b-difflinks1).uniq.each do |l|
312
+ if out_script
313
+ puts "gfa.rm(gfa.link_from_to(#{l.from.to_sym.inspect}, "+
314
+ "#{l.from_orient.inspect}, "+
315
+ "#{l.to.to_sym.inspect}, "+
316
+ "#{l.to_orient.inspect}, "+
317
+ "#{l.overlap.to_s.inspect}.to_cigar))"
318
+ else
319
+ puts "<\t[links/different]\t#{l.to_s}"
320
+ end
321
+ end
322
+ (difflinks2b-difflinks2).uniq.each do |l|
323
+ if out_script
324
+ puts "gfa << #{l.to_s.inspect}"
325
+ else
326
+ puts ">\t[links/different]\t#{l.to_s}"
327
+ end
328
+ end
329
+ if !difffound
330
+ if out_script
331
+ puts "# ... are identical"
332
+ elsif out_identical
333
+ puts "# Links are identical"
334
+ end
335
+ end
336
+ puts if out_script
337
+ end
338
+
339
+ # TODO: this code is similar to -l; make generic and merge
340
+ if rt.include?("-c")
341
+ difffound = false
342
+ s1names = gfa1.segment_names.sort
343
+ s2names = gfa2.segment_names.sort
344
+ cexcl1 = []
345
+ (s1names - s2names).each do |sn|
346
+ difffound = true
347
+ cexcl1 += gfa1.contained_in(sn)
348
+ cexcl1 += gfa1.containing(sn)
349
+ end
350
+ cexcl1.uniq.each do |c|
351
+ if !out_script
352
+ puts "<\t[contaiments/exclusive_segments]\t#{c.to_s}"
353
+ end
354
+ end
355
+ cexcl2 = []
356
+ (s2names - s1names).each do |sn|
357
+ difffound = true
358
+ cexcl2 += gfa2.contained_in(sn)
359
+ cexcl2 += gfa2.containing(sn)
360
+ end
361
+ cexcl2.uniq.each do |c|
362
+ if out_script
363
+ puts "gfa << #{c.to_s.inspect}"
364
+ else
365
+ puts ">\t[contaiments/exclusive_segments]\t#{c.to_s}"
366
+ end
367
+ end
368
+ cdiff1 = []
369
+ cdiff2 = []
370
+ (s1names & s2names).each do |sn|
371
+ c1 = gfa1.contained_in(sn)
372
+ c2 = gfa2.contained_in(sn)
373
+ c1 += gfa1.containing(sn)
374
+ c2 += gfa2.containing(sn)
375
+ d1 = c1 - c2
376
+ d2 = c2 - c1
377
+ if !d1.empty?
378
+ difffound = true
379
+ cdiff1 += d1
380
+ end
381
+ if !d2.empty?
382
+ difffound = true
383
+ cdiff2 += d2
384
+ end
385
+ end
386
+ (cdiff1-cexcl1).uniq.each do |l|
387
+ if out_script
388
+ # TODO: handle multiple containments for a segments pair
389
+ puts "gfa.rm(gfa.containment(#{l.from.to_sym.inspect}, "+
390
+ "#{l.to.to_sym.inspect}))"
391
+ else
392
+ puts "<\t[containments/different]\t#{l.to_s}"
393
+ end
394
+ end
395
+ (cdiff2-cexcl2).uniq.each do |l|
396
+ if out_script
397
+ puts "gfa << #{l.to_s.inspect}"
398
+ else
399
+ puts ">\t[containments/different]\t#{l.to_s}"
400
+ end
401
+ end
402
+ if !difffound
403
+ if out_script
404
+ puts "# ... are identical"
405
+ elsif out_identical
406
+ puts "# Containments are identical"
407
+ end
408
+ end
409
+ puts if out_script
410
+ end
411
+
412
+ if rt.include?("-p")
413
+ diff_segments_or_paths(gfa1,gfa2,"path",out_script,out_identical)
414
+ end
415
+
416
+ if out_script
417
+ puts
418
+ puts "# Output graph"
419
+ puts "puts gfa"
420
+ end
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rgfatools"
4
+
5
+ # crisprs have a structure ARU1RU..RUnRB where |U|~|R| in [24..50]
6
+
7
+ $debugmode = false
8
+ $spacersonly = false
9
+
10
+ class RGFA
11
+
12
+ def find_crisprs(minrepeats=3,minlen=24,maxlen=50)
13
+ ls = {}
14
+ segment_names.each do |sn|
15
+ s = segment(sn)
16
+ s.cn = (s.coverage(unit_length: @default[:unit_length],
17
+ count_tag: @default[:count_tag])/2).round
18
+ end
19
+ output_segment_infos if $debugmode
20
+ maxvisits_global = {:B => {}, :E => {}}
21
+ segment_names.each do |sn|
22
+ s = segment(sn)
23
+ next if s.length < minlen or s.length > maxlen
24
+ next if s.cn < minrepeats
25
+ circles = {}
26
+ linear = {}
27
+ maxvisits = {}
28
+ [:B, :E].each do |rt|
29
+ maxvisits[rt] = maxvisits_global[rt].dup
30
+ maxvisits[rt][sn] ||= s.cn
31
+ circles[rt] = []
32
+ linear[rt] = []
33
+ segment_end = [s, rt].to_segment_end
34
+ links_of(segment_end).each do |l|
35
+ search_circle(segment_end.invert_end_type,
36
+ segment_end,
37
+ l,
38
+ maxvisits[rt],0,
39
+ minlen,
40
+ maxlen*2+s.length,
41
+ [segment_end],
42
+ circles[rt],
43
+ linear[rt])
44
+ end
45
+ if maxvisits[rt][sn.to_sym] > 0
46
+ multi = {:l => [], :c => []}
47
+ [[linear[rt],:l], [circles[rt],:c]].each do |paths, pt|
48
+ paths.each do |c|
49
+ min_mv = s.cn
50
+ upto = (pt == :l ? -1 : -2)
51
+ c[0..upto].each do |csn, et|
52
+ mv = maxvisits[rt][csn.to_sym]
53
+ if mv < min_mv
54
+ min_mv = mv
55
+ end
56
+ end
57
+ if min_mv > 0
58
+ min_mv.times { multi[pt] << c.dup }
59
+ c[0..upto].each do |csn, et|
60
+ maxvisits[rt][csn.to_sym] -= min_mv
61
+ end
62
+ end
63
+ end
64
+ end
65
+ circles[rt] += multi[:c]
66
+ linear[rt] += multi[:l]
67
+ end
68
+ end
69
+ n_paths = (circles[:E].size+circles[:B].size+
70
+ linear[:E].size+linear[:B].size)
71
+ if (circles[:E].size - circles[:B].size).abs > 1
72
+ next
73
+ end
74
+ if (linear[:E].size - linear[:B].size).abs > 0
75
+ next
76
+ end
77
+ if linear[:E].size != 1
78
+ next
79
+ end
80
+ merged_circles = []
81
+ circles[:E].each {|c|merged_circles << merge_crisprs_path(c,s,:E)}
82
+ before = merge_crisprs_path(linear[:B].first,s,:B)
83
+ after = merge_crisprs_path(linear[:E].first,s,:E)
84
+ next if merged_circles.size < minrepeats
85
+ maxvisits_global = maxvisits
86
+ instances = 1
87
+ possible_instances = 0
88
+ merged_circles.each do |seq|
89
+ if seq.length > s.length + minlen
90
+ possible_instances += 1
91
+ end
92
+ instances += 1
93
+ end
94
+ if $spacersonly
95
+ puts merged_circles.sort.map(&:upcase)
96
+ else
97
+ puts "CRISP signature found in segment #{s.name}"
98
+ puts
99
+ puts " Before: sequence = ...#{before[-50..-1]}"
100
+ puts
101
+ if possible_instances > 0
102
+ instances = "#{instances}..#{instances+possible_instances}"
103
+ end
104
+ puts " Repeat: instances = #{instances}; "+
105
+ "length = #{s.length};\t"+
106
+ "sequence = #{s.sequence}"
107
+ puts
108
+ puts " Spacers:"
109
+ asterisk = false
110
+ merged_circles.each_with_index do |seq, i|
111
+ if seq.length > s.length + minlen
112
+ str = "=#{s.length}+2*#{(seq.length.to_f - s.length)/2}"
113
+ asterisk = true
114
+ this_asterisk = true
115
+ else
116
+ str = ""
117
+ this_asterisk = false
118
+ end
119
+ puts " (#{i+1}#{this_asterisk ? "*" : ""})\t"+
120
+ "length = #{seq.length}#{str};\tsequence = #{seq}"
121
+ end
122
+ if asterisk
123
+ puts
124
+ puts " * = possibly containing inexact repeat instance"
125
+ end
126
+ puts
127
+ puts "After: sequence = #{after[0..49]}..."
128
+ end
129
+ end
130
+ end
131
+
132
+ private
133
+
134
+ def output_segment_infos
135
+ segment_names.each do |sn|
136
+ s = segment(sn)
137
+ puts "#{s.name}\t#{s.cn}\t"+
138
+ "#{neighbours([s.name,:B]).map{|nb|segment(nb.segment).cn}.inject(:+)}\t"+
139
+ "#{neighbours([s.name,:E]).map{|nb|segment(nb.segment).cn}.inject(:+)}\t"+
140
+ "#{links_of([s.name,:B]).size}\t"+
141
+ "#{links_of([s.name,:E]).size}\t"+
142
+ "#{s.KC}\t#{s.length}"
143
+ end
144
+ end
145
+
146
+ def merge_crisprs_path(segpath, repeat, repeat_end)
147
+ merged = create_merged_segment(segpath, merged_name: :short,
148
+ disable_tracking: true)[0]
149
+ sequence = merged.sequence[repeat.
150
+ sequence.length..-(1+repeat.sequence.length)]
151
+ sequence = sequence.rc if repeat_end == :B
152
+ return sequence
153
+ end
154
+
155
+ def search_circle(goal, from, l, maxvisits, dist, mindist,
156
+ maxdist, path, circles, linear)
157
+ dest = l.other_end(from)
158
+ dest.segment = segment(dest.segment)
159
+ maxvisits[dest.name] ||= dest.segment.cn
160
+ se = dest.invert_end_type
161
+ if dest == goal
162
+ return if dist < mindist
163
+ new_path = path.dup
164
+ new_path << se
165
+ new_path[0..-2].each {|x| maxvisits[x.name] -= 1}
166
+ circles << new_path
167
+ return
168
+ end
169
+ return if maxvisits[dest.name] == 0
170
+ return if path.any?{|x|x.name==dest.name}
171
+ new_path = path.dup
172
+ new_path << se
173
+ dist += dest.segment.length - l.overlap.first.len
174
+ if dist > maxdist
175
+ new_path = path.dup
176
+ new_path << se
177
+ new_path[0..-1].each {|x| maxvisits[x.name] -= 1}
178
+ linear << new_path
179
+ return
180
+ end
181
+ ls = links_of(se)
182
+ if ls.size == 0
183
+ new_path[0..-1].each {|x| maxvisits[x.name] -= 1}
184
+ linear << new_path
185
+ return
186
+ end
187
+ ls.each do |next_l|
188
+ next_dest = segment(next_l.other_end(se).segment)
189
+ maxvisits[next_dest.name] ||= next_dest.cn
190
+ next if maxvisits[next_dest.name] == 0
191
+ search_circle(goal,se,next_l,maxvisits,dist,mindist,maxdist,new_path,
192
+ circles,linear)
193
+ end
194
+ return
195
+ end
196
+
197
+ end
198
+
199
+ if (ARGV.size == 0)
200
+ STDERR.puts "Usage: #$0 <gfa>"
201
+ exit 1
202
+ end
203
+ gfa = RGFA.from_file(ARGV[0])
204
+ gfa.set_default_count_tag(:KC)
205
+ gfa.header.ks ||= gfa.segments[0].length + 1
206
+ gfa.set_count_unit_length(gfa.header.ks-1)
207
+ gfa.find_crisprs
208
+