rgfa 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/gfadiff.rb +420 -0
- data/bin/rgfa-findcrisprs.rb +208 -0
- data/bin/rgfa-mergelinear.rb +14 -0
- data/bin/rgfa-simdebruijn.rb +86 -0
- data/lib/rgfa.rb +376 -0
- data/lib/rgfa/byte_array.rb +74 -0
- data/lib/rgfa/cigar.rb +157 -0
- data/lib/rgfa/connectivity.rb +131 -0
- data/lib/rgfa/containments.rb +97 -0
- data/lib/rgfa/error.rb +3 -0
- data/lib/rgfa/field_array.rb +87 -0
- data/lib/rgfa/field_parser.rb +109 -0
- data/lib/rgfa/field_validator.rb +241 -0
- data/lib/rgfa/field_writer.rb +108 -0
- data/lib/rgfa/headers.rb +76 -0
- data/lib/rgfa/line.rb +721 -0
- data/lib/rgfa/line/containment.rb +87 -0
- data/lib/rgfa/line/header.rb +92 -0
- data/lib/rgfa/line/link.rb +379 -0
- data/lib/rgfa/line/path.rb +106 -0
- data/lib/rgfa/line/segment.rb +209 -0
- data/lib/rgfa/linear_paths.rb +285 -0
- data/lib/rgfa/lines.rb +155 -0
- data/lib/rgfa/links.rb +242 -0
- data/lib/rgfa/logger.rb +192 -0
- data/lib/rgfa/multiplication.rb +156 -0
- data/lib/rgfa/numeric_array.rb +196 -0
- data/lib/rgfa/paths.rb +98 -0
- data/lib/rgfa/rgl.rb +194 -0
- data/lib/rgfa/segment_ends_path.rb +9 -0
- data/lib/rgfa/segment_info.rb +162 -0
- data/lib/rgfa/segments.rb +99 -0
- data/lib/rgfa/sequence.rb +65 -0
- data/lib/rgfatools.rb +102 -0
- data/lib/rgfatools/artifacts.rb +29 -0
- data/lib/rgfatools/copy_number.rb +126 -0
- data/lib/rgfatools/invertible_segments.rb +104 -0
- data/lib/rgfatools/linear_paths.rb +140 -0
- data/lib/rgfatools/multiplication.rb +194 -0
- data/lib/rgfatools/p_bubbles.rb +66 -0
- data/lib/rgfatools/superfluous_links.rb +64 -0
- metadata +97 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
#
|
2
|
+
# Extensions of the String class to handle nucleotidic sequences
|
3
|
+
#
|
4
|
+
module RGFA::Sequence
|
5
|
+
|
6
|
+
# Computes the reverse complement of a nucleotidic sequence
|
7
|
+
#
|
8
|
+
# @return [String] reverse complement, without newlines and spaces
|
9
|
+
# @return [String] "*" if string is "*"
|
10
|
+
#
|
11
|
+
# @param tolerant [Boolean] <i>(defaults to: +false+)</i>
|
12
|
+
# if true, anything non-sequence is complemented to itself
|
13
|
+
# @param rnasequence [Boolean] <i>(defaults to: +false+)</i>
|
14
|
+
# if true, any A and a is complemented into u and U; otherwise
|
15
|
+
# it is so, only if an U is found; otherwise DNA is assumed
|
16
|
+
#
|
17
|
+
# @raise [RuntimeError] if not +tolerant+ and chars are found for which
|
18
|
+
# no Watson-Crick complement is defined
|
19
|
+
# @raise [RuntimeError] if sequence contains both U and T
|
20
|
+
#
|
21
|
+
# @example
|
22
|
+
# "ACTG".rc # => "CAGT"
|
23
|
+
# "acGT".rc # => "ACgt"
|
24
|
+
# @example Undefined sequence is represented by "*":
|
25
|
+
# "*".rc # => "*"
|
26
|
+
# @example Extended IUPAC Alphabet:
|
27
|
+
# "ARBN".rc # => "NVYT"
|
28
|
+
# @example Usage with RNA sequences:
|
29
|
+
# "ACUG".rc # => "CAGU"
|
30
|
+
# "ACG".rc(rnasequence: true) # => "CGU"
|
31
|
+
# "ACUT".rc # (raises RuntimeError, both U and T)
|
32
|
+
def rc(tolerant: false, rnasequence: false)
|
33
|
+
return "*" if self == "*"
|
34
|
+
retval = each_char.map do |c|
|
35
|
+
if c == "U" or c == "u"
|
36
|
+
rnasequence = true
|
37
|
+
elsif rnasequence and (c == "T" or c == "t")
|
38
|
+
raise "String contains both U/u and T/t"
|
39
|
+
end
|
40
|
+
wcc = WCC.fetch(c, tolerant ? c : nil)
|
41
|
+
raise "#{self}: no Watson-Crick complement for #{c}" if wcc.nil?
|
42
|
+
wcc
|
43
|
+
end.reverse.join
|
44
|
+
if rnasequence
|
45
|
+
retval.tr!("tT","uU")
|
46
|
+
end
|
47
|
+
retval
|
48
|
+
end
|
49
|
+
|
50
|
+
# Watson-Crick Complements
|
51
|
+
WCC = {"a"=>"t","t"=>"a","A"=>"T","T"=>"A",
|
52
|
+
"c"=>"g","g"=>"c","C"=>"G","G"=>"C",
|
53
|
+
"b"=>"v","B"=>"V","v"=>"b","V"=>"B",
|
54
|
+
"h"=>"d","H"=>"D","d"=>"h","D"=>"H",
|
55
|
+
"R"=>"Y","Y"=>"R","r"=>"y","y"=>"r",
|
56
|
+
"K"=>"M","M"=>"K","k"=>"m","m"=>"k",
|
57
|
+
"S"=>"S","s"=>"s","w"=>"w","W"=>"W",
|
58
|
+
"n"=>"n","N"=>"N","u"=>"a","U"=>"A",
|
59
|
+
"-"=>"-","."=>".","="=>"=",
|
60
|
+
" "=>"","\n"=>""}
|
61
|
+
end
|
62
|
+
|
63
|
+
class String
|
64
|
+
include RGFA::Sequence
|
65
|
+
end
|
data/lib/rgfatools.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
require "rgfa"
|
2
|
+
|
3
|
+
#
|
4
|
+
# Module defining additional methods for the RGFA class.
|
5
|
+
#
|
6
|
+
# RGFATools is an extension to the RGFA library, which allow to perform further
|
7
|
+
# operations. Thereby additional conventions are required, with respect to the
|
8
|
+
# GFA specification, which are compatible with it.
|
9
|
+
#
|
10
|
+
# The methods defined here allow, e.g., to randomly orient a segment which has
|
11
|
+
# the same connections on both sides, to compute copy numbers and multiply or
|
12
|
+
# delete segments according to them, to distribute the links of copies after
|
13
|
+
# multipling a segment, or to eliminate edges in the graph which are
|
14
|
+
# incompatible with an hamiltonian path.
|
15
|
+
#
|
16
|
+
# Custom optional fields are defined, such as "cn" for the copy number of a
|
17
|
+
# segment, "or" for the original segment(s) of a duplicated or merged segment,
|
18
|
+
# "mp" for the starting position of original segments in a merged segment, "rp"
|
19
|
+
# for the position of possible inversions due to arbitrary orientation of some
|
20
|
+
# segments by the program.
|
21
|
+
#
|
22
|
+
# Furthermore a convention for the naming of the segments is introduced, which
|
23
|
+
# gives a special meaning to the characters "_^()".
|
24
|
+
#
|
25
|
+
# @developer
|
26
|
+
# In the main file is only the method redefinition infrastructure
|
27
|
+
# (private methods). The public methods are in the included modules.
|
28
|
+
#
|
29
|
+
module RGFATools
|
30
|
+
end
|
31
|
+
|
32
|
+
require_relative "rgfatools/artifacts"
|
33
|
+
require_relative "rgfatools/copy_number"
|
34
|
+
require_relative "rgfatools/invertible_segments"
|
35
|
+
require_relative "rgfatools/multiplication"
|
36
|
+
require_relative "rgfatools/superfluous_links"
|
37
|
+
require_relative "rgfatools/linear_paths"
|
38
|
+
require_relative "rgfatools/p_bubbles"
|
39
|
+
|
40
|
+
module RGFATools
|
41
|
+
|
42
|
+
include RGFATools::Artifacts
|
43
|
+
include RGFATools::CopyNumber
|
44
|
+
include RGFATools::InvertibleSegments
|
45
|
+
include RGFATools::Multiplication
|
46
|
+
include RGFATools::SuperfluousLinks
|
47
|
+
include RGFATools::LinearPaths
|
48
|
+
include RGFATools::PBubbles
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def self.included(klass)
|
53
|
+
included_modules.each do |included_module|
|
54
|
+
if included_module.const_defined?("Redefined")
|
55
|
+
self.redefine_methods(included_module::Redefined, klass)
|
56
|
+
end
|
57
|
+
if included_module.const_defined?("ClassMethods")
|
58
|
+
klass.extend(included_module::ClassMethods)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.redefine_methods(redefined_methods, klass)
|
64
|
+
klass.class_eval do
|
65
|
+
redefined_methods.each do |redefined_method|
|
66
|
+
was_private = klass.private_instance_methods.include?(redefined_method)
|
67
|
+
public redefined_method
|
68
|
+
alias_method :"#{redefined_method}_without_rgfatools", redefined_method
|
69
|
+
alias_method redefined_method, :"#{redefined_method}_with_rgfatools"
|
70
|
+
if was_private
|
71
|
+
private redefined_method,
|
72
|
+
:"#{redefined_method}_without_rgfatools",
|
73
|
+
:"#{redefined_method}_with_rgfatools"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
ProgramName = "RGFATools"
|
80
|
+
|
81
|
+
def add_program_name_to_header
|
82
|
+
set_header_field(:pn, RGFATools::ProgramName)
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
class RGFA
|
88
|
+
include RGFATools
|
89
|
+
|
90
|
+
# Enable {RGFATools} extensions of RGFA methods
|
91
|
+
# @return [void]
|
92
|
+
def enable_extensions
|
93
|
+
@extensions_enabled = true
|
94
|
+
end
|
95
|
+
|
96
|
+
# Disable {RGFATools} extensions of RGFA methods
|
97
|
+
# @return [void]
|
98
|
+
def disable_extensions
|
99
|
+
@extensions_enabled = false
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#
|
2
|
+
# Methods which edit the graph components without traversal
|
3
|
+
#
|
4
|
+
module RGFATools::Artifacts
|
5
|
+
|
6
|
+
# Remove connected components whose sum of lengths of the segments
|
7
|
+
# is under a specified value.
|
8
|
+
# @param minlen [Integer] the minimum length
|
9
|
+
# @return [RGFA] self
|
10
|
+
def remove_small_components(minlen)
|
11
|
+
rm(connected_components.select {|cc|
|
12
|
+
cc.map{|sn|segment(sn).length}.reduce(:+) < minlen })
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
# Remove end segments, whose sequence length is under a specified value.
|
17
|
+
# @param minlen [Integer] the minimum length
|
18
|
+
# @return [RGFA] self
|
19
|
+
def remove_dead_ends(minlen)
|
20
|
+
segments.each do |s|
|
21
|
+
c = connectivity(s)
|
22
|
+
rm(s) if s.length < minlen and
|
23
|
+
(c[0] == 0 or c[1] == 0) and
|
24
|
+
!cut_segment?(s)
|
25
|
+
end
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#
|
2
|
+
# Methods which edit the graph components without traversal
|
3
|
+
#
|
4
|
+
module RGFATools::CopyNumber
|
5
|
+
|
6
|
+
# Sets the count tag to use as default by coverage computations
|
7
|
+
# <i>(defaults to: +:RC+)</i>.
|
8
|
+
#
|
9
|
+
# @return [RGFA] self
|
10
|
+
# @param tag [Symbol] the tag to use
|
11
|
+
def set_default_count_tag(tag)
|
12
|
+
@default[:count_tag] = tag
|
13
|
+
return self
|
14
|
+
end
|
15
|
+
|
16
|
+
# Sets the unit length (k-mer size, average read lenght or average fragment
|
17
|
+
# length) to use for coverage computation
|
18
|
+
# <i>(defaults to: 1)</i>.
|
19
|
+
#
|
20
|
+
# @param unit_length [Integer] the unit length to use
|
21
|
+
# @return [RGFA] self
|
22
|
+
def set_count_unit_length(unit_length)
|
23
|
+
@default[:unit_length] = unit_length
|
24
|
+
return self
|
25
|
+
end
|
26
|
+
|
27
|
+
# Delete segments which have a coverage under a specified value.
|
28
|
+
#
|
29
|
+
# @param mincov [Integer] the minimum coverage
|
30
|
+
# @!macro [new] count_tag
|
31
|
+
# @param count_tag [Symbol] <i>(defaults to: +:RC+ or the value set by
|
32
|
+
# {#set_default_count_tag})</i> the count tag to use for coverage
|
33
|
+
# computation
|
34
|
+
# @!macro [new] unit_length
|
35
|
+
# @param unit_length [Integer] <i>(defaults to: 1 or the value set by
|
36
|
+
# {#set_count_unit_length})</i> the unit length to use for coverage
|
37
|
+
# computation
|
38
|
+
#
|
39
|
+
# @return [RGFA] self
|
40
|
+
def delete_low_coverage_segments(mincov,
|
41
|
+
count_tag: @default[:count_tag],
|
42
|
+
unit_length: @default[:unit_length])
|
43
|
+
segments.map do |s|
|
44
|
+
cov = s.coverage(count_tag: count_tag,
|
45
|
+
unit_length: unit_length)
|
46
|
+
cov < mincov ? s.name : nil
|
47
|
+
end.compact.each do |sn|
|
48
|
+
delete_segment(sn)
|
49
|
+
end
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# @param mincov [Integer] <i>(defaults to: 1/4 of +single_copy_coverage+)</i>
|
54
|
+
# the minimum coverage, cn for segments under this value is set to 0
|
55
|
+
# @param single_copy_coverage [Integer]
|
56
|
+
# the coverage that shall be considered to be single copy
|
57
|
+
# @param cn_tag [Symbol] <i>(defaults to: +:cn+)</i>
|
58
|
+
# the tag to use for storing the copy number
|
59
|
+
# @!macro count_tag
|
60
|
+
# @!macro unit_length
|
61
|
+
# @return [RGFA] self
|
62
|
+
def compute_copy_numbers(single_copy_coverage,
|
63
|
+
mincov: single_copy_coverage * 0.25,
|
64
|
+
count_tag: @default[:count_tag],
|
65
|
+
cn_tag: :cn,
|
66
|
+
unit_length: @default[:unit_length])
|
67
|
+
segments.each do |s|
|
68
|
+
cov = s.coverage!(count_tag: count_tag, unit_length: unit_length).to_f
|
69
|
+
if cov < mincov
|
70
|
+
cn = 0
|
71
|
+
elsif cov < single_copy_coverage
|
72
|
+
cn = 1
|
73
|
+
else
|
74
|
+
cn = (cov / single_copy_coverage).round
|
75
|
+
end
|
76
|
+
s.set(cn_tag, cn)
|
77
|
+
end
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
# Applies the computed copy number to a segment
|
82
|
+
# @!macro [new] apply_copy_number
|
83
|
+
# @return [RGFA] self
|
84
|
+
# @param [:lowcase, :upcase, :number, :copy] copy_names_suffix
|
85
|
+
# <i>(Defaults to: +:lowcase+)</i>
|
86
|
+
# Symbol representing a system to compute the names from the name of
|
87
|
+
# the original segment. See "Automatic computation of the copy names"
|
88
|
+
# in #multiply_extended.
|
89
|
+
# @param count_tag [Symbol] tag to use for storing the copy number
|
90
|
+
# (default: cn)
|
91
|
+
# @param distribute [Symbol] distribution policy, see #multiply_extended
|
92
|
+
# @param origin_tag [Symbol] tag to use for storing the origin (default: or)
|
93
|
+
# @param conserve_components [Boolean] when factor is 0,
|
94
|
+
# do not remove segments if doing so increases the number of components
|
95
|
+
# in the graph (default: true)
|
96
|
+
# @param segment [RGFA::Line::Segment, Symbol] segment or segment name
|
97
|
+
def apply_copy_number(segment, count_tag: :cn,
|
98
|
+
distribute: :auto,
|
99
|
+
copy_names_suffix: :lowcase, origin_tag: :or,
|
100
|
+
conserve_components: true)
|
101
|
+
s, sn = segment_and_segment_name(segment)
|
102
|
+
factor = s.get!(count_tag)
|
103
|
+
multiply_extended(sn, factor,
|
104
|
+
distribute: distribute,
|
105
|
+
copy_names: copy_names_suffix,
|
106
|
+
conserve_components: conserve_components,
|
107
|
+
origin_tag: origin_tag)
|
108
|
+
self
|
109
|
+
end
|
110
|
+
|
111
|
+
# Applies the computed copy number to all segments
|
112
|
+
# @!macro apply_copy_number
|
113
|
+
def apply_copy_numbers(count_tag: :cn, distribute: :auto,
|
114
|
+
copy_names_suffix: :lowcase, origin_tag: :or,
|
115
|
+
conserve_components: true)
|
116
|
+
segments.sort_by{|s|s.get!(count_tag)}.each do |s|
|
117
|
+
multiply_extended(s.name, s.get(count_tag),
|
118
|
+
distribute: distribute,
|
119
|
+
copy_names: copy_names_suffix,
|
120
|
+
conserve_components: conserve_components,
|
121
|
+
origin_tag: origin_tag)
|
122
|
+
end
|
123
|
+
self
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#
|
2
|
+
# Methods which edit the graph components without traversal
|
3
|
+
#
|
4
|
+
module RGFATools::InvertibleSegments
|
5
|
+
|
6
|
+
# Selects a random orientation for all invertible segments
|
7
|
+
# @return [RGFA] self
|
8
|
+
def randomly_orient_invertibles
|
9
|
+
segment_names.each do |sn|
|
10
|
+
if segment_same_links_both_ends?(sn)
|
11
|
+
randomly_orient_proven_invertible_segment(sn)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
self
|
15
|
+
end
|
16
|
+
|
17
|
+
# Selects a random orientation for an invertible segment
|
18
|
+
# @return [RGFA] self
|
19
|
+
# @!macro segment_param
|
20
|
+
def randomly_orient_invertible(segment)
|
21
|
+
segment_name = segment.kind_of?(RGFA::Line) ? segment.name : segment
|
22
|
+
if !segment_same_links_both_ends?(segment_name)
|
23
|
+
raise "Only segments with links to the same or equivalent segments "+
|
24
|
+
"at both ends can be randomly oriented"
|
25
|
+
end
|
26
|
+
randomly_orient_proven_invertible_segment(segment_name)
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def randomly_orient_proven_invertible_segment(segment_name)
|
33
|
+
parts = partitioned_links_of([segment_name, :E])
|
34
|
+
if parts.size == 2
|
35
|
+
tokeep1_other_end = parts[0][0].other_end([segment_name, :E])
|
36
|
+
tokeep2_other_end = parts[1][0].other_end([segment_name, :E])
|
37
|
+
elsif parts.size == 1 and parts[0].size == 2
|
38
|
+
tokeep1_other_end = parts[0][0].other_end([segment_name, :E])
|
39
|
+
tokeep2_other_end = parts[0][1].other_end([segment_name, :E])
|
40
|
+
else
|
41
|
+
return
|
42
|
+
end
|
43
|
+
return if links_of(tokeep1_other_end).size < 2
|
44
|
+
return if links_of(tokeep2_other_end).size < 2
|
45
|
+
delete_other_links([segment_name, :E], tokeep1_other_end)
|
46
|
+
delete_other_links([segment_name, :B], tokeep2_other_end)
|
47
|
+
annotate_random_orientation(segment_name)
|
48
|
+
end
|
49
|
+
|
50
|
+
def link_targets_for_cmp(segment_end)
|
51
|
+
links_of(segment_end).map {|l| l.other_end(segment_end).join}
|
52
|
+
end
|
53
|
+
|
54
|
+
def segment_same_links_both_ends?(segment_name)
|
55
|
+
e_links = link_targets_for_cmp([segment_name, :E])
|
56
|
+
b_links = link_targets_for_cmp([segment_name, :B])
|
57
|
+
return e_links == b_links
|
58
|
+
end
|
59
|
+
|
60
|
+
def segment_signature(segment_end)
|
61
|
+
s = segment!(segment_end[0])
|
62
|
+
link_targets_for_cmp(segment_end).join(",")+"\t"+
|
63
|
+
link_targets_for_cmp(segment_end.invert_end_type).join(",")+"\t"+
|
64
|
+
[:or].map do |field|
|
65
|
+
s.send(field)
|
66
|
+
end.join("\t")
|
67
|
+
end
|
68
|
+
|
69
|
+
def partitioned_links_of(segment_end)
|
70
|
+
links_of(segment_end).group_by do |l|
|
71
|
+
other_end = l.other_end(segment_end)
|
72
|
+
sig = segment_signature(other_end)
|
73
|
+
sig
|
74
|
+
end.map {|sig, par| par}
|
75
|
+
end
|
76
|
+
|
77
|
+
def annotate_random_orientation(segment_name)
|
78
|
+
segment = segment!(segment_name)
|
79
|
+
n = segment.name.to_s.split("_")
|
80
|
+
pairs = 0
|
81
|
+
pos = [1, segment.LN]
|
82
|
+
if segment.or
|
83
|
+
o = segment.or.to_s.split(",")
|
84
|
+
if o.size > 2
|
85
|
+
while o.last == o.first + "^" or o.last + "^" == o.first
|
86
|
+
pairs += 1
|
87
|
+
o.pop
|
88
|
+
o.shift
|
89
|
+
end
|
90
|
+
end
|
91
|
+
if segment.mp
|
92
|
+
pos = [segment.mp[pairs*2], segment.mp[-1-pairs*2]]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rn = segment.rn
|
96
|
+
rn ||= []
|
97
|
+
rn += pos
|
98
|
+
segment.rn = rn
|
99
|
+
n[pairs] = "(" + n[pairs]
|
100
|
+
n[-1-pairs] = n[-1-pairs] + ")"
|
101
|
+
rename(segment.name, n.join("_"))
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
#
|
2
|
+
# Methods for the RGFA class, which involve a traversal of the graph following
|
3
|
+
# links
|
4
|
+
#
|
5
|
+
module RGFATools::LinearPaths
|
6
|
+
|
7
|
+
# @!method merge_linear_path(segpath, **options)
|
8
|
+
# Merge a linear path, i.e. a path of segments without extra-branches.
|
9
|
+
# @!macro [new] merge_more
|
10
|
+
# Extends the RGFA method, with additional functionality:
|
11
|
+
# - +name+: the name of the merged segment is set to the name of the
|
12
|
+
# single segments joined by underscore (+_+). If a name already
|
13
|
+
# contained an underscore, it is splitted before merging. Whenever a
|
14
|
+
# segment is reversed complemented, its name (or the name of all its
|
15
|
+
# components) is suffixed with a +^+; if the last letter was already
|
16
|
+
# +^+, it is removed; if it contained +_+ the name is splitted, the
|
17
|
+
# elements reversed and joined back using +_+; round parentheses are
|
18
|
+
# removed from the name before processing and added back after it.
|
19
|
+
# - +:or+: keeps track of the origin of the merged segment; the
|
20
|
+
# origin tag is set to an array of :or or name
|
21
|
+
# (if no :or available) tags of the segment which have been merged;
|
22
|
+
# the character +^+ is assigned the same meaning as in +name+
|
23
|
+
# - +:rn+: tag used to store possible inversion positions and
|
24
|
+
# it is updated by this method; i.e. it is passed from the single
|
25
|
+
# segments to the merged segment, and the coordinates updated
|
26
|
+
# - +:mp+: tag used to store the position of the
|
27
|
+
# single segments in the merged segment; it is created or updated by
|
28
|
+
# this method
|
29
|
+
# Note that the extensions to the original method will only be run
|
30
|
+
# if either #enable_extensions has been called on RGFA object
|
31
|
+
# or the enable_tracking parameter is set..
|
32
|
+
# After calling #enable_extensions, you may still obtain the original
|
33
|
+
# behaviour by setting the disable_tracking parameter.
|
34
|
+
# @!macro merge_more
|
35
|
+
#
|
36
|
+
# @!macro [new] merge_lim
|
37
|
+
#
|
38
|
+
# Limitations: all containments und paths involving merged segments are
|
39
|
+
# deleted.
|
40
|
+
# @!macro merge_lim
|
41
|
+
#
|
42
|
+
# @param segpath [Array<RGFA::SegmentEnd>] a linear path, such as that
|
43
|
+
# retrieved by #linear_path (see RGFA API documentation)
|
44
|
+
# @!macro [new] merge_options
|
45
|
+
# @param options [Hash] optional keyword arguments
|
46
|
+
# @option options [String, :short, nil] :merged_name (nil)
|
47
|
+
# if nil, the merged_name is automatically computed; if :short,
|
48
|
+
# a name is computed starting with "merged1" and calling next until
|
49
|
+
# an available name is founf; if String, the name to use
|
50
|
+
# @option options [Boolean] :cut_counts (false)
|
51
|
+
# if true, total count in merged segment m, composed of segments
|
52
|
+
# s of set S is multiplied by the factor Sum(|s in S|)/|m|
|
53
|
+
# @option options [Boolean] :enable_tracking (false)
|
54
|
+
# if true, the extended method with RGFATools is called,
|
55
|
+
# no matter if RGFA#enable_extensions was called.
|
56
|
+
# @option options [Boolean] :disable_tracking (false)
|
57
|
+
# if true, the original method of RGFA without RGFATools is called,
|
58
|
+
# no matter if RGFA#enable_extensions was called.
|
59
|
+
# @!macro merge_options
|
60
|
+
#
|
61
|
+
# @return [RGFA] self
|
62
|
+
# @see #merge_linear_paths
|
63
|
+
|
64
|
+
# @!method merge_linear_paths(**options)
|
65
|
+
# Merge all linear paths in the graph, i.e.
|
66
|
+
# paths of segments without extra-branches
|
67
|
+
# @!macro merge_more
|
68
|
+
# @!macro merge_lim
|
69
|
+
# @!macro merge_options
|
70
|
+
#
|
71
|
+
# @return [RGFA] self
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
Redefined = [:add_segment_to_merged]
|
76
|
+
|
77
|
+
def reverse_segment_name(name, separator)
|
78
|
+
name.to_s.split(separator).map do |part|
|
79
|
+
openp = part[0] == "("
|
80
|
+
part = part[1..-1] if openp
|
81
|
+
closep = part[-1] == ")"
|
82
|
+
part = part[0..-2] if closep
|
83
|
+
part = (part[-1] == "^") ? part[0..-2] : part+"^"
|
84
|
+
part += ")" if openp
|
85
|
+
part = "(#{part}" if closep
|
86
|
+
part
|
87
|
+
end.reverse.join(separator)
|
88
|
+
end
|
89
|
+
|
90
|
+
def reverse_pos_array(pos_array, lastpos)
|
91
|
+
return nil if pos_array.nil? or lastpos.nil?
|
92
|
+
pos_array.map {|pos| lastpos - pos + 1}.reverse
|
93
|
+
end
|
94
|
+
|
95
|
+
def add_segment_to_merged_with_rgfatools(merged, segment, reversed, cut, init,
|
96
|
+
options)
|
97
|
+
if !options[:enable_tracking] and
|
98
|
+
(options[:disable_tracking] or !@extensions_enabled)
|
99
|
+
return add_segment_to_merged_without_rgfatools(merged, segment, reversed,
|
100
|
+
cut, init, options)
|
101
|
+
end
|
102
|
+
s = (reversed ? segment.sequence.rc[cut..-1] : segment.sequence[cut..-1])
|
103
|
+
n = (reversed ? reverse_segment_name(segment.name, "_") : segment.name.to_s)
|
104
|
+
rn = (reversed ? reverse_pos_array(segment.rn, segment.LN) : segment.rn)
|
105
|
+
mp = (reversed ? reverse_pos_array(segment.mp, segment.LN) : segment.mp)
|
106
|
+
mp = [1, segment.LN] if mp.nil? and segment.LN
|
107
|
+
if segment.or.nil?
|
108
|
+
o = n
|
109
|
+
else
|
110
|
+
o = (reversed ? reverse_segment_name(segment.or, ",") : segment.or)
|
111
|
+
end
|
112
|
+
if init
|
113
|
+
merged.sequence = s
|
114
|
+
merged.name = options[:merged_name].nil? ? n : options[:merged_name]
|
115
|
+
merged.LN = segment.LN
|
116
|
+
merged.rn = rn
|
117
|
+
merged.or = o
|
118
|
+
merged.mp = mp
|
119
|
+
else
|
120
|
+
(segment.sequence == "*") ? (merged.sequence = "*")
|
121
|
+
: (merged.sequence += s)
|
122
|
+
merged.name = "#{merged.name}_#{n}" if options[:merged_name].nil?
|
123
|
+
if merged.LN
|
124
|
+
if rn
|
125
|
+
rn = rn.map {|pos| pos - cut + merged.LN}
|
126
|
+
merged.rn = merged.rn.nil? ? rn : merged.rn + rn
|
127
|
+
end
|
128
|
+
if mp and merged.mp
|
129
|
+
merged.mp += mp.map {|pos| pos - cut + merged.LN}
|
130
|
+
end
|
131
|
+
segment.LN ? merged.LN += (segment.LN - cut)
|
132
|
+
: merged.LN = nil
|
133
|
+
else
|
134
|
+
merged.mp = nil
|
135
|
+
end
|
136
|
+
merged.or = merged.or.nil? ? o : "#{merged.or},#{o}"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|