gfa 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/gfa-add-gaf +7 -5
- data/bin/gfa-paths-to-fasta +7 -0
- data/lib/gfa/graph.rb +1 -1
- data/lib/gfa/record/path.rb +55 -0
- data/lib/gfa/record/segment.rb +8 -0
- data/lib/gfa/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8069525ab289a4299c4d51df9a58a083df95292cd7bc59b83516ac4c5e7508f2
|
4
|
+
data.tar.gz: c1398a889d1f49431bfa01b196084ee9c448c60426fb179801151e7731538c03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d97f574a6081727f5687e259a954881351bf7767ec36d1f5ac8b2e6eef939a59a9406077619b306d9782e99357ec2cb98536b4e73474a9aad75c61614c7f7d13
|
7
|
+
data.tar.gz: 385a463ad705401ec083375b917d3654633f44e3828fcc93546422fb80712fc4bc22dbc2dd128dbdf1db44a8c60ed4d7a31df558db8786eb1fca12225a29b2ab
|
data/bin/gfa-add-gaf
CHANGED
@@ -18,11 +18,13 @@ unless degree
|
|
18
18
|
<input-gaf> Input GAF file to read
|
19
19
|
<output> Output GFA file to write
|
20
20
|
<degree> Maximum degree of separation between the segment set in the GAF
|
21
|
-
and any other
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
and any other segments included in the output GFA.
|
22
|
+
- -1: include the complete original GAF without subsetting.
|
23
|
+
- 0: only segments in the GAF are included.
|
24
|
+
- 1: only the target segments in the GAF, records linking to
|
25
|
+
them, and segments linked by those records are included.
|
26
|
+
- Any integer > 1: include additional expansion rounds for
|
27
|
+
those linked segments.
|
26
28
|
<pref> A prefix to name all recorded paths
|
27
29
|
By default: Based on the GAF file name
|
28
30
|
<threads> If passed, parallelize process with these many threads
|
data/bin/gfa-paths-to-fasta
CHANGED
@@ -26,4 +26,11 @@ end
|
|
26
26
|
$stderr.puts "Loading GFA: #{input}"
|
27
27
|
gfa = GFA.load_parallel(input, (threads || 1).to_i)
|
28
28
|
|
29
|
+
$stderr.puts "Saving path sequences: #{output}"
|
30
|
+
File.open(output, 'w') do |fasta|
|
31
|
+
gfa.paths.set.each do |path|
|
32
|
+
fasta.puts '>%s' % path.path_name.value
|
33
|
+
fasta.puts path.sequence(gfa)
|
34
|
+
end
|
35
|
+
end
|
29
36
|
|
data/lib/gfa/graph.rb
CHANGED
@@ -146,7 +146,7 @@ class GFA
|
|
146
146
|
|
147
147
|
def internally_linking_records(segments, edges)
|
148
148
|
$stderr.puts '- Gathering internally linking records'
|
149
|
-
segments = Hash[segments.
|
149
|
+
segments = Hash[segments.map { |i| [i.name.value, true]}]
|
150
150
|
edges.select { |record| record.segment_names_a.all? { |s| segments[s] } }
|
151
151
|
end
|
152
152
|
|
data/lib/gfa/record/path.rb
CHANGED
@@ -42,4 +42,59 @@ class GFA::Record::Path < GFA::Record
|
|
42
42
|
|
43
43
|
segment_names_a.any? { |name| segment.name == name }
|
44
44
|
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Array of GFA::Field::String with the sequences from each segment featuring
|
48
|
+
# the correct orientation from a +gfa+ (which *must* be indexed)
|
49
|
+
#
|
50
|
+
# TODO: Distinguish between a direct path (separated by comma) and a
|
51
|
+
# jump (separated by semicolon). Jumps include a distance estimate
|
52
|
+
# (column 6, optional) which could be used to add Ns between segment
|
53
|
+
# sequences (from GFA 1.2)
|
54
|
+
def segment_sequences(gfa)
|
55
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
56
|
+
segment_names.value.split(/[,;]/).map do |i|
|
57
|
+
orientation = i[-1]
|
58
|
+
i[-1] = ''
|
59
|
+
segment = gfa.segments[i]
|
60
|
+
|
61
|
+
case orientation
|
62
|
+
when '+' ; segment.sequence
|
63
|
+
when '-' ; segment.rc
|
64
|
+
else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Produce the contiguous path sequence based on the segment sequences and
|
71
|
+
# orientations from a +gfa+ (which *must* be indexed)
|
72
|
+
#
|
73
|
+
# TODO: Estimate gaps (Ns) from Jump distances (see +segment_sequences+)
|
74
|
+
#
|
75
|
+
# TODO: Attempt reading CIGAR values from the path first, the corresponding
|
76
|
+
# links next, and actually performing the pairwise overlap as last resort
|
77
|
+
#
|
78
|
+
# TODO: Support ambiguous IUPAC codes for overlap evaluation
|
79
|
+
def sequence(gfa)
|
80
|
+
segment_sequences(gfa).map(&:value)
|
81
|
+
.inject('') { |a, b| a + after_overlap(a, b) }
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
##
|
86
|
+
# Find the overlap between sequences +a+ and +b+ (Strings) and return
|
87
|
+
# only the part of +b+ after the overlap. Assumes that +a+ starts
|
88
|
+
# at the same point or before +b+. If no overlap is found, returns +b+
|
89
|
+
# in its entirety.
|
90
|
+
def after_overlap(a, b)
|
91
|
+
(0 .. a.length - 1).each do |a_from|
|
92
|
+
a_to = b.length + a_from > a.length ? a.length : b.length + a_from
|
93
|
+
b_to = b.length + a_from > a.length ? a.length - a_from : b.length
|
94
|
+
if a[a_from .. a_to - 1] == b[0 .. b_to - 1]
|
95
|
+
return b[b_to .. b.length].to_s
|
96
|
+
end
|
97
|
+
end
|
98
|
+
b
|
99
|
+
end
|
45
100
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -32,4 +32,12 @@ class GFA::Record::Segment < GFA::Record
|
|
32
32
|
def length
|
33
33
|
sequence.value.length
|
34
34
|
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Returns the reverse-complement of the sequence (as a Z field)
|
38
|
+
def rc
|
39
|
+
GFA::Field::String.new(
|
40
|
+
sequence.value.upcase.reverse.tr('ACGTURYSWKMBDHVN', 'TGCAAYRSWMKVHDBN')
|
41
|
+
)
|
42
|
+
end
|
35
43
|
end
|
data/lib/gfa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|