gfa 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/gfa-paths-to-fasta +7 -0
- data/lib/gfa/record/path.rb +55 -0
- data/lib/gfa/record/segment.rb +8 -0
- data/lib/gfa/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8069525ab289a4299c4d51df9a58a083df95292cd7bc59b83516ac4c5e7508f2
|
4
|
+
data.tar.gz: c1398a889d1f49431bfa01b196084ee9c448c60426fb179801151e7731538c03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d97f574a6081727f5687e259a954881351bf7767ec36d1f5ac8b2e6eef939a59a9406077619b306d9782e99357ec2cb98536b4e73474a9aad75c61614c7f7d13
|
7
|
+
data.tar.gz: 385a463ad705401ec083375b917d3654633f44e3828fcc93546422fb80712fc4bc22dbc2dd128dbdf1db44a8c60ed4d7a31df558db8786eb1fca12225a29b2ab
|
data/bin/gfa-paths-to-fasta
CHANGED
@@ -26,4 +26,11 @@ end
|
|
26
26
|
$stderr.puts "Loading GFA: #{input}"
|
27
27
|
gfa = GFA.load_parallel(input, (threads || 1).to_i)
|
28
28
|
|
29
|
+
$stderr.puts "Saving path sequences: #{output}"
|
30
|
+
File.open(output, 'w') do |fasta|
|
31
|
+
gfa.paths.set.each do |path|
|
32
|
+
fasta.puts '>%s' % path.path_name.value
|
33
|
+
fasta.puts path.sequence(gfa)
|
34
|
+
end
|
35
|
+
end
|
29
36
|
|
data/lib/gfa/record/path.rb
CHANGED
@@ -42,4 +42,59 @@ class GFA::Record::Path < GFA::Record
|
|
42
42
|
|
43
43
|
segment_names_a.any? { |name| segment.name == name }
|
44
44
|
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Array of GFA::Field::String with the sequences from each segment featuring
|
48
|
+
# the correct orientation from a +gfa+ (which *must* be indexed)
|
49
|
+
#
|
50
|
+
# TODO: Distinguish between a direct path (separated by comma) and a
|
51
|
+
# jump (separated by semicolon). Jumps include a distance estimate
|
52
|
+
# (column 6, optional) which could be used to add Ns between segment
|
53
|
+
# sequences (from GFA 1.2)
|
54
|
+
def segment_sequences(gfa)
|
55
|
+
raise "Unindexed GFA" unless gfa.indexed?
|
56
|
+
segment_names.value.split(/[,;]/).map do |i|
|
57
|
+
orientation = i[-1]
|
58
|
+
i[-1] = ''
|
59
|
+
segment = gfa.segments[i]
|
60
|
+
|
61
|
+
case orientation
|
62
|
+
when '+' ; segment.sequence
|
63
|
+
when '-' ; segment.rc
|
64
|
+
else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# Produce the contiguous path sequence based on the segment sequences and
|
71
|
+
# orientations from a +gfa+ (which *must* be indexed)
|
72
|
+
#
|
73
|
+
# TODO: Estimate gaps (Ns) from Jump distances (see +segment_sequences+)
|
74
|
+
#
|
75
|
+
# TODO: Attempt reading CIGAR values from the path first, the corresponding
|
76
|
+
# links next, and actually performing the pairwise overlap as last resort
|
77
|
+
#
|
78
|
+
# TODO: Support ambiguous IUPAC codes for overlap evaluation
|
79
|
+
def sequence(gfa)
|
80
|
+
segment_sequences(gfa).map(&:value)
|
81
|
+
.inject('') { |a, b| a + after_overlap(a, b) }
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
##
|
86
|
+
# Find the overlap between sequences +a+ and +b+ (Strings) and return
|
87
|
+
# only the part of +b+ after the overlap. Assumes that +a+ starts
|
88
|
+
# at the same point or before +b+. If no overlap is found, returns +b+
|
89
|
+
# in its entirety.
|
90
|
+
def after_overlap(a, b)
|
91
|
+
(0 .. a.length - 1).each do |a_from|
|
92
|
+
a_to = b.length + a_from > a.length ? a.length : b.length + a_from
|
93
|
+
b_to = b.length + a_from > a.length ? a.length - a_from : b.length
|
94
|
+
if a[a_from .. a_to - 1] == b[0 .. b_to - 1]
|
95
|
+
return b[b_to .. b.length].to_s
|
96
|
+
end
|
97
|
+
end
|
98
|
+
b
|
99
|
+
end
|
45
100
|
end
|
data/lib/gfa/record/segment.rb
CHANGED
@@ -32,4 +32,12 @@ class GFA::Record::Segment < GFA::Record
|
|
32
32
|
def length
|
33
33
|
sequence.value.length
|
34
34
|
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Returns the reverse-complement of the sequence (as a Z field)
|
38
|
+
def rc
|
39
|
+
GFA::Field::String.new(
|
40
|
+
sequence.value.upcase.reverse.tr('ACGTURYSWKMBDHVN', 'TGCAAYRSWMKVHDBN')
|
41
|
+
)
|
42
|
+
end
|
35
43
|
end
|
data/lib/gfa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|