gfa 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf18e4a80f49fc9c0ae4574c5c7bbc3c97fd41eaf5c7aff9fdd7991b7d562ccf
4
- data.tar.gz: ac6999ffbbbeba45f9b81c6081ac16339ce4bb55502a77b116c77f91870af991
3
+ metadata.gz: 8069525ab289a4299c4d51df9a58a083df95292cd7bc59b83516ac4c5e7508f2
4
+ data.tar.gz: c1398a889d1f49431bfa01b196084ee9c448c60426fb179801151e7731538c03
5
5
  SHA512:
6
- metadata.gz: c8ae1b273166574d8520aa1f747ac0dbd08e8f31846754ca6c3fa7f0432d5f31583aa915fcebb04a220e90fe14aeeb4f06dec2a5c23527571c9bb337de0ef9a5
7
- data.tar.gz: f5fce0959cace1fe961d92fab2946bf8b67571ee6ceef3cd6e7047d8c46f0855b28e6b703ffbfe327ef8d09e1174b94ee98423ae36bb858bab76615849e0bd94
6
+ metadata.gz: d97f574a6081727f5687e259a954881351bf7767ec36d1f5ac8b2e6eef939a59a9406077619b306d9782e99357ec2cb98536b4e73474a9aad75c61614c7f7d13
7
+ data.tar.gz: 385a463ad705401ec083375b917d3654633f44e3828fcc93546422fb80712fc4bc22dbc2dd128dbdf1db44a8c60ed4d7a31df558db8786eb1fca12225a29b2ab
@@ -26,4 +26,11 @@ end
26
26
  $stderr.puts "Loading GFA: #{input}"
27
27
  gfa = GFA.load_parallel(input, (threads || 1).to_i)
28
28
 
29
+ $stderr.puts "Saving path sequences: #{output}"
30
+ File.open(output, 'w') do |fasta|
31
+ gfa.paths.set.each do |path|
32
+ fasta.puts '>%s' % path.path_name.value
33
+ fasta.puts path.sequence(gfa)
34
+ end
35
+ end
29
36
 
@@ -42,4 +42,59 @@ class GFA::Record::Path < GFA::Record
42
42
 
43
43
  segment_names_a.any? { |name| segment.name == name }
44
44
  end
45
+
46
+ ##
47
+ # Array of GFA::Field::String with the sequences from each segment featuring
48
+ # the correct orientation from a +gfa+ (which *must* be indexed)
49
+ #
50
+ # TODO: Distinguish between a direct path (separated by comma) and a
51
+ # jump (separated by semicolon). Jumps include a distance estimate
52
+ # (column 6, optional) which could be used to add Ns between segment
53
+ # sequences (from GFA 1.2)
54
+ def segment_sequences(gfa)
55
+ raise "Unindexed GFA" unless gfa.indexed?
56
+ segment_names.value.split(/[,;]/).map do |i|
57
+ orientation = i[-1]
58
+ i[-1] = ''
59
+ segment = gfa.segments[i]
60
+
61
+ case orientation
62
+ when '+' ; segment.sequence
63
+ when '-' ; segment.rc
64
+ else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
65
+ end
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Produce the contiguous path sequence based on the segment sequences and
71
+ # orientations from a +gfa+ (which *must* be indexed)
72
+ #
73
+ # TODO: Estimate gaps (Ns) from Jump distances (see +segment_sequences+)
74
+ #
75
+ # TODO: Attempt reading CIGAR values from the path first, the corresponding
76
+ # links next, and actually performing the pairwise overlap as last resort
77
+ #
78
+ # TODO: Support ambiguous IUPAC codes for overlap evaluation
79
+ def sequence(gfa)
80
+ segment_sequences(gfa).map(&:value)
81
+ .inject('') { |a, b| a + after_overlap(a, b) }
82
+ end
83
+
84
+ private
85
+ ##
86
+ # Find the overlap between sequences +a+ and +b+ (Strings) and return
87
+ # only the part of +b+ after the overlap. Assumes that +a+ starts
88
+ # at the same point or before +b+. If no overlap is found, returns +b+
89
+ # in its entirety.
90
+ def after_overlap(a, b)
91
+ (0 .. a.length - 1).each do |a_from|
92
+ a_to = b.length + a_from > a.length ? a.length : b.length + a_from
93
+ b_to = b.length + a_from > a.length ? a.length - a_from : b.length
94
+ if a[a_from .. a_to - 1] == b[0 .. b_to - 1]
95
+ return b[b_to .. b.length].to_s
96
+ end
97
+ end
98
+ b
99
+ end
45
100
  end
@@ -32,4 +32,12 @@ class GFA::Record::Segment < GFA::Record
32
32
  def length
33
33
  sequence.value.length
34
34
  end
35
+
36
+ ##
37
+ # Returns the reverse-complement of the sequence (as a Z field)
38
+ def rc
39
+ GFA::Field::String.new(
40
+ sequence.value.upcase.reverse.tr('ACGTURYSWKMBDHVN', 'TGCAAYRSWMKVHDBN')
41
+ )
42
+ end
35
43
  end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.6.1'
2
+ VERSION = '0.6.2'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-05 00:00:00.000000000 Z
11
+ date: 2023-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl