gfa 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf18e4a80f49fc9c0ae4574c5c7bbc3c97fd41eaf5c7aff9fdd7991b7d562ccf
4
- data.tar.gz: ac6999ffbbbeba45f9b81c6081ac16339ce4bb55502a77b116c77f91870af991
3
+ metadata.gz: 8069525ab289a4299c4d51df9a58a083df95292cd7bc59b83516ac4c5e7508f2
4
+ data.tar.gz: c1398a889d1f49431bfa01b196084ee9c448c60426fb179801151e7731538c03
5
5
  SHA512:
6
- metadata.gz: c8ae1b273166574d8520aa1f747ac0dbd08e8f31846754ca6c3fa7f0432d5f31583aa915fcebb04a220e90fe14aeeb4f06dec2a5c23527571c9bb337de0ef9a5
7
- data.tar.gz: f5fce0959cace1fe961d92fab2946bf8b67571ee6ceef3cd6e7047d8c46f0855b28e6b703ffbfe327ef8d09e1174b94ee98423ae36bb858bab76615849e0bd94
6
+ metadata.gz: d97f574a6081727f5687e259a954881351bf7767ec36d1f5ac8b2e6eef939a59a9406077619b306d9782e99357ec2cb98536b4e73474a9aad75c61614c7f7d13
7
+ data.tar.gz: 385a463ad705401ec083375b917d3654633f44e3828fcc93546422fb80712fc4bc22dbc2dd128dbdf1db44a8c60ed4d7a31df558db8786eb1fca12225a29b2ab
@@ -26,4 +26,11 @@ end
26
26
  $stderr.puts "Loading GFA: #{input}"
27
27
  gfa = GFA.load_parallel(input, (threads || 1).to_i)
28
28
 
29
+ $stderr.puts "Saving path sequences: #{output}"
30
+ File.open(output, 'w') do |fasta|
31
+ gfa.paths.set.each do |path|
32
+ fasta.puts '>%s' % path.path_name.value
33
+ fasta.puts path.sequence(gfa)
34
+ end
35
+ end
29
36
 
@@ -42,4 +42,59 @@ class GFA::Record::Path < GFA::Record
42
42
 
43
43
  segment_names_a.any? { |name| segment.name == name }
44
44
  end
45
+
46
+ ##
47
+ # Array of GFA::Field::String with the sequences from each segment featuring
48
+ # the correct orientation from a +gfa+ (which *must* be indexed)
49
+ #
50
+ # TODO: Distinguish between a direct path (separated by comma) and a
51
+ # jump (separated by semicolon). Jumps include a distance estimate
52
+ # (column 6, optional) which could be used to add Ns between segment
53
+ # sequences (from GFA 1.2)
54
+ def segment_sequences(gfa)
55
+ raise "Unindexed GFA" unless gfa.indexed?
56
+ segment_names.value.split(/[,;]/).map do |i|
57
+ orientation = i[-1]
58
+ i[-1] = ''
59
+ segment = gfa.segments[i]
60
+
61
+ case orientation
62
+ when '+' ; segment.sequence
63
+ when '-' ; segment.rc
64
+ else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
65
+ end
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Produce the contiguous path sequence based on the segment sequences and
71
+ # orientations from a +gfa+ (which *must* be indexed)
72
+ #
73
+ # TODO: Estimate gaps (Ns) from Jump distances (see +segment_sequences+)
74
+ #
75
+ # TODO: Attempt reading CIGAR values from the path first, the corresponding
76
+ # links next, and actually performing the pairwise overlap as last resort
77
+ #
78
+ # TODO: Support ambiguous IUPAC codes for overlap evaluation
79
+ def sequence(gfa)
80
+ segment_sequences(gfa).map(&:value)
81
+ .inject('') { |a, b| a + after_overlap(a, b) }
82
+ end
83
+
84
+ private
85
+ ##
86
+ # Find the overlap between sequences +a+ and +b+ (Strings) and return
87
+ # only the part of +b+ after the overlap. Assumes that +a+ starts
88
+ # at the same point or before +b+. If no overlap is found, returns +b+
89
+ # in its entirety.
90
+ def after_overlap(a, b)
91
+ (0 .. a.length - 1).each do |a_from|
92
+ a_to = b.length + a_from > a.length ? a.length : b.length + a_from
93
+ b_to = b.length + a_from > a.length ? a.length - a_from : b.length
94
+ if a[a_from .. a_to - 1] == b[0 .. b_to - 1]
95
+ return b[b_to .. b.length].to_s
96
+ end
97
+ end
98
+ b
99
+ end
45
100
  end
@@ -32,4 +32,12 @@ class GFA::Record::Segment < GFA::Record
32
32
  def length
33
33
  sequence.value.length
34
34
  end
35
+
36
+ ##
37
+ # Returns the reverse-complement of the sequence (as a Z field)
38
+ def rc
39
+ GFA::Field::String.new(
40
+ sequence.value.upcase.reverse.tr('ACGTURYSWKMBDHVN', 'TGCAAYRSWMKVHDBN')
41
+ )
42
+ end
35
43
  end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.6.1'
2
+ VERSION = '0.6.2'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-05 00:00:00.000000000 Z
11
+ date: 2023-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl