gfa 0.6.1 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf18e4a80f49fc9c0ae4574c5c7bbc3c97fd41eaf5c7aff9fdd7991b7d562ccf
4
- data.tar.gz: ac6999ffbbbeba45f9b81c6081ac16339ce4bb55502a77b116c77f91870af991
3
+ metadata.gz: 6e858670d104b7602c27ef71b2942f355027eb3e34dfc9c9aa5d9aa6a3a8ebbc
4
+ data.tar.gz: 4a905a283972c8d0704c90feb81a90becc73af7dffb328cf108145c0e0103023
5
5
  SHA512:
6
- metadata.gz: c8ae1b273166574d8520aa1f747ac0dbd08e8f31846754ca6c3fa7f0432d5f31583aa915fcebb04a220e90fe14aeeb4f06dec2a5c23527571c9bb337de0ef9a5
7
- data.tar.gz: f5fce0959cace1fe961d92fab2946bf8b67571ee6ceef3cd6e7047d8c46f0855b28e6b703ffbfe327ef8d09e1174b94ee98423ae36bb858bab76615849e0bd94
6
+ metadata.gz: cda7d12b5f2c6bd18e991d8591a2788d85a2dd29c99bfcc0e0d20c40978199e81099875453d9f34a188e4f723177acc09c2990d25062ef7082556a96f97b56ac
7
+ data.tar.gz: 18e5ff044ffd143e95fbc8525756215437528468b094136f622a30cfde7a51b8d25ca4c1e8a26300a3027ec79fafdbdda343af3cebf9e1c2813467c983b67873
data/bin/gfa-add-gaf CHANGED
@@ -8,7 +8,7 @@ $LOAD_PATH.push File.expand_path('../../lib', File.realpath(__FILE__))
8
8
 
9
9
  require 'gfa'
10
10
 
11
- input_gfa, input_gaf, output, degree, threads = ARGV
11
+ input_gfa, input_gaf, output, degree, pref, threads = ARGV
12
12
 
13
13
  unless degree
14
14
  $stderr.puts <<~HELP
@@ -26,4 +26,11 @@ end
26
26
  $stderr.puts "Loading GFA: #{input}"
27
27
  gfa = GFA.load_parallel(input, (threads || 1).to_i)
28
28
 
29
+ $stderr.puts "Saving path sequences: #{output}"
30
+ File.open(output, 'w') do |fasta|
31
+ gfa.paths.set.each do |path|
32
+ fasta.puts '>%s' % path.path_name.value
33
+ fasta.puts path.sequence(gfa)
34
+ end
35
+ end
29
36
 
@@ -42,4 +42,59 @@ class GFA::Record::Path < GFA::Record
42
42
 
43
43
  segment_names_a.any? { |name| segment.name == name }
44
44
  end
45
+
46
+ ##
47
+ # Array of GFA::Field::String with the sequences from each segment featuring
48
+ # the correct orientation from a +gfa+ (which *must* be indexed)
49
+ #
50
+ # TODO: Distinguish between a direct path (separated by comma) and a
51
+ # jump (separated by semicolon). Jumps include a distance estimate
52
+ # (column 6, optional) which could be used to add Ns between segment
53
+ # sequences (from GFA 1.2)
54
+ def segment_sequences(gfa)
55
+ raise "Unindexed GFA" unless gfa.indexed?
56
+ segment_names.value.split(/[,;]/).map do |i|
57
+ orientation = i[-1]
58
+ i[-1] = ''
59
+ segment = gfa.segments[i]
60
+
61
+ case orientation
62
+ when '+' ; segment.sequence
63
+ when '-' ; segment.rc
64
+ else ; raise "Unknown orientation: #{orientation} (path: #{path_name})"
65
+ end
66
+ end
67
+ end
68
+
69
+ ##
70
+ # Produce the contiguous path sequence based on the segment sequences and
71
+ # orientations from a +gfa+ (which *must* be indexed)
72
+ #
73
+ # TODO: Estimate gaps (Ns) from Jump distances (see +segment_sequences+)
74
+ #
75
+ # TODO: Attempt reading CIGAR values from the path first, the corresponding
76
+ # links next, and actually performing the pairwise overlap as last resort
77
+ #
78
+ # TODO: Support ambiguous IUPAC codes for overlap evaluation
79
+ def sequence(gfa)
80
+ segment_sequences(gfa).map(&:value)
81
+ .inject('') { |a, b| a + after_overlap(a, b) }
82
+ end
83
+
84
+ private
85
+ ##
86
+ # Find the overlap between sequences +a+ and +b+ (Strings) and return
87
+ # only the part of +b+ after the overlap. Assumes that +a+ starts
88
+ # at the same point or before +b+. If no overlap is found, returns +b+
89
+ # in its entirety.
90
+ def after_overlap(a, b)
91
+ (0 .. a.length - 1).each do |a_from|
92
+ a_to = b.length + a_from > a.length ? a.length : b.length + a_from
93
+ b_to = b.length + a_from > a.length ? a.length - a_from : b.length
94
+ if a[a_from .. a_to - 1] == b[0 .. b_to - 1]
95
+ return b[b_to .. b.length].to_s
96
+ end
97
+ end
98
+ b
99
+ end
45
100
  end
@@ -32,4 +32,12 @@ class GFA::Record::Segment < GFA::Record
32
32
  def length
33
33
  sequence.value.length
34
34
  end
35
+
36
+ ##
37
+ # Returns the reverse-complement of the sequence (as a Z field)
38
+ def rc
39
+ GFA::Field::String.new(
40
+ sequence.value.upcase.reverse.tr('ACGTURYSWKMBDHVN', 'TGCAAYRSWMKVHDBN')
41
+ )
42
+ end
35
43
  end
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.6.1'
2
+ VERSION = '0.6.3'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-05 00:00:00.000000000 Z
11
+ date: 2023-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl