gfa 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a7a646a57c98f25bda2120b3e04783099433d7aa7c8f0658a1bfdcd07e0dea35
4
- data.tar.gz: c8aec87b21fd9b1d7972228fa7118cf71b40f58a5dd20e385e6e80ef86b2ef92
3
+ metadata.gz: 13cc01c1ed08457fc0a4cf409e87ab0e0c52e6de6f9a9019bb15de87d462bac7
4
+ data.tar.gz: 9013cf534626775e09c9aa2006ff47d5f03fc5c5580cde1d0f325124de911a79
5
5
  SHA512:
6
- metadata.gz: 3f4c54a483e4cd6baaf66fbddda2e7eb41a13cd14bd492c5132a2d2610bdd2f283d7a92c261dca10f54fb9e03611ca7f7c05036fa783c361d4b8e752e9fd6b53
7
- data.tar.gz: 2b17c35013f3c9dbdb99a20e060c2dde13732c139f0551f3ce5b65e317ad5d1918b84447570542b11b67f5d65387729606de914b18994c420e5fe8b41b9bf5af
6
+ metadata.gz: fe4dfc8fa6e24fcc734a91d5101c9d9ab3315ddf5cc26ef943ae01695e573c367d31ae53a7a1325a809f0eedcaaae97d0453fd590f079c0f6a63eecdf89cc10f
7
+ data.tar.gz: 3957432e59163f82cce3eddc240a03b70ffe1693c3dad9d7f373655108c454f04b5dbe618f4f65adcaf7c98ed8327323c3d2b1fee97083685612a02678780ac8
data/lib/gfa/common.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'gfa/version'
2
+ require 'gfa/matrix'
2
3
  require 'gfa/record_set'
3
4
  require 'gfa/field'
4
5
 
@@ -10,6 +11,41 @@ class GFA
10
11
  end
11
12
  end
12
13
 
14
+ def self.advance_bar(n)
15
+ @advance_bar_n = n
16
+ @advance_bar_i = 0
17
+ @advance_bar_p = 0
18
+ @advance_bar_s = Time.now
19
+ $stderr.print ' [' + (' ' * 50) + ']' + " #{n}\r"
20
+ $stderr.print ' [>'
21
+ end
22
+
23
+ def self.advance
24
+ @advance_bar_i += 1
25
+ # $stderr.print "#{@advance_bar_i}"[-1] + "\b"
26
+ while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
27
+ $stderr.print "\b=>"
28
+ @advance_bar_p += 1
29
+ end
30
+ return unless @advance_bar_i == @advance_bar_n
31
+
32
+ $stderr.print "\b]\r"
33
+ t_t = Time.now - @advance_bar_s
34
+ t_u = 'sec'
35
+
36
+ if t_t > 60
37
+ t_t /= 60
38
+ t_u = 'min'
39
+ end
40
+
41
+ if t_t > 60
42
+ t_t /= 60
43
+ t_u = 'h'
44
+ end
45
+
46
+ $stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
47
+ end
48
+
13
49
  # Instance-level
14
50
  attr :gfa_version, :records, :opts
15
51
 
data/lib/gfa/graph.rb CHANGED
@@ -22,10 +22,36 @@ class GFA
22
22
  implicit_graph(opts).to_adjacency
23
23
  end
24
24
 
25
+ ##
26
+ # Calculate and store internally a matrix representing all edges.
27
+ def calculate_edge_matrix!
28
+ $stderr.puts '- Building edge matrix'
29
+ @edge_matrix = GFA::Matrix.new(segments.size, segments.size)
30
+ self.class.advance_bar(all_edges.size)
31
+ all_edges.each do |edge|
32
+ self.class.advance
33
+ idx = edge.segments(self).map { |i| segments.position(i) }
34
+ idx.each do |i|
35
+ idx.each do |j|
36
+ @edge_matrix[i, j] = true unless i == j
37
+ end
38
+ end
39
+ end
40
+ @edge_matrix
41
+ end
42
+
43
+ ##
44
+ # Returns the matrix representing all edges
45
+ def edge_matrix
46
+ @edge_matrix or calculate_edge_matrix!
47
+ end
48
+
25
49
  ##
26
50
  # Extracts the subset of records associated to +segments+, which is an Array
27
- # with values of any class in: Integer (segment index),
28
- # String or GFA::Field::String (segment names), or GFA::Record::Segment.
51
+ # with values of any class in:
52
+ # - Integer: segment index,
53
+ # - String or GFA::Field::String: segment names, or
54
+ # - GFA::Record::Segment: the actual segments themselves
29
55
  #
30
56
  # +degree+ indicates the maximum degree of separation between the original
31
57
  # segment set and any additional segments. Use 0 to include only the segments
@@ -63,8 +89,7 @@ class GFA
63
89
  segments.each { |segment| gfa << segment }
64
90
 
65
91
  # Expand graph
66
- linking, edges = linking_records(gfa.segments, degree: degree)
67
- linking += internally_linking_records(segments, edges)
92
+ linking = linking_records(gfa.segments, degree: degree)
68
93
  linking.each { |record| gfa << record }
69
94
 
70
95
  # Return
@@ -84,70 +109,54 @@ class GFA
84
109
  # of the +segments+ set (assumes they have already been evaluated). This is
85
110
  # only used for internal heuristics
86
111
  #
87
- # Returns an Array of with two elements:
88
- # 0. An array of GFA::Record objects with all the identified linking records
89
- # 1. An array of GFA::Record objects with all edges that were not identified
112
+ # Returns an array of GFA::Record objects with all the identified linking
113
+ # records (edges)
90
114
  #
91
115
  # IMPORTANT NOTE 1: The object +segments+ will be modified to include all
92
116
  # linked segments. If you don't want this behaviour, please make sure to pass
93
117
  # a duplicate of the object instead.
94
- #
95
- # IMPORTANT NOTE 2: The list of linking records may not comprehensively
96
- # include all records linking the identified expanded segment set. To ensure
97
- # a consistent set is identified, use:
98
- # linking, edges = gfa.linking_records(segments)
99
- # linking += gfa.internally_linking_records(segments, edges)
100
- #
101
- def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
118
+ def linking_records(segments, degree: 1)
102
119
  unless segments.is_a? GFA::RecordSet::SegmentSet
103
120
  raise "Unrecognised class: #{segments.class}"
104
121
  end
105
122
 
106
- # Gather edges to evaluate
107
- edges ||= all_edges
108
- return [[], edges] if degree <= 0
109
-
110
- # Links, Containments, Jumps (from, to) and Paths (segment_names)
111
- linking = []
112
- eval_set = _ignore == 0 ? segments.set : segments.set[_ignore..]
113
- edges.delete_if do |edge|
114
- if eval_set.any? { |segment| edge.include? segment }
115
- linking << edge
116
- true # Remove from the edge set to speed up future recursions
117
- else
118
- false # Keep it, possibly linking future recursions
119
- end
120
- end
121
-
122
- # Recurse and return
123
- if degree >= 1
124
- pre = segments.size
125
-
126
- # Add additional linked segments
127
- linking.each do |record|
128
- record.segments(self).each do |other_seg|
129
- segments << other_seg unless segments[other_seg.name]
123
+ return [] if degree <= 0
124
+
125
+ edge_matrix # Just to trigger matrix calculation
126
+ degree.times do |round|
127
+ $stderr.puts "- Expansion round #{round + 1}"
128
+ self.class.advance_bar(segments.size)
129
+ pre_expansion = segments.size
130
+ new_segments = []
131
+ segments.set.each do |segment|
132
+ self.class.advance
133
+ idx = self.segments.position(segment)
134
+ edge_matrix[nil, idx].each_with_index do |edge, target_k|
135
+ new_segments << target_k if edge
130
136
  end
131
137
  end
132
-
133
- # Recurse only if new segments were discovered
134
- if segments.size > pre
135
- $stderr.puts "- Recursion [#{degree}]: " \
136
- "#{pre} -> #{segments.size}\t(#{edges.size})"
137
- linking +=
138
- linking_records(
139
- segments,
140
- degree: degree - 1, edges: edges, _ignore: pre
141
- )[0]
142
- end
138
+ new_segments = new_segments.uniq.map { |i| self.segments[i] }
139
+ new_segments.each { |i| segments << i unless segments[i.name] }
140
+ new = segments.size - pre_expansion
141
+ $stderr.puts " #{new} segments found, total: #{segments.size}"
142
+ break if new == 0
143
143
  end
144
- [linking, edges]
144
+
145
+ internally_linking_records(segments, all_edges)
145
146
  end
146
147
 
147
148
  def internally_linking_records(segments, edges)
149
+ unless segments.is_a? GFA::RecordSet::SegmentSet
150
+ raise "Unrecognised class: #{segments.class}"
151
+ end
152
+
148
153
  $stderr.puts '- Gathering internally linking records'
149
- segments = Hash[segments.map { |i| [i.name.value, true]}]
150
- edges.select { |record| record.segment_names_a.all? { |s| segments[s] } }
154
+ s_names = Hash[segments.set.map { |i| [i.name.value, true]}]
155
+ self.class.advance_bar(edges.size)
156
+ edges.select do |record|
157
+ self.class.advance
158
+ record.segment_names_a.all? { |s| s_names[s] }
159
+ end
151
160
  end
152
161
 
153
162
  ##
@@ -155,7 +164,7 @@ class GFA
155
164
  # from the GFA. I.e., all links, jumps, containments, and paths.
156
165
  def all_edges
157
166
  edge_t = %i[Link Jump Containment Path]
158
- edges = edge_t.flat_map { |t| records[t].set } if edges.nil?
167
+ @edges ||= edge_t.flat_map { |t| records[t].set }
159
168
  end
160
169
 
161
170
  private
data/lib/gfa/matrix.rb ADDED
@@ -0,0 +1,96 @@
1
+
2
+ ##
3
+ # A class to represent sparse matrices internally used for graph operations
4
+ class GFA::Matrix
5
+
6
+ attr_accessor :rows, :columns, :values
7
+
8
+ ##
9
+ # Initialize a Matrix with +rows+ and +columns+ (both should be Integer),
10
+ # and a default +value+ (+nil+ if missing)
11
+ def initialize(rows, columns, value = nil)
12
+ raise 'Matrix rows must be an integer' unless rows.is_a? Integer
13
+ raise 'Matrix columns must be an integer' unless columns.is_a? Integer
14
+ raise 'Matrix rows must be positive' if rows < 0
15
+ raise 'Matrix columns must be positive' if columns < 0
16
+
17
+ @rows = rows
18
+ @columns = columns
19
+ @values = Hash.new(value)
20
+ end
21
+
22
+ def [](row = nil, col = nil)
23
+ index(row, col).map { |i| values[i] }
24
+ end
25
+
26
+ def []=(row, col, value)
27
+ values = (row.nil? || col.nil?) ? value : [value]
28
+ unless values.is_a? Array
29
+ raise 'Value must be an array if setting a range of cells'
30
+ end
31
+
32
+ idx = index(row, col)
33
+ if idx.size != values.size
34
+ raise "Expected #{idx.size} values, but only got #{values.size}"
35
+ end
36
+ idx.each_with_index.map { |i, k| @values[i] = values[k] }
37
+ end
38
+
39
+ ##
40
+ # Determines the index of +row+ and +col+ (both must be defined Integer),
41
+ # sets its value to an empty Array if not yet defined, and appends +value+.
42
+ # Returns an error if the value already exists but it's not an array
43
+ def append(row, col, value)
44
+ raise 'wow must be a defined integer' unless row.is_a?(Integer)
45
+ raise 'col must be a defined integer' unless col.is_a?(Integer)
46
+
47
+ idx = index(row, col).first
48
+ @values[idx] ||= []
49
+ unless @values[idx].is_a? Array
50
+ raise 'The values exists and it is not an array'
51
+ end
52
+
53
+ @values[idx] << value
54
+ end
55
+
56
+ ##
57
+ # Returns the list of indexes determined by +row+ and +col+ as an Array:
58
+ # - If +row+ and +col+ are Integer, it returns the value at the given cell
59
+ # - If both +row+ and +col+ are +nil+, it returns the indexes for all values
60
+ # - If +row+ xor +col+ are +nil+, it returns the indexes of the entire column
61
+ # or row, respectively.
62
+ def index(row = nil, col = nil)
63
+ if row.nil? && col.nil?
64
+ # All values
65
+ (0 .. values.size - 1) # .to_a
66
+ elsif row.nil?
67
+ # Entire column
68
+ (col_offset(col) .. col_offset(col) + rows - 1) # .to_a
69
+ elsif col.nil?
70
+ # Entire row
71
+ ric = row_in_column(row)
72
+ (0 .. columns - 1).map { |i| col_offset(i) + ric }
73
+ else
74
+ # Single value
75
+ [col_offset(col) + row_in_column(row)]
76
+ end
77
+ end
78
+
79
+ ##
80
+ # Index of the first cell of the +col+. The column is a 0-based
81
+ # index, with negative integers representing columns counted from
82
+ # the bottom (-1 being the last column)
83
+ def col_offset(col)
84
+ col = cols + col if col < 0
85
+ col * rows
86
+ end
87
+
88
+ ##
89
+ # Index of the +row+ as if it was in the first column. The row
90
+ # is a 0-based index, with negative integers representing rows
91
+ # counted from the end (-1 being the last row)
92
+ def row_in_column(row)
93
+ row = rows + row if row < 0
94
+ row
95
+ end
96
+ end
data/lib/gfa/parser.rb CHANGED
@@ -48,6 +48,7 @@ class GFA
48
48
  blk = (lno.to_f / thr).ceil
49
49
 
50
50
  # Launch children processes
51
+ advance_bar(blk)
51
52
  io = []
52
53
  pid = []
53
54
  thr.times do |i|
@@ -56,7 +57,10 @@ class GFA
56
57
  io[i][0].close
57
58
  o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
58
59
  records = []
59
- read_records(file, o) { |record| records << record }
60
+ read_records(file, o) do |record|
61
+ advance if i == 0
62
+ records << record
63
+ end
60
64
  Marshal.dump(records, io[i][1])
61
65
  exit!(0)
62
66
  end
@@ -10,7 +10,8 @@ module GFA::Record::HasFromTo
10
10
  ##
11
11
  # Extracts all linked segments from +gfa+ (which *must* be indexed)
12
12
  def segments(gfa)
13
- raise "Unindexed GFA" unless gfa.indexed?
13
+ raise 'Unindexed GFA' unless gfa.indexed?
14
+
14
15
  [gfa.segments[from.value], gfa.segments[to.value]]
15
16
  end
16
17
 
@@ -27,10 +27,9 @@ class GFA::Record::Path < GFA::Record
27
27
  ##
28
28
  # Extracts all linked segments from +gfa+ (which *must* be indexed)
29
29
  def segments(gfa)
30
- raise "Unindexed GFA" unless gfa.indexed?
31
- segment_names_a.map do |name|
32
- gfa.segments[name]
33
- end
30
+ raise 'Unindexed GFA' unless gfa.indexed?
31
+
32
+ segment_names_a.map { |name| gfa.segments[name] }
34
33
  end
35
34
 
36
35
  ##
@@ -23,12 +23,13 @@ class GFA::RecordSet
23
23
 
24
24
  # Instance-level
25
25
 
26
- attr_reader :set, :index, :gfa
26
+ attr_reader :set, :index, :position, :gfa
27
27
 
28
28
  def initialize(gfa = nil)
29
- @set = []
30
- @index = {}
31
- @gfa = gfa || GFA.new
29
+ @set = []
30
+ @index = {}
31
+ @position = {}
32
+ @gfa = gfa || GFA.new
32
33
  end
33
34
 
34
35
  def [](k)
@@ -69,6 +70,7 @@ class GFA::RecordSet
69
70
  raise "Wrong type of record: #{v.type}" if v.type != type
70
71
 
71
72
  @set << v
73
+ @position[index_id(v)] = set.size - 1
72
74
  index!(v)
73
75
  end
74
76
 
@@ -109,6 +111,12 @@ class GFA::RecordSet
109
111
  @index[k]
110
112
  end
111
113
 
114
+ def position(v)
115
+ v = index_id(v) if v.is_a? GFA::Record
116
+ v = v.value if v.is_a? GFA::Field
117
+ @position[v]
118
+ end
119
+
112
120
  def merge!(record_set)
113
121
  raise "Not a record set" unless record_set.is_a?(GFA::RecordSet)
114
122
  if record_set.type != type
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.7.0'
2
+ VERSION = '0.8.0'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-10 00:00:00.000000000 Z
11
+ date: 2023-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl
@@ -81,6 +81,7 @@ files:
81
81
  - lib/gfa/field/string.rb
82
82
  - lib/gfa/generator.rb
83
83
  - lib/gfa/graph.rb
84
+ - lib/gfa/matrix.rb
84
85
  - lib/gfa/modules.rb
85
86
  - lib/gfa/parser.rb
86
87
  - lib/gfa/record.rb