gfa 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a7a646a57c98f25bda2120b3e04783099433d7aa7c8f0658a1bfdcd07e0dea35
4
- data.tar.gz: c8aec87b21fd9b1d7972228fa7118cf71b40f58a5dd20e385e6e80ef86b2ef92
3
+ metadata.gz: 13cc01c1ed08457fc0a4cf409e87ab0e0c52e6de6f9a9019bb15de87d462bac7
4
+ data.tar.gz: 9013cf534626775e09c9aa2006ff47d5f03fc5c5580cde1d0f325124de911a79
5
5
  SHA512:
6
- metadata.gz: 3f4c54a483e4cd6baaf66fbddda2e7eb41a13cd14bd492c5132a2d2610bdd2f283d7a92c261dca10f54fb9e03611ca7f7c05036fa783c361d4b8e752e9fd6b53
7
- data.tar.gz: 2b17c35013f3c9dbdb99a20e060c2dde13732c139f0551f3ce5b65e317ad5d1918b84447570542b11b67f5d65387729606de914b18994c420e5fe8b41b9bf5af
6
+ metadata.gz: fe4dfc8fa6e24fcc734a91d5101c9d9ab3315ddf5cc26ef943ae01695e573c367d31ae53a7a1325a809f0eedcaaae97d0453fd590f079c0f6a63eecdf89cc10f
7
+ data.tar.gz: 3957432e59163f82cce3eddc240a03b70ffe1693c3dad9d7f373655108c454f04b5dbe618f4f65adcaf7c98ed8327323c3d2b1fee97083685612a02678780ac8
data/lib/gfa/common.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'gfa/version'
2
+ require 'gfa/matrix'
2
3
  require 'gfa/record_set'
3
4
  require 'gfa/field'
4
5
 
@@ -10,6 +11,41 @@ class GFA
10
11
  end
11
12
  end
12
13
 
14
+ def self.advance_bar(n)
15
+ @advance_bar_n = n
16
+ @advance_bar_i = 0
17
+ @advance_bar_p = 0
18
+ @advance_bar_s = Time.now
19
+ $stderr.print ' [' + (' ' * 50) + ']' + " #{n}\r"
20
+ $stderr.print ' [>'
21
+ end
22
+
23
+ def self.advance
24
+ @advance_bar_i += 1
25
+ # $stderr.print "#{@advance_bar_i}"[-1] + "\b"
26
+ while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
27
+ $stderr.print "\b=>"
28
+ @advance_bar_p += 1
29
+ end
30
+ return unless @advance_bar_i == @advance_bar_n
31
+
32
+ $stderr.print "\b]\r"
33
+ t_t = Time.now - @advance_bar_s
34
+ t_u = 'sec'
35
+
36
+ if t_t > 60
37
+ t_t /= 60
38
+ t_u = 'min'
39
+ end
40
+
41
+ if t_t > 60
42
+ t_t /= 60
43
+ t_u = 'h'
44
+ end
45
+
46
+ $stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
47
+ end
48
+
13
49
  # Instance-level
14
50
  attr :gfa_version, :records, :opts
15
51
 
data/lib/gfa/graph.rb CHANGED
@@ -22,10 +22,36 @@ class GFA
22
22
  implicit_graph(opts).to_adjacency
23
23
  end
24
24
 
25
+ ##
26
+ # Calculate and store internally a matrix representing all edges.
27
+ def calculate_edge_matrix!
28
+ $stderr.puts '- Building edge matrix'
29
+ @edge_matrix = GFA::Matrix.new(segments.size, segments.size)
30
+ self.class.advance_bar(all_edges.size)
31
+ all_edges.each do |edge|
32
+ self.class.advance
33
+ idx = edge.segments(self).map { |i| segments.position(i) }
34
+ idx.each do |i|
35
+ idx.each do |j|
36
+ @edge_matrix[i, j] = true unless i == j
37
+ end
38
+ end
39
+ end
40
+ @edge_matrix
41
+ end
42
+
43
+ ##
44
+ # Returns the matrix representing all edges
45
+ def edge_matrix
46
+ @edge_matrix or calculate_edge_matrix!
47
+ end
48
+
25
49
  ##
26
50
  # Extracts the subset of records associated to +segments+, which is an Array
27
- # with values of any class in: Integer (segment index),
28
- # String or GFA::Field::String (segment names), or GFA::Record::Segment.
51
+ # with values of any class in:
52
+ # - Integer: segment index,
53
+ # - String or GFA::Field::String: segment names, or
54
+ # - GFA::Record::Segment: the actual segments themselves
29
55
  #
30
56
  # +degree+ indicates the maximum degree of separation between the original
31
57
  # segment set and any additional segments. Use 0 to include only the segments
@@ -63,8 +89,7 @@ class GFA
63
89
  segments.each { |segment| gfa << segment }
64
90
 
65
91
  # Expand graph
66
- linking, edges = linking_records(gfa.segments, degree: degree)
67
- linking += internally_linking_records(segments, edges)
92
+ linking = linking_records(gfa.segments, degree: degree)
68
93
  linking.each { |record| gfa << record }
69
94
 
70
95
  # Return
@@ -84,70 +109,54 @@ class GFA
84
109
  # of the +segments+ set (assumes they have already been evaluated). This is
85
110
  # only used for internal heuristics
86
111
  #
87
- # Returns an Array of with two elements:
88
- # 0. An array of GFA::Record objects with all the identified linking records
89
- # 1. An array of GFA::Record objects with all edges that were not identified
112
+ # Returns an array of GFA::Record objects with all the identified linking
113
+ # records (edges)
90
114
  #
91
115
  # IMPORTANT NOTE 1: The object +segments+ will be modified to include all
92
116
  # linked segments. If you don't want this behaviour, please make sure to pass
93
117
  # a duplicate of the object instead.
94
- #
95
- # IMPORTANT NOTE 2: The list of linking records may not comprehensively
96
- # include all records linking the identified expanded segment set. To ensure
97
- # a consistent set is identified, use:
98
- # linking, edges = gfa.linking_records(segments)
99
- # linking += gfa.internally_linking_records(segments, edges)
100
- #
101
- def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
118
+ def linking_records(segments, degree: 1)
102
119
  unless segments.is_a? GFA::RecordSet::SegmentSet
103
120
  raise "Unrecognised class: #{segments.class}"
104
121
  end
105
122
 
106
- # Gather edges to evaluate
107
- edges ||= all_edges
108
- return [[], edges] if degree <= 0
109
-
110
- # Links, Containments, Jumps (from, to) and Paths (segment_names)
111
- linking = []
112
- eval_set = _ignore == 0 ? segments.set : segments.set[_ignore..]
113
- edges.delete_if do |edge|
114
- if eval_set.any? { |segment| edge.include? segment }
115
- linking << edge
116
- true # Remove from the edge set to speed up future recursions
117
- else
118
- false # Keep it, possibly linking future recursions
119
- end
120
- end
121
-
122
- # Recurse and return
123
- if degree >= 1
124
- pre = segments.size
125
-
126
- # Add additional linked segments
127
- linking.each do |record|
128
- record.segments(self).each do |other_seg|
129
- segments << other_seg unless segments[other_seg.name]
123
+ return [] if degree <= 0
124
+
125
+ edge_matrix # Just to trigger matrix calculation
126
+ degree.times do |round|
127
+ $stderr.puts "- Expansion round #{round + 1}"
128
+ self.class.advance_bar(segments.size)
129
+ pre_expansion = segments.size
130
+ new_segments = []
131
+ segments.set.each do |segment|
132
+ self.class.advance
133
+ idx = self.segments.position(segment)
134
+ edge_matrix[nil, idx].each_with_index do |edge, target_k|
135
+ new_segments << target_k if edge
130
136
  end
131
137
  end
132
-
133
- # Recurse only if new segments were discovered
134
- if segments.size > pre
135
- $stderr.puts "- Recursion [#{degree}]: " \
136
- "#{pre} -> #{segments.size}\t(#{edges.size})"
137
- linking +=
138
- linking_records(
139
- segments,
140
- degree: degree - 1, edges: edges, _ignore: pre
141
- )[0]
142
- end
138
+ new_segments = new_segments.uniq.map { |i| self.segments[i] }
139
+ new_segments.each { |i| segments << i unless segments[i.name] }
140
+ new = segments.size - pre_expansion
141
+ $stderr.puts " #{new} segments found, total: #{segments.size}"
142
+ break if new == 0
143
143
  end
144
- [linking, edges]
144
+
145
+ internally_linking_records(segments, all_edges)
145
146
  end
146
147
 
147
148
  def internally_linking_records(segments, edges)
149
+ unless segments.is_a? GFA::RecordSet::SegmentSet
150
+ raise "Unrecognised class: #{segments.class}"
151
+ end
152
+
148
153
  $stderr.puts '- Gathering internally linking records'
149
- segments = Hash[segments.map { |i| [i.name.value, true]}]
150
- edges.select { |record| record.segment_names_a.all? { |s| segments[s] } }
154
+ s_names = Hash[segments.set.map { |i| [i.name.value, true]}]
155
+ self.class.advance_bar(edges.size)
156
+ edges.select do |record|
157
+ self.class.advance
158
+ record.segment_names_a.all? { |s| s_names[s] }
159
+ end
151
160
  end
152
161
 
153
162
  ##
@@ -155,7 +164,7 @@ class GFA
155
164
  # from the GFA. I.e., all links, jumps, containments, and paths.
156
165
  def all_edges
157
166
  edge_t = %i[Link Jump Containment Path]
158
- edges = edge_t.flat_map { |t| records[t].set } if edges.nil?
167
+ @edges ||= edge_t.flat_map { |t| records[t].set }
159
168
  end
160
169
 
161
170
  private
data/lib/gfa/matrix.rb ADDED
@@ -0,0 +1,96 @@
1
+
2
+ ##
3
+ # A class to represent sparse matrices internally used for graph operations
4
+ class GFA::Matrix
5
+
6
+ attr_accessor :rows, :columns, :values
7
+
8
+ ##
9
+ # Initialize a Matrix with +rows+ and +columns+ (both should be Integer),
10
+ # and a default +value+ (+nil+ if missing)
11
+ def initialize(rows, columns, value = nil)
12
+ raise 'Matrix rows must be an integer' unless rows.is_a? Integer
13
+ raise 'Matrix columns must be an integer' unless columns.is_a? Integer
14
+ raise 'Matrix rows must be positive' if rows < 0
15
+ raise 'Matrix columns must be positive' if columns < 0
16
+
17
+ @rows = rows
18
+ @columns = columns
19
+ @values = Hash.new(value)
20
+ end
21
+
22
+ def [](row = nil, col = nil)
23
+ index(row, col).map { |i| values[i] }
24
+ end
25
+
26
+ def []=(row, col, value)
27
+ values = (row.nil? || col.nil?) ? value : [value]
28
+ unless values.is_a? Array
29
+ raise 'Value must be an array if setting a range of cells'
30
+ end
31
+
32
+ idx = index(row, col)
33
+ if idx.size != values.size
34
+ raise "Expected #{idx.size} values, but only got #{values.size}"
35
+ end
36
+ idx.each_with_index.map { |i, k| @values[i] = values[k] }
37
+ end
38
+
39
+ ##
40
+ # Determines the index of +row+ and +col+ (both must be defined Integer),
41
+ # sets its value to an empty Array if not yet defined, and appends +value+.
42
+ # Returns an error if the value already exists but it's not an array
43
+ def append(row, col, value)
44
+ raise 'wow must be a defined integer' unless row.is_a?(Integer)
45
+ raise 'col must be a defined integer' unless col.is_a?(Integer)
46
+
47
+ idx = index(row, col).first
48
+ @values[idx] ||= []
49
+ unless @values[idx].is_a? Array
50
+ raise 'The values exists and it is not an array'
51
+ end
52
+
53
+ @values[idx] << value
54
+ end
55
+
56
+ ##
57
+ # Returns the list of indexes determined by +row+ and +col+ as an Array:
58
+ # - If +row+ and +col+ are Integer, it returns the value at the given cell
59
+ # - If both +row+ and +col+ are +nil+, it returns the indexes for all values
60
+ # - If +row+ xor +col+ are +nil+, it returns the indexes of the entire column
61
+ # or row, respectively.
62
+ def index(row = nil, col = nil)
63
+ if row.nil? && col.nil?
64
+ # All values
65
+ (0 .. values.size - 1) # .to_a
66
+ elsif row.nil?
67
+ # Entire column
68
+ (col_offset(col) .. col_offset(col) + rows - 1) # .to_a
69
+ elsif col.nil?
70
+ # Entire row
71
+ ric = row_in_column(row)
72
+ (0 .. columns - 1).map { |i| col_offset(i) + ric }
73
+ else
74
+ # Single value
75
+ [col_offset(col) + row_in_column(row)]
76
+ end
77
+ end
78
+
79
+ ##
80
+ # Index of the first cell of the +col+. The column is a 0-based
81
+ # index, with negative integers representing columns counted from
82
+ # the bottom (-1 being the last column)
83
+ def col_offset(col)
84
+ col = cols + col if col < 0
85
+ col * rows
86
+ end
87
+
88
+ ##
89
+ # Index of the +row+ as if it was in the first column. The row
90
+ # is a 0-based index, with negative integers representing rows
91
+ # counted from the end (-1 being the last row)
92
+ def row_in_column(row)
93
+ row = rows + row if row < 0
94
+ row
95
+ end
96
+ end
data/lib/gfa/parser.rb CHANGED
@@ -48,6 +48,7 @@ class GFA
48
48
  blk = (lno.to_f / thr).ceil
49
49
 
50
50
  # Launch children processes
51
+ advance_bar(blk)
51
52
  io = []
52
53
  pid = []
53
54
  thr.times do |i|
@@ -56,7 +57,10 @@ class GFA
56
57
  io[i][0].close
57
58
  o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
58
59
  records = []
59
- read_records(file, o) { |record| records << record }
60
+ read_records(file, o) do |record|
61
+ advance if i == 0
62
+ records << record
63
+ end
60
64
  Marshal.dump(records, io[i][1])
61
65
  exit!(0)
62
66
  end
@@ -10,7 +10,8 @@ module GFA::Record::HasFromTo
10
10
  ##
11
11
  # Extracts all linked segments from +gfa+ (which *must* be indexed)
12
12
  def segments(gfa)
13
- raise "Unindexed GFA" unless gfa.indexed?
13
+ raise 'Unindexed GFA' unless gfa.indexed?
14
+
14
15
  [gfa.segments[from.value], gfa.segments[to.value]]
15
16
  end
16
17
 
@@ -27,10 +27,9 @@ class GFA::Record::Path < GFA::Record
27
27
  ##
28
28
  # Extracts all linked segments from +gfa+ (which *must* be indexed)
29
29
  def segments(gfa)
30
- raise "Unindexed GFA" unless gfa.indexed?
31
- segment_names_a.map do |name|
32
- gfa.segments[name]
33
- end
30
+ raise 'Unindexed GFA' unless gfa.indexed?
31
+
32
+ segment_names_a.map { |name| gfa.segments[name] }
34
33
  end
35
34
 
36
35
  ##
@@ -23,12 +23,13 @@ class GFA::RecordSet
23
23
 
24
24
  # Instance-level
25
25
 
26
- attr_reader :set, :index, :gfa
26
+ attr_reader :set, :index, :position, :gfa
27
27
 
28
28
  def initialize(gfa = nil)
29
- @set = []
30
- @index = {}
31
- @gfa = gfa || GFA.new
29
+ @set = []
30
+ @index = {}
31
+ @position = {}
32
+ @gfa = gfa || GFA.new
32
33
  end
33
34
 
34
35
  def [](k)
@@ -69,6 +70,7 @@ class GFA::RecordSet
69
70
  raise "Wrong type of record: #{v.type}" if v.type != type
70
71
 
71
72
  @set << v
73
+ @position[index_id(v)] = set.size - 1
72
74
  index!(v)
73
75
  end
74
76
 
@@ -109,6 +111,12 @@ class GFA::RecordSet
109
111
  @index[k]
110
112
  end
111
113
 
114
+ def position(v)
115
+ v = index_id(v) if v.is_a? GFA::Record
116
+ v = v.value if v.is_a? GFA::Field
117
+ @position[v]
118
+ end
119
+
112
120
  def merge!(record_set)
113
121
  raise "Not a record set" unless record_set.is_a?(GFA::RecordSet)
114
122
  if record_set.type != type
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.7.0'
2
+ VERSION = '0.8.0'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-10 00:00:00.000000000 Z
11
+ date: 2023-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl
@@ -81,6 +81,7 @@ files:
81
81
  - lib/gfa/field/string.rb
82
82
  - lib/gfa/generator.rb
83
83
  - lib/gfa/graph.rb
84
+ - lib/gfa/matrix.rb
84
85
  - lib/gfa/modules.rb
85
86
  - lib/gfa/parser.rb
86
87
  - lib/gfa/record.rb