gfa 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gfa/common.rb +36 -0
- data/lib/gfa/graph.rb +63 -54
- data/lib/gfa/matrix.rb +96 -0
- data/lib/gfa/parser.rb +5 -1
- data/lib/gfa/record/has_from_to.rb +2 -1
- data/lib/gfa/record/path.rb +3 -4
- data/lib/gfa/record_set.rb +12 -4
- data/lib/gfa/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13cc01c1ed08457fc0a4cf409e87ab0e0c52e6de6f9a9019bb15de87d462bac7
|
4
|
+
data.tar.gz: 9013cf534626775e09c9aa2006ff47d5f03fc5c5580cde1d0f325124de911a79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe4dfc8fa6e24fcc734a91d5101c9d9ab3315ddf5cc26ef943ae01695e573c367d31ae53a7a1325a809f0eedcaaae97d0453fd590f079c0f6a63eecdf89cc10f
|
7
|
+
data.tar.gz: 3957432e59163f82cce3eddc240a03b70ffe1693c3dad9d7f373655108c454f04b5dbe618f4f65adcaf7c98ed8327323c3d2b1fee97083685612a02678780ac8
|
data/lib/gfa/common.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'gfa/version'
|
2
|
+
require 'gfa/matrix'
|
2
3
|
require 'gfa/record_set'
|
3
4
|
require 'gfa/field'
|
4
5
|
|
@@ -10,6 +11,41 @@ class GFA
|
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
14
|
+
def self.advance_bar(n)
|
15
|
+
@advance_bar_n = n
|
16
|
+
@advance_bar_i = 0
|
17
|
+
@advance_bar_p = 0
|
18
|
+
@advance_bar_s = Time.now
|
19
|
+
$stderr.print ' [' + (' ' * 50) + ']' + " #{n}\r"
|
20
|
+
$stderr.print ' [>'
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.advance
|
24
|
+
@advance_bar_i += 1
|
25
|
+
# $stderr.print "#{@advance_bar_i}"[-1] + "\b"
|
26
|
+
while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
|
27
|
+
$stderr.print "\b=>"
|
28
|
+
@advance_bar_p += 1
|
29
|
+
end
|
30
|
+
return unless @advance_bar_i == @advance_bar_n
|
31
|
+
|
32
|
+
$stderr.print "\b]\r"
|
33
|
+
t_t = Time.now - @advance_bar_s
|
34
|
+
t_u = 'sec'
|
35
|
+
|
36
|
+
if t_t > 60
|
37
|
+
t_t /= 60
|
38
|
+
t_u = 'min'
|
39
|
+
end
|
40
|
+
|
41
|
+
if t_t > 60
|
42
|
+
t_t /= 60
|
43
|
+
t_u = 'h'
|
44
|
+
end
|
45
|
+
|
46
|
+
$stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
|
47
|
+
end
|
48
|
+
|
13
49
|
# Instance-level
|
14
50
|
attr :gfa_version, :records, :opts
|
15
51
|
|
data/lib/gfa/graph.rb
CHANGED
@@ -22,10 +22,36 @@ class GFA
|
|
22
22
|
implicit_graph(opts).to_adjacency
|
23
23
|
end
|
24
24
|
|
25
|
+
##
|
26
|
+
# Calculate and store internally a matrix representing all edges.
|
27
|
+
def calculate_edge_matrix!
|
28
|
+
$stderr.puts '- Building edge matrix'
|
29
|
+
@edge_matrix = GFA::Matrix.new(segments.size, segments.size)
|
30
|
+
self.class.advance_bar(all_edges.size)
|
31
|
+
all_edges.each do |edge|
|
32
|
+
self.class.advance
|
33
|
+
idx = edge.segments(self).map { |i| segments.position(i) }
|
34
|
+
idx.each do |i|
|
35
|
+
idx.each do |j|
|
36
|
+
@edge_matrix[i, j] = true unless i == j
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@edge_matrix
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Returns the matrix representing all edges
|
45
|
+
def edge_matrix
|
46
|
+
@edge_matrix or calculate_edge_matrix!
|
47
|
+
end
|
48
|
+
|
25
49
|
##
|
26
50
|
# Extracts the subset of records associated to +segments+, which is an Array
|
27
|
-
# with values of any class in:
|
28
|
-
#
|
51
|
+
# with values of any class in:
|
52
|
+
# - Integer: segment index,
|
53
|
+
# - String or GFA::Field::String: segment names, or
|
54
|
+
# - GFA::Record::Segment: the actual segments themselves
|
29
55
|
#
|
30
56
|
# +degree+ indicates the maximum degree of separation between the original
|
31
57
|
# segment set and any additional segments. Use 0 to include only the segments
|
@@ -63,8 +89,7 @@ class GFA
|
|
63
89
|
segments.each { |segment| gfa << segment }
|
64
90
|
|
65
91
|
# Expand graph
|
66
|
-
linking
|
67
|
-
linking += internally_linking_records(segments, edges)
|
92
|
+
linking = linking_records(gfa.segments, degree: degree)
|
68
93
|
linking.each { |record| gfa << record }
|
69
94
|
|
70
95
|
# Return
|
@@ -84,70 +109,54 @@ class GFA
|
|
84
109
|
# of the +segments+ set (assumes they have already been evaluated). This is
|
85
110
|
# only used for internal heuristics
|
86
111
|
#
|
87
|
-
# Returns an
|
88
|
-
#
|
89
|
-
# 1. An array of GFA::Record objects with all edges that were not identified
|
112
|
+
# Returns an array of GFA::Record objects with all the identified linking
|
113
|
+
# records (edges)
|
90
114
|
#
|
91
115
|
# IMPORTANT NOTE 1: The object +segments+ will be modified to include all
|
92
116
|
# linked segments. If you don't want this behaviour, please make sure to pass
|
93
117
|
# a duplicate of the object instead.
|
94
|
-
|
95
|
-
# IMPORTANT NOTE 2: The list of linking records may not comprehensively
|
96
|
-
# include all records linking the identified expanded segment set. To ensure
|
97
|
-
# a consistent set is identified, use:
|
98
|
-
# linking, edges = gfa.linking_records(segments)
|
99
|
-
# linking += gfa.internally_linking_records(segments, edges)
|
100
|
-
#
|
101
|
-
def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
|
118
|
+
def linking_records(segments, degree: 1)
|
102
119
|
unless segments.is_a? GFA::RecordSet::SegmentSet
|
103
120
|
raise "Unrecognised class: #{segments.class}"
|
104
121
|
end
|
105
122
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
# Recurse and return
|
123
|
-
if degree >= 1
|
124
|
-
pre = segments.size
|
125
|
-
|
126
|
-
# Add additional linked segments
|
127
|
-
linking.each do |record|
|
128
|
-
record.segments(self).each do |other_seg|
|
129
|
-
segments << other_seg unless segments[other_seg.name]
|
123
|
+
return [] if degree <= 0
|
124
|
+
|
125
|
+
edge_matrix # Just to trigger matrix calculation
|
126
|
+
degree.times do |round|
|
127
|
+
$stderr.puts "- Expansion round #{round + 1}"
|
128
|
+
self.class.advance_bar(segments.size)
|
129
|
+
pre_expansion = segments.size
|
130
|
+
new_segments = []
|
131
|
+
segments.set.each do |segment|
|
132
|
+
self.class.advance
|
133
|
+
idx = self.segments.position(segment)
|
134
|
+
edge_matrix[nil, idx].each_with_index do |edge, target_k|
|
135
|
+
new_segments << target_k if edge
|
130
136
|
end
|
131
137
|
end
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
linking +=
|
138
|
-
linking_records(
|
139
|
-
segments,
|
140
|
-
degree: degree - 1, edges: edges, _ignore: pre
|
141
|
-
)[0]
|
142
|
-
end
|
138
|
+
new_segments = new_segments.uniq.map { |i| self.segments[i] }
|
139
|
+
new_segments.each { |i| segments << i unless segments[i.name] }
|
140
|
+
new = segments.size - pre_expansion
|
141
|
+
$stderr.puts " #{new} segments found, total: #{segments.size}"
|
142
|
+
break if new == 0
|
143
143
|
end
|
144
|
-
|
144
|
+
|
145
|
+
internally_linking_records(segments, all_edges)
|
145
146
|
end
|
146
147
|
|
147
148
|
def internally_linking_records(segments, edges)
|
149
|
+
unless segments.is_a? GFA::RecordSet::SegmentSet
|
150
|
+
raise "Unrecognised class: #{segments.class}"
|
151
|
+
end
|
152
|
+
|
148
153
|
$stderr.puts '- Gathering internally linking records'
|
149
|
-
|
150
|
-
|
154
|
+
s_names = Hash[segments.set.map { |i| [i.name.value, true]}]
|
155
|
+
self.class.advance_bar(edges.size)
|
156
|
+
edges.select do |record|
|
157
|
+
self.class.advance
|
158
|
+
record.segment_names_a.all? { |s| s_names[s] }
|
159
|
+
end
|
151
160
|
end
|
152
161
|
|
153
162
|
##
|
@@ -155,7 +164,7 @@ class GFA
|
|
155
164
|
# from the GFA. I.e., all links, jumps, containments, and paths.
|
156
165
|
def all_edges
|
157
166
|
edge_t = %i[Link Jump Containment Path]
|
158
|
-
edges
|
167
|
+
@edges ||= edge_t.flat_map { |t| records[t].set }
|
159
168
|
end
|
160
169
|
|
161
170
|
private
|
data/lib/gfa/matrix.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# A class to represent sparse matrices internally used for graph operations
|
4
|
+
class GFA::Matrix
|
5
|
+
|
6
|
+
attr_accessor :rows, :columns, :values
|
7
|
+
|
8
|
+
##
|
9
|
+
# Initialize a Matrix with +rows+ and +columns+ (both should be Integer),
|
10
|
+
# and a default +value+ (+nil+ if missing)
|
11
|
+
def initialize(rows, columns, value = nil)
|
12
|
+
raise 'Matrix rows must be an integer' unless rows.is_a? Integer
|
13
|
+
raise 'Matrix columns must be an integer' unless columns.is_a? Integer
|
14
|
+
raise 'Matrix rows must be positive' if rows < 0
|
15
|
+
raise 'Matrix columns must be positive' if columns < 0
|
16
|
+
|
17
|
+
@rows = rows
|
18
|
+
@columns = columns
|
19
|
+
@values = Hash.new(value)
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](row = nil, col = nil)
|
23
|
+
index(row, col).map { |i| values[i] }
|
24
|
+
end
|
25
|
+
|
26
|
+
def []=(row, col, value)
|
27
|
+
values = (row.nil? || col.nil?) ? value : [value]
|
28
|
+
unless values.is_a? Array
|
29
|
+
raise 'Value must be an array if setting a range of cells'
|
30
|
+
end
|
31
|
+
|
32
|
+
idx = index(row, col)
|
33
|
+
if idx.size != values.size
|
34
|
+
raise "Expected #{idx.size} values, but only got #{values.size}"
|
35
|
+
end
|
36
|
+
idx.each_with_index.map { |i, k| @values[i] = values[k] }
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Determines the index of +row+ and +col+ (both must be defined Integer),
|
41
|
+
# sets its value to an empty Array if not yet defined, and appends +value+.
|
42
|
+
# Returns an error if the value already exists but it's not an array
|
43
|
+
def append(row, col, value)
|
44
|
+
raise 'wow must be a defined integer' unless row.is_a?(Integer)
|
45
|
+
raise 'col must be a defined integer' unless col.is_a?(Integer)
|
46
|
+
|
47
|
+
idx = index(row, col).first
|
48
|
+
@values[idx] ||= []
|
49
|
+
unless @values[idx].is_a? Array
|
50
|
+
raise 'The values exists and it is not an array'
|
51
|
+
end
|
52
|
+
|
53
|
+
@values[idx] << value
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Returns the list of indexes determined by +row+ and +col+ as an Array:
|
58
|
+
# - If +row+ and +col+ are Integer, it returns the value at the given cell
|
59
|
+
# - If both +row+ and +col+ are +nil+, it returns the indexes for all values
|
60
|
+
# - If +row+ xor +col+ are +nil+, it returns the indexes of the entire column
|
61
|
+
# or row, respectively.
|
62
|
+
def index(row = nil, col = nil)
|
63
|
+
if row.nil? && col.nil?
|
64
|
+
# All values
|
65
|
+
(0 .. values.size - 1) # .to_a
|
66
|
+
elsif row.nil?
|
67
|
+
# Entire column
|
68
|
+
(col_offset(col) .. col_offset(col) + rows - 1) # .to_a
|
69
|
+
elsif col.nil?
|
70
|
+
# Entire row
|
71
|
+
ric = row_in_column(row)
|
72
|
+
(0 .. columns - 1).map { |i| col_offset(i) + ric }
|
73
|
+
else
|
74
|
+
# Single value
|
75
|
+
[col_offset(col) + row_in_column(row)]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Index of the first cell of the +col+. The column is a 0-based
|
81
|
+
# index, with negative integers representing columns counted from
|
82
|
+
# the bottom (-1 being the last column)
|
83
|
+
def col_offset(col)
|
84
|
+
col = cols + col if col < 0
|
85
|
+
col * rows
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Index of the +row+ as if it was in the first column. The row
|
90
|
+
# is a 0-based index, with negative integers representing rows
|
91
|
+
# counted from the end (-1 being the last row)
|
92
|
+
def row_in_column(row)
|
93
|
+
row = rows + row if row < 0
|
94
|
+
row
|
95
|
+
end
|
96
|
+
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -48,6 +48,7 @@ class GFA
|
|
48
48
|
blk = (lno.to_f / thr).ceil
|
49
49
|
|
50
50
|
# Launch children processes
|
51
|
+
advance_bar(blk)
|
51
52
|
io = []
|
52
53
|
pid = []
|
53
54
|
thr.times do |i|
|
@@ -56,7 +57,10 @@ class GFA
|
|
56
57
|
io[i][0].close
|
57
58
|
o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
|
58
59
|
records = []
|
59
|
-
read_records(file, o)
|
60
|
+
read_records(file, o) do |record|
|
61
|
+
advance if i == 0
|
62
|
+
records << record
|
63
|
+
end
|
60
64
|
Marshal.dump(records, io[i][1])
|
61
65
|
exit!(0)
|
62
66
|
end
|
@@ -10,7 +10,8 @@ module GFA::Record::HasFromTo
|
|
10
10
|
##
|
11
11
|
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
12
12
|
def segments(gfa)
|
13
|
-
raise
|
13
|
+
raise 'Unindexed GFA' unless gfa.indexed?
|
14
|
+
|
14
15
|
[gfa.segments[from.value], gfa.segments[to.value]]
|
15
16
|
end
|
16
17
|
|
data/lib/gfa/record/path.rb
CHANGED
@@ -27,10 +27,9 @@ class GFA::Record::Path < GFA::Record
|
|
27
27
|
##
|
28
28
|
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
29
29
|
def segments(gfa)
|
30
|
-
raise
|
31
|
-
|
32
|
-
|
33
|
-
end
|
30
|
+
raise 'Unindexed GFA' unless gfa.indexed?
|
31
|
+
|
32
|
+
segment_names_a.map { |name| gfa.segments[name] }
|
34
33
|
end
|
35
34
|
|
36
35
|
##
|
data/lib/gfa/record_set.rb
CHANGED
@@ -23,12 +23,13 @@ class GFA::RecordSet
|
|
23
23
|
|
24
24
|
# Instance-level
|
25
25
|
|
26
|
-
attr_reader :set, :index, :gfa
|
26
|
+
attr_reader :set, :index, :position, :gfa
|
27
27
|
|
28
28
|
def initialize(gfa = nil)
|
29
|
-
@set
|
30
|
-
@index
|
31
|
-
@
|
29
|
+
@set = []
|
30
|
+
@index = {}
|
31
|
+
@position = {}
|
32
|
+
@gfa = gfa || GFA.new
|
32
33
|
end
|
33
34
|
|
34
35
|
def [](k)
|
@@ -69,6 +70,7 @@ class GFA::RecordSet
|
|
69
70
|
raise "Wrong type of record: #{v.type}" if v.type != type
|
70
71
|
|
71
72
|
@set << v
|
73
|
+
@position[index_id(v)] = set.size - 1
|
72
74
|
index!(v)
|
73
75
|
end
|
74
76
|
|
@@ -109,6 +111,12 @@ class GFA::RecordSet
|
|
109
111
|
@index[k]
|
110
112
|
end
|
111
113
|
|
114
|
+
def position(v)
|
115
|
+
v = index_id(v) if v.is_a? GFA::Record
|
116
|
+
v = v.value if v.is_a? GFA::Field
|
117
|
+
@position[v]
|
118
|
+
end
|
119
|
+
|
112
120
|
def merge!(record_set)
|
113
121
|
raise "Not a record set" unless record_set.is_a?(GFA::RecordSet)
|
114
122
|
if record_set.type != type
|
data/lib/gfa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|
@@ -81,6 +81,7 @@ files:
|
|
81
81
|
- lib/gfa/field/string.rb
|
82
82
|
- lib/gfa/generator.rb
|
83
83
|
- lib/gfa/graph.rb
|
84
|
+
- lib/gfa/matrix.rb
|
84
85
|
- lib/gfa/modules.rb
|
85
86
|
- lib/gfa/parser.rb
|
86
87
|
- lib/gfa/record.rb
|