gfa 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gfa/common.rb +36 -0
- data/lib/gfa/graph.rb +63 -54
- data/lib/gfa/matrix.rb +96 -0
- data/lib/gfa/parser.rb +5 -1
- data/lib/gfa/record/has_from_to.rb +2 -1
- data/lib/gfa/record/path.rb +3 -4
- data/lib/gfa/record_set.rb +12 -4
- data/lib/gfa/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13cc01c1ed08457fc0a4cf409e87ab0e0c52e6de6f9a9019bb15de87d462bac7
|
4
|
+
data.tar.gz: 9013cf534626775e09c9aa2006ff47d5f03fc5c5580cde1d0f325124de911a79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe4dfc8fa6e24fcc734a91d5101c9d9ab3315ddf5cc26ef943ae01695e573c367d31ae53a7a1325a809f0eedcaaae97d0453fd590f079c0f6a63eecdf89cc10f
|
7
|
+
data.tar.gz: 3957432e59163f82cce3eddc240a03b70ffe1693c3dad9d7f373655108c454f04b5dbe618f4f65adcaf7c98ed8327323c3d2b1fee97083685612a02678780ac8
|
data/lib/gfa/common.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'gfa/version'
|
2
|
+
require 'gfa/matrix'
|
2
3
|
require 'gfa/record_set'
|
3
4
|
require 'gfa/field'
|
4
5
|
|
@@ -10,6 +11,41 @@ class GFA
|
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
14
|
+
def self.advance_bar(n)
|
15
|
+
@advance_bar_n = n
|
16
|
+
@advance_bar_i = 0
|
17
|
+
@advance_bar_p = 0
|
18
|
+
@advance_bar_s = Time.now
|
19
|
+
$stderr.print ' [' + (' ' * 50) + ']' + " #{n}\r"
|
20
|
+
$stderr.print ' [>'
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.advance
|
24
|
+
@advance_bar_i += 1
|
25
|
+
# $stderr.print "#{@advance_bar_i}"[-1] + "\b"
|
26
|
+
while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
|
27
|
+
$stderr.print "\b=>"
|
28
|
+
@advance_bar_p += 1
|
29
|
+
end
|
30
|
+
return unless @advance_bar_i == @advance_bar_n
|
31
|
+
|
32
|
+
$stderr.print "\b]\r"
|
33
|
+
t_t = Time.now - @advance_bar_s
|
34
|
+
t_u = 'sec'
|
35
|
+
|
36
|
+
if t_t > 60
|
37
|
+
t_t /= 60
|
38
|
+
t_u = 'min'
|
39
|
+
end
|
40
|
+
|
41
|
+
if t_t > 60
|
42
|
+
t_t /= 60
|
43
|
+
t_u = 'h'
|
44
|
+
end
|
45
|
+
|
46
|
+
$stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
|
47
|
+
end
|
48
|
+
|
13
49
|
# Instance-level
|
14
50
|
attr :gfa_version, :records, :opts
|
15
51
|
|
data/lib/gfa/graph.rb
CHANGED
@@ -22,10 +22,36 @@ class GFA
|
|
22
22
|
implicit_graph(opts).to_adjacency
|
23
23
|
end
|
24
24
|
|
25
|
+
##
|
26
|
+
# Calculate and store internally a matrix representing all edges.
|
27
|
+
def calculate_edge_matrix!
|
28
|
+
$stderr.puts '- Building edge matrix'
|
29
|
+
@edge_matrix = GFA::Matrix.new(segments.size, segments.size)
|
30
|
+
self.class.advance_bar(all_edges.size)
|
31
|
+
all_edges.each do |edge|
|
32
|
+
self.class.advance
|
33
|
+
idx = edge.segments(self).map { |i| segments.position(i) }
|
34
|
+
idx.each do |i|
|
35
|
+
idx.each do |j|
|
36
|
+
@edge_matrix[i, j] = true unless i == j
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
@edge_matrix
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Returns the matrix representing all edges
|
45
|
+
def edge_matrix
|
46
|
+
@edge_matrix or calculate_edge_matrix!
|
47
|
+
end
|
48
|
+
|
25
49
|
##
|
26
50
|
# Extracts the subset of records associated to +segments+, which is an Array
|
27
|
-
# with values of any class in:
|
28
|
-
#
|
51
|
+
# with values of any class in:
|
52
|
+
# - Integer: segment index,
|
53
|
+
# - String or GFA::Field::String: segment names, or
|
54
|
+
# - GFA::Record::Segment: the actual segments themselves
|
29
55
|
#
|
30
56
|
# +degree+ indicates the maximum degree of separation between the original
|
31
57
|
# segment set and any additional segments. Use 0 to include only the segments
|
@@ -63,8 +89,7 @@ class GFA
|
|
63
89
|
segments.each { |segment| gfa << segment }
|
64
90
|
|
65
91
|
# Expand graph
|
66
|
-
linking
|
67
|
-
linking += internally_linking_records(segments, edges)
|
92
|
+
linking = linking_records(gfa.segments, degree: degree)
|
68
93
|
linking.each { |record| gfa << record }
|
69
94
|
|
70
95
|
# Return
|
@@ -84,70 +109,54 @@ class GFA
|
|
84
109
|
# of the +segments+ set (assumes they have already been evaluated). This is
|
85
110
|
# only used for internal heuristics
|
86
111
|
#
|
87
|
-
# Returns an
|
88
|
-
#
|
89
|
-
# 1. An array of GFA::Record objects with all edges that were not identified
|
112
|
+
# Returns an array of GFA::Record objects with all the identified linking
|
113
|
+
# records (edges)
|
90
114
|
#
|
91
115
|
# IMPORTANT NOTE 1: The object +segments+ will be modified to include all
|
92
116
|
# linked segments. If you don't want this behaviour, please make sure to pass
|
93
117
|
# a duplicate of the object instead.
|
94
|
-
|
95
|
-
# IMPORTANT NOTE 2: The list of linking records may not comprehensively
|
96
|
-
# include all records linking the identified expanded segment set. To ensure
|
97
|
-
# a consistent set is identified, use:
|
98
|
-
# linking, edges = gfa.linking_records(segments)
|
99
|
-
# linking += gfa.internally_linking_records(segments, edges)
|
100
|
-
#
|
101
|
-
def linking_records(segments, degree: 1, edges: nil, _ignore: 0)
|
118
|
+
def linking_records(segments, degree: 1)
|
102
119
|
unless segments.is_a? GFA::RecordSet::SegmentSet
|
103
120
|
raise "Unrecognised class: #{segments.class}"
|
104
121
|
end
|
105
122
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
# Recurse and return
|
123
|
-
if degree >= 1
|
124
|
-
pre = segments.size
|
125
|
-
|
126
|
-
# Add additional linked segments
|
127
|
-
linking.each do |record|
|
128
|
-
record.segments(self).each do |other_seg|
|
129
|
-
segments << other_seg unless segments[other_seg.name]
|
123
|
+
return [] if degree <= 0
|
124
|
+
|
125
|
+
edge_matrix # Just to trigger matrix calculation
|
126
|
+
degree.times do |round|
|
127
|
+
$stderr.puts "- Expansion round #{round + 1}"
|
128
|
+
self.class.advance_bar(segments.size)
|
129
|
+
pre_expansion = segments.size
|
130
|
+
new_segments = []
|
131
|
+
segments.set.each do |segment|
|
132
|
+
self.class.advance
|
133
|
+
idx = self.segments.position(segment)
|
134
|
+
edge_matrix[nil, idx].each_with_index do |edge, target_k|
|
135
|
+
new_segments << target_k if edge
|
130
136
|
end
|
131
137
|
end
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
linking +=
|
138
|
-
linking_records(
|
139
|
-
segments,
|
140
|
-
degree: degree - 1, edges: edges, _ignore: pre
|
141
|
-
)[0]
|
142
|
-
end
|
138
|
+
new_segments = new_segments.uniq.map { |i| self.segments[i] }
|
139
|
+
new_segments.each { |i| segments << i unless segments[i.name] }
|
140
|
+
new = segments.size - pre_expansion
|
141
|
+
$stderr.puts " #{new} segments found, total: #{segments.size}"
|
142
|
+
break if new == 0
|
143
143
|
end
|
144
|
-
|
144
|
+
|
145
|
+
internally_linking_records(segments, all_edges)
|
145
146
|
end
|
146
147
|
|
147
148
|
def internally_linking_records(segments, edges)
|
149
|
+
unless segments.is_a? GFA::RecordSet::SegmentSet
|
150
|
+
raise "Unrecognised class: #{segments.class}"
|
151
|
+
end
|
152
|
+
|
148
153
|
$stderr.puts '- Gathering internally linking records'
|
149
|
-
|
150
|
-
|
154
|
+
s_names = Hash[segments.set.map { |i| [i.name.value, true]}]
|
155
|
+
self.class.advance_bar(edges.size)
|
156
|
+
edges.select do |record|
|
157
|
+
self.class.advance
|
158
|
+
record.segment_names_a.all? { |s| s_names[s] }
|
159
|
+
end
|
151
160
|
end
|
152
161
|
|
153
162
|
##
|
@@ -155,7 +164,7 @@ class GFA
|
|
155
164
|
# from the GFA. I.e., all links, jumps, containments, and paths.
|
156
165
|
def all_edges
|
157
166
|
edge_t = %i[Link Jump Containment Path]
|
158
|
-
edges
|
167
|
+
@edges ||= edge_t.flat_map { |t| records[t].set }
|
159
168
|
end
|
160
169
|
|
161
170
|
private
|
data/lib/gfa/matrix.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# A class to represent sparse matrices internally used for graph operations
|
4
|
+
class GFA::Matrix
|
5
|
+
|
6
|
+
attr_accessor :rows, :columns, :values
|
7
|
+
|
8
|
+
##
|
9
|
+
# Initialize a Matrix with +rows+ and +columns+ (both should be Integer),
|
10
|
+
# and a default +value+ (+nil+ if missing)
|
11
|
+
def initialize(rows, columns, value = nil)
|
12
|
+
raise 'Matrix rows must be an integer' unless rows.is_a? Integer
|
13
|
+
raise 'Matrix columns must be an integer' unless columns.is_a? Integer
|
14
|
+
raise 'Matrix rows must be positive' if rows < 0
|
15
|
+
raise 'Matrix columns must be positive' if columns < 0
|
16
|
+
|
17
|
+
@rows = rows
|
18
|
+
@columns = columns
|
19
|
+
@values = Hash.new(value)
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](row = nil, col = nil)
|
23
|
+
index(row, col).map { |i| values[i] }
|
24
|
+
end
|
25
|
+
|
26
|
+
def []=(row, col, value)
|
27
|
+
values = (row.nil? || col.nil?) ? value : [value]
|
28
|
+
unless values.is_a? Array
|
29
|
+
raise 'Value must be an array if setting a range of cells'
|
30
|
+
end
|
31
|
+
|
32
|
+
idx = index(row, col)
|
33
|
+
if idx.size != values.size
|
34
|
+
raise "Expected #{idx.size} values, but only got #{values.size}"
|
35
|
+
end
|
36
|
+
idx.each_with_index.map { |i, k| @values[i] = values[k] }
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Determines the index of +row+ and +col+ (both must be defined Integer),
|
41
|
+
# sets its value to an empty Array if not yet defined, and appends +value+.
|
42
|
+
# Returns an error if the value already exists but it's not an array
|
43
|
+
def append(row, col, value)
|
44
|
+
raise 'wow must be a defined integer' unless row.is_a?(Integer)
|
45
|
+
raise 'col must be a defined integer' unless col.is_a?(Integer)
|
46
|
+
|
47
|
+
idx = index(row, col).first
|
48
|
+
@values[idx] ||= []
|
49
|
+
unless @values[idx].is_a? Array
|
50
|
+
raise 'The values exists and it is not an array'
|
51
|
+
end
|
52
|
+
|
53
|
+
@values[idx] << value
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Returns the list of indexes determined by +row+ and +col+ as an Array:
|
58
|
+
# - If +row+ and +col+ are Integer, it returns the value at the given cell
|
59
|
+
# - If both +row+ and +col+ are +nil+, it returns the indexes for all values
|
60
|
+
# - If +row+ xor +col+ are +nil+, it returns the indexes of the entire column
|
61
|
+
# or row, respectively.
|
62
|
+
def index(row = nil, col = nil)
|
63
|
+
if row.nil? && col.nil?
|
64
|
+
# All values
|
65
|
+
(0 .. values.size - 1) # .to_a
|
66
|
+
elsif row.nil?
|
67
|
+
# Entire column
|
68
|
+
(col_offset(col) .. col_offset(col) + rows - 1) # .to_a
|
69
|
+
elsif col.nil?
|
70
|
+
# Entire row
|
71
|
+
ric = row_in_column(row)
|
72
|
+
(0 .. columns - 1).map { |i| col_offset(i) + ric }
|
73
|
+
else
|
74
|
+
# Single value
|
75
|
+
[col_offset(col) + row_in_column(row)]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Index of the first cell of the +col+. The column is a 0-based
|
81
|
+
# index, with negative integers representing columns counted from
|
82
|
+
# the bottom (-1 being the last column)
|
83
|
+
def col_offset(col)
|
84
|
+
col = cols + col if col < 0
|
85
|
+
col * rows
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Index of the +row+ as if it was in the first column. The row
|
90
|
+
# is a 0-based index, with negative integers representing rows
|
91
|
+
# counted from the end (-1 being the last row)
|
92
|
+
def row_in_column(row)
|
93
|
+
row = rows + row if row < 0
|
94
|
+
row
|
95
|
+
end
|
96
|
+
end
|
data/lib/gfa/parser.rb
CHANGED
@@ -48,6 +48,7 @@ class GFA
|
|
48
48
|
blk = (lno.to_f / thr).ceil
|
49
49
|
|
50
50
|
# Launch children processes
|
51
|
+
advance_bar(blk)
|
51
52
|
io = []
|
52
53
|
pid = []
|
53
54
|
thr.times do |i|
|
@@ -56,7 +57,10 @@ class GFA
|
|
56
57
|
io[i][0].close
|
57
58
|
o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
|
58
59
|
records = []
|
59
|
-
read_records(file, o)
|
60
|
+
read_records(file, o) do |record|
|
61
|
+
advance if i == 0
|
62
|
+
records << record
|
63
|
+
end
|
60
64
|
Marshal.dump(records, io[i][1])
|
61
65
|
exit!(0)
|
62
66
|
end
|
@@ -10,7 +10,8 @@ module GFA::Record::HasFromTo
|
|
10
10
|
##
|
11
11
|
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
12
12
|
def segments(gfa)
|
13
|
-
raise
|
13
|
+
raise 'Unindexed GFA' unless gfa.indexed?
|
14
|
+
|
14
15
|
[gfa.segments[from.value], gfa.segments[to.value]]
|
15
16
|
end
|
16
17
|
|
data/lib/gfa/record/path.rb
CHANGED
@@ -27,10 +27,9 @@ class GFA::Record::Path < GFA::Record
|
|
27
27
|
##
|
28
28
|
# Extracts all linked segments from +gfa+ (which *must* be indexed)
|
29
29
|
def segments(gfa)
|
30
|
-
raise
|
31
|
-
|
32
|
-
|
33
|
-
end
|
30
|
+
raise 'Unindexed GFA' unless gfa.indexed?
|
31
|
+
|
32
|
+
segment_names_a.map { |name| gfa.segments[name] }
|
34
33
|
end
|
35
34
|
|
36
35
|
##
|
data/lib/gfa/record_set.rb
CHANGED
@@ -23,12 +23,13 @@ class GFA::RecordSet
|
|
23
23
|
|
24
24
|
# Instance-level
|
25
25
|
|
26
|
-
attr_reader :set, :index, :gfa
|
26
|
+
attr_reader :set, :index, :position, :gfa
|
27
27
|
|
28
28
|
def initialize(gfa = nil)
|
29
|
-
@set
|
30
|
-
@index
|
31
|
-
@
|
29
|
+
@set = []
|
30
|
+
@index = {}
|
31
|
+
@position = {}
|
32
|
+
@gfa = gfa || GFA.new
|
32
33
|
end
|
33
34
|
|
34
35
|
def [](k)
|
@@ -69,6 +70,7 @@ class GFA::RecordSet
|
|
69
70
|
raise "Wrong type of record: #{v.type}" if v.type != type
|
70
71
|
|
71
72
|
@set << v
|
73
|
+
@position[index_id(v)] = set.size - 1
|
72
74
|
index!(v)
|
73
75
|
end
|
74
76
|
|
@@ -109,6 +111,12 @@ class GFA::RecordSet
|
|
109
111
|
@index[k]
|
110
112
|
end
|
111
113
|
|
114
|
+
def position(v)
|
115
|
+
v = index_id(v) if v.is_a? GFA::Record
|
116
|
+
v = v.value if v.is_a? GFA::Field
|
117
|
+
@position[v]
|
118
|
+
end
|
119
|
+
|
112
120
|
def merge!(record_set)
|
113
121
|
raise "Not a record set" unless record_set.is_a?(GFA::RecordSet)
|
114
122
|
if record_set.type != type
|
data/lib/gfa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|
@@ -81,6 +81,7 @@ files:
|
|
81
81
|
- lib/gfa/field/string.rb
|
82
82
|
- lib/gfa/generator.rb
|
83
83
|
- lib/gfa/graph.rb
|
84
|
+
- lib/gfa/matrix.rb
|
84
85
|
- lib/gfa/modules.rb
|
85
86
|
- lib/gfa/parser.rb
|
86
87
|
- lib/gfa/record.rb
|