gfa 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/gfa-add-gaf +3 -2
- data/lib/gfa/common.rb +8 -10
- data/lib/gfa/graph.rb +46 -23
- data/lib/gfa/parser.rb +6 -3
- data/lib/gfa/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8919a697174aabef8c92bf7024d855c31c2adce2402e05474882cc03754e9ce2
|
4
|
+
data.tar.gz: 3a03b9fd5446938df35974eb0e225a35ecdc69539546ff0c6873fdba0cb28928
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f72d49eb2fa08a89a8bf919cbc73161f65a1c2d083aa783856f33a5d872895a450d6de87a52977bfe69e5de35b0677cd00473b73d6a515449407c3705cfa3744
|
7
|
+
data.tar.gz: b57b09ffd99e0744fbf686d328acf778d4dcb502ae88cb79cfc3722d2c0b0d415569db0a9cce27367e6c16048ac3788efd7d4af3bfa2f3bbce7e66fad8b0f776
|
data/bin/gfa-add-gaf
CHANGED
@@ -34,7 +34,8 @@ unless degree
|
|
34
34
|
end
|
35
35
|
|
36
36
|
$stderr.puts "Loading GFA: #{input_gfa}"
|
37
|
-
|
37
|
+
threads = (threads || 1).to_i
|
38
|
+
gfa = GFA.load_parallel(input_gfa, threads)
|
38
39
|
|
39
40
|
$stderr.puts "Loading GAF: #{input_gaf}"
|
40
41
|
$stderr.puts "- Minimum identity: #{0.95}"
|
@@ -65,7 +66,7 @@ $stderr.puts "- Found #{segments.size} linked segments"
|
|
65
66
|
degree = degree.to_i
|
66
67
|
if degree >= 0
|
67
68
|
$stderr.puts 'Subsetting graph'
|
68
|
-
gfa = gfa.subgraph(segments, degree: degree)
|
69
|
+
gfa = gfa.subgraph(segments, degree: degree, threads: threads)
|
69
70
|
end
|
70
71
|
|
71
72
|
$stderr.puts "Saving GFA: #{output}"
|
data/lib/gfa/common.rb
CHANGED
@@ -22,7 +22,6 @@ class GFA
|
|
22
22
|
|
23
23
|
def self.advance
|
24
24
|
@advance_bar_i += 1
|
25
|
-
# $stderr.print "#{@advance_bar_i}"[-1] + "\b"
|
26
25
|
while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
|
27
26
|
$stderr.print "\b=>"
|
28
27
|
@advance_bar_p += 1
|
@@ -33,17 +32,16 @@ class GFA
|
|
33
32
|
t_t = Time.now - @advance_bar_s
|
34
33
|
t_u = 'sec'
|
35
34
|
|
36
|
-
|
37
|
-
t_t
|
38
|
-
|
35
|
+
[[60, 'min'], [60, 'h'], [24, 'd'], [30, 'mo']].each do |t_a|
|
36
|
+
break if t_t < t_a[0]
|
37
|
+
t_t /= t_a[0]
|
38
|
+
t_u = t_a[1]
|
39
39
|
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
$stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
|
41
|
+
ram_gb = `ps -o rss= -p #{$$}`.to_f / 1024 / 1024
|
42
|
+
info1 = 'Time elapsed: %.1f %s' % [t_t, t_u]
|
43
|
+
info2 = '%.1fG RAM' % ram_gb
|
44
|
+
$stderr.puts ' [ %-36s%12s ]' % [info1, info2]
|
47
45
|
end
|
48
46
|
|
49
47
|
# Instance-level
|
data/lib/gfa/graph.rb
CHANGED
@@ -62,10 +62,13 @@ class GFA
|
|
62
62
|
# If +headers+, it includes all the original headers. Otherwise it only
|
63
63
|
# only includes the version header (might be inferred).
|
64
64
|
#
|
65
|
+
# +threads+ indicates the number of threads to use in the processing of this
|
66
|
+
# operation, currently only affecting expansion rounds.
|
67
|
+
#
|
65
68
|
# All comments are ignored even if originally parsed. Walks are currently
|
66
69
|
# ignored too. If the current GFA object doesn't have an index, it builds one
|
67
70
|
# and forces +index: true+. The output object inherits all options.
|
68
|
-
def subgraph(segments, degree: 1, headers: true)
|
71
|
+
def subgraph(segments, degree: 1, headers: true, threads: 2)
|
69
72
|
# Prepare objects
|
70
73
|
unless opts[:index]
|
71
74
|
opts[:index] = true
|
@@ -89,7 +92,7 @@ class GFA
|
|
89
92
|
segments.each { |segment| gfa << segment }
|
90
93
|
|
91
94
|
# Expand graph
|
92
|
-
linking = linking_records(gfa.segments, degree: degree)
|
95
|
+
linking = linking_records(gfa.segments, degree: degree, threads: threads)
|
93
96
|
linking.each { |record| gfa << record }
|
94
97
|
|
95
98
|
# Return
|
@@ -99,23 +102,17 @@ class GFA
|
|
99
102
|
##
|
100
103
|
# Finds all the records linking to any segments in +segments+, a
|
101
104
|
# GFA::RecordSet::SegmentSet object, and expands to links with up to
|
102
|
-
# +degree+ degrees of separation
|
103
|
-
#
|
104
|
-
# It only evaluates the edges given in the +edges+ Array of GFA::Record
|
105
|
-
# values. If +edges+ is +nil+, it uses the full set of edges in the gfa.
|
106
|
-
# Edge GFA::Record objects can be of type Link, Containment, Jump, or Path
|
107
|
-
#
|
108
|
-
# If +_ignore+ is passed, it ignores this number of segments at the beginning
|
109
|
-
# of the +segments+ set (assumes they have already been evaluated). This is
|
110
|
-
# only used for internal heuristics
|
105
|
+
# +degree+ degrees of separation. This operation uses +threads+ Threads
|
106
|
+
# (with shared RAM)
|
111
107
|
#
|
112
108
|
# Returns an array of GFA::Record objects with all the identified linking
|
113
|
-
# records (edges)
|
109
|
+
# records (edges). Edge GFA::Record objects can be of type: Link, Containment,
|
110
|
+
# Jump, or Path
|
114
111
|
#
|
115
|
-
# IMPORTANT NOTE
|
112
|
+
# IMPORTANT NOTE: The object +segments+ will be modified to include all
|
116
113
|
# linked segments. If you don't want this behaviour, please make sure to pass
|
117
|
-
# a duplicate of the object instead
|
118
|
-
def linking_records(segments, degree: 1)
|
114
|
+
# a duplicate of the object instead
|
115
|
+
def linking_records(segments, degree: 1, threads: 2)
|
119
116
|
unless segments.is_a? GFA::RecordSet::SegmentSet
|
120
117
|
raise "Unrecognised class: #{segments.class}"
|
121
118
|
end
|
@@ -123,17 +120,43 @@ class GFA
|
|
123
120
|
edge_matrix unless degree == 0 # Just to trigger matrix calculation
|
124
121
|
degree.times do |round|
|
125
122
|
$stderr.puts "- Expansion round #{round + 1}"
|
126
|
-
self.class.advance_bar(segments.size)
|
123
|
+
self.class.advance_bar(segments.size + 1)
|
127
124
|
pre_expansion = segments.size
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
125
|
+
|
126
|
+
# Launch children processes
|
127
|
+
io = []
|
128
|
+
pid = []
|
129
|
+
threads.times do |t|
|
130
|
+
io[t] = IO.pipe
|
131
|
+
pid << fork do
|
132
|
+
new_segments = Set.new
|
133
|
+
segments.set.each_with_index do |segment, k|
|
134
|
+
self.class.advance if t == 0
|
135
|
+
next unless (k % threads) == t
|
136
|
+
idx = self.segments.position(segment)
|
137
|
+
edge_matrix[nil, idx].each_with_index do |edge, target_k|
|
138
|
+
new_segments << target_k if edge
|
139
|
+
end
|
140
|
+
end
|
141
|
+
Marshal.dump(new_segments, io[t][1])
|
142
|
+
self.class.advance if t == 0
|
143
|
+
exit!(0)
|
134
144
|
end
|
145
|
+
io[t][1].close
|
146
|
+
end
|
147
|
+
|
148
|
+
# Collect and merge results
|
149
|
+
new_segments = Set.new
|
150
|
+
io.each_with_index do |pipe, k|
|
151
|
+
result = pipe[0].read
|
152
|
+
Process.wait(pid[k])
|
153
|
+
self.class.advance_bar(io.size) if k == 0
|
154
|
+
raise "Child process failed: #{k}" if result.empty?
|
155
|
+
new_segments += Marshal.load(result)
|
156
|
+
pipe[0].close
|
157
|
+
self.class.advance
|
135
158
|
end
|
136
|
-
new_segments = new_segments.
|
159
|
+
new_segments = new_segments.map { |i| self.segments[i] }
|
137
160
|
new_segments.each { |i| segments << i unless segments[i.name] }
|
138
161
|
new = segments.size - pre_expansion
|
139
162
|
$stderr.puts " #{new} segments found, total: #{segments.size}"
|
data/lib/gfa/parser.rb
CHANGED
@@ -39,7 +39,7 @@ class GFA
|
|
39
39
|
# and the same +opts+ supported by +load+. Defaults to the +load+ method
|
40
40
|
# instead if +thr <= 1+.
|
41
41
|
def self.load_parallel(file, thr, opts = {})
|
42
|
-
return self.load(file, opts) if thr
|
42
|
+
return self.load(file, opts) if thr < 1
|
43
43
|
|
44
44
|
# Prepare data
|
45
45
|
lno = 0
|
@@ -48,7 +48,7 @@ class GFA
|
|
48
48
|
blk = (lno.to_f / thr).ceil
|
49
49
|
|
50
50
|
# Launch children processes
|
51
|
-
advance_bar(blk)
|
51
|
+
advance_bar(blk + 1)
|
52
52
|
io = []
|
53
53
|
pid = []
|
54
54
|
thr.times do |i|
|
@@ -58,10 +58,11 @@ class GFA
|
|
58
58
|
o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
|
59
59
|
records = []
|
60
60
|
read_records(file, o) do |record|
|
61
|
-
advance if i == 0
|
62
61
|
records << record
|
62
|
+
advance if i == 0
|
63
63
|
end
|
64
64
|
Marshal.dump(records, io[i][1])
|
65
|
+
advance if i == 0
|
65
66
|
exit!(0)
|
66
67
|
end
|
67
68
|
io[i][1].close
|
@@ -72,9 +73,11 @@ class GFA
|
|
72
73
|
io.each_with_index do |pipe, k|
|
73
74
|
result = pipe[0].read
|
74
75
|
Process.wait(pid[k])
|
76
|
+
advance_bar(io.size) if k == 0
|
75
77
|
raise "Child process failed: #{k}" if result.empty?
|
76
78
|
Marshal.load(result).each { |record| gfa << record }
|
77
79
|
pipe[0].close
|
80
|
+
advance
|
78
81
|
end
|
79
82
|
|
80
83
|
return gfa
|
data/lib/gfa/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gfa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rgl
|