gfa 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80f578050737b697fa9205a6e66dd5a3206c3c8e49dad24f066e76444829334a
4
- data.tar.gz: 57d8d6436d92e009c867ac1939e2b9522b392931f3c404d0f2964b58111bb0d1
3
+ metadata.gz: 8919a697174aabef8c92bf7024d855c31c2adce2402e05474882cc03754e9ce2
4
+ data.tar.gz: 3a03b9fd5446938df35974eb0e225a35ecdc69539546ff0c6873fdba0cb28928
5
5
  SHA512:
6
- metadata.gz: 96145760f6f613ebfab88463c3dc2587dfd319ad7deccaec943b7ce93491b974ef5045e63b11ecf0c6cb4583b7b92c97b0132465838b7648e8bb1cd7e59cb5f6
7
- data.tar.gz: bc1ae46b2da9278db465afff02995679da7b4c71a558cea04396a8343eb998f4e28cbc1cb729886f030cbb1fe6093edca73945d85307a1cbf2a3bfe3aed74cef
6
+ metadata.gz: f72d49eb2fa08a89a8bf919cbc73161f65a1c2d083aa783856f33a5d872895a450d6de87a52977bfe69e5de35b0677cd00473b73d6a515449407c3705cfa3744
7
+ data.tar.gz: b57b09ffd99e0744fbf686d328acf778d4dcb502ae88cb79cfc3722d2c0b0d415569db0a9cce27367e6c16048ac3788efd7d4af3bfa2f3bbce7e66fad8b0f776
data/bin/gfa-add-gaf CHANGED
@@ -34,7 +34,8 @@ unless degree
34
34
  end
35
35
 
36
36
  $stderr.puts "Loading GFA: #{input_gfa}"
37
- gfa = GFA.load_parallel(input_gfa, (threads || 1).to_i)
37
+ threads = (threads || 1).to_i
38
+ gfa = GFA.load_parallel(input_gfa, threads)
38
39
 
39
40
  $stderr.puts "Loading GAF: #{input_gaf}"
40
41
  $stderr.puts "- Minimum identity: #{0.95}"
@@ -65,7 +66,7 @@ $stderr.puts "- Found #{segments.size} linked segments"
65
66
  degree = degree.to_i
66
67
  if degree >= 0
67
68
  $stderr.puts 'Subsetting graph'
68
- gfa = gfa.subgraph(segments, degree: degree)
69
+ gfa = gfa.subgraph(segments, degree: degree, threads: threads)
69
70
  end
70
71
 
71
72
  $stderr.puts "Saving GFA: #{output}"
data/lib/gfa/common.rb CHANGED
@@ -22,7 +22,6 @@ class GFA
22
22
 
23
23
  def self.advance
24
24
  @advance_bar_i += 1
25
- # $stderr.print "#{@advance_bar_i}"[-1] + "\b"
26
25
  while 50 * @advance_bar_i / @advance_bar_n > @advance_bar_p
27
26
  $stderr.print "\b=>"
28
27
  @advance_bar_p += 1
@@ -33,17 +32,16 @@ class GFA
33
32
  t_t = Time.now - @advance_bar_s
34
33
  t_u = 'sec'
35
34
 
36
- if t_t > 60
37
- t_t /= 60
38
- t_u = 'min'
35
+ [[60, 'min'], [60, 'h'], [24, 'd'], [30, 'mo']].each do |t_a|
36
+ break if t_t < t_a[0]
37
+ t_t /= t_a[0]
38
+ t_u = t_a[1]
39
39
  end
40
40
 
41
- if t_t > 60
42
- t_t /= 60
43
- t_u = 'h'
44
- end
45
-
46
- $stderr.puts ' [ %-48s ]' % "Time elapsed: #{'%.1f' % t_t} #{t_u}"
41
+ ram_gb = `ps -o rss= -p #{$$}`.to_f / 1024 / 1024
42
+ info1 = 'Time elapsed: %.1f %s' % [t_t, t_u]
43
+ info2 = '%.1fG RAM' % ram_gb
44
+ $stderr.puts ' [ %-36s%12s ]' % [info1, info2]
47
45
  end
48
46
 
49
47
  # Instance-level
data/lib/gfa/graph.rb CHANGED
@@ -62,10 +62,13 @@ class GFA
62
62
  # If +headers+, it includes all the original headers. Otherwise it only
63
63
  # only includes the version header (might be inferred).
64
64
  #
65
+ # +threads+ indicates the number of threads to use in the processing of this
66
+ # operation, currently only affecting expansion rounds.
67
+ #
65
68
  # All comments are ignored even if originally parsed. Walks are currently
66
69
  # ignored too. If the current GFA object doesn't have an index, it builds one
67
70
  # and forces +index: true+. The output object inherits all options.
68
- def subgraph(segments, degree: 1, headers: true)
71
+ def subgraph(segments, degree: 1, headers: true, threads: 2)
69
72
  # Prepare objects
70
73
  unless opts[:index]
71
74
  opts[:index] = true
@@ -89,7 +92,7 @@ class GFA
89
92
  segments.each { |segment| gfa << segment }
90
93
 
91
94
  # Expand graph
92
- linking = linking_records(gfa.segments, degree: degree)
95
+ linking = linking_records(gfa.segments, degree: degree, threads: threads)
93
96
  linking.each { |record| gfa << record }
94
97
 
95
98
  # Return
@@ -99,23 +102,17 @@ class GFA
99
102
  ##
100
103
  # Finds all the records linking to any segments in +segments+, a
101
104
  # GFA::RecordSet::SegmentSet object, and expands to links with up to
102
- # +degree+ degrees of separation
103
- #
104
- # It only evaluates the edges given in the +edges+ Array of GFA::Record
105
- # values. If +edges+ is +nil+, it uses the full set of edges in the gfa.
106
- # Edge GFA::Record objects can be of type Link, Containment, Jump, or Path
107
- #
108
- # If +_ignore+ is passed, it ignores this number of segments at the beginning
109
- # of the +segments+ set (assumes they have already been evaluated). This is
110
- # only used for internal heuristics
105
+ # +degree+ degrees of separation. This operation uses +threads+ Threads
106
+ # (with shared RAM)
111
107
  #
112
108
  # Returns an array of GFA::Record objects with all the identified linking
113
- # records (edges)
109
+ # records (edges). Edge GFA::Record objects can be of type: Link, Containment,
110
+ # Jump, or Path
114
111
  #
115
- # IMPORTANT NOTE 1: The object +segments+ will be modified to include all
112
+ # IMPORTANT NOTE: The object +segments+ will be modified to include all
116
113
  # linked segments. If you don't want this behaviour, please make sure to pass
117
- # a duplicate of the object instead.
118
- def linking_records(segments, degree: 1)
114
+ # a duplicate of the object instead
115
+ def linking_records(segments, degree: 1, threads: 2)
119
116
  unless segments.is_a? GFA::RecordSet::SegmentSet
120
117
  raise "Unrecognised class: #{segments.class}"
121
118
  end
@@ -123,17 +120,43 @@ class GFA
123
120
  edge_matrix unless degree == 0 # Just to trigger matrix calculation
124
121
  degree.times do |round|
125
122
  $stderr.puts "- Expansion round #{round + 1}"
126
- self.class.advance_bar(segments.size)
123
+ self.class.advance_bar(segments.size + 1)
127
124
  pre_expansion = segments.size
128
- new_segments = []
129
- segments.set.each do |segment|
130
- self.class.advance
131
- idx = self.segments.position(segment)
132
- edge_matrix[nil, idx].each_with_index do |edge, target_k|
133
- new_segments << target_k if edge
125
+
126
+ # Launch children processes
127
+ io = []
128
+ pid = []
129
+ threads.times do |t|
130
+ io[t] = IO.pipe
131
+ pid << fork do
132
+ new_segments = Set.new
133
+ segments.set.each_with_index do |segment, k|
134
+ self.class.advance if t == 0
135
+ next unless (k % threads) == t
136
+ idx = self.segments.position(segment)
137
+ edge_matrix[nil, idx].each_with_index do |edge, target_k|
138
+ new_segments << target_k if edge
139
+ end
140
+ end
141
+ Marshal.dump(new_segments, io[t][1])
142
+ self.class.advance if t == 0
143
+ exit!(0)
134
144
  end
145
+ io[t][1].close
146
+ end
147
+
148
+ # Collect and merge results
149
+ new_segments = Set.new
150
+ io.each_with_index do |pipe, k|
151
+ result = pipe[0].read
152
+ Process.wait(pid[k])
153
+ self.class.advance_bar(io.size) if k == 0
154
+ raise "Child process failed: #{k}" if result.empty?
155
+ new_segments += Marshal.load(result)
156
+ pipe[0].close
157
+ self.class.advance
135
158
  end
136
- new_segments = new_segments.uniq.map { |i| self.segments[i] }
159
+ new_segments = new_segments.map { |i| self.segments[i] }
137
160
  new_segments.each { |i| segments << i unless segments[i.name] }
138
161
  new = segments.size - pre_expansion
139
162
  $stderr.puts " #{new} segments found, total: #{segments.size}"
data/lib/gfa/parser.rb CHANGED
@@ -39,7 +39,7 @@ class GFA
39
39
  # and the same +opts+ supported by +load+. Defaults to the +load+ method
40
40
  # instead if +thr <= 1+.
41
41
  def self.load_parallel(file, thr, opts = {})
42
- return self.load(file, opts) if thr <= 1
42
+ return self.load(file, opts) if thr < 1
43
43
 
44
44
  # Prepare data
45
45
  lno = 0
@@ -48,7 +48,7 @@ class GFA
48
48
  blk = (lno.to_f / thr).ceil
49
49
 
50
50
  # Launch children processes
51
- advance_bar(blk)
51
+ advance_bar(blk + 1)
52
52
  io = []
53
53
  pid = []
54
54
  thr.times do |i|
@@ -58,10 +58,11 @@ class GFA
58
58
  o = opts.merge(line_range: [i * blk, (i + 1) * blk - 1])
59
59
  records = []
60
60
  read_records(file, o) do |record|
61
- advance if i == 0
62
61
  records << record
62
+ advance if i == 0
63
63
  end
64
64
  Marshal.dump(records, io[i][1])
65
+ advance if i == 0
65
66
  exit!(0)
66
67
  end
67
68
  io[i][1].close
@@ -72,9 +73,11 @@ class GFA
72
73
  io.each_with_index do |pipe, k|
73
74
  result = pipe[0].read
74
75
  Process.wait(pid[k])
76
+ advance_bar(io.size) if k == 0
75
77
  raise "Child process failed: #{k}" if result.empty?
76
78
  Marshal.load(result).each { |record| gfa << record }
77
79
  pipe[0].close
80
+ advance
78
81
  end
79
82
 
80
83
  return gfa
data/lib/gfa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class GFA
2
- VERSION = '0.9.2'
2
+ VERSION = '0.9.3'
3
3
  VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
4
4
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
5
5
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gfa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ version: 0.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-03 00:00:00.000000000 Z
11
+ date: 2024-08-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rgl