miga-base 1.3.20.4 → 1.3.20.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/distances.rb +101 -58
- data/lib/miga/json.rb +1 -1
- data/lib/miga/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba2efaad584d52c37115ad8a3f6076d0824f150189ba6611fa272239e434233c
|
4
|
+
data.tar.gz: dc0c901d5362f0080d831364ff8b5dce0709d04815efb91eb2445bf76a0b6fa0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9696bf176847dd66eb8cd4bd92ba3d69446e833f89998634fe594fc30d9d6af505f64c09e832b2f3dd8e6bac7a6d88bd733e84866ae556f0a0545e7e537f0460
|
7
|
+
data.tar.gz: 3162a6f5c23a8af7b7973a83d22500fcab04ac55f8915ceddbb5eb11308d0b97ee433832fbc93990ba2be2b2fcb3a5f6dc66234893975eec4656e4b6e2f68fe6
|
@@ -39,17 +39,20 @@ module MiGA::Cli::Action::Doctor::Distances
|
|
39
39
|
project = cli.load_project
|
40
40
|
ref_ds = project.each_dataset.select(&:ref?)
|
41
41
|
|
42
|
-
# Read and
|
42
|
+
# Read and write data
|
43
43
|
tmp = partial_bidir_tmp(project, ref_ds)
|
44
|
-
|
44
|
+
fixed_ds = merge_bidir_tmp(tmp)
|
45
45
|
FileUtils.rm_rf(tmp)
|
46
46
|
|
47
|
-
#
|
48
|
-
|
49
|
-
cli.
|
50
|
-
|
47
|
+
# Fix tables if needed
|
48
|
+
unless fixed_ds.empty?
|
49
|
+
cli.say ' - Filled datasets: %i' % fixed_ds.size
|
50
|
+
%i[aai_distances ani_distances].each do |res_name|
|
51
|
+
res = cli.load_project.result(res_name) or next
|
52
|
+
cli.say ' - Recalculating tables: %s' % res_name
|
53
|
+
res.recalculate!('Distances updated for bidirectionality').save
|
54
|
+
end
|
51
55
|
end
|
52
|
-
cli.say
|
53
56
|
end
|
54
57
|
|
55
58
|
##
|
@@ -69,93 +72,133 @@ module MiGA::Cli::Action::Doctor::Distances
|
|
69
72
|
|
70
73
|
#---- Auxuliary functions -----
|
71
74
|
|
75
|
+
##
|
76
|
+
# Calculates the number of chunks that should be produced during the
|
77
|
+
# bidirectional checks for +n+ reference datasets (Integer)
|
78
|
+
def partial_bidir_chunks(n)
|
79
|
+
y = [cli[:threads], (n / 1024).ceil].max
|
80
|
+
y = n if y > n
|
81
|
+
y
|
82
|
+
end
|
83
|
+
|
72
84
|
##
|
73
85
|
# Make a temporal directory holding partial bidirectionality reports (one per
|
74
86
|
# thread) in a custom multi-JSON format. Requires a MiGA::Project +project+
|
75
87
|
# and the iterator of the reference datasets +ref_ds+. Returns the path to the
|
76
|
-
# temporal directory created
|
88
|
+
# temporal directory created
|
77
89
|
def partial_bidir_tmp(project, ref_ds)
|
78
90
|
n = ref_ds.size
|
91
|
+
chunks = partial_bidir_chunks(n)
|
79
92
|
|
80
93
|
# Check first if a previous run is complete (and recover it)
|
81
94
|
tmp = File.join(project.path, 'doctor-bidirectional.tmp')
|
82
|
-
tmp_done = File.join(tmp, 'done.txt')
|
95
|
+
tmp_done = File.join(tmp, 'read-done.txt')
|
83
96
|
if File.size?(tmp_done) &&
|
84
|
-
File.readlines(tmp_done)[0].chomp.to_i ==
|
97
|
+
File.readlines(tmp_done)[0].chomp.to_i == chunks
|
85
98
|
return tmp
|
86
99
|
end
|
87
100
|
|
88
101
|
# Read data first (threaded)
|
89
102
|
FileUtils.mkdir_p(tmp)
|
90
|
-
|
91
|
-
|
92
|
-
|
103
|
+
chunks_e = 0 .. chunks - 1
|
104
|
+
MiGA::Parallel.distribute(chunks_e, cli[:threads]) do |chunk, k, thr|
|
105
|
+
cli.advance('Reading:', k, chunks, false) if thr == 0
|
106
|
+
dist = {}
|
93
107
|
[:aai, :ani].each do |metric|
|
94
|
-
|
108
|
+
dist[metric] = {}
|
95
109
|
ref_ds.each_with_index do |ds, idx|
|
96
|
-
if idx %
|
97
|
-
cli.advance('Reading:', idx + 1, n, false) if thr == 0
|
110
|
+
if idx % chunks == chunk
|
98
111
|
row = read_bidirectional(ds, metric)
|
99
|
-
|
112
|
+
dist[metric][ds.name] = row unless row.empty?
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
if thr == 0
|
107
|
-
cli.advance('Reading:', n, n, false)
|
108
|
-
cli.say
|
109
|
-
end
|
116
|
+
file = File.join(tmp, "#{chunk}.marshal")
|
117
|
+
File.open("#{file}.tmp", 'w') { |fh| Marshal.dump(dist, fh) }
|
118
|
+
File.rename("#{file}.tmp", file)
|
110
119
|
end
|
120
|
+
cli.advance('Reading:', chunks, chunks, false)
|
121
|
+
cli.say
|
111
122
|
|
112
123
|
# Save information to indicate that the run is complete and return
|
113
|
-
File.open(tmp_done, 'w') { |fh| fh.puts
|
124
|
+
File.open(tmp_done, 'w') { |fh| fh.puts chunks }
|
114
125
|
return tmp
|
115
126
|
end
|
116
127
|
|
117
128
|
##
|
118
129
|
# Read partial temporal reports of bidirectionality (located in +tmp+), and
|
119
|
-
#
|
120
|
-
#
|
130
|
+
# fill databases with missing values. Returns the names of the datasets fixed
|
131
|
+
# as a Set.
|
121
132
|
def merge_bidir_tmp(tmp)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
tmp_done = File.join(tmp, 'read-done.txt')
|
134
|
+
chunks = File.readlines(tmp_done)[0].chomp.to_i
|
135
|
+
|
136
|
+
lower_triangle = []
|
137
|
+
chunks.times.each do |i|
|
138
|
+
(0 .. i).to_a.each { |j| lower_triangle << [i, j] }
|
139
|
+
end
|
140
|
+
MiGA::Parallel.distribute(lower_triangle, cli[:threads]) do |cell, k, thr|
|
141
|
+
cli.advance('Writing:', k, lower_triangle.size, false) if thr == 0
|
142
|
+
fixed_ds = merge_bidir_tmp_pair(tmp, cell[0], cell[1])
|
143
|
+
File.open(File.join(tmp, "#{cell[0]}-#{cell[1]}.txt"), 'w') do |fh|
|
144
|
+
fixed_ds.each { |ds| fh.puts ds }
|
145
|
+
end
|
146
|
+
end
|
147
|
+
cli.advance('Writing:', lower_triangle.size, lower_triangle.size, false)
|
148
|
+
cli.say
|
149
|
+
lower_triangle.map do |cell|
|
150
|
+
Set.new.tap do |y|
|
151
|
+
File.open(File.join(tmp, "#{cell[0]}-#{cell[1]}.txt"), 'r') do |fh|
|
152
|
+
fh.each { |ln| y << ln.chomp }
|
153
|
+
end
|
131
154
|
end
|
155
|
+
end.inject(Set.new, :+)
|
156
|
+
end
|
132
157
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
158
|
+
##
|
159
|
+
# Cross-reference two reports of bidirectionality (located in +tmp+),
|
160
|
+
# identified by indexes +x+ and +y+, and fill databases with missing values.
|
161
|
+
# Returns the names of the fixed datasets as a Set.
|
162
|
+
def merge_bidir_tmp_pair(tmp, x, y)
|
163
|
+
dist_x = Marshal.load(File.read(File.join(tmp, "#{x}.marshal")))
|
164
|
+
if x == y
|
165
|
+
merge_bidir_tmp_cell(dist_x, dist_x)
|
166
|
+
else
|
167
|
+
dist_y = Marshal.load(File.read(File.join(tmp, "#{y}.marshal")))
|
168
|
+
merge_bidir_tmp_cell(dist_x, dist_y) +
|
169
|
+
merge_bidir_tmp_cell(dist_y, dist_x)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
##
|
174
|
+
# Find missing values in a "chunks cell" and fill databases. Returns the names
|
175
|
+
# of the fixed datasets as a Set.
|
176
|
+
def merge_bidir_tmp_cell(dist_x, dist_y)
|
177
|
+
# Find missing values
|
178
|
+
dist = {}
|
179
|
+
datasets = Set.new
|
180
|
+
dist_x.each do |metric, distances_x|
|
181
|
+
dist[metric] = {}
|
182
|
+
distances_x.each do |qry_x, row_x|
|
183
|
+
dist_y[metric].each do |qry_y, row_y|
|
184
|
+
# Ignore if missing in dist_x
|
185
|
+
next unless dist_x[metric][qry_x]&.include?(qry_y)
|
186
|
+
# Ignore if already in dist_y
|
187
|
+
next if dist_y[metric][qry_y]&.include?(qry_x)
|
188
|
+
# Save otherwise
|
189
|
+
dist[metric][qry_x] ||= {}
|
190
|
+
dist[metric][qry_x][qry_y] = dist_x[metric][qry_x][qry_y]
|
191
|
+
datasets << qry_y
|
151
192
|
end
|
152
|
-
raise "Incomplete thread dump: #{file}" unless metric == :end
|
153
193
|
end
|
154
|
-
File.open("#{file}.marshal", 'w') { |fh| Marshal.dump(dist, fh) }
|
155
194
|
end
|
156
|
-
cli.say
|
157
195
|
|
158
|
-
|
196
|
+
# Save them in databases
|
197
|
+
datasets.each do |ds_name|
|
198
|
+
ds = cli.load_project.dataset(ds_name)
|
199
|
+
save_bidirectional(ds, dist)
|
200
|
+
end
|
201
|
+
datasets
|
159
202
|
end
|
160
203
|
end
|
161
204
|
|
data/lib/miga/json.rb
CHANGED
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 20,
|
15
|
+
VERSION = [1.3, 20, 6].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 9,
|
23
|
+
VERSION_DATE = Date.new(2024, 9, 12)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.20.
|
4
|
+
version: 1.3.20.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|