gn_crossmap 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/gn_crossmap/resolver.rb +21 -20
- data/lib/gn_crossmap/resolver_job.rb +5 -4
- data/lib/gn_crossmap/result_processor.rb +0 -2
- data/lib/gn_crossmap/stats.rb +27 -4
- data/lib/gn_crossmap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49f923dfdab693728fe296afcf09fa275ce870ad
|
4
|
+
data.tar.gz: da0ac647957f8c28c438af7954b1e35473bcfc28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37e8d0183db3322a0bf627c336737a6dc42ae4471c4f52554ea761cfcdcc71210c6d0a793c948623a647cfbc97f7d782538049f4a62bdb97de6bded8ac2998ff
|
7
|
+
data.tar.gz: db2ccccf36fd9bfe04f77e3d75eca76286d82b4cc3f8e79d78fb54c443b6f991f2a235ba396a7bc4c41ec997a15aaec283eb676e577ac52358cb39017154d7d3
|
data/CHANGELOG.md
CHANGED
data/lib/gn_crossmap/resolver.rb
CHANGED
@@ -31,7 +31,7 @@ module GnCrossmap
|
|
31
31
|
private
|
32
32
|
|
33
33
|
def wrap_up
|
34
|
-
@stats.stats[:
|
34
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
35
35
|
@stats.stats[:status] = :finish
|
36
36
|
@processor.writer.close
|
37
37
|
end
|
@@ -52,7 +52,7 @@ module GnCrossmap
|
|
52
52
|
|
53
53
|
def resolution_stats(records_num)
|
54
54
|
@stats.stats[:total_records] = records_num
|
55
|
-
@stats.stats[:
|
55
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
56
56
|
@stats.stats[:status] = :resolution
|
57
57
|
end
|
58
58
|
|
@@ -85,20 +85,6 @@ module GnCrossmap
|
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
-
# rubocop:disable Metrics/AbcSize
|
89
|
-
def update_stats(job_stats)
|
90
|
-
s = @stats.stats
|
91
|
-
s[:current_speed] = job_stats.stats[:current_speed]
|
92
|
-
s[:speed] = s[:current_speed] * @threads unless s[:speed]
|
93
|
-
s[:speed] = s[:speed] * (1 - @smoothing) +
|
94
|
-
s[:current_speed] * @smoothing * @threads
|
95
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
96
|
-
s[:resolved_records] += job_stats.stats[:resolved_records]
|
97
|
-
s[:eta] = (s[:total_records] - s[:resolved_records]) / s[:speed]
|
98
|
-
s[:matches][7] += job_stats.stats[:matches][7]
|
99
|
-
end
|
100
|
-
# rubocop:enable all
|
101
|
-
|
102
88
|
def create_job(batch)
|
103
89
|
names, batch_data = collect_names(batch)
|
104
90
|
rb = ResolverJob.new(names, batch_data, @resolver_url, @ds_id)
|
@@ -120,19 +106,34 @@ module GnCrossmap
|
|
120
106
|
batch_data[id] = row[:original]
|
121
107
|
@processor.input[id] = { rank: row[:rank] }
|
122
108
|
str << "#{id}|#{row[:name]}"
|
123
|
-
end
|
109
|
+
end
|
124
110
|
[names, batch_data]
|
125
111
|
end
|
126
112
|
|
113
|
+
# rubocop:disable Metrics/AbcSize
|
114
|
+
def update_stats(job_stats)
|
115
|
+
s = @stats.stats
|
116
|
+
current_speed = job_stats.stats[:current_speed] *
|
117
|
+
@stats.penalty(@threads)
|
118
|
+
|
119
|
+
s[:resolution][:completed_records] +=
|
120
|
+
job_stats.stats[:resolution][:completed_records]
|
121
|
+
@stats.update_eta(current_speed)
|
122
|
+
s[:matches][7] += job_stats.stats[:matches][7]
|
123
|
+
end
|
124
|
+
|
127
125
|
def with_log
|
128
126
|
yield
|
129
127
|
s = @count + 1
|
130
128
|
@count += @batch
|
131
129
|
e = [@count, @stats.stats[:total_records]].min
|
132
|
-
|
133
|
-
|
134
|
-
@stats.stats[:
|
130
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
131
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
132
|
+
@stats.stats[:total_records],
|
133
|
+
@stats.stats[:resolution][:speed].to_i,
|
134
|
+
Time.at(eta))
|
135
135
|
GnCrossmap.log(msg)
|
136
136
|
end
|
137
137
|
end
|
138
138
|
end
|
139
|
+
# rubocop:enable all
|
@@ -4,7 +4,8 @@ module GnCrossmap
|
|
4
4
|
# Remote resolution for parallel jobs
|
5
5
|
class ResolverJob
|
6
6
|
def initialize(names, batch_data, resolver_url, ds_id)
|
7
|
-
@
|
7
|
+
@names_size = names.size
|
8
|
+
@names = names.join("\n")
|
8
9
|
@batch_data = batch_data
|
9
10
|
@resolver_url = resolver_url
|
10
11
|
@ds_id = ds_id
|
@@ -26,7 +27,7 @@ module GnCrossmap
|
|
26
27
|
rescue RestClient::Exception
|
27
28
|
single_remote_resolve(names)
|
28
29
|
ensure
|
29
|
-
|
30
|
+
update_stats(batch_start)
|
30
31
|
end
|
31
32
|
|
32
33
|
def single_remote_resolve(names)
|
@@ -49,12 +50,12 @@ module GnCrossmap
|
|
49
50
|
|
50
51
|
def process_resolver_error(err, name)
|
51
52
|
@stats.stats[:matches][7] += 1
|
52
|
-
@stats.stats[:resolved_records] += 1
|
53
53
|
GnCrossmap.logger.error("Resolver broke on '#{name}': #{err.message}")
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
56
|
+
def update_stats(batch_start)
|
57
57
|
@stats.stats[:current_speed] = @batch_data.size / (Time.now - batch_start)
|
58
|
+
@stats.stats[:resolution][:completed_records] = @names_size
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|
@@ -31,7 +31,6 @@ module GnCrossmap
|
|
31
31
|
|
32
32
|
def write_empty_result(datum)
|
33
33
|
@stats.stats[:matches][0] += 1
|
34
|
-
@stats.stats[:resolved_records] += 1
|
35
34
|
res = compile_empty_result(datum)
|
36
35
|
@writer.write(res)
|
37
36
|
end
|
@@ -56,7 +55,6 @@ module GnCrossmap
|
|
56
55
|
def collect_stats(datum)
|
57
56
|
match_num = datum[:results].map { |d| d[:match_type] }.min
|
58
57
|
@stats.stats[:matches][match_num] += 1
|
59
|
-
@stats.stats[:resolved_records] += 1
|
60
58
|
end
|
61
59
|
|
62
60
|
def compile_result(datum, result, match_size)
|
data/lib/gn_crossmap/stats.rb
CHANGED
@@ -7,20 +7,43 @@ module GnCrossmap
|
|
7
7
|
|
8
8
|
def initialize
|
9
9
|
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
|
11
|
-
|
12
|
-
resolution_start: nil, resolution_stop: nil,
|
13
|
-
speed: nil, current_speed: nil, eta: nil,
|
10
|
+
ingestion_span: nil, ingestion_start: nil,
|
11
|
+
resolution: eta_struct,
|
14
12
|
matches: match_types, errors: [] }
|
13
|
+
@smooth = 0.05
|
14
|
+
end
|
15
|
+
|
16
|
+
def penalty(threads)
|
17
|
+
pnlt = 0.7
|
18
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
19
|
+
end
|
20
|
+
|
21
|
+
def update_eta(current_speed)
|
22
|
+
eta = @stats[:resolution]
|
23
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
24
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
25
|
+
eta[:eta] = (@stats[:total_records] -
|
26
|
+
@stats[:resolution][:completed_records]) /
|
27
|
+
eta[:speed]
|
15
28
|
end
|
16
29
|
|
17
30
|
private
|
18
31
|
|
32
|
+
def eta_struct
|
33
|
+
{ start_time: nil, completed_records: 0,
|
34
|
+
speed: nil, eta: nil, stop_time: nil }
|
35
|
+
end
|
36
|
+
|
19
37
|
def match_types
|
20
38
|
matches = GnCrossmap::MATCH_TYPES.keys
|
21
39
|
matches.each_with_object({}) do |key, obj|
|
22
40
|
obj[key] = 0
|
23
41
|
end
|
24
42
|
end
|
43
|
+
|
44
|
+
def penalty_adj(threads, val, pnlt)
|
45
|
+
return val if threads < 2
|
46
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
47
|
+
end
|
25
48
|
end
|
26
49
|
end
|
data/lib/gn_crossmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|