gn_crossmap 4.0.0 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/gn_crossmap/resolver.rb +21 -20
- data/lib/gn_crossmap/resolver_job.rb +5 -4
- data/lib/gn_crossmap/result_processor.rb +0 -2
- data/lib/gn_crossmap/stats.rb +27 -4
- data/lib/gn_crossmap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49f923dfdab693728fe296afcf09fa275ce870ad
|
4
|
+
data.tar.gz: da0ac647957f8c28c438af7954b1e35473bcfc28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37e8d0183db3322a0bf627c336737a6dc42ae4471c4f52554ea761cfcdcc71210c6d0a793c948623a647cfbc97f7d782538049f4a62bdb97de6bded8ac2998ff
|
7
|
+
data.tar.gz: db2ccccf36fd9bfe04f77e3d75eca76286d82b4cc3f8e79d78fb54c443b6f991f2a235ba396a7bc4c41ec997a15aaec283eb676e577ac52358cb39017154d7d3
|
data/CHANGELOG.md
CHANGED
data/lib/gn_crossmap/resolver.rb
CHANGED
@@ -31,7 +31,7 @@ module GnCrossmap
|
|
31
31
|
private
|
32
32
|
|
33
33
|
def wrap_up
|
34
|
-
@stats.stats[:
|
34
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
35
35
|
@stats.stats[:status] = :finish
|
36
36
|
@processor.writer.close
|
37
37
|
end
|
@@ -52,7 +52,7 @@ module GnCrossmap
|
|
52
52
|
|
53
53
|
def resolution_stats(records_num)
|
54
54
|
@stats.stats[:total_records] = records_num
|
55
|
-
@stats.stats[:
|
55
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
56
56
|
@stats.stats[:status] = :resolution
|
57
57
|
end
|
58
58
|
|
@@ -85,20 +85,6 @@ module GnCrossmap
|
|
85
85
|
end
|
86
86
|
end
|
87
87
|
|
88
|
-
# rubocop:disable Metrics/AbcSize
|
89
|
-
def update_stats(job_stats)
|
90
|
-
s = @stats.stats
|
91
|
-
s[:current_speed] = job_stats.stats[:current_speed]
|
92
|
-
s[:speed] = s[:current_speed] * @threads unless s[:speed]
|
93
|
-
s[:speed] = s[:speed] * (1 - @smoothing) +
|
94
|
-
s[:current_speed] * @smoothing * @threads
|
95
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
96
|
-
s[:resolved_records] += job_stats.stats[:resolved_records]
|
97
|
-
s[:eta] = (s[:total_records] - s[:resolved_records]) / s[:speed]
|
98
|
-
s[:matches][7] += job_stats.stats[:matches][7]
|
99
|
-
end
|
100
|
-
# rubocop:enable all
|
101
|
-
|
102
88
|
def create_job(batch)
|
103
89
|
names, batch_data = collect_names(batch)
|
104
90
|
rb = ResolverJob.new(names, batch_data, @resolver_url, @ds_id)
|
@@ -120,19 +106,34 @@ module GnCrossmap
|
|
120
106
|
batch_data[id] = row[:original]
|
121
107
|
@processor.input[id] = { rank: row[:rank] }
|
122
108
|
str << "#{id}|#{row[:name]}"
|
123
|
-
end
|
109
|
+
end
|
124
110
|
[names, batch_data]
|
125
111
|
end
|
126
112
|
|
113
|
+
# rubocop:disable Metrics/AbcSize
|
114
|
+
def update_stats(job_stats)
|
115
|
+
s = @stats.stats
|
116
|
+
current_speed = job_stats.stats[:current_speed] *
|
117
|
+
@stats.penalty(@threads)
|
118
|
+
|
119
|
+
s[:resolution][:completed_records] +=
|
120
|
+
job_stats.stats[:resolution][:completed_records]
|
121
|
+
@stats.update_eta(current_speed)
|
122
|
+
s[:matches][7] += job_stats.stats[:matches][7]
|
123
|
+
end
|
124
|
+
|
127
125
|
def with_log
|
128
126
|
yield
|
129
127
|
s = @count + 1
|
130
128
|
@count += @batch
|
131
129
|
e = [@count, @stats.stats[:total_records]].min
|
132
|
-
|
133
|
-
|
134
|
-
@stats.stats[:
|
130
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
131
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
132
|
+
@stats.stats[:total_records],
|
133
|
+
@stats.stats[:resolution][:speed].to_i,
|
134
|
+
Time.at(eta))
|
135
135
|
GnCrossmap.log(msg)
|
136
136
|
end
|
137
137
|
end
|
138
138
|
end
|
139
|
+
# rubocop:enable all
|
@@ -4,7 +4,8 @@ module GnCrossmap
|
|
4
4
|
# Remote resolution for parallel jobs
|
5
5
|
class ResolverJob
|
6
6
|
def initialize(names, batch_data, resolver_url, ds_id)
|
7
|
-
@
|
7
|
+
@names_size = names.size
|
8
|
+
@names = names.join("\n")
|
8
9
|
@batch_data = batch_data
|
9
10
|
@resolver_url = resolver_url
|
10
11
|
@ds_id = ds_id
|
@@ -26,7 +27,7 @@ module GnCrossmap
|
|
26
27
|
rescue RestClient::Exception
|
27
28
|
single_remote_resolve(names)
|
28
29
|
ensure
|
29
|
-
|
30
|
+
update_stats(batch_start)
|
30
31
|
end
|
31
32
|
|
32
33
|
def single_remote_resolve(names)
|
@@ -49,12 +50,12 @@ module GnCrossmap
|
|
49
50
|
|
50
51
|
def process_resolver_error(err, name)
|
51
52
|
@stats.stats[:matches][7] += 1
|
52
|
-
@stats.stats[:resolved_records] += 1
|
53
53
|
GnCrossmap.logger.error("Resolver broke on '#{name}': #{err.message}")
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
56
|
+
def update_stats(batch_start)
|
57
57
|
@stats.stats[:current_speed] = @batch_data.size / (Time.now - batch_start)
|
58
|
+
@stats.stats[:resolution][:completed_records] = @names_size
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|
@@ -31,7 +31,6 @@ module GnCrossmap
|
|
31
31
|
|
32
32
|
def write_empty_result(datum)
|
33
33
|
@stats.stats[:matches][0] += 1
|
34
|
-
@stats.stats[:resolved_records] += 1
|
35
34
|
res = compile_empty_result(datum)
|
36
35
|
@writer.write(res)
|
37
36
|
end
|
@@ -56,7 +55,6 @@ module GnCrossmap
|
|
56
55
|
def collect_stats(datum)
|
57
56
|
match_num = datum[:results].map { |d| d[:match_type] }.min
|
58
57
|
@stats.stats[:matches][match_num] += 1
|
59
|
-
@stats.stats[:resolved_records] += 1
|
60
58
|
end
|
61
59
|
|
62
60
|
def compile_result(datum, result, match_size)
|
data/lib/gn_crossmap/stats.rb
CHANGED
@@ -7,20 +7,43 @@ module GnCrossmap
|
|
7
7
|
|
8
8
|
def initialize
|
9
9
|
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
|
11
|
-
|
12
|
-
resolution_start: nil, resolution_stop: nil,
|
13
|
-
speed: nil, current_speed: nil, eta: nil,
|
10
|
+
ingestion_span: nil, ingestion_start: nil,
|
11
|
+
resolution: eta_struct,
|
14
12
|
matches: match_types, errors: [] }
|
13
|
+
@smooth = 0.05
|
14
|
+
end
|
15
|
+
|
16
|
+
def penalty(threads)
|
17
|
+
pnlt = 0.7
|
18
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
19
|
+
end
|
20
|
+
|
21
|
+
def update_eta(current_speed)
|
22
|
+
eta = @stats[:resolution]
|
23
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
24
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
25
|
+
eta[:eta] = (@stats[:total_records] -
|
26
|
+
@stats[:resolution][:completed_records]) /
|
27
|
+
eta[:speed]
|
15
28
|
end
|
16
29
|
|
17
30
|
private
|
18
31
|
|
32
|
+
def eta_struct
|
33
|
+
{ start_time: nil, completed_records: 0,
|
34
|
+
speed: nil, eta: nil, stop_time: nil }
|
35
|
+
end
|
36
|
+
|
19
37
|
def match_types
|
20
38
|
matches = GnCrossmap::MATCH_TYPES.keys
|
21
39
|
matches.each_with_object({}) do |key, obj|
|
22
40
|
obj[key] = 0
|
23
41
|
end
|
24
42
|
end
|
43
|
+
|
44
|
+
def penalty_adj(threads, val, pnlt)
|
45
|
+
return val if threads < 2
|
46
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
47
|
+
end
|
25
48
|
end
|
26
49
|
end
|
data/lib/gn_crossmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|