gn_list_resolver 3.3.1.1 → 4.0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +2 -1
- data/lib/gn_list_resolver/resolver.rb +22 -14
- data/lib/gn_list_resolver/resolver.rb.orig +39 -7
- data/lib/gn_list_resolver/resolver_job.rb +2 -1
- data/lib/gn_list_resolver/result_processor.rb +0 -3
- data/lib/gn_list_resolver/stats.rb +28 -5
- data/lib/gn_list_resolver/stats.rb.orig +62 -0
- data/lib/gn_list_resolver/version.rb +1 -1
- data/lib/gn_list_resolver/version.rb.orig +3 -3
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47abe84b5c8bd829c01d9c50978fe71b3f14aef6
|
4
|
+
data.tar.gz: 7852f24f3dc3ba40e3175c92275bcde26eda0463
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e97f1949f12a97c814adb4e4a4b88223ac00bc5bc6095a553ed22461304f4986a2d32cc7071f8871b83a7957ba76c1fe102e6fd776cfea52a33278e9e1909584
|
7
|
+
data.tar.gz: f458552148b3981f6f9c5017ee1b8c9b90bed16e332ef06f5340f88815050905e51277e472be23089be94032c84032f770e3e338dcf7cf00b7dccb64e181e109
|
data/CHANGELOG.md
CHANGED
@@ -25,6 +25,14 @@
|
|
25
25
|
@dimus, @alexander-myltsev - sync with `gn_crossmap`'s 3.2.0, current name is
|
26
26
|
still a bit broken
|
27
27
|
|
28
|
+
## 4.0.1
|
29
|
+
|
30
|
+
* @dimus - Clean up resolution stats
|
31
|
+
|
32
|
+
## 4.0.0
|
33
|
+
|
34
|
+
* @dimus - Better estimation for time left for resolver and speed
|
35
|
+
|
28
36
|
## 3.3.1
|
29
37
|
|
30
38
|
* @dimus - Better error message in logger
|
data/README.md
CHANGED
@@ -166,7 +166,8 @@ end
|
|
166
166
|
|resolution_stop |time when resolution of names stopped |
|
167
167
|
|resolution_span |time of intermediate checkpoint of resolving names |
|
168
168
|
|resolved_records |number of names already processed |
|
169
|
-
|
|
169
|
+
|speed |weighted speed average |
|
170
|
+
|current_speed |speed of resolution for the last batch |
|
170
171
|
|matches |Distribution of processed data by match type (see below) |
|
171
172
|
|errors |First 0-10 errors found during the csv file processing |
|
172
173
|
|
@@ -16,6 +16,7 @@ module GnListResolver
|
|
16
16
|
@count = 0
|
17
17
|
@jobs = []
|
18
18
|
@batch = 1000
|
19
|
+
@smoothing = 0.05
|
19
20
|
end
|
20
21
|
|
21
22
|
def resolve(data)
|
@@ -32,7 +33,7 @@ module GnListResolver
|
|
32
33
|
private
|
33
34
|
|
34
35
|
def wrap_up
|
35
|
-
@stats.stats[:
|
36
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
36
37
|
@stats.stats[:status] = :finish
|
37
38
|
@processor.writer.close
|
38
39
|
end
|
@@ -53,7 +54,7 @@ module GnListResolver
|
|
53
54
|
|
54
55
|
def resolution_stats(records_num)
|
55
56
|
@stats.stats[:total_records] = records_num
|
56
|
-
@stats.stats[:
|
57
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
57
58
|
@stats.stats[:status] = :resolution
|
58
59
|
end
|
59
60
|
|
@@ -82,17 +83,10 @@ module GnListResolver
|
|
82
83
|
update_stats(stats)
|
83
84
|
@processor.process(results, current_data)
|
84
85
|
else
|
85
|
-
|
86
|
+
GnListResolver.logger.error(job.reason.message)
|
86
87
|
end
|
87
88
|
end
|
88
89
|
|
89
|
-
def update_stats(job_stats)
|
90
|
-
s = @stats.stats
|
91
|
-
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
92
|
-
s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
|
93
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
94
|
-
end
|
95
|
-
|
96
90
|
def create_job(batch)
|
97
91
|
batch_data = collect_names(batch)
|
98
92
|
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
@@ -116,14 +110,28 @@ module GnListResolver
|
|
116
110
|
batch_data
|
117
111
|
end
|
118
112
|
|
113
|
+
# rubocop:disable Metrics/AbcSize
|
114
|
+
def update_stats(job_stats)
|
115
|
+
s = @stats.stats
|
116
|
+
current_speed = job_stats.stats[:current_speed] *
|
117
|
+
@stats.penalty(@threads)
|
118
|
+
|
119
|
+
s[:resolution][:completed_records] +=
|
120
|
+
job_stats.stats[:resolution][:completed_records]
|
121
|
+
@stats.update_eta(current_speed)
|
122
|
+
end
|
123
|
+
|
119
124
|
def with_log
|
125
|
+
yield
|
120
126
|
s = @count + 1
|
121
127
|
@count += @batch
|
122
128
|
e = [@count, @stats.stats[:total_records]].min
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
129
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
130
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
131
|
+
@stats.stats[:total_records],
|
132
|
+
@stats.stats[:resolution][:speed].to_i,
|
133
|
+
Time.at(eta))
|
134
|
+
GnListResolver.log(msg)
|
127
135
|
end
|
128
136
|
end
|
129
137
|
end
|
@@ -15,7 +15,12 @@ module GnListResolver
|
|
15
15
|
new(writer, @stats, @with_classification)
|
16
16
|
@count = 0
|
17
17
|
@jobs = []
|
18
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
18
19
|
@batch = 1000
|
20
|
+
=======
|
21
|
+
@batch = 200
|
22
|
+
@smoothing = 0.05
|
23
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
19
24
|
end
|
20
25
|
|
21
26
|
def resolve(data)
|
@@ -32,7 +37,7 @@ module GnListResolver
|
|
32
37
|
private
|
33
38
|
|
34
39
|
def wrap_up
|
35
|
-
@stats.stats[:
|
40
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
36
41
|
@stats.stats[:status] = :finish
|
37
42
|
@processor.writer.close
|
38
43
|
end
|
@@ -53,7 +58,7 @@ module GnListResolver
|
|
53
58
|
|
54
59
|
def resolution_stats(records_num)
|
55
60
|
@stats.stats[:total_records] = records_num
|
56
|
-
@stats.stats[:
|
61
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
57
62
|
@stats.stats[:status] = :resolution
|
58
63
|
end
|
59
64
|
|
@@ -82,14 +87,11 @@ module GnListResolver
|
|
82
87
|
update_stats(stats)
|
83
88
|
@processor.process(results, current_data)
|
84
89
|
else
|
85
|
-
|
86
|
-
GnListResolver.logger.error("Remote resolver server failed")
|
87
|
-
=======
|
88
|
-
GnCrossmap.logger.error(job.reason.message)
|
89
|
-
>>>>>>> 36115cc... better error log:lib/gn_crossmap/resolver.rb
|
90
|
+
GnResolver.logger.error(job.reason.message)
|
90
91
|
end
|
91
92
|
end
|
92
93
|
|
94
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
93
95
|
def update_stats(job_stats)
|
94
96
|
s = @stats.stats
|
95
97
|
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
@@ -97,6 +99,8 @@ module GnListResolver
|
|
97
99
|
s[:resolution_span] = Time.now - s[:resolution_start]
|
98
100
|
end
|
99
101
|
|
102
|
+
=======
|
103
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
100
104
|
def create_job(batch)
|
101
105
|
batch_data = collect_names(batch)
|
102
106
|
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
@@ -116,18 +120,46 @@ module GnListResolver
|
|
116
120
|
id = row[:id].strip
|
117
121
|
batch_data[id] = row[:original]
|
118
122
|
@processor.input[id] = { rank: row[:rank] }
|
123
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
119
124
|
end
|
120
125
|
batch_data
|
126
|
+
=======
|
127
|
+
str << "#{id}|#{row[:name]}"
|
128
|
+
end
|
129
|
+
[names, batch_data]
|
130
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
|
+
end
|
132
|
+
|
133
|
+
# rubocop:disable Metrics/AbcSize
|
134
|
+
def update_stats(job_stats)
|
135
|
+
s = @stats.stats
|
136
|
+
current_speed = job_stats.stats[:current_speed] *
|
137
|
+
@stats.penalty(@threads)
|
138
|
+
|
139
|
+
s[:resolution][:completed_records] +=
|
140
|
+
job_stats.stats[:resolution][:completed_records]
|
141
|
+
@stats.update_eta(current_speed)
|
142
|
+
s[:matches][7] += job_stats.stats[:matches][7]
|
121
143
|
end
|
122
144
|
|
123
145
|
def with_log
|
146
|
+
yield
|
124
147
|
s = @count + 1
|
125
148
|
@count += @batch
|
126
149
|
e = [@count, @stats.stats[:total_records]].min
|
150
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
127
151
|
GnListResolver.log("Resolve #{s}-#{e} out of " \
|
128
152
|
"#{@stats.stats[:total_records]} records at " \
|
129
153
|
"#{RESOLVER_URL}")
|
130
154
|
yield
|
155
|
+
=======
|
156
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
157
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
158
|
+
@stats.stats[:total_records],
|
159
|
+
@stats.stats[:resolution][:speed].to_i,
|
160
|
+
Time.at(eta))
|
161
|
+
GnCrossmap.log(msg)
|
162
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
163
|
end
|
132
164
|
end
|
133
165
|
end
|
@@ -33,7 +33,8 @@ module GnListResolver
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def stats_add_batch_time(batch_start)
|
36
|
-
@stats.stats[:
|
36
|
+
@stats.stats[:current_speed] = @names.size / (Time.now - batch_start)
|
37
|
+
@stats.stats[:resolution][:completed_records] = @names.size
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
@@ -24,7 +24,6 @@ module GnListResolver
|
|
24
24
|
|
25
25
|
def write_empty_result(datum)
|
26
26
|
@stats.stats[:matches][MATCH_TYPE_EMPTY] += 1
|
27
|
-
@stats.stats[:resolved_records] += 1
|
28
27
|
res = compile_empty_result(datum)
|
29
28
|
@writer.write(res)
|
30
29
|
end
|
@@ -54,9 +53,7 @@ module GnListResolver
|
|
54
53
|
else
|
55
54
|
match_type_min.match_type.kind.to_sym
|
56
55
|
end
|
57
|
-
require "byebug"; byebug if @stats.stats[:matches][match_type_value].nil?
|
58
56
|
@stats.stats[:matches][match_type_value] += 1
|
59
|
-
@stats.stats[:resolved_records] += 1
|
60
57
|
end
|
61
58
|
|
62
59
|
def compile_result(datum, result, match_size)
|
@@ -7,11 +7,24 @@ module GnListResolver
|
|
7
7
|
|
8
8
|
def initialize
|
9
9
|
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
ingestion_span: nil, ingestion_start: nil,
|
11
|
+
resolution: eta_struct,
|
12
|
+
matches: init_matches, errors: [] }
|
13
|
+
@smooth = 0.05
|
14
|
+
end
|
15
|
+
|
16
|
+
def penalty(threads)
|
17
|
+
pnlt = 0.7
|
18
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
19
|
+
end
|
20
|
+
|
21
|
+
def update_eta(current_speed)
|
22
|
+
eta = @stats[:resolution]
|
23
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
24
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
25
|
+
eta[:eta] = (@stats[:total_records] -
|
26
|
+
@stats[:resolution][:completed_records]) /
|
27
|
+
eta[:speed]
|
15
28
|
end
|
16
29
|
|
17
30
|
private
|
@@ -19,5 +32,15 @@ module GnListResolver
|
|
19
32
|
def init_matches
|
20
33
|
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
21
34
|
end
|
35
|
+
|
36
|
+
def eta_struct
|
37
|
+
{ start_time: nil, completed_records: 0,
|
38
|
+
speed: nil, eta: nil, stop_time: nil }
|
39
|
+
end
|
40
|
+
|
41
|
+
def penalty_adj(threads, val, pnlt)
|
42
|
+
return val if threads < 2
|
43
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
44
|
+
end
|
22
45
|
end
|
23
46
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GnListResolver
|
4
|
+
# Collects statistics about list resolving process
|
5
|
+
class Stats
|
6
|
+
attr_accessor :stats
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
+
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
11
|
+
resolved_records: 0, ingestion_span: nil,
|
12
|
+
resolution_span: nil, ingestion_start: nil,
|
13
|
+
resolution_start: nil, resolution_stop: nil,
|
14
|
+
last_batches_time: [], matches: init_matches,
|
15
|
+
errors: [] }
|
16
|
+
=======
|
17
|
+
ingestion_span: nil, ingestion_start: nil,
|
18
|
+
resolution: eta_struct,
|
19
|
+
matches: match_types, errors: [] }
|
20
|
+
@smooth = 0.05
|
21
|
+
end
|
22
|
+
|
23
|
+
def penalty(threads)
|
24
|
+
pnlt = 0.7
|
25
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
26
|
+
end
|
27
|
+
|
28
|
+
def update_eta(current_speed)
|
29
|
+
eta = @stats[:resolution]
|
30
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
31
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
32
|
+
eta[:eta] = (@stats[:total_records] -
|
33
|
+
@stats[:resolution][:completed_records]) /
|
34
|
+
eta[:speed]
|
35
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
41
|
+
def init_matches
|
42
|
+
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
43
|
+
=======
|
44
|
+
def eta_struct
|
45
|
+
{ start_time: nil, completed_records: 0,
|
46
|
+
speed: nil, eta: nil, stop_time: nil }
|
47
|
+
end
|
48
|
+
|
49
|
+
def match_types
|
50
|
+
matches = GnCrossmap::MATCH_TYPES.keys
|
51
|
+
matches.each_with_object({}) do |key, obj|
|
52
|
+
obj[key] = 0
|
53
|
+
end
|
54
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
55
|
+
end
|
56
|
+
|
57
|
+
def penalty_adj(threads, val, pnlt)
|
58
|
+
return val if threads < 2
|
59
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -3,11 +3,11 @@
|
|
3
3
|
# Namespace module for crossmapping checklists to GN sources
|
4
4
|
<<<<<<< HEAD:lib/gn_list_resolver/version.rb
|
5
5
|
module GnListResolver
|
6
|
-
VERSION = "3.3.1.
|
6
|
+
VERSION = "3.3.1.1"
|
7
7
|
=======
|
8
8
|
module GnCrossmap
|
9
|
-
VERSION = "
|
10
|
-
>>>>>>>
|
9
|
+
VERSION = "4.0.1"
|
10
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
|
11
11
|
|
12
12
|
def self.version
|
13
13
|
VERSION
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_list_resolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: biodiversity
|
@@ -231,6 +231,7 @@ files:
|
|
231
231
|
- lib/gn_list_resolver/result_processor.rb
|
232
232
|
- lib/gn_list_resolver/sci_name_collector.rb
|
233
233
|
- lib/gn_list_resolver/stats.rb
|
234
|
+
- lib/gn_list_resolver/stats.rb.orig
|
234
235
|
- lib/gn_list_resolver/version.rb
|
235
236
|
- lib/gn_list_resolver/version.rb.orig
|
236
237
|
- lib/gn_list_resolver/writer.rb
|