gn_list_resolver 3.3.1.1 → 4.0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +2 -1
- data/lib/gn_list_resolver/resolver.rb +22 -14
- data/lib/gn_list_resolver/resolver.rb.orig +39 -7
- data/lib/gn_list_resolver/resolver_job.rb +2 -1
- data/lib/gn_list_resolver/result_processor.rb +0 -3
- data/lib/gn_list_resolver/stats.rb +28 -5
- data/lib/gn_list_resolver/stats.rb.orig +62 -0
- data/lib/gn_list_resolver/version.rb +1 -1
- data/lib/gn_list_resolver/version.rb.orig +3 -3
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47abe84b5c8bd829c01d9c50978fe71b3f14aef6
|
4
|
+
data.tar.gz: 7852f24f3dc3ba40e3175c92275bcde26eda0463
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e97f1949f12a97c814adb4e4a4b88223ac00bc5bc6095a553ed22461304f4986a2d32cc7071f8871b83a7957ba76c1fe102e6fd776cfea52a33278e9e1909584
|
7
|
+
data.tar.gz: f458552148b3981f6f9c5017ee1b8c9b90bed16e332ef06f5340f88815050905e51277e472be23089be94032c84032f770e3e338dcf7cf00b7dccb64e181e109
|
data/CHANGELOG.md
CHANGED
@@ -25,6 +25,14 @@
|
|
25
25
|
@dimus, @alexander-myltsev - sync with `gn_crossmap`'s 3.2.0, current name is
|
26
26
|
still a bit broken
|
27
27
|
|
28
|
+
## 4.0.1
|
29
|
+
|
30
|
+
* @dimus - Clean up resolution stats
|
31
|
+
|
32
|
+
## 4.0.0
|
33
|
+
|
34
|
+
* @dimus - Better estimation for time left for resolver and speed
|
35
|
+
|
28
36
|
## 3.3.1
|
29
37
|
|
30
38
|
* @dimus - Better error message in logger
|
data/README.md
CHANGED
@@ -166,7 +166,8 @@ end
|
|
166
166
|
|resolution_stop |time when resolution of names stopped |
|
167
167
|
|resolution_span |time of intermediate checkpoint of resolving names |
|
168
168
|
|resolved_records |number of names already processed |
|
169
|
-
|
|
169
|
+
|speed |weighted speed average |
|
170
|
+
|current_speed |speed of resolution for the last batch |
|
170
171
|
|matches |Distribution of processed data by match type (see below) |
|
171
172
|
|errors |First 0-10 errors found during the csv file processing |
|
172
173
|
|
@@ -16,6 +16,7 @@ module GnListResolver
|
|
16
16
|
@count = 0
|
17
17
|
@jobs = []
|
18
18
|
@batch = 1000
|
19
|
+
@smoothing = 0.05
|
19
20
|
end
|
20
21
|
|
21
22
|
def resolve(data)
|
@@ -32,7 +33,7 @@ module GnListResolver
|
|
32
33
|
private
|
33
34
|
|
34
35
|
def wrap_up
|
35
|
-
@stats.stats[:
|
36
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
36
37
|
@stats.stats[:status] = :finish
|
37
38
|
@processor.writer.close
|
38
39
|
end
|
@@ -53,7 +54,7 @@ module GnListResolver
|
|
53
54
|
|
54
55
|
def resolution_stats(records_num)
|
55
56
|
@stats.stats[:total_records] = records_num
|
56
|
-
@stats.stats[:
|
57
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
57
58
|
@stats.stats[:status] = :resolution
|
58
59
|
end
|
59
60
|
|
@@ -82,17 +83,10 @@ module GnListResolver
|
|
82
83
|
update_stats(stats)
|
83
84
|
@processor.process(results, current_data)
|
84
85
|
else
|
85
|
-
|
86
|
+
GnListResolver.logger.error(job.reason.message)
|
86
87
|
end
|
87
88
|
end
|
88
89
|
|
89
|
-
def update_stats(job_stats)
|
90
|
-
s = @stats.stats
|
91
|
-
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
92
|
-
s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
|
93
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
94
|
-
end
|
95
|
-
|
96
90
|
def create_job(batch)
|
97
91
|
batch_data = collect_names(batch)
|
98
92
|
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
@@ -116,14 +110,28 @@ module GnListResolver
|
|
116
110
|
batch_data
|
117
111
|
end
|
118
112
|
|
113
|
+
# rubocop:disable Metrics/AbcSize
|
114
|
+
def update_stats(job_stats)
|
115
|
+
s = @stats.stats
|
116
|
+
current_speed = job_stats.stats[:current_speed] *
|
117
|
+
@stats.penalty(@threads)
|
118
|
+
|
119
|
+
s[:resolution][:completed_records] +=
|
120
|
+
job_stats.stats[:resolution][:completed_records]
|
121
|
+
@stats.update_eta(current_speed)
|
122
|
+
end
|
123
|
+
|
119
124
|
def with_log
|
125
|
+
yield
|
120
126
|
s = @count + 1
|
121
127
|
@count += @batch
|
122
128
|
e = [@count, @stats.stats[:total_records]].min
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
129
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
130
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
131
|
+
@stats.stats[:total_records],
|
132
|
+
@stats.stats[:resolution][:speed].to_i,
|
133
|
+
Time.at(eta))
|
134
|
+
GnListResolver.log(msg)
|
127
135
|
end
|
128
136
|
end
|
129
137
|
end
|
@@ -15,7 +15,12 @@ module GnListResolver
|
|
15
15
|
new(writer, @stats, @with_classification)
|
16
16
|
@count = 0
|
17
17
|
@jobs = []
|
18
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
18
19
|
@batch = 1000
|
20
|
+
=======
|
21
|
+
@batch = 200
|
22
|
+
@smoothing = 0.05
|
23
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
19
24
|
end
|
20
25
|
|
21
26
|
def resolve(data)
|
@@ -32,7 +37,7 @@ module GnListResolver
|
|
32
37
|
private
|
33
38
|
|
34
39
|
def wrap_up
|
35
|
-
@stats.stats[:
|
40
|
+
@stats.stats[:resolution][:stop_time] = Time.now
|
36
41
|
@stats.stats[:status] = :finish
|
37
42
|
@processor.writer.close
|
38
43
|
end
|
@@ -53,7 +58,7 @@ module GnListResolver
|
|
53
58
|
|
54
59
|
def resolution_stats(records_num)
|
55
60
|
@stats.stats[:total_records] = records_num
|
56
|
-
@stats.stats[:
|
61
|
+
@stats.stats[:resolution][:start_time] = Time.now
|
57
62
|
@stats.stats[:status] = :resolution
|
58
63
|
end
|
59
64
|
|
@@ -82,14 +87,11 @@ module GnListResolver
|
|
82
87
|
update_stats(stats)
|
83
88
|
@processor.process(results, current_data)
|
84
89
|
else
|
85
|
-
|
86
|
-
GnListResolver.logger.error("Remote resolver server failed")
|
87
|
-
=======
|
88
|
-
GnCrossmap.logger.error(job.reason.message)
|
89
|
-
>>>>>>> 36115cc... better error log:lib/gn_crossmap/resolver.rb
|
90
|
+
GnResolver.logger.error(job.reason.message)
|
90
91
|
end
|
91
92
|
end
|
92
93
|
|
94
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
93
95
|
def update_stats(job_stats)
|
94
96
|
s = @stats.stats
|
95
97
|
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
@@ -97,6 +99,8 @@ module GnListResolver
|
|
97
99
|
s[:resolution_span] = Time.now - s[:resolution_start]
|
98
100
|
end
|
99
101
|
|
102
|
+
=======
|
103
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
100
104
|
def create_job(batch)
|
101
105
|
batch_data = collect_names(batch)
|
102
106
|
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
@@ -116,18 +120,46 @@ module GnListResolver
|
|
116
120
|
id = row[:id].strip
|
117
121
|
batch_data[id] = row[:original]
|
118
122
|
@processor.input[id] = { rank: row[:rank] }
|
123
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
119
124
|
end
|
120
125
|
batch_data
|
126
|
+
=======
|
127
|
+
str << "#{id}|#{row[:name]}"
|
128
|
+
end
|
129
|
+
[names, batch_data]
|
130
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
|
+
end
|
132
|
+
|
133
|
+
# rubocop:disable Metrics/AbcSize
|
134
|
+
def update_stats(job_stats)
|
135
|
+
s = @stats.stats
|
136
|
+
current_speed = job_stats.stats[:current_speed] *
|
137
|
+
@stats.penalty(@threads)
|
138
|
+
|
139
|
+
s[:resolution][:completed_records] +=
|
140
|
+
job_stats.stats[:resolution][:completed_records]
|
141
|
+
@stats.update_eta(current_speed)
|
142
|
+
s[:matches][7] += job_stats.stats[:matches][7]
|
121
143
|
end
|
122
144
|
|
123
145
|
def with_log
|
146
|
+
yield
|
124
147
|
s = @count + 1
|
125
148
|
@count += @batch
|
126
149
|
e = [@count, @stats.stats[:total_records]].min
|
150
|
+
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
127
151
|
GnListResolver.log("Resolve #{s}-#{e} out of " \
|
128
152
|
"#{@stats.stats[:total_records]} records at " \
|
129
153
|
"#{RESOLVER_URL}")
|
130
154
|
yield
|
155
|
+
=======
|
156
|
+
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
157
|
+
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
158
|
+
@stats.stats[:total_records],
|
159
|
+
@stats.stats[:resolution][:speed].to_i,
|
160
|
+
Time.at(eta))
|
161
|
+
GnCrossmap.log(msg)
|
162
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
163
|
end
|
132
164
|
end
|
133
165
|
end
|
@@ -33,7 +33,8 @@ module GnListResolver
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def stats_add_batch_time(batch_start)
|
36
|
-
@stats.stats[:
|
36
|
+
@stats.stats[:current_speed] = @names.size / (Time.now - batch_start)
|
37
|
+
@stats.stats[:resolution][:completed_records] = @names.size
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
@@ -24,7 +24,6 @@ module GnListResolver
|
|
24
24
|
|
25
25
|
def write_empty_result(datum)
|
26
26
|
@stats.stats[:matches][MATCH_TYPE_EMPTY] += 1
|
27
|
-
@stats.stats[:resolved_records] += 1
|
28
27
|
res = compile_empty_result(datum)
|
29
28
|
@writer.write(res)
|
30
29
|
end
|
@@ -54,9 +53,7 @@ module GnListResolver
|
|
54
53
|
else
|
55
54
|
match_type_min.match_type.kind.to_sym
|
56
55
|
end
|
57
|
-
require "byebug"; byebug if @stats.stats[:matches][match_type_value].nil?
|
58
56
|
@stats.stats[:matches][match_type_value] += 1
|
59
|
-
@stats.stats[:resolved_records] += 1
|
60
57
|
end
|
61
58
|
|
62
59
|
def compile_result(datum, result, match_size)
|
@@ -7,11 +7,24 @@ module GnListResolver
|
|
7
7
|
|
8
8
|
def initialize
|
9
9
|
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
ingestion_span: nil, ingestion_start: nil,
|
11
|
+
resolution: eta_struct,
|
12
|
+
matches: init_matches, errors: [] }
|
13
|
+
@smooth = 0.05
|
14
|
+
end
|
15
|
+
|
16
|
+
def penalty(threads)
|
17
|
+
pnlt = 0.7
|
18
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
19
|
+
end
|
20
|
+
|
21
|
+
def update_eta(current_speed)
|
22
|
+
eta = @stats[:resolution]
|
23
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
24
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
25
|
+
eta[:eta] = (@stats[:total_records] -
|
26
|
+
@stats[:resolution][:completed_records]) /
|
27
|
+
eta[:speed]
|
15
28
|
end
|
16
29
|
|
17
30
|
private
|
@@ -19,5 +32,15 @@ module GnListResolver
|
|
19
32
|
def init_matches
|
20
33
|
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
21
34
|
end
|
35
|
+
|
36
|
+
def eta_struct
|
37
|
+
{ start_time: nil, completed_records: 0,
|
38
|
+
speed: nil, eta: nil, stop_time: nil }
|
39
|
+
end
|
40
|
+
|
41
|
+
def penalty_adj(threads, val, pnlt)
|
42
|
+
return val if threads < 2
|
43
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
44
|
+
end
|
22
45
|
end
|
23
46
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GnListResolver
|
4
|
+
# Collects statistics about list resolving process
|
5
|
+
class Stats
|
6
|
+
attr_accessor :stats
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
+
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
11
|
+
resolved_records: 0, ingestion_span: nil,
|
12
|
+
resolution_span: nil, ingestion_start: nil,
|
13
|
+
resolution_start: nil, resolution_stop: nil,
|
14
|
+
last_batches_time: [], matches: init_matches,
|
15
|
+
errors: [] }
|
16
|
+
=======
|
17
|
+
ingestion_span: nil, ingestion_start: nil,
|
18
|
+
resolution: eta_struct,
|
19
|
+
matches: match_types, errors: [] }
|
20
|
+
@smooth = 0.05
|
21
|
+
end
|
22
|
+
|
23
|
+
def penalty(threads)
|
24
|
+
pnlt = 0.7
|
25
|
+
penalty_adj(threads.to_i, 1, pnlt)
|
26
|
+
end
|
27
|
+
|
28
|
+
def update_eta(current_speed)
|
29
|
+
eta = @stats[:resolution]
|
30
|
+
eta[:speed] = current_speed if eta[:speed].nil?
|
31
|
+
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
32
|
+
eta[:eta] = (@stats[:total_records] -
|
33
|
+
@stats[:resolution][:completed_records]) /
|
34
|
+
eta[:speed]
|
35
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
41
|
+
def init_matches
|
42
|
+
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
43
|
+
=======
|
44
|
+
def eta_struct
|
45
|
+
{ start_time: nil, completed_records: 0,
|
46
|
+
speed: nil, eta: nil, stop_time: nil }
|
47
|
+
end
|
48
|
+
|
49
|
+
def match_types
|
50
|
+
matches = GnCrossmap::MATCH_TYPES.keys
|
51
|
+
matches.each_with_object({}) do |key, obj|
|
52
|
+
obj[key] = 0
|
53
|
+
end
|
54
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
55
|
+
end
|
56
|
+
|
57
|
+
def penalty_adj(threads, val, pnlt)
|
58
|
+
return val if threads < 2
|
59
|
+
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -3,11 +3,11 @@
|
|
3
3
|
# Namespace module for crossmapping checklists to GN sources
|
4
4
|
<<<<<<< HEAD:lib/gn_list_resolver/version.rb
|
5
5
|
module GnListResolver
|
6
|
-
VERSION = "3.3.1.
|
6
|
+
VERSION = "3.3.1.1"
|
7
7
|
=======
|
8
8
|
module GnCrossmap
|
9
|
-
VERSION = "
|
10
|
-
>>>>>>>
|
9
|
+
VERSION = "4.0.1"
|
10
|
+
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
|
11
11
|
|
12
12
|
def self.version
|
13
13
|
VERSION
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_list_resolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: biodiversity
|
@@ -231,6 +231,7 @@ files:
|
|
231
231
|
- lib/gn_list_resolver/result_processor.rb
|
232
232
|
- lib/gn_list_resolver/sci_name_collector.rb
|
233
233
|
- lib/gn_list_resolver/stats.rb
|
234
|
+
- lib/gn_list_resolver/stats.rb.orig
|
234
235
|
- lib/gn_list_resolver/version.rb
|
235
236
|
- lib/gn_list_resolver/version.rb.orig
|
236
237
|
- lib/gn_list_resolver/writer.rb
|