gn_list_resolver 3.3.1.1 → 4.0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bca6cb993b435d76acf6a14bb0c4031892a6a949
4
- data.tar.gz: d7c2c9e5c5132cd93b4dc03857427a0e6e5ed8e2
3
+ metadata.gz: 47abe84b5c8bd829c01d9c50978fe71b3f14aef6
4
+ data.tar.gz: 7852f24f3dc3ba40e3175c92275bcde26eda0463
5
5
  SHA512:
6
- metadata.gz: 64966a526648716221e6024c753fb9cb28b3af1450d928c1e9bc5f70cb614587ac7481674d93dbac1368a0ca90e621930d170ed5db5bf31d94a6478944919a58
7
- data.tar.gz: aabced616396747ce23661c945b29b11b7aebc297ee57618ed5e2a659d581abf446f1af42aafd1032492e54b20b8cc49ccd550650a73222f15d6c4bb69e1d0b3
6
+ metadata.gz: e97f1949f12a97c814adb4e4a4b88223ac00bc5bc6095a553ed22461304f4986a2d32cc7071f8871b83a7957ba76c1fe102e6fd776cfea52a33278e9e1909584
7
+ data.tar.gz: f458552148b3981f6f9c5017ee1b8c9b90bed16e332ef06f5340f88815050905e51277e472be23089be94032c84032f770e3e338dcf7cf00b7dccb64e181e109
@@ -25,6 +25,14 @@
25
25
  @dimus, @alexander-myltsev - sync with `gn_crossmap`'s 3.2.0, current name is
26
26
  still a bit broken
27
27
 
28
+ ## 4.0.1
29
+
30
+ * @dimus - Clean up resolution stats
31
+
32
+ ## 4.0.0
33
+
34
+ * @dimus - Better estimation for time left for resolver and speed
35
+
28
36
  ## 3.3.1
29
37
 
30
38
  * @dimus - Better error message in logger
data/README.md CHANGED
@@ -166,7 +166,8 @@ end
166
166
  |resolution_stop |time when resolution of names stopped |
167
167
  |resolution_span |time of intermediate checkpoint of resolving names |
168
168
  |resolved_records |number of names already processed |
169
- |last_batches_time |time required to process the last batch of names |
169
+ |speed |weighted speed average |
170
+ |current_speed |speed of resolution for the last batch |
170
171
  |matches |Distribution of processed data by match type (see below) |
171
172
  |errors |First 0-10 errors found during the csv file processing |
172
173
 
@@ -16,6 +16,7 @@ module GnListResolver
16
16
  @count = 0
17
17
  @jobs = []
18
18
  @batch = 1000
19
+ @smoothing = 0.05
19
20
  end
20
21
 
21
22
  def resolve(data)
@@ -32,7 +33,7 @@ module GnListResolver
32
33
  private
33
34
 
34
35
  def wrap_up
35
- @stats.stats[:resolution_stop] = Time.now
36
+ @stats.stats[:resolution][:stop_time] = Time.now
36
37
  @stats.stats[:status] = :finish
37
38
  @processor.writer.close
38
39
  end
@@ -53,7 +54,7 @@ module GnListResolver
53
54
 
54
55
  def resolution_stats(records_num)
55
56
  @stats.stats[:total_records] = records_num
56
- @stats.stats[:resolution_start] = Time.now
57
+ @stats.stats[:resolution][:start_time] = Time.now
57
58
  @stats.stats[:status] = :resolution
58
59
  end
59
60
 
@@ -82,17 +83,10 @@ module GnListResolver
82
83
  update_stats(stats)
83
84
  @processor.process(results, current_data)
84
85
  else
85
- GnResolver.logger.error(job.reason.message)
86
+ GnListResolver.logger.error(job.reason.message)
86
87
  end
87
88
  end
88
89
 
89
- def update_stats(job_stats)
90
- s = @stats.stats
91
- s[:last_batches_time].shift if s[:last_batches_time].size > 2
92
- s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
93
- s[:resolution_span] = Time.now - s[:resolution_start]
94
- end
95
-
96
90
  def create_job(batch)
97
91
  batch_data = collect_names(batch)
98
92
  rb = ResolverJob.new(batch, batch_data, @ds_id)
@@ -116,14 +110,28 @@ module GnListResolver
116
110
  batch_data
117
111
  end
118
112
 
113
+ # rubocop:disable Metrics/AbcSize
114
+ def update_stats(job_stats)
115
+ s = @stats.stats
116
+ current_speed = job_stats.stats[:current_speed] *
117
+ @stats.penalty(@threads)
118
+
119
+ s[:resolution][:completed_records] +=
120
+ job_stats.stats[:resolution][:completed_records]
121
+ @stats.update_eta(current_speed)
122
+ end
123
+
119
124
  def with_log
125
+ yield
120
126
  s = @count + 1
121
127
  @count += @batch
122
128
  e = [@count, @stats.stats[:total_records]].min
123
- GnListResolver.log("Resolve #{s}-#{e} out of " \
124
- "#{@stats.stats[:total_records]} records at " \
125
- "#{RESOLVER_URL}")
126
- yield
129
+ eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
130
+ msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
131
+ @stats.stats[:total_records],
132
+ @stats.stats[:resolution][:speed].to_i,
133
+ Time.at(eta))
134
+ GnListResolver.log(msg)
127
135
  end
128
136
  end
129
137
  end
@@ -15,7 +15,12 @@ module GnListResolver
15
15
  new(writer, @stats, @with_classification)
16
16
  @count = 0
17
17
  @jobs = []
18
+ <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
18
19
  @batch = 1000
20
+ =======
21
+ @batch = 200
22
+ @smoothing = 0.05
23
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
19
24
  end
20
25
 
21
26
  def resolve(data)
@@ -32,7 +37,7 @@ module GnListResolver
32
37
  private
33
38
 
34
39
  def wrap_up
35
- @stats.stats[:resolution_stop] = Time.now
40
+ @stats.stats[:resolution][:stop_time] = Time.now
36
41
  @stats.stats[:status] = :finish
37
42
  @processor.writer.close
38
43
  end
@@ -53,7 +58,7 @@ module GnListResolver
53
58
 
54
59
  def resolution_stats(records_num)
55
60
  @stats.stats[:total_records] = records_num
56
- @stats.stats[:resolution_start] = Time.now
61
+ @stats.stats[:resolution][:start_time] = Time.now
57
62
  @stats.stats[:status] = :resolution
58
63
  end
59
64
 
@@ -82,14 +87,11 @@ module GnListResolver
82
87
  update_stats(stats)
83
88
  @processor.process(results, current_data)
84
89
  else
85
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
86
- GnListResolver.logger.error("Remote resolver server failed")
87
- =======
88
- GnCrossmap.logger.error(job.reason.message)
89
- >>>>>>> 36115cc... better error log:lib/gn_crossmap/resolver.rb
90
+ GnResolver.logger.error(job.reason.message)
90
91
  end
91
92
  end
92
93
 
94
+ <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
93
95
  def update_stats(job_stats)
94
96
  s = @stats.stats
95
97
  s[:last_batches_time].shift if s[:last_batches_time].size > 2
@@ -97,6 +99,8 @@ module GnListResolver
97
99
  s[:resolution_span] = Time.now - s[:resolution_start]
98
100
  end
99
101
 
102
+ =======
103
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
100
104
  def create_job(batch)
101
105
  batch_data = collect_names(batch)
102
106
  rb = ResolverJob.new(batch, batch_data, @ds_id)
@@ -116,18 +120,46 @@ module GnListResolver
116
120
  id = row[:id].strip
117
121
  batch_data[id] = row[:original]
118
122
  @processor.input[id] = { rank: row[:rank] }
123
+ <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
119
124
  end
120
125
  batch_data
126
+ =======
127
+ str << "#{id}|#{row[:name]}"
128
+ end
129
+ [names, batch_data]
130
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
131
+ end
132
+
133
+ # rubocop:disable Metrics/AbcSize
134
+ def update_stats(job_stats)
135
+ s = @stats.stats
136
+ current_speed = job_stats.stats[:current_speed] *
137
+ @stats.penalty(@threads)
138
+
139
+ s[:resolution][:completed_records] +=
140
+ job_stats.stats[:resolution][:completed_records]
141
+ @stats.update_eta(current_speed)
142
+ s[:matches][7] += job_stats.stats[:matches][7]
121
143
  end
122
144
 
123
145
  def with_log
146
+ yield
124
147
  s = @count + 1
125
148
  @count += @batch
126
149
  e = [@count, @stats.stats[:total_records]].min
150
+ <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
127
151
  GnListResolver.log("Resolve #{s}-#{e} out of " \
128
152
  "#{@stats.stats[:total_records]} records at " \
129
153
  "#{RESOLVER_URL}")
130
154
  yield
155
+ =======
156
+ eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
157
+ msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
158
+ @stats.stats[:total_records],
159
+ @stats.stats[:resolution][:speed].to_i,
160
+ Time.at(eta))
161
+ GnCrossmap.log(msg)
162
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
131
163
  end
132
164
  end
133
165
  end
@@ -33,7 +33,8 @@ module GnListResolver
33
33
  end
34
34
 
35
35
  def stats_add_batch_time(batch_start)
36
- @stats.stats[:last_batches_time] << Time.now - batch_start
36
+ @stats.stats[:current_speed] = @names.size / (Time.now - batch_start)
37
+ @stats.stats[:resolution][:completed_records] = @names.size
37
38
  end
38
39
  end
39
40
  end
@@ -24,7 +24,6 @@ module GnListResolver
24
24
 
25
25
  def write_empty_result(datum)
26
26
  @stats.stats[:matches][MATCH_TYPE_EMPTY] += 1
27
- @stats.stats[:resolved_records] += 1
28
27
  res = compile_empty_result(datum)
29
28
  @writer.write(res)
30
29
  end
@@ -54,9 +53,7 @@ module GnListResolver
54
53
  else
55
54
  match_type_min.match_type.kind.to_sym
56
55
  end
57
- require "byebug"; byebug if @stats.stats[:matches][match_type_value].nil?
58
56
  @stats.stats[:matches][match_type_value] += 1
59
- @stats.stats[:resolved_records] += 1
60
57
  end
61
58
 
62
59
  def compile_result(datum, result, match_size)
@@ -7,11 +7,24 @@ module GnListResolver
7
7
 
8
8
  def initialize
9
9
  @stats = { status: :init, total_records: 0, ingested_records: 0,
10
- resolved_records: 0, ingestion_span: nil,
11
- resolution_span: nil, ingestion_start: nil,
12
- resolution_start: nil, resolution_stop: nil,
13
- last_batches_time: [], matches: init_matches,
14
- errors: [] }
10
+ ingestion_span: nil, ingestion_start: nil,
11
+ resolution: eta_struct,
12
+ matches: init_matches, errors: [] }
13
+ @smooth = 0.05
14
+ end
15
+
16
+ def penalty(threads)
17
+ pnlt = 0.7
18
+ penalty_adj(threads.to_i, 1, pnlt)
19
+ end
20
+
21
+ def update_eta(current_speed)
22
+ eta = @stats[:resolution]
23
+ eta[:speed] = current_speed if eta[:speed].nil?
24
+ eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
25
+ eta[:eta] = (@stats[:total_records] -
26
+ @stats[:resolution][:completed_records]) /
27
+ eta[:speed]
15
28
  end
16
29
 
17
30
  private
@@ -19,5 +32,15 @@ module GnListResolver
19
32
  def init_matches
20
33
  MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
21
34
  end
35
+
36
+ def eta_struct
37
+ { start_time: nil, completed_records: 0,
38
+ speed: nil, eta: nil, stop_time: nil }
39
+ end
40
+
41
+ def penalty_adj(threads, val, pnlt)
42
+ return val if threads < 2
43
+ val + penalty_adj(threads - 1, (val * pnlt), pnlt)
44
+ end
22
45
  end
23
46
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GnListResolver
4
+ # Collects statistics about list resolving process
5
+ class Stats
6
+ attr_accessor :stats
7
+
8
+ def initialize
9
+ @stats = { status: :init, total_records: 0, ingested_records: 0,
10
+ <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
11
+ resolved_records: 0, ingestion_span: nil,
12
+ resolution_span: nil, ingestion_start: nil,
13
+ resolution_start: nil, resolution_stop: nil,
14
+ last_batches_time: [], matches: init_matches,
15
+ errors: [] }
16
+ =======
17
+ ingestion_span: nil, ingestion_start: nil,
18
+ resolution: eta_struct,
19
+ matches: match_types, errors: [] }
20
+ @smooth = 0.05
21
+ end
22
+
23
+ def penalty(threads)
24
+ pnlt = 0.7
25
+ penalty_adj(threads.to_i, 1, pnlt)
26
+ end
27
+
28
+ def update_eta(current_speed)
29
+ eta = @stats[:resolution]
30
+ eta[:speed] = current_speed if eta[:speed].nil?
31
+ eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
32
+ eta[:eta] = (@stats[:total_records] -
33
+ @stats[:resolution][:completed_records]) /
34
+ eta[:speed]
35
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
36
+ end
37
+
38
+ private
39
+
40
+ <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
41
+ def init_matches
42
+ MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
43
+ =======
44
+ def eta_struct
45
+ { start_time: nil, completed_records: 0,
46
+ speed: nil, eta: nil, stop_time: nil }
47
+ end
48
+
49
+ def match_types
50
+ matches = GnCrossmap::MATCH_TYPES.keys
51
+ matches.each_with_object({}) do |key, obj|
52
+ obj[key] = 0
53
+ end
54
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
55
+ end
56
+
57
+ def penalty_adj(threads, val, pnlt)
58
+ return val if threads < 2
59
+ val + penalty_adj(threads - 1, (val * pnlt), pnlt)
60
+ end
61
+ end
62
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnListResolver
5
- VERSION = "3.3.1.1"
5
+ VERSION = "4.0.1.0"
6
6
 
7
7
  def self.version
8
8
  VERSION
@@ -3,11 +3,11 @@
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  <<<<<<< HEAD:lib/gn_list_resolver/version.rb
5
5
  module GnListResolver
6
- VERSION = "3.3.1.0"
6
+ VERSION = "3.3.1.1"
7
7
  =======
8
8
  module GnCrossmap
9
- VERSION = "3.3.1"
10
- >>>>>>> 36115cc... better error log:lib/gn_crossmap/version.rb
9
+ VERSION = "4.0.1"
10
+ >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
11
11
 
12
12
  def self.version
13
13
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_list_resolver
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1.1
4
+ version: 4.0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-15 00:00:00.000000000 Z
12
+ date: 2017-09-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: biodiversity
@@ -231,6 +231,7 @@ files:
231
231
  - lib/gn_list_resolver/result_processor.rb
232
232
  - lib/gn_list_resolver/sci_name_collector.rb
233
233
  - lib/gn_list_resolver/stats.rb
234
+ - lib/gn_list_resolver/stats.rb.orig
234
235
  - lib/gn_list_resolver/version.rb
235
236
  - lib/gn_list_resolver/version.rb.orig
236
237
  - lib/gn_list_resolver/writer.rb