gn_crossmap 4.0.0 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 17c1750e71b66ba0d13b76a4acaa110f00b7cdd0
4
- data.tar.gz: 96f3b4d6955c10724f78749c99d9c0414da21278
3
+ metadata.gz: 49f923dfdab693728fe296afcf09fa275ce870ad
4
+ data.tar.gz: da0ac647957f8c28c438af7954b1e35473bcfc28
5
5
  SHA512:
6
- metadata.gz: 874afab0ba3e3c7e847ebcb8a86c439bcdcc56ab84ae86b0d5dc1912cd7dd8dff7fefa5bc0d031a082b9b854c11714879db44568edf79bcc698ba8337a633b9e
7
- data.tar.gz: 4cff3ac93255cd203ef4c2e9083ff1da6babb94a6367afb244c38e743c23be620fb154aad8e7deed818c16ada4f7ad0b78643123b72dd2c54798cafa8f70728e
6
+ metadata.gz: 37e8d0183db3322a0bf627c336737a6dc42ae4471c4f52554ea761cfcdcc71210c6d0a793c948623a647cfbc97f7d782538049f4a62bdb97de6bded8ac2998ff
7
+ data.tar.gz: db2ccccf36fd9bfe04f77e3d75eca76286d82b4cc3f8e79d78fb54c443b6f991f2a235ba396a7bc4c41ec997a15aaec283eb676e577ac52358cb39017154d7d3
@@ -1,5 +1,9 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 4.0.1
4
+
5
+ * @dimus - Clean up resolution stats
6
+
3
7
  ## 4.0.0
4
8
 
5
9
  * @dimus - Better estimation for time left for resolver and speed
@@ -31,7 +31,7 @@ module GnCrossmap
31
31
  private
32
32
 
33
33
  def wrap_up
34
- @stats.stats[:resolution_stop] = Time.now
34
+ @stats.stats[:resolution][:stop_time] = Time.now
35
35
  @stats.stats[:status] = :finish
36
36
  @processor.writer.close
37
37
  end
@@ -52,7 +52,7 @@ module GnCrossmap
52
52
 
53
53
  def resolution_stats(records_num)
54
54
  @stats.stats[:total_records] = records_num
55
- @stats.stats[:resolution_start] = Time.now
55
+ @stats.stats[:resolution][:start_time] = Time.now
56
56
  @stats.stats[:status] = :resolution
57
57
  end
58
58
 
@@ -85,20 +85,6 @@ module GnCrossmap
85
85
  end
86
86
  end
87
87
 
88
- # rubocop:disable Metrics/AbcSize
89
- def update_stats(job_stats)
90
- s = @stats.stats
91
- s[:current_speed] = job_stats.stats[:current_speed]
92
- s[:speed] = s[:current_speed] * @threads unless s[:speed]
93
- s[:speed] = s[:speed] * (1 - @smoothing) +
94
- s[:current_speed] * @smoothing * @threads
95
- s[:resolution_span] = Time.now - s[:resolution_start]
96
- s[:resolved_records] += job_stats.stats[:resolved_records]
97
- s[:eta] = (s[:total_records] - s[:resolved_records]) / s[:speed]
98
- s[:matches][7] += job_stats.stats[:matches][7]
99
- end
100
- # rubocop:enable all
101
-
102
88
  def create_job(batch)
103
89
  names, batch_data = collect_names(batch)
104
90
  rb = ResolverJob.new(names, batch_data, @resolver_url, @ds_id)
@@ -120,19 +106,34 @@ module GnCrossmap
120
106
  batch_data[id] = row[:original]
121
107
  @processor.input[id] = { rank: row[:rank] }
122
108
  str << "#{id}|#{row[:name]}"
123
- end.join("\n")
109
+ end
124
110
  [names, batch_data]
125
111
  end
126
112
 
113
+ # rubocop:disable Metrics/AbcSize
114
+ def update_stats(job_stats)
115
+ s = @stats.stats
116
+ current_speed = job_stats.stats[:current_speed] *
117
+ @stats.penalty(@threads)
118
+
119
+ s[:resolution][:completed_records] +=
120
+ job_stats.stats[:resolution][:completed_records]
121
+ @stats.update_eta(current_speed)
122
+ s[:matches][7] += job_stats.stats[:matches][7]
123
+ end
124
+
127
125
  def with_log
128
126
  yield
129
127
  s = @count + 1
130
128
  @count += @batch
131
129
  e = [@count, @stats.stats[:total_records]].min
132
- msg = format("Resolve %s-%s/%s records %d rec/s; eta: %d", s, e,
133
- @stats.stats[:total_records], @stats.stats[:speed],
134
- @stats.stats[:eta].to_i + Time.now.to_i)
130
+ eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
131
+ msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
132
+ @stats.stats[:total_records],
133
+ @stats.stats[:resolution][:speed].to_i,
134
+ Time.at(eta))
135
135
  GnCrossmap.log(msg)
136
136
  end
137
137
  end
138
138
  end
139
+ # rubocop:enable all
@@ -4,7 +4,8 @@ module GnCrossmap
4
4
  # Remote resolution for parallel jobs
5
5
  class ResolverJob
6
6
  def initialize(names, batch_data, resolver_url, ds_id)
7
- @names = names
7
+ @names_size = names.size
8
+ @names = names.join("\n")
8
9
  @batch_data = batch_data
9
10
  @resolver_url = resolver_url
10
11
  @ds_id = ds_id
@@ -26,7 +27,7 @@ module GnCrossmap
26
27
  rescue RestClient::Exception
27
28
  single_remote_resolve(names)
28
29
  ensure
29
- stats_add_batch_time(batch_start)
30
+ update_stats(batch_start)
30
31
  end
31
32
 
32
33
  def single_remote_resolve(names)
@@ -49,12 +50,12 @@ module GnCrossmap
49
50
 
50
51
  def process_resolver_error(err, name)
51
52
  @stats.stats[:matches][7] += 1
52
- @stats.stats[:resolved_records] += 1
53
53
  GnCrossmap.logger.error("Resolver broke on '#{name}': #{err.message}")
54
54
  end
55
55
 
56
- def stats_add_batch_time(batch_start)
56
+ def update_stats(batch_start)
57
57
  @stats.stats[:current_speed] = @batch_data.size / (Time.now - batch_start)
58
+ @stats.stats[:resolution][:completed_records] = @names_size
58
59
  end
59
60
  end
60
61
  end
@@ -31,7 +31,6 @@ module GnCrossmap
31
31
 
32
32
  def write_empty_result(datum)
33
33
  @stats.stats[:matches][0] += 1
34
- @stats.stats[:resolved_records] += 1
35
34
  res = compile_empty_result(datum)
36
35
  @writer.write(res)
37
36
  end
@@ -56,7 +55,6 @@ module GnCrossmap
56
55
  def collect_stats(datum)
57
56
  match_num = datum[:results].map { |d| d[:match_type] }.min
58
57
  @stats.stats[:matches][match_num] += 1
59
- @stats.stats[:resolved_records] += 1
60
58
  end
61
59
 
62
60
  def compile_result(datum, result, match_size)
@@ -7,20 +7,43 @@ module GnCrossmap
7
7
 
8
8
  def initialize
9
9
  @stats = { status: :init, total_records: 0, ingested_records: 0,
10
- resolved_records: 0, ingestion_span: nil,
11
- resolution_span: nil, ingestion_start: nil,
12
- resolution_start: nil, resolution_stop: nil,
13
- speed: nil, current_speed: nil, eta: nil,
10
+ ingestion_span: nil, ingestion_start: nil,
11
+ resolution: eta_struct,
14
12
  matches: match_types, errors: [] }
13
+ @smooth = 0.05
14
+ end
15
+
16
+ def penalty(threads)
17
+ pnlt = 0.7
18
+ penalty_adj(threads.to_i, 1, pnlt)
19
+ end
20
+
21
+ def update_eta(current_speed)
22
+ eta = @stats[:resolution]
23
+ eta[:speed] = current_speed if eta[:speed].nil?
24
+ eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
25
+ eta[:eta] = (@stats[:total_records] -
26
+ @stats[:resolution][:completed_records]) /
27
+ eta[:speed]
15
28
  end
16
29
 
17
30
  private
18
31
 
32
+ def eta_struct
33
+ { start_time: nil, completed_records: 0,
34
+ speed: nil, eta: nil, stop_time: nil }
35
+ end
36
+
19
37
  def match_types
20
38
  matches = GnCrossmap::MATCH_TYPES.keys
21
39
  matches.each_with_object({}) do |key, obj|
22
40
  obj[key] = 0
23
41
  end
24
42
  end
43
+
44
+ def penalty_adj(threads, val, pnlt)
45
+ return val if threads < 2
46
+ val + penalty_adj(threads - 1, (val * pnlt), pnlt)
47
+ end
25
48
  end
26
49
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnCrossmap
5
- VERSION = "4.0.0"
5
+ VERSION = "4.0.1"
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-13 00:00:00.000000000 Z
11
+ date: 2017-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity