gn_crossmap 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 17c1750e71b66ba0d13b76a4acaa110f00b7cdd0
4
- data.tar.gz: 96f3b4d6955c10724f78749c99d9c0414da21278
3
+ metadata.gz: 49f923dfdab693728fe296afcf09fa275ce870ad
4
+ data.tar.gz: da0ac647957f8c28c438af7954b1e35473bcfc28
5
5
  SHA512:
6
- metadata.gz: 874afab0ba3e3c7e847ebcb8a86c439bcdcc56ab84ae86b0d5dc1912cd7dd8dff7fefa5bc0d031a082b9b854c11714879db44568edf79bcc698ba8337a633b9e
7
- data.tar.gz: 4cff3ac93255cd203ef4c2e9083ff1da6babb94a6367afb244c38e743c23be620fb154aad8e7deed818c16ada4f7ad0b78643123b72dd2c54798cafa8f70728e
6
+ metadata.gz: 37e8d0183db3322a0bf627c336737a6dc42ae4471c4f52554ea761cfcdcc71210c6d0a793c948623a647cfbc97f7d782538049f4a62bdb97de6bded8ac2998ff
7
+ data.tar.gz: db2ccccf36fd9bfe04f77e3d75eca76286d82b4cc3f8e79d78fb54c443b6f991f2a235ba396a7bc4c41ec997a15aaec283eb676e577ac52358cb39017154d7d3
@@ -1,5 +1,9 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 4.0.1
4
+
5
+ * @dimus - Clean up resolution stats
6
+
3
7
  ## 4.0.0
4
8
 
5
9
  * @dimus - Better estimation for time left for resolver and speed
@@ -31,7 +31,7 @@ module GnCrossmap
31
31
  private
32
32
 
33
33
  def wrap_up
34
- @stats.stats[:resolution_stop] = Time.now
34
+ @stats.stats[:resolution][:stop_time] = Time.now
35
35
  @stats.stats[:status] = :finish
36
36
  @processor.writer.close
37
37
  end
@@ -52,7 +52,7 @@ module GnCrossmap
52
52
 
53
53
  def resolution_stats(records_num)
54
54
  @stats.stats[:total_records] = records_num
55
- @stats.stats[:resolution_start] = Time.now
55
+ @stats.stats[:resolution][:start_time] = Time.now
56
56
  @stats.stats[:status] = :resolution
57
57
  end
58
58
 
@@ -85,20 +85,6 @@ module GnCrossmap
85
85
  end
86
86
  end
87
87
 
88
- # rubocop:disable Metrics/AbcSize
89
- def update_stats(job_stats)
90
- s = @stats.stats
91
- s[:current_speed] = job_stats.stats[:current_speed]
92
- s[:speed] = s[:current_speed] * @threads unless s[:speed]
93
- s[:speed] = s[:speed] * (1 - @smoothing) +
94
- s[:current_speed] * @smoothing * @threads
95
- s[:resolution_span] = Time.now - s[:resolution_start]
96
- s[:resolved_records] += job_stats.stats[:resolved_records]
97
- s[:eta] = (s[:total_records] - s[:resolved_records]) / s[:speed]
98
- s[:matches][7] += job_stats.stats[:matches][7]
99
- end
100
- # rubocop:enable all
101
-
102
88
  def create_job(batch)
103
89
  names, batch_data = collect_names(batch)
104
90
  rb = ResolverJob.new(names, batch_data, @resolver_url, @ds_id)
@@ -120,19 +106,34 @@ module GnCrossmap
120
106
  batch_data[id] = row[:original]
121
107
  @processor.input[id] = { rank: row[:rank] }
122
108
  str << "#{id}|#{row[:name]}"
123
- end.join("\n")
109
+ end
124
110
  [names, batch_data]
125
111
  end
126
112
 
113
+ # rubocop:disable Metrics/AbcSize
114
+ def update_stats(job_stats)
115
+ s = @stats.stats
116
+ current_speed = job_stats.stats[:current_speed] *
117
+ @stats.penalty(@threads)
118
+
119
+ s[:resolution][:completed_records] +=
120
+ job_stats.stats[:resolution][:completed_records]
121
+ @stats.update_eta(current_speed)
122
+ s[:matches][7] += job_stats.stats[:matches][7]
123
+ end
124
+
127
125
  def with_log
128
126
  yield
129
127
  s = @count + 1
130
128
  @count += @batch
131
129
  e = [@count, @stats.stats[:total_records]].min
132
- msg = format("Resolve %s-%s/%s records %d rec/s; eta: %d", s, e,
133
- @stats.stats[:total_records], @stats.stats[:speed],
134
- @stats.stats[:eta].to_i + Time.now.to_i)
130
+ eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
131
+ msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
132
+ @stats.stats[:total_records],
133
+ @stats.stats[:resolution][:speed].to_i,
134
+ Time.at(eta))
135
135
  GnCrossmap.log(msg)
136
136
  end
137
137
  end
138
138
  end
139
+ # rubocop:enable all
@@ -4,7 +4,8 @@ module GnCrossmap
4
4
  # Remote resolution for parallel jobs
5
5
  class ResolverJob
6
6
  def initialize(names, batch_data, resolver_url, ds_id)
7
- @names = names
7
+ @names_size = names.size
8
+ @names = names.join("\n")
8
9
  @batch_data = batch_data
9
10
  @resolver_url = resolver_url
10
11
  @ds_id = ds_id
@@ -26,7 +27,7 @@ module GnCrossmap
26
27
  rescue RestClient::Exception
27
28
  single_remote_resolve(names)
28
29
  ensure
29
- stats_add_batch_time(batch_start)
30
+ update_stats(batch_start)
30
31
  end
31
32
 
32
33
  def single_remote_resolve(names)
@@ -49,12 +50,12 @@ module GnCrossmap
49
50
 
50
51
  def process_resolver_error(err, name)
51
52
  @stats.stats[:matches][7] += 1
52
- @stats.stats[:resolved_records] += 1
53
53
  GnCrossmap.logger.error("Resolver broke on '#{name}': #{err.message}")
54
54
  end
55
55
 
56
- def stats_add_batch_time(batch_start)
56
+ def update_stats(batch_start)
57
57
  @stats.stats[:current_speed] = @batch_data.size / (Time.now - batch_start)
58
+ @stats.stats[:resolution][:completed_records] = @names_size
58
59
  end
59
60
  end
60
61
  end
@@ -31,7 +31,6 @@ module GnCrossmap
31
31
 
32
32
  def write_empty_result(datum)
33
33
  @stats.stats[:matches][0] += 1
34
- @stats.stats[:resolved_records] += 1
35
34
  res = compile_empty_result(datum)
36
35
  @writer.write(res)
37
36
  end
@@ -56,7 +55,6 @@ module GnCrossmap
56
55
  def collect_stats(datum)
57
56
  match_num = datum[:results].map { |d| d[:match_type] }.min
58
57
  @stats.stats[:matches][match_num] += 1
59
- @stats.stats[:resolved_records] += 1
60
58
  end
61
59
 
62
60
  def compile_result(datum, result, match_size)
@@ -7,20 +7,43 @@ module GnCrossmap
7
7
 
8
8
  def initialize
9
9
  @stats = { status: :init, total_records: 0, ingested_records: 0,
10
- resolved_records: 0, ingestion_span: nil,
11
- resolution_span: nil, ingestion_start: nil,
12
- resolution_start: nil, resolution_stop: nil,
13
- speed: nil, current_speed: nil, eta: nil,
10
+ ingestion_span: nil, ingestion_start: nil,
11
+ resolution: eta_struct,
14
12
  matches: match_types, errors: [] }
13
+ @smooth = 0.05
14
+ end
15
+
16
+ def penalty(threads)
17
+ pnlt = 0.7
18
+ penalty_adj(threads.to_i, 1, pnlt)
19
+ end
20
+
21
+ def update_eta(current_speed)
22
+ eta = @stats[:resolution]
23
+ eta[:speed] = current_speed if eta[:speed].nil?
24
+ eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
25
+ eta[:eta] = (@stats[:total_records] -
26
+ @stats[:resolution][:completed_records]) /
27
+ eta[:speed]
15
28
  end
16
29
 
17
30
  private
18
31
 
32
+ def eta_struct
33
+ { start_time: nil, completed_records: 0,
34
+ speed: nil, eta: nil, stop_time: nil }
35
+ end
36
+
19
37
  def match_types
20
38
  matches = GnCrossmap::MATCH_TYPES.keys
21
39
  matches.each_with_object({}) do |key, obj|
22
40
  obj[key] = 0
23
41
  end
24
42
  end
43
+
44
+ def penalty_adj(threads, val, pnlt)
45
+ return val if threads < 2
46
+ val + penalty_adj(threads - 1, (val * pnlt), pnlt)
47
+ end
25
48
  end
26
49
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnCrossmap
5
- VERSION = "4.0.0"
5
+ VERSION = "4.0.1"
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-13 00:00:00.000000000 Z
11
+ date: 2017-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity