gn_list_resolver 4.0.1.0 → 4.0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47abe84b5c8bd829c01d9c50978fe71b3f14aef6
4
- data.tar.gz: 7852f24f3dc3ba40e3175c92275bcde26eda0463
3
+ metadata.gz: 0d3bfe28b67d56d76bed6c1a4a5be00e89bfad04
4
+ data.tar.gz: 5dd7d097a80376c9fcc574787980af55f956d766
5
5
  SHA512:
6
- metadata.gz: e97f1949f12a97c814adb4e4a4b88223ac00bc5bc6095a553ed22461304f4986a2d32cc7071f8871b83a7957ba76c1fe102e6fd776cfea52a33278e9e1909584
7
- data.tar.gz: f458552148b3981f6f9c5017ee1b8c9b90bed16e332ef06f5340f88815050905e51277e472be23089be94032c84032f770e3e338dcf7cf00b7dccb64e181e109
6
+ metadata.gz: 3bf460b69cc2fa211ec8a9d4099ef79c7c7ff25f5acd63c61bbada96609cab45d59967ddbcb454c75a067445f3acb41c0ec4b5ebb21d9155f9109c6c64ef3d55
7
+ data.tar.gz: 1496680694c8920362025e76327d4185638c1742108d628c1af4279c1c54228d40864b9106db3bb844591ba1592d478cf7799ee5ab59e39886e8051f18cc6168
@@ -25,6 +25,10 @@
25
25
  @dimus, @alexander-myltsev - sync with `gn_crossmap`'s 3.2.0, current name is
26
26
  still a bit broken
27
27
 
28
+ ## 4.0.2
29
+
30
+ * @dimus - Add resolution span_time to stats
31
+
28
32
  ## 4.0.1
29
33
 
30
34
  * @dimus - Clean up resolution stats
@@ -44,7 +44,7 @@ Gem::Specification.new do |gem|
44
44
  gem.add_development_dependency "coveralls", "~> 0.8"
45
45
  gem.add_development_dependency "rake", "~> 12.0"
46
46
  gem.add_development_dependency "rspec", "~> 3.2"
47
- gem.add_development_dependency "rubocop", "~> 0.49"
47
+ gem.add_development_dependency "rubocop", "~> 0.50"
48
48
  end
49
49
 
50
50
  # rubocop:enable Metrics/BlockLength:
@@ -75,7 +75,7 @@ module GnListResolver
75
75
 
76
76
  def process_headers(row)
77
77
  @original_fields = headers(row)
78
- row = produce_alt_headers(row) if @alt_headers && !@alt_headers.empty?
78
+ row = produce_alt_headers(row) unless @alt_headers&.empty?
79
79
  row
80
80
  end
81
81
 
@@ -104,7 +104,7 @@ module GnListResolver
104
104
 
105
105
  def taxon_id_header(hdrs)
106
106
  hdrs.each do |h|
107
- return [h] if h && h.match?(/taxonid\s*$/i)
107
+ return [h] if h&.match?(/taxonid\s*$/i)
108
108
  end
109
109
  []
110
110
  end
@@ -119,6 +119,7 @@ module GnListResolver
119
119
  s[:resolution][:completed_records] +=
120
120
  job_stats.stats[:resolution][:completed_records]
121
121
  @stats.update_eta(current_speed)
122
+ s[:resolution][:time_span] = Time.now - s[:resolution][:start_time]
122
123
  end
123
124
 
124
125
  def with_log
@@ -34,7 +34,7 @@ module GnListResolver
34
34
  end
35
35
 
36
36
  def eta_struct
37
- { start_time: nil, completed_records: 0,
37
+ { start_time: nil, completed_records: 0, time_span: 0,
38
38
  speed: nil, eta: nil, stop_time: nil }
39
39
  end
40
40
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnListResolver
5
- VERSION = "4.0.1.0"
5
+ VERSION = "4.0.2.0"
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_list_resolver
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1.0
4
+ version: 4.0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-17 00:00:00.000000000 Z
12
+ date: 2017-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: biodiversity
@@ -185,14 +185,14 @@ dependencies:
185
185
  requirements:
186
186
  - - "~>"
187
187
  - !ruby/object:Gem::Version
188
- version: '0.49'
188
+ version: '0.50'
189
189
  type: :development
190
190
  prerelease: false
191
191
  version_requirements: !ruby/object:Gem::Requirement
192
192
  requirements:
193
193
  - - "~>"
194
194
  - !ruby/object:Gem::Version
195
- version: '0.49'
195
+ version: '0.50'
196
196
  description: Gem uses a checklist in a comma-separated format as an input, and returns
197
197
  back a new comma-separated resolved list of scientific names to one of the data
198
198
  sources from http://resolver.globalnames.org
@@ -226,14 +226,11 @@ files:
226
226
  - lib/gn_list_resolver/graphql.rb
227
227
  - lib/gn_list_resolver/reader.rb
228
228
  - lib/gn_list_resolver/resolver.rb
229
- - lib/gn_list_resolver/resolver.rb.orig
230
229
  - lib/gn_list_resolver/resolver_job.rb
231
230
  - lib/gn_list_resolver/result_processor.rb
232
231
  - lib/gn_list_resolver/sci_name_collector.rb
233
232
  - lib/gn_list_resolver/stats.rb
234
- - lib/gn_list_resolver/stats.rb.orig
235
233
  - lib/gn_list_resolver/version.rb
236
- - lib/gn_list_resolver/version.rb.orig
237
234
  - lib/gn_list_resolver/writer.rb
238
235
  homepage: https://github.com/GlobalNamesArchitecture/gn_list_resolver
239
236
  licenses:
@@ -1,166 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # rubocop:disable Metrics/ClassLength
4
-
5
- module GnListResolver
6
- # Sends data to GN Resolver and collects results
7
- class Resolver
8
- GRAPHQL = GnGraphQL.new
9
- QUERY = GRAPHQL.client.parse(GRAPHQL.query)
10
- attr_reader :stats
11
-
12
- def initialize(writer, opts)
13
- instance_vars_from_opts(opts)
14
- @processor = GnListResolver::ResultProcessor.
15
- new(writer, @stats, @with_classification)
16
- @count = 0
17
- @jobs = []
18
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
19
- @batch = 1000
20
- =======
21
- @batch = 200
22
- @smoothing = 0.05
23
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
24
- end
25
-
26
- def resolve(data)
27
- resolution_stats(data.size)
28
- @threads.times do
29
- batch = data.shift(@batch)
30
- add_job(batch)
31
- end
32
- block_given? ? traverse_jobs(data, &Proc.new) : traverse_jobs(data)
33
- wrap_up
34
- block_given? ? yield(@stats.stats) : @stats.stats
35
- end
36
-
37
- private
38
-
39
- def wrap_up
40
- @stats.stats[:resolution][:stop_time] = Time.now
41
- @stats.stats[:status] = :finish
42
- @processor.writer.close
43
- end
44
-
45
- def add_job(batch)
46
- job = batch.empty? ? nil : create_job(batch)
47
- @jobs << job
48
- end
49
-
50
- def traverse_jobs(data)
51
- until data.empty? && @jobs.compact.empty?
52
- process_results(data)
53
- cmd = yield(@stats.stats) if block_given?
54
- break if cmd == "STOP"
55
- sleep(0.5)
56
- end
57
- end
58
-
59
- def resolution_stats(records_num)
60
- @stats.stats[:total_records] = records_num
61
- @stats.stats[:resolution][:start_time] = Time.now
62
- @stats.stats[:status] = :resolution
63
- end
64
-
65
- def process_results(data)
66
- indices = []
67
- @jobs.each_with_index do |job, i|
68
- next if job.nil? || !job.complete?
69
- with_log do
70
- process_job(job)
71
- indices << i
72
- end
73
- end
74
- add_jobs(indices, data) unless indices.empty?
75
- end
76
-
77
- def add_jobs(indices, data)
78
- indices.each do |i|
79
- batch = data.shift(@batch)
80
- @jobs[i] = batch.empty? ? nil : create_job(batch)
81
- end
82
- end
83
-
84
- def process_job(job)
85
- if job.fulfilled?
86
- results, current_data, stats = job.value
87
- update_stats(stats)
88
- @processor.process(results, current_data)
89
- else
90
- GnResolver.logger.error(job.reason.message)
91
- end
92
- end
93
-
94
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
95
- def update_stats(job_stats)
96
- s = @stats.stats
97
- s[:last_batches_time].shift if s[:last_batches_time].size > 2
98
- s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
99
- s[:resolution_span] = Time.now - s[:resolution_start]
100
- end
101
-
102
- =======
103
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
104
- def create_job(batch)
105
- batch_data = collect_names(batch)
106
- rb = ResolverJob.new(batch, batch_data, @ds_id)
107
- Concurrent::Future.execute { rb.run }
108
- end
109
-
110
- def instance_vars_from_opts(opts)
111
- @stats = opts.stats
112
- @with_classification = opts.with_classification.freeze
113
- @ds_id = opts.data_source_id.freeze
114
- @threads = opts.threads
115
- end
116
-
117
- def collect_names(batch)
118
- batch_data = {}
119
- batch.each do |row|
120
- id = row[:id].strip
121
- batch_data[id] = row[:original]
122
- @processor.input[id] = { rank: row[:rank] }
123
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
124
- end
125
- batch_data
126
- =======
127
- str << "#{id}|#{row[:name]}"
128
- end
129
- [names, batch_data]
130
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
131
- end
132
-
133
- # rubocop:disable Metrics/AbcSize
134
- def update_stats(job_stats)
135
- s = @stats.stats
136
- current_speed = job_stats.stats[:current_speed] *
137
- @stats.penalty(@threads)
138
-
139
- s[:resolution][:completed_records] +=
140
- job_stats.stats[:resolution][:completed_records]
141
- @stats.update_eta(current_speed)
142
- s[:matches][7] += job_stats.stats[:matches][7]
143
- end
144
-
145
- def with_log
146
- yield
147
- s = @count + 1
148
- @count += @batch
149
- e = [@count, @stats.stats[:total_records]].min
150
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
151
- GnListResolver.log("Resolve #{s}-#{e} out of " \
152
- "#{@stats.stats[:total_records]} records at " \
153
- "#{RESOLVER_URL}")
154
- yield
155
- =======
156
- eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
157
- msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
158
- @stats.stats[:total_records],
159
- @stats.stats[:resolution][:speed].to_i,
160
- Time.at(eta))
161
- GnCrossmap.log(msg)
162
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
163
- end
164
- end
165
- end
166
- # rubocop:enable all
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module GnListResolver
4
- # Collects statistics about list resolving process
5
- class Stats
6
- attr_accessor :stats
7
-
8
- def initialize
9
- @stats = { status: :init, total_records: 0, ingested_records: 0,
10
- <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
11
- resolved_records: 0, ingestion_span: nil,
12
- resolution_span: nil, ingestion_start: nil,
13
- resolution_start: nil, resolution_stop: nil,
14
- last_batches_time: [], matches: init_matches,
15
- errors: [] }
16
- =======
17
- ingestion_span: nil, ingestion_start: nil,
18
- resolution: eta_struct,
19
- matches: match_types, errors: [] }
20
- @smooth = 0.05
21
- end
22
-
23
- def penalty(threads)
24
- pnlt = 0.7
25
- penalty_adj(threads.to_i, 1, pnlt)
26
- end
27
-
28
- def update_eta(current_speed)
29
- eta = @stats[:resolution]
30
- eta[:speed] = current_speed if eta[:speed].nil?
31
- eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
32
- eta[:eta] = (@stats[:total_records] -
33
- @stats[:resolution][:completed_records]) /
34
- eta[:speed]
35
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
36
- end
37
-
38
- private
39
-
40
- <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
41
- def init_matches
42
- MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
43
- =======
44
- def eta_struct
45
- { start_time: nil, completed_records: 0,
46
- speed: nil, eta: nil, stop_time: nil }
47
- end
48
-
49
- def match_types
50
- matches = GnCrossmap::MATCH_TYPES.keys
51
- matches.each_with_object({}) do |key, obj|
52
- obj[key] = 0
53
- end
54
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
55
- end
56
-
57
- def penalty_adj(threads, val, pnlt)
58
- return val if threads < 2
59
- val + penalty_adj(threads - 1, (val * pnlt), pnlt)
60
- end
61
- end
62
- end
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Namespace module for crossmapping checklists to GN sources
4
- <<<<<<< HEAD:lib/gn_list_resolver/version.rb
5
- module GnListResolver
6
- VERSION = "3.3.1.1"
7
- =======
8
- module GnCrossmap
9
- VERSION = "4.0.1"
10
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
11
-
12
- def self.version
13
- VERSION
14
- end
15
- end