gn_list_resolver 4.0.1.0 → 4.0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47abe84b5c8bd829c01d9c50978fe71b3f14aef6
4
- data.tar.gz: 7852f24f3dc3ba40e3175c92275bcde26eda0463
3
+ metadata.gz: 0d3bfe28b67d56d76bed6c1a4a5be00e89bfad04
4
+ data.tar.gz: 5dd7d097a80376c9fcc574787980af55f956d766
5
5
  SHA512:
6
- metadata.gz: e97f1949f12a97c814adb4e4a4b88223ac00bc5bc6095a553ed22461304f4986a2d32cc7071f8871b83a7957ba76c1fe102e6fd776cfea52a33278e9e1909584
7
- data.tar.gz: f458552148b3981f6f9c5017ee1b8c9b90bed16e332ef06f5340f88815050905e51277e472be23089be94032c84032f770e3e338dcf7cf00b7dccb64e181e109
6
+ metadata.gz: 3bf460b69cc2fa211ec8a9d4099ef79c7c7ff25f5acd63c61bbada96609cab45d59967ddbcb454c75a067445f3acb41c0ec4b5ebb21d9155f9109c6c64ef3d55
7
+ data.tar.gz: 1496680694c8920362025e76327d4185638c1742108d628c1af4279c1c54228d40864b9106db3bb844591ba1592d478cf7799ee5ab59e39886e8051f18cc6168
@@ -25,6 +25,10 @@
25
25
  @dimus, @alexander-myltsev - sync with `gn_crossmap`'s 3.2.0, current name is
26
26
  still a bit broken
27
27
 
28
+ ## 4.0.2
29
+
30
+ * @dimus - Add resolution span_time to stats
31
+
28
32
  ## 4.0.1
29
33
 
30
34
  * @dimus - Clean up resolution stats
@@ -44,7 +44,7 @@ Gem::Specification.new do |gem|
44
44
  gem.add_development_dependency "coveralls", "~> 0.8"
45
45
  gem.add_development_dependency "rake", "~> 12.0"
46
46
  gem.add_development_dependency "rspec", "~> 3.2"
47
- gem.add_development_dependency "rubocop", "~> 0.49"
47
+ gem.add_development_dependency "rubocop", "~> 0.50"
48
48
  end
49
49
 
50
50
  # rubocop:enable Metrics/BlockLength:
@@ -75,7 +75,7 @@ module GnListResolver
75
75
 
76
76
  def process_headers(row)
77
77
  @original_fields = headers(row)
78
- row = produce_alt_headers(row) if @alt_headers && !@alt_headers.empty?
78
+ row = produce_alt_headers(row) unless @alt_headers&.empty?
79
79
  row
80
80
  end
81
81
 
@@ -104,7 +104,7 @@ module GnListResolver
104
104
 
105
105
  def taxon_id_header(hdrs)
106
106
  hdrs.each do |h|
107
- return [h] if h && h.match?(/taxonid\s*$/i)
107
+ return [h] if h&.match?(/taxonid\s*$/i)
108
108
  end
109
109
  []
110
110
  end
@@ -119,6 +119,7 @@ module GnListResolver
119
119
  s[:resolution][:completed_records] +=
120
120
  job_stats.stats[:resolution][:completed_records]
121
121
  @stats.update_eta(current_speed)
122
+ s[:resolution][:time_span] = Time.now - s[:resolution][:start_time]
122
123
  end
123
124
 
124
125
  def with_log
@@ -34,7 +34,7 @@ module GnListResolver
34
34
  end
35
35
 
36
36
  def eta_struct
37
- { start_time: nil, completed_records: 0,
37
+ { start_time: nil, completed_records: 0, time_span: 0,
38
38
  speed: nil, eta: nil, stop_time: nil }
39
39
  end
40
40
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnListResolver
5
- VERSION = "4.0.1.0"
5
+ VERSION = "4.0.2.0"
6
6
 
7
7
  def self.version
8
8
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_list_resolver
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.1.0
4
+ version: 4.0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-17 00:00:00.000000000 Z
12
+ date: 2017-09-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: biodiversity
@@ -185,14 +185,14 @@ dependencies:
185
185
  requirements:
186
186
  - - "~>"
187
187
  - !ruby/object:Gem::Version
188
- version: '0.49'
188
+ version: '0.50'
189
189
  type: :development
190
190
  prerelease: false
191
191
  version_requirements: !ruby/object:Gem::Requirement
192
192
  requirements:
193
193
  - - "~>"
194
194
  - !ruby/object:Gem::Version
195
- version: '0.49'
195
+ version: '0.50'
196
196
  description: Gem uses a checklist in a comma-separated format as an input, and returns
197
197
  back a new comma-separated resolved list of scientific names to one of the data
198
198
  sources from http://resolver.globalnames.org
@@ -226,14 +226,11 @@ files:
226
226
  - lib/gn_list_resolver/graphql.rb
227
227
  - lib/gn_list_resolver/reader.rb
228
228
  - lib/gn_list_resolver/resolver.rb
229
- - lib/gn_list_resolver/resolver.rb.orig
230
229
  - lib/gn_list_resolver/resolver_job.rb
231
230
  - lib/gn_list_resolver/result_processor.rb
232
231
  - lib/gn_list_resolver/sci_name_collector.rb
233
232
  - lib/gn_list_resolver/stats.rb
234
- - lib/gn_list_resolver/stats.rb.orig
235
233
  - lib/gn_list_resolver/version.rb
236
- - lib/gn_list_resolver/version.rb.orig
237
234
  - lib/gn_list_resolver/writer.rb
238
235
  homepage: https://github.com/GlobalNamesArchitecture/gn_list_resolver
239
236
  licenses:
@@ -1,166 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # rubocop:disable Metrics/ClassLength
4
-
5
- module GnListResolver
6
- # Sends data to GN Resolver and collects results
7
- class Resolver
8
- GRAPHQL = GnGraphQL.new
9
- QUERY = GRAPHQL.client.parse(GRAPHQL.query)
10
- attr_reader :stats
11
-
12
- def initialize(writer, opts)
13
- instance_vars_from_opts(opts)
14
- @processor = GnListResolver::ResultProcessor.
15
- new(writer, @stats, @with_classification)
16
- @count = 0
17
- @jobs = []
18
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
19
- @batch = 1000
20
- =======
21
- @batch = 200
22
- @smoothing = 0.05
23
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
24
- end
25
-
26
- def resolve(data)
27
- resolution_stats(data.size)
28
- @threads.times do
29
- batch = data.shift(@batch)
30
- add_job(batch)
31
- end
32
- block_given? ? traverse_jobs(data, &Proc.new) : traverse_jobs(data)
33
- wrap_up
34
- block_given? ? yield(@stats.stats) : @stats.stats
35
- end
36
-
37
- private
38
-
39
- def wrap_up
40
- @stats.stats[:resolution][:stop_time] = Time.now
41
- @stats.stats[:status] = :finish
42
- @processor.writer.close
43
- end
44
-
45
- def add_job(batch)
46
- job = batch.empty? ? nil : create_job(batch)
47
- @jobs << job
48
- end
49
-
50
- def traverse_jobs(data)
51
- until data.empty? && @jobs.compact.empty?
52
- process_results(data)
53
- cmd = yield(@stats.stats) if block_given?
54
- break if cmd == "STOP"
55
- sleep(0.5)
56
- end
57
- end
58
-
59
- def resolution_stats(records_num)
60
- @stats.stats[:total_records] = records_num
61
- @stats.stats[:resolution][:start_time] = Time.now
62
- @stats.stats[:status] = :resolution
63
- end
64
-
65
- def process_results(data)
66
- indices = []
67
- @jobs.each_with_index do |job, i|
68
- next if job.nil? || !job.complete?
69
- with_log do
70
- process_job(job)
71
- indices << i
72
- end
73
- end
74
- add_jobs(indices, data) unless indices.empty?
75
- end
76
-
77
- def add_jobs(indices, data)
78
- indices.each do |i|
79
- batch = data.shift(@batch)
80
- @jobs[i] = batch.empty? ? nil : create_job(batch)
81
- end
82
- end
83
-
84
- def process_job(job)
85
- if job.fulfilled?
86
- results, current_data, stats = job.value
87
- update_stats(stats)
88
- @processor.process(results, current_data)
89
- else
90
- GnResolver.logger.error(job.reason.message)
91
- end
92
- end
93
-
94
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
95
- def update_stats(job_stats)
96
- s = @stats.stats
97
- s[:last_batches_time].shift if s[:last_batches_time].size > 2
98
- s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
99
- s[:resolution_span] = Time.now - s[:resolution_start]
100
- end
101
-
102
- =======
103
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
104
- def create_job(batch)
105
- batch_data = collect_names(batch)
106
- rb = ResolverJob.new(batch, batch_data, @ds_id)
107
- Concurrent::Future.execute { rb.run }
108
- end
109
-
110
- def instance_vars_from_opts(opts)
111
- @stats = opts.stats
112
- @with_classification = opts.with_classification.freeze
113
- @ds_id = opts.data_source_id.freeze
114
- @threads = opts.threads
115
- end
116
-
117
- def collect_names(batch)
118
- batch_data = {}
119
- batch.each do |row|
120
- id = row[:id].strip
121
- batch_data[id] = row[:original]
122
- @processor.input[id] = { rank: row[:rank] }
123
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
124
- end
125
- batch_data
126
- =======
127
- str << "#{id}|#{row[:name]}"
128
- end
129
- [names, batch_data]
130
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
131
- end
132
-
133
- # rubocop:disable Metrics/AbcSize
134
- def update_stats(job_stats)
135
- s = @stats.stats
136
- current_speed = job_stats.stats[:current_speed] *
137
- @stats.penalty(@threads)
138
-
139
- s[:resolution][:completed_records] +=
140
- job_stats.stats[:resolution][:completed_records]
141
- @stats.update_eta(current_speed)
142
- s[:matches][7] += job_stats.stats[:matches][7]
143
- end
144
-
145
- def with_log
146
- yield
147
- s = @count + 1
148
- @count += @batch
149
- e = [@count, @stats.stats[:total_records]].min
150
- <<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
151
- GnListResolver.log("Resolve #{s}-#{e} out of " \
152
- "#{@stats.stats[:total_records]} records at " \
153
- "#{RESOLVER_URL}")
154
- yield
155
- =======
156
- eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
157
- msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
158
- @stats.stats[:total_records],
159
- @stats.stats[:resolution][:speed].to_i,
160
- Time.at(eta))
161
- GnCrossmap.log(msg)
162
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
163
- end
164
- end
165
- end
166
- # rubocop:enable all
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module GnListResolver
4
- # Collects statistics about list resolving process
5
- class Stats
6
- attr_accessor :stats
7
-
8
- def initialize
9
- @stats = { status: :init, total_records: 0, ingested_records: 0,
10
- <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
11
- resolved_records: 0, ingestion_span: nil,
12
- resolution_span: nil, ingestion_start: nil,
13
- resolution_start: nil, resolution_stop: nil,
14
- last_batches_time: [], matches: init_matches,
15
- errors: [] }
16
- =======
17
- ingestion_span: nil, ingestion_start: nil,
18
- resolution: eta_struct,
19
- matches: match_types, errors: [] }
20
- @smooth = 0.05
21
- end
22
-
23
- def penalty(threads)
24
- pnlt = 0.7
25
- penalty_adj(threads.to_i, 1, pnlt)
26
- end
27
-
28
- def update_eta(current_speed)
29
- eta = @stats[:resolution]
30
- eta[:speed] = current_speed if eta[:speed].nil?
31
- eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
32
- eta[:eta] = (@stats[:total_records] -
33
- @stats[:resolution][:completed_records]) /
34
- eta[:speed]
35
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
36
- end
37
-
38
- private
39
-
40
- <<<<<<< HEAD:lib/gn_list_resolver/stats.rb
41
- def init_matches
42
- MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
43
- =======
44
- def eta_struct
45
- { start_time: nil, completed_records: 0,
46
- speed: nil, eta: nil, stop_time: nil }
47
- end
48
-
49
- def match_types
50
- matches = GnCrossmap::MATCH_TYPES.keys
51
- matches.each_with_object({}) do |key, obj|
52
- obj[key] = 0
53
- end
54
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
55
- end
56
-
57
- def penalty_adj(threads, val, pnlt)
58
- return val if threads < 2
59
- val + penalty_adj(threads - 1, (val * pnlt), pnlt)
60
- end
61
- end
62
- end
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Namespace module for crossmapping checklists to GN sources
4
- <<<<<<< HEAD:lib/gn_list_resolver/version.rb
5
- module GnListResolver
6
- VERSION = "3.3.1.1"
7
- =======
8
- module GnCrossmap
9
- VERSION = "4.0.1"
10
- >>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
11
-
12
- def self.version
13
- VERSION
14
- end
15
- end