gn_list_resolver 4.0.1.0 → 4.0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/gn_list_resolver.gemspec +1 -1
- data/lib/gn_list_resolver/reader.rb +2 -2
- data/lib/gn_list_resolver/resolver.rb +1 -0
- data/lib/gn_list_resolver/stats.rb +1 -1
- data/lib/gn_list_resolver/version.rb +1 -1
- metadata +4 -7
- data/lib/gn_list_resolver/resolver.rb.orig +0 -166
- data/lib/gn_list_resolver/stats.rb.orig +0 -62
- data/lib/gn_list_resolver/version.rb.orig +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d3bfe28b67d56d76bed6c1a4a5be00e89bfad04
|
4
|
+
data.tar.gz: 5dd7d097a80376c9fcc574787980af55f956d766
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bf460b69cc2fa211ec8a9d4099ef79c7c7ff25f5acd63c61bbada96609cab45d59967ddbcb454c75a067445f3acb41c0ec4b5ebb21d9155f9109c6c64ef3d55
|
7
|
+
data.tar.gz: 1496680694c8920362025e76327d4185638c1742108d628c1af4279c1c54228d40864b9106db3bb844591ba1592d478cf7799ee5ab59e39886e8051f18cc6168
|
data/CHANGELOG.md
CHANGED
data/gn_list_resolver.gemspec
CHANGED
@@ -44,7 +44,7 @@ Gem::Specification.new do |gem|
|
|
44
44
|
gem.add_development_dependency "coveralls", "~> 0.8"
|
45
45
|
gem.add_development_dependency "rake", "~> 12.0"
|
46
46
|
gem.add_development_dependency "rspec", "~> 3.2"
|
47
|
-
gem.add_development_dependency "rubocop", "~> 0.
|
47
|
+
gem.add_development_dependency "rubocop", "~> 0.50"
|
48
48
|
end
|
49
49
|
|
50
50
|
# rubocop:enable Metrics/BlockLength:
|
@@ -75,7 +75,7 @@ module GnListResolver
|
|
75
75
|
|
76
76
|
def process_headers(row)
|
77
77
|
@original_fields = headers(row)
|
78
|
-
row = produce_alt_headers(row)
|
78
|
+
row = produce_alt_headers(row) unless @alt_headers&.empty?
|
79
79
|
row
|
80
80
|
end
|
81
81
|
|
@@ -104,7 +104,7 @@ module GnListResolver
|
|
104
104
|
|
105
105
|
def taxon_id_header(hdrs)
|
106
106
|
hdrs.each do |h|
|
107
|
-
return [h] if h
|
107
|
+
return [h] if h&.match?(/taxonid\s*$/i)
|
108
108
|
end
|
109
109
|
[]
|
110
110
|
end
|
@@ -119,6 +119,7 @@ module GnListResolver
|
|
119
119
|
s[:resolution][:completed_records] +=
|
120
120
|
job_stats.stats[:resolution][:completed_records]
|
121
121
|
@stats.update_eta(current_speed)
|
122
|
+
s[:resolution][:time_span] = Time.now - s[:resolution][:start_time]
|
122
123
|
end
|
123
124
|
|
124
125
|
def with_log
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_list_resolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: biodiversity
|
@@ -185,14 +185,14 @@ dependencies:
|
|
185
185
|
requirements:
|
186
186
|
- - "~>"
|
187
187
|
- !ruby/object:Gem::Version
|
188
|
-
version: '0.
|
188
|
+
version: '0.50'
|
189
189
|
type: :development
|
190
190
|
prerelease: false
|
191
191
|
version_requirements: !ruby/object:Gem::Requirement
|
192
192
|
requirements:
|
193
193
|
- - "~>"
|
194
194
|
- !ruby/object:Gem::Version
|
195
|
-
version: '0.
|
195
|
+
version: '0.50'
|
196
196
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
197
197
|
back a new comma-separated resolved list of scientific names to one of the data
|
198
198
|
sources from http://resolver.globalnames.org
|
@@ -226,14 +226,11 @@ files:
|
|
226
226
|
- lib/gn_list_resolver/graphql.rb
|
227
227
|
- lib/gn_list_resolver/reader.rb
|
228
228
|
- lib/gn_list_resolver/resolver.rb
|
229
|
-
- lib/gn_list_resolver/resolver.rb.orig
|
230
229
|
- lib/gn_list_resolver/resolver_job.rb
|
231
230
|
- lib/gn_list_resolver/result_processor.rb
|
232
231
|
- lib/gn_list_resolver/sci_name_collector.rb
|
233
232
|
- lib/gn_list_resolver/stats.rb
|
234
|
-
- lib/gn_list_resolver/stats.rb.orig
|
235
233
|
- lib/gn_list_resolver/version.rb
|
236
|
-
- lib/gn_list_resolver/version.rb.orig
|
237
234
|
- lib/gn_list_resolver/writer.rb
|
238
235
|
homepage: https://github.com/GlobalNamesArchitecture/gn_list_resolver
|
239
236
|
licenses:
|
@@ -1,166 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# rubocop:disable Metrics/ClassLength
|
4
|
-
|
5
|
-
module GnListResolver
|
6
|
-
# Sends data to GN Resolver and collects results
|
7
|
-
class Resolver
|
8
|
-
GRAPHQL = GnGraphQL.new
|
9
|
-
QUERY = GRAPHQL.client.parse(GRAPHQL.query)
|
10
|
-
attr_reader :stats
|
11
|
-
|
12
|
-
def initialize(writer, opts)
|
13
|
-
instance_vars_from_opts(opts)
|
14
|
-
@processor = GnListResolver::ResultProcessor.
|
15
|
-
new(writer, @stats, @with_classification)
|
16
|
-
@count = 0
|
17
|
-
@jobs = []
|
18
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
19
|
-
@batch = 1000
|
20
|
-
=======
|
21
|
-
@batch = 200
|
22
|
-
@smoothing = 0.05
|
23
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
24
|
-
end
|
25
|
-
|
26
|
-
def resolve(data)
|
27
|
-
resolution_stats(data.size)
|
28
|
-
@threads.times do
|
29
|
-
batch = data.shift(@batch)
|
30
|
-
add_job(batch)
|
31
|
-
end
|
32
|
-
block_given? ? traverse_jobs(data, &Proc.new) : traverse_jobs(data)
|
33
|
-
wrap_up
|
34
|
-
block_given? ? yield(@stats.stats) : @stats.stats
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
def wrap_up
|
40
|
-
@stats.stats[:resolution][:stop_time] = Time.now
|
41
|
-
@stats.stats[:status] = :finish
|
42
|
-
@processor.writer.close
|
43
|
-
end
|
44
|
-
|
45
|
-
def add_job(batch)
|
46
|
-
job = batch.empty? ? nil : create_job(batch)
|
47
|
-
@jobs << job
|
48
|
-
end
|
49
|
-
|
50
|
-
def traverse_jobs(data)
|
51
|
-
until data.empty? && @jobs.compact.empty?
|
52
|
-
process_results(data)
|
53
|
-
cmd = yield(@stats.stats) if block_given?
|
54
|
-
break if cmd == "STOP"
|
55
|
-
sleep(0.5)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def resolution_stats(records_num)
|
60
|
-
@stats.stats[:total_records] = records_num
|
61
|
-
@stats.stats[:resolution][:start_time] = Time.now
|
62
|
-
@stats.stats[:status] = :resolution
|
63
|
-
end
|
64
|
-
|
65
|
-
def process_results(data)
|
66
|
-
indices = []
|
67
|
-
@jobs.each_with_index do |job, i|
|
68
|
-
next if job.nil? || !job.complete?
|
69
|
-
with_log do
|
70
|
-
process_job(job)
|
71
|
-
indices << i
|
72
|
-
end
|
73
|
-
end
|
74
|
-
add_jobs(indices, data) unless indices.empty?
|
75
|
-
end
|
76
|
-
|
77
|
-
def add_jobs(indices, data)
|
78
|
-
indices.each do |i|
|
79
|
-
batch = data.shift(@batch)
|
80
|
-
@jobs[i] = batch.empty? ? nil : create_job(batch)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def process_job(job)
|
85
|
-
if job.fulfilled?
|
86
|
-
results, current_data, stats = job.value
|
87
|
-
update_stats(stats)
|
88
|
-
@processor.process(results, current_data)
|
89
|
-
else
|
90
|
-
GnResolver.logger.error(job.reason.message)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
95
|
-
def update_stats(job_stats)
|
96
|
-
s = @stats.stats
|
97
|
-
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
98
|
-
s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
|
99
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
100
|
-
end
|
101
|
-
|
102
|
-
=======
|
103
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
104
|
-
def create_job(batch)
|
105
|
-
batch_data = collect_names(batch)
|
106
|
-
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
107
|
-
Concurrent::Future.execute { rb.run }
|
108
|
-
end
|
109
|
-
|
110
|
-
def instance_vars_from_opts(opts)
|
111
|
-
@stats = opts.stats
|
112
|
-
@with_classification = opts.with_classification.freeze
|
113
|
-
@ds_id = opts.data_source_id.freeze
|
114
|
-
@threads = opts.threads
|
115
|
-
end
|
116
|
-
|
117
|
-
def collect_names(batch)
|
118
|
-
batch_data = {}
|
119
|
-
batch.each do |row|
|
120
|
-
id = row[:id].strip
|
121
|
-
batch_data[id] = row[:original]
|
122
|
-
@processor.input[id] = { rank: row[:rank] }
|
123
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
124
|
-
end
|
125
|
-
batch_data
|
126
|
-
=======
|
127
|
-
str << "#{id}|#{row[:name]}"
|
128
|
-
end
|
129
|
-
[names, batch_data]
|
130
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
|
-
end
|
132
|
-
|
133
|
-
# rubocop:disable Metrics/AbcSize
|
134
|
-
def update_stats(job_stats)
|
135
|
-
s = @stats.stats
|
136
|
-
current_speed = job_stats.stats[:current_speed] *
|
137
|
-
@stats.penalty(@threads)
|
138
|
-
|
139
|
-
s[:resolution][:completed_records] +=
|
140
|
-
job_stats.stats[:resolution][:completed_records]
|
141
|
-
@stats.update_eta(current_speed)
|
142
|
-
s[:matches][7] += job_stats.stats[:matches][7]
|
143
|
-
end
|
144
|
-
|
145
|
-
def with_log
|
146
|
-
yield
|
147
|
-
s = @count + 1
|
148
|
-
@count += @batch
|
149
|
-
e = [@count, @stats.stats[:total_records]].min
|
150
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
151
|
-
GnListResolver.log("Resolve #{s}-#{e} out of " \
|
152
|
-
"#{@stats.stats[:total_records]} records at " \
|
153
|
-
"#{RESOLVER_URL}")
|
154
|
-
yield
|
155
|
-
=======
|
156
|
-
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
157
|
-
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
158
|
-
@stats.stats[:total_records],
|
159
|
-
@stats.stats[:resolution][:speed].to_i,
|
160
|
-
Time.at(eta))
|
161
|
-
GnCrossmap.log(msg)
|
162
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
# rubocop:enable all
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module GnListResolver
|
4
|
-
# Collects statistics about list resolving process
|
5
|
-
class Stats
|
6
|
-
attr_accessor :stats
|
7
|
-
|
8
|
-
def initialize
|
9
|
-
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
11
|
-
resolved_records: 0, ingestion_span: nil,
|
12
|
-
resolution_span: nil, ingestion_start: nil,
|
13
|
-
resolution_start: nil, resolution_stop: nil,
|
14
|
-
last_batches_time: [], matches: init_matches,
|
15
|
-
errors: [] }
|
16
|
-
=======
|
17
|
-
ingestion_span: nil, ingestion_start: nil,
|
18
|
-
resolution: eta_struct,
|
19
|
-
matches: match_types, errors: [] }
|
20
|
-
@smooth = 0.05
|
21
|
-
end
|
22
|
-
|
23
|
-
def penalty(threads)
|
24
|
-
pnlt = 0.7
|
25
|
-
penalty_adj(threads.to_i, 1, pnlt)
|
26
|
-
end
|
27
|
-
|
28
|
-
def update_eta(current_speed)
|
29
|
-
eta = @stats[:resolution]
|
30
|
-
eta[:speed] = current_speed if eta[:speed].nil?
|
31
|
-
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
32
|
-
eta[:eta] = (@stats[:total_records] -
|
33
|
-
@stats[:resolution][:completed_records]) /
|
34
|
-
eta[:speed]
|
35
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
41
|
-
def init_matches
|
42
|
-
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
43
|
-
=======
|
44
|
-
def eta_struct
|
45
|
-
{ start_time: nil, completed_records: 0,
|
46
|
-
speed: nil, eta: nil, stop_time: nil }
|
47
|
-
end
|
48
|
-
|
49
|
-
def match_types
|
50
|
-
matches = GnCrossmap::MATCH_TYPES.keys
|
51
|
-
matches.each_with_object({}) do |key, obj|
|
52
|
-
obj[key] = 0
|
53
|
-
end
|
54
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
55
|
-
end
|
56
|
-
|
57
|
-
def penalty_adj(threads, val, pnlt)
|
58
|
-
return val if threads < 2
|
59
|
-
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Namespace module for crossmapping checklists to GN sources
|
4
|
-
<<<<<<< HEAD:lib/gn_list_resolver/version.rb
|
5
|
-
module GnListResolver
|
6
|
-
VERSION = "3.3.1.1"
|
7
|
-
=======
|
8
|
-
module GnCrossmap
|
9
|
-
VERSION = "4.0.1"
|
10
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
|
11
|
-
|
12
|
-
def self.version
|
13
|
-
VERSION
|
14
|
-
end
|
15
|
-
end
|