gn_list_resolver 4.0.1.0 → 4.0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/gn_list_resolver.gemspec +1 -1
- data/lib/gn_list_resolver/reader.rb +2 -2
- data/lib/gn_list_resolver/resolver.rb +1 -0
- data/lib/gn_list_resolver/stats.rb +1 -1
- data/lib/gn_list_resolver/version.rb +1 -1
- metadata +4 -7
- data/lib/gn_list_resolver/resolver.rb.orig +0 -166
- data/lib/gn_list_resolver/stats.rb.orig +0 -62
- data/lib/gn_list_resolver/version.rb.orig +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d3bfe28b67d56d76bed6c1a4a5be00e89bfad04
|
4
|
+
data.tar.gz: 5dd7d097a80376c9fcc574787980af55f956d766
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bf460b69cc2fa211ec8a9d4099ef79c7c7ff25f5acd63c61bbada96609cab45d59967ddbcb454c75a067445f3acb41c0ec4b5ebb21d9155f9109c6c64ef3d55
|
7
|
+
data.tar.gz: 1496680694c8920362025e76327d4185638c1742108d628c1af4279c1c54228d40864b9106db3bb844591ba1592d478cf7799ee5ab59e39886e8051f18cc6168
|
data/CHANGELOG.md
CHANGED
data/gn_list_resolver.gemspec
CHANGED
@@ -44,7 +44,7 @@ Gem::Specification.new do |gem|
|
|
44
44
|
gem.add_development_dependency "coveralls", "~> 0.8"
|
45
45
|
gem.add_development_dependency "rake", "~> 12.0"
|
46
46
|
gem.add_development_dependency "rspec", "~> 3.2"
|
47
|
-
gem.add_development_dependency "rubocop", "~> 0.
|
47
|
+
gem.add_development_dependency "rubocop", "~> 0.50"
|
48
48
|
end
|
49
49
|
|
50
50
|
# rubocop:enable Metrics/BlockLength:
|
@@ -75,7 +75,7 @@ module GnListResolver
|
|
75
75
|
|
76
76
|
def process_headers(row)
|
77
77
|
@original_fields = headers(row)
|
78
|
-
row = produce_alt_headers(row)
|
78
|
+
row = produce_alt_headers(row) unless @alt_headers&.empty?
|
79
79
|
row
|
80
80
|
end
|
81
81
|
|
@@ -104,7 +104,7 @@ module GnListResolver
|
|
104
104
|
|
105
105
|
def taxon_id_header(hdrs)
|
106
106
|
hdrs.each do |h|
|
107
|
-
return [h] if h
|
107
|
+
return [h] if h&.match?(/taxonid\s*$/i)
|
108
108
|
end
|
109
109
|
[]
|
110
110
|
end
|
@@ -119,6 +119,7 @@ module GnListResolver
|
|
119
119
|
s[:resolution][:completed_records] +=
|
120
120
|
job_stats.stats[:resolution][:completed_records]
|
121
121
|
@stats.update_eta(current_speed)
|
122
|
+
s[:resolution][:time_span] = Time.now - s[:resolution][:start_time]
|
122
123
|
end
|
123
124
|
|
124
125
|
def with_log
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_list_resolver
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: biodiversity
|
@@ -185,14 +185,14 @@ dependencies:
|
|
185
185
|
requirements:
|
186
186
|
- - "~>"
|
187
187
|
- !ruby/object:Gem::Version
|
188
|
-
version: '0.
|
188
|
+
version: '0.50'
|
189
189
|
type: :development
|
190
190
|
prerelease: false
|
191
191
|
version_requirements: !ruby/object:Gem::Requirement
|
192
192
|
requirements:
|
193
193
|
- - "~>"
|
194
194
|
- !ruby/object:Gem::Version
|
195
|
-
version: '0.
|
195
|
+
version: '0.50'
|
196
196
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
197
197
|
back a new comma-separated resolved list of scientific names to one of the data
|
198
198
|
sources from http://resolver.globalnames.org
|
@@ -226,14 +226,11 @@ files:
|
|
226
226
|
- lib/gn_list_resolver/graphql.rb
|
227
227
|
- lib/gn_list_resolver/reader.rb
|
228
228
|
- lib/gn_list_resolver/resolver.rb
|
229
|
-
- lib/gn_list_resolver/resolver.rb.orig
|
230
229
|
- lib/gn_list_resolver/resolver_job.rb
|
231
230
|
- lib/gn_list_resolver/result_processor.rb
|
232
231
|
- lib/gn_list_resolver/sci_name_collector.rb
|
233
232
|
- lib/gn_list_resolver/stats.rb
|
234
|
-
- lib/gn_list_resolver/stats.rb.orig
|
235
233
|
- lib/gn_list_resolver/version.rb
|
236
|
-
- lib/gn_list_resolver/version.rb.orig
|
237
234
|
- lib/gn_list_resolver/writer.rb
|
238
235
|
homepage: https://github.com/GlobalNamesArchitecture/gn_list_resolver
|
239
236
|
licenses:
|
@@ -1,166 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# rubocop:disable Metrics/ClassLength
|
4
|
-
|
5
|
-
module GnListResolver
|
6
|
-
# Sends data to GN Resolver and collects results
|
7
|
-
class Resolver
|
8
|
-
GRAPHQL = GnGraphQL.new
|
9
|
-
QUERY = GRAPHQL.client.parse(GRAPHQL.query)
|
10
|
-
attr_reader :stats
|
11
|
-
|
12
|
-
def initialize(writer, opts)
|
13
|
-
instance_vars_from_opts(opts)
|
14
|
-
@processor = GnListResolver::ResultProcessor.
|
15
|
-
new(writer, @stats, @with_classification)
|
16
|
-
@count = 0
|
17
|
-
@jobs = []
|
18
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
19
|
-
@batch = 1000
|
20
|
-
=======
|
21
|
-
@batch = 200
|
22
|
-
@smoothing = 0.05
|
23
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
24
|
-
end
|
25
|
-
|
26
|
-
def resolve(data)
|
27
|
-
resolution_stats(data.size)
|
28
|
-
@threads.times do
|
29
|
-
batch = data.shift(@batch)
|
30
|
-
add_job(batch)
|
31
|
-
end
|
32
|
-
block_given? ? traverse_jobs(data, &Proc.new) : traverse_jobs(data)
|
33
|
-
wrap_up
|
34
|
-
block_given? ? yield(@stats.stats) : @stats.stats
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
def wrap_up
|
40
|
-
@stats.stats[:resolution][:stop_time] = Time.now
|
41
|
-
@stats.stats[:status] = :finish
|
42
|
-
@processor.writer.close
|
43
|
-
end
|
44
|
-
|
45
|
-
def add_job(batch)
|
46
|
-
job = batch.empty? ? nil : create_job(batch)
|
47
|
-
@jobs << job
|
48
|
-
end
|
49
|
-
|
50
|
-
def traverse_jobs(data)
|
51
|
-
until data.empty? && @jobs.compact.empty?
|
52
|
-
process_results(data)
|
53
|
-
cmd = yield(@stats.stats) if block_given?
|
54
|
-
break if cmd == "STOP"
|
55
|
-
sleep(0.5)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def resolution_stats(records_num)
|
60
|
-
@stats.stats[:total_records] = records_num
|
61
|
-
@stats.stats[:resolution][:start_time] = Time.now
|
62
|
-
@stats.stats[:status] = :resolution
|
63
|
-
end
|
64
|
-
|
65
|
-
def process_results(data)
|
66
|
-
indices = []
|
67
|
-
@jobs.each_with_index do |job, i|
|
68
|
-
next if job.nil? || !job.complete?
|
69
|
-
with_log do
|
70
|
-
process_job(job)
|
71
|
-
indices << i
|
72
|
-
end
|
73
|
-
end
|
74
|
-
add_jobs(indices, data) unless indices.empty?
|
75
|
-
end
|
76
|
-
|
77
|
-
def add_jobs(indices, data)
|
78
|
-
indices.each do |i|
|
79
|
-
batch = data.shift(@batch)
|
80
|
-
@jobs[i] = batch.empty? ? nil : create_job(batch)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def process_job(job)
|
85
|
-
if job.fulfilled?
|
86
|
-
results, current_data, stats = job.value
|
87
|
-
update_stats(stats)
|
88
|
-
@processor.process(results, current_data)
|
89
|
-
else
|
90
|
-
GnResolver.logger.error(job.reason.message)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
95
|
-
def update_stats(job_stats)
|
96
|
-
s = @stats.stats
|
97
|
-
s[:last_batches_time].shift if s[:last_batches_time].size > 2
|
98
|
-
s[:last_batches_time] << job_stats.stats[:last_batches_time][0]
|
99
|
-
s[:resolution_span] = Time.now - s[:resolution_start]
|
100
|
-
end
|
101
|
-
|
102
|
-
=======
|
103
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
104
|
-
def create_job(batch)
|
105
|
-
batch_data = collect_names(batch)
|
106
|
-
rb = ResolverJob.new(batch, batch_data, @ds_id)
|
107
|
-
Concurrent::Future.execute { rb.run }
|
108
|
-
end
|
109
|
-
|
110
|
-
def instance_vars_from_opts(opts)
|
111
|
-
@stats = opts.stats
|
112
|
-
@with_classification = opts.with_classification.freeze
|
113
|
-
@ds_id = opts.data_source_id.freeze
|
114
|
-
@threads = opts.threads
|
115
|
-
end
|
116
|
-
|
117
|
-
def collect_names(batch)
|
118
|
-
batch_data = {}
|
119
|
-
batch.each do |row|
|
120
|
-
id = row[:id].strip
|
121
|
-
batch_data[id] = row[:original]
|
122
|
-
@processor.input[id] = { rank: row[:rank] }
|
123
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
124
|
-
end
|
125
|
-
batch_data
|
126
|
-
=======
|
127
|
-
str << "#{id}|#{row[:name]}"
|
128
|
-
end
|
129
|
-
[names, batch_data]
|
130
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
131
|
-
end
|
132
|
-
|
133
|
-
# rubocop:disable Metrics/AbcSize
|
134
|
-
def update_stats(job_stats)
|
135
|
-
s = @stats.stats
|
136
|
-
current_speed = job_stats.stats[:current_speed] *
|
137
|
-
@stats.penalty(@threads)
|
138
|
-
|
139
|
-
s[:resolution][:completed_records] +=
|
140
|
-
job_stats.stats[:resolution][:completed_records]
|
141
|
-
@stats.update_eta(current_speed)
|
142
|
-
s[:matches][7] += job_stats.stats[:matches][7]
|
143
|
-
end
|
144
|
-
|
145
|
-
def with_log
|
146
|
-
yield
|
147
|
-
s = @count + 1
|
148
|
-
@count += @batch
|
149
|
-
e = [@count, @stats.stats[:total_records]].min
|
150
|
-
<<<<<<< HEAD:lib/gn_list_resolver/resolver.rb
|
151
|
-
GnListResolver.log("Resolve #{s}-#{e} out of " \
|
152
|
-
"#{@stats.stats[:total_records]} records at " \
|
153
|
-
"#{RESOLVER_URL}")
|
154
|
-
yield
|
155
|
-
=======
|
156
|
-
eta = @stats.stats[:resolution][:eta].to_i + Time.now.to_i
|
157
|
-
msg = format("Resolve %s-%s/%s records %d rec/s; eta: %s", s, e,
|
158
|
-
@stats.stats[:total_records],
|
159
|
-
@stats.stats[:resolution][:speed].to_i,
|
160
|
-
Time.at(eta))
|
161
|
-
GnCrossmap.log(msg)
|
162
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/resolver.rb
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
# rubocop:enable all
|
@@ -1,62 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module GnListResolver
|
4
|
-
# Collects statistics about list resolving process
|
5
|
-
class Stats
|
6
|
-
attr_accessor :stats
|
7
|
-
|
8
|
-
def initialize
|
9
|
-
@stats = { status: :init, total_records: 0, ingested_records: 0,
|
10
|
-
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
11
|
-
resolved_records: 0, ingestion_span: nil,
|
12
|
-
resolution_span: nil, ingestion_start: nil,
|
13
|
-
resolution_start: nil, resolution_stop: nil,
|
14
|
-
last_batches_time: [], matches: init_matches,
|
15
|
-
errors: [] }
|
16
|
-
=======
|
17
|
-
ingestion_span: nil, ingestion_start: nil,
|
18
|
-
resolution: eta_struct,
|
19
|
-
matches: match_types, errors: [] }
|
20
|
-
@smooth = 0.05
|
21
|
-
end
|
22
|
-
|
23
|
-
def penalty(threads)
|
24
|
-
pnlt = 0.7
|
25
|
-
penalty_adj(threads.to_i, 1, pnlt)
|
26
|
-
end
|
27
|
-
|
28
|
-
def update_eta(current_speed)
|
29
|
-
eta = @stats[:resolution]
|
30
|
-
eta[:speed] = current_speed if eta[:speed].nil?
|
31
|
-
eta[:speed] = eta[:speed] * (1 - @smooth) + current_speed * @smooth
|
32
|
-
eta[:eta] = (@stats[:total_records] -
|
33
|
-
@stats[:resolution][:completed_records]) /
|
34
|
-
eta[:speed]
|
35
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
<<<<<<< HEAD:lib/gn_list_resolver/stats.rb
|
41
|
-
def init_matches
|
42
|
-
MATCH_TYPES.keys.each_with_object({}) { |t, h| h[t] = 0 }
|
43
|
-
=======
|
44
|
-
def eta_struct
|
45
|
-
{ start_time: nil, completed_records: 0,
|
46
|
-
speed: nil, eta: nil, stop_time: nil }
|
47
|
-
end
|
48
|
-
|
49
|
-
def match_types
|
50
|
-
matches = GnCrossmap::MATCH_TYPES.keys
|
51
|
-
matches.each_with_object({}) do |key, obj|
|
52
|
-
obj[key] = 0
|
53
|
-
end
|
54
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/stats.rb
|
55
|
-
end
|
56
|
-
|
57
|
-
def penalty_adj(threads, val, pnlt)
|
58
|
-
return val if threads < 2
|
59
|
-
val + penalty_adj(threads - 1, (val * pnlt), pnlt)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Namespace module for crossmapping checklists to GN sources
|
4
|
-
<<<<<<< HEAD:lib/gn_list_resolver/version.rb
|
5
|
-
module GnListResolver
|
6
|
-
VERSION = "3.3.1.1"
|
7
|
-
=======
|
8
|
-
module GnCrossmap
|
9
|
-
VERSION = "4.0.1"
|
10
|
-
>>>>>>> 2a4afb8... Fix #42 - better speed estimation:lib/gn_crossmap/version.rb
|
11
|
-
|
12
|
-
def self.version
|
13
|
-
VERSION
|
14
|
-
end
|
15
|
-
end
|