biodiversity 3.5.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -6
- data/.ruby-version +1 -1
- data/.travis.yml +1 -6
- data/CHANGELOG +3 -0
- data/Gemfile +2 -0
- data/README.md +37 -178
- data/Rakefile +15 -48
- data/biodiversity.gemspec +18 -21
- data/clib/linux/libgnparser.h +93 -0
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +93 -0
- data/clib/mac/libgnparser.so +0 -0
- data/lib/biodiversity.rb +4 -9
- data/lib/biodiversity/parser.rb +65 -281
- data/lib/biodiversity/version.rb +8 -1
- data/spec/lib/biodiversity_spec.rb +9 -0
- data/spec/lib/parser_spec.rb +38 -0
- data/spec/spec_helper.rb +4 -81
- metadata +27 -102
- data/.byebug_history +0 -18
- data/.document +0 -5
- data/examples/socket_client.rb +0 -25
- data/lib/biodiversity/guid.rb +0 -1
- data/lib/biodiversity/guid/lsid.rb +0 -16
- data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -528
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +0 -120
- data/lib/biodiversity/parser/scientific_name_clean.rb +0 -8991
- data/lib/biodiversity/parser/scientific_name_clean.treetop +0 -1632
- data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1298
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +0 -264
- data/spec/biodiversity_spec.rb +0 -11
- data/spec/files/test_data.txt +0 -490
- data/spec/files/todo.txt +0 -55
- data/spec/guid/lsid.spec.rb +0 -15
- data/spec/parser/scientific_name_canonical_spec.rb +0 -36
- data/spec/parser/scientific_name_clean_spec.rb +0 -1137
- data/spec/parser/scientific_name_dirty_spec.rb +0 -165
- data/spec/parser/scientific_name_spec.rb +0 -193
data/lib/biodiversity/version.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# s frozen_string_literal: true
|
4
|
+
|
5
|
+
describe Biodiversity::Parser do
|
6
|
+
describe('parse') do
|
7
|
+
it 'parses name in simple format' do
|
8
|
+
parsed = subject.parse('Homo sapiens Linn.', true)
|
9
|
+
expect(parsed[:canonicalName][:simple]).to eq 'Homo sapiens'
|
10
|
+
expect(parsed[:normalized]).to be_nil
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'parsed name in full format' do
|
14
|
+
parsed = subject.parse('Homo sapiens Linn.')
|
15
|
+
expect(parsed[:canonicalName][:simple]).to eq 'Homo sapiens'
|
16
|
+
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe('parse_ary') do
|
21
|
+
it 'parses names in simple format' do
|
22
|
+
parsed = subject.parse_ary(['Homo sapiens Linn.', 'Pardosa moesta'], true)
|
23
|
+
expect(parsed[0][:canonicalName][:simple]).to eq 'Homo sapiens'
|
24
|
+
expect(parsed[1][:canonicalName][:simple]).to eq 'Pardosa moesta'
|
25
|
+
expect(parsed[0][:normalized]).to be_nil
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'parsed name in full format' do
|
29
|
+
parsed = subject.parse_ary(
|
30
|
+
['Homo sapiens Linn.', 'Tobacco Mosaic Virus']
|
31
|
+
)
|
32
|
+
expect(parsed[0][:canonicalName][:simple]).to eq 'Homo sapiens'
|
33
|
+
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
34
|
+
expect(parsed[1][:parsed]).to be false
|
35
|
+
expect(parsed[1][:virus]).to be true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,82 +1,5 @@
|
|
1
|
-
|
2
|
-
require "treetop"
|
3
|
-
require "json"
|
4
|
-
require "biodiversity"
|
5
|
-
require "webmock/rspec"
|
6
|
-
|
7
|
-
|
8
|
-
RSpec.configure do |c|
|
9
|
-
c.mock_with :rr
|
10
|
-
end
|
11
|
-
|
12
|
-
dir = File.dirname(__FILE__)
|
13
|
-
Treetop.load(File.expand_path(File.join(dir,
|
14
|
-
"../lib/biodiversity/parser/scientific_name_clean")))
|
15
|
-
Treetop.load(File.expand_path(File.join(dir,
|
16
|
-
"../lib/biodiversity/parser/scientific_name_dirty")))
|
17
|
-
Treetop.load(File.expand_path(File.join(dir,
|
18
|
-
"../lib/biodiversity/parser/scientific_name_canonical")))
|
19
|
-
|
20
|
-
PARSER_TEST_VERSION = "test_version"
|
21
|
-
|
22
|
-
def set_parser(parser)
|
23
|
-
@parser = parser
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse(input)
|
27
|
-
@parser.parse(input)
|
28
|
-
end
|
29
|
-
|
30
|
-
def value(input)
|
31
|
-
parse(input).value
|
32
|
-
end
|
33
|
-
|
34
|
-
def canonical(input)
|
35
|
-
parse(input).canonical
|
36
|
-
end
|
37
|
-
|
38
|
-
def details(input)
|
39
|
-
parse(input).details
|
40
|
-
end
|
41
|
-
|
42
|
-
def pos(input)
|
43
|
-
parse(input).pos
|
44
|
-
end
|
45
|
-
|
46
|
-
def json(input)
|
47
|
-
parse(input).
|
48
|
-
to_json.gsub(/"parser_version":"[^"]*"/,
|
49
|
-
%Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
50
|
-
end
|
51
|
-
|
52
|
-
def debug(input)
|
53
|
-
res = parse(input)
|
54
|
-
puts "<pre>"
|
55
|
-
if res
|
56
|
-
puts "success!"
|
57
|
-
puts res.inspect
|
58
|
-
else
|
59
|
-
puts input
|
60
|
-
val = @parser.failure_reason.to_s.match(/column [0-9]*/).
|
61
|
-
to_s.gsub(/column /, "").to_i
|
62
|
-
print ("-" * (val - 1))
|
63
|
-
print "^ Computer says 'ni'!\n"
|
64
|
-
puts @parser.failure_reason
|
65
|
-
puts @parser.to_yaml
|
66
|
-
end
|
67
|
-
puts "</pre>"
|
68
|
-
end
|
69
|
-
|
70
|
-
def read_test_file
|
71
|
-
f = open(File.expand_path("../files/test_data.txt", __FILE__))
|
72
|
-
f.each do |line|
|
73
|
-
name, jsn = line.split("|")
|
74
|
-
if line.match(/^\s*#/) == nil && name && jsn
|
75
|
-
yield({ name: name, jsn: jsn })
|
76
|
-
else
|
77
|
-
yield({ comment: line })
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
1
|
+
# frozen_string_literal: true
|
82
2
|
|
3
|
+
require 'rspec'
|
4
|
+
require 'json'
|
5
|
+
require 'biodiversity'
|
metadata
CHANGED
@@ -1,165 +1,105 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: ffi
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.11'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: parallel
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.12'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.12'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: unicode_utils
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '1.4'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '1.4'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: gn_uuid
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0.5'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0.5'
|
26
|
+
version: '1.11'
|
69
27
|
- !ruby/object:Gem::Dependency
|
70
28
|
name: bundler
|
71
29
|
requirement: !ruby/object:Gem::Requirement
|
72
30
|
requirements:
|
73
31
|
- - "~>"
|
74
32
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '1.16'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rake
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '12.3'
|
33
|
+
version: '2.0'
|
90
34
|
type: :development
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
38
|
- - "~>"
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
40
|
+
version: '2.0'
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: byebug
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
45
|
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
47
|
+
version: '11.0'
|
104
48
|
type: :development
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
52
|
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
54
|
+
version: '11.0'
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
56
|
+
name: rake
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
114
58
|
requirements:
|
115
59
|
- - "~>"
|
116
60
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
61
|
+
version: '13.0'
|
118
62
|
type: :development
|
119
63
|
prerelease: false
|
120
64
|
version_requirements: !ruby/object:Gem::Requirement
|
121
65
|
requirements:
|
122
66
|
- - "~>"
|
123
67
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
68
|
+
version: '13.0'
|
125
69
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
70
|
+
name: rspec
|
127
71
|
requirement: !ruby/object:Gem::Requirement
|
128
72
|
requirements:
|
129
73
|
- - "~>"
|
130
74
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
75
|
+
version: '3.9'
|
132
76
|
type: :development
|
133
77
|
prerelease: false
|
134
78
|
version_requirements: !ruby/object:Gem::Requirement
|
135
79
|
requirements:
|
136
80
|
- - "~>"
|
137
81
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
82
|
+
version: '3.9'
|
139
83
|
- !ruby/object:Gem::Dependency
|
140
84
|
name: rubocop
|
141
85
|
requirement: !ruby/object:Gem::Requirement
|
142
86
|
requirements:
|
143
87
|
- - "~>"
|
144
88
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0.
|
89
|
+
version: '0.76'
|
146
90
|
type: :development
|
147
91
|
prerelease: false
|
148
92
|
version_requirements: !ruby/object:Gem::Requirement
|
149
93
|
requirements:
|
150
94
|
- - "~>"
|
151
95
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0.
|
153
|
-
description:
|
96
|
+
version: '0.76'
|
97
|
+
description: Parsing tool for biodiversity informatics
|
154
98
|
email: dmozzherin@gmail.com
|
155
|
-
executables:
|
156
|
-
- nnparse
|
157
|
-
- parserver
|
99
|
+
executables: []
|
158
100
|
extensions: []
|
159
101
|
extra_rdoc_files: []
|
160
102
|
files:
|
161
|
-
- ".byebug_history"
|
162
|
-
- ".document"
|
163
103
|
- ".gitignore"
|
164
104
|
- ".rspec"
|
165
105
|
- ".rubocop.yml"
|
@@ -170,31 +110,16 @@ files:
|
|
170
110
|
- LICENSE
|
171
111
|
- README.md
|
172
112
|
- Rakefile
|
173
|
-
- bin/nnparse
|
174
|
-
- bin/parserver
|
175
113
|
- biodiversity.gemspec
|
176
|
-
-
|
114
|
+
- clib/linux/libgnparser.h
|
115
|
+
- clib/linux/libgnparser.so
|
116
|
+
- clib/mac/libgnparser.h
|
117
|
+
- clib/mac/libgnparser.so
|
177
118
|
- lib/biodiversity.rb
|
178
|
-
- lib/biodiversity/guid.rb
|
179
|
-
- lib/biodiversity/guid/lsid.rb
|
180
119
|
- lib/biodiversity/parser.rb
|
181
|
-
- lib/biodiversity/parser/scientific_name_canonical.rb
|
182
|
-
- lib/biodiversity/parser/scientific_name_canonical.treetop
|
183
|
-
- lib/biodiversity/parser/scientific_name_clean.rb
|
184
|
-
- lib/biodiversity/parser/scientific_name_clean.treetop
|
185
|
-
- lib/biodiversity/parser/scientific_name_dirty.rb
|
186
|
-
- lib/biodiversity/parser/scientific_name_dirty.treetop
|
187
120
|
- lib/biodiversity/version.rb
|
188
|
-
-
|
189
|
-
- spec/
|
190
|
-
- spec/files/lsid.xml
|
191
|
-
- spec/files/test_data.txt
|
192
|
-
- spec/files/todo.txt
|
193
|
-
- spec/guid/lsid.spec.rb
|
194
|
-
- spec/parser/scientific_name_canonical_spec.rb
|
195
|
-
- spec/parser/scientific_name_clean_spec.rb
|
196
|
-
- spec/parser/scientific_name_dirty_spec.rb
|
197
|
-
- spec/parser/scientific_name_spec.rb
|
121
|
+
- spec/lib/biodiversity_spec.rb
|
122
|
+
- spec/lib/parser_spec.rb
|
198
123
|
- spec/spec_helper.rb
|
199
124
|
homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
200
125
|
licenses:
|
@@ -216,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
141
|
version: '0'
|
217
142
|
requirements: []
|
218
143
|
rubyforge_project:
|
219
|
-
rubygems_version: 2.6.
|
144
|
+
rubygems_version: 2.7.6.2
|
220
145
|
signing_key:
|
221
146
|
specification_version: 4
|
222
147
|
summary: Parser of scientific names
|
data/.byebug_history
DELETED
data/.document
DELETED
data/examples/socket_client.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'socket'
|
3
|
-
|
4
|
-
host = 'localhost'
|
5
|
-
port = 4334
|
6
|
-
|
7
|
-
f = open('10000_names.txt')
|
8
|
-
w = open('output.txt', 'w')
|
9
|
-
s = TCPSocket.open(host, port)
|
10
|
-
|
11
|
-
f.each_with_index do |line, i|
|
12
|
-
puts i if i % 1000 == 0
|
13
|
-
line = line.strip
|
14
|
-
s.puts(line.strip)
|
15
|
-
res = s.gets
|
16
|
-
if res && res.split(" ").size > 3
|
17
|
-
res = res.strip
|
18
|
-
w.write(line + "\n")
|
19
|
-
w.write(res + "\n")
|
20
|
-
w.write("\n")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
s.close
|
25
|
-
|
data/lib/biodiversity/guid.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require_relative 'guid/lsid'
|
@@ -1,16 +0,0 @@
|
|
1
|
-
class LsidResolver
|
2
|
-
def self.resolve(lsid)
|
3
|
-
http_get_rdf(lsid)
|
4
|
-
end
|
5
|
-
|
6
|
-
protected
|
7
|
-
def self.http_get_rdf(lsid)
|
8
|
-
rdf = ''
|
9
|
-
open(Biodiversity::LSID_RESOLVER_URL + lsid) do |f|
|
10
|
-
f.each do |line|
|
11
|
-
rdf += line if !line.strip.blank?
|
12
|
-
end
|
13
|
-
end
|
14
|
-
rdf
|
15
|
-
end
|
16
|
-
end
|