biodiversity 3.5.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -6
- data/.ruby-version +1 -1
- data/.travis.yml +1 -6
- data/CHANGELOG +3 -0
- data/Gemfile +2 -0
- data/README.md +37 -178
- data/Rakefile +15 -48
- data/biodiversity.gemspec +18 -21
- data/clib/linux/libgnparser.h +93 -0
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +93 -0
- data/clib/mac/libgnparser.so +0 -0
- data/lib/biodiversity.rb +4 -9
- data/lib/biodiversity/parser.rb +65 -281
- data/lib/biodiversity/version.rb +8 -1
- data/spec/lib/biodiversity_spec.rb +9 -0
- data/spec/lib/parser_spec.rb +38 -0
- data/spec/spec_helper.rb +4 -81
- metadata +27 -102
- data/.byebug_history +0 -18
- data/.document +0 -5
- data/examples/socket_client.rb +0 -25
- data/lib/biodiversity/guid.rb +0 -1
- data/lib/biodiversity/guid/lsid.rb +0 -16
- data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -528
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +0 -120
- data/lib/biodiversity/parser/scientific_name_clean.rb +0 -8991
- data/lib/biodiversity/parser/scientific_name_clean.treetop +0 -1632
- data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1298
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +0 -264
- data/spec/biodiversity_spec.rb +0 -11
- data/spec/files/test_data.txt +0 -490
- data/spec/files/todo.txt +0 -55
- data/spec/guid/lsid.spec.rb +0 -15
- data/spec/parser/scientific_name_canonical_spec.rb +0 -36
- data/spec/parser/scientific_name_clean_spec.rb +0 -1137
- data/spec/parser/scientific_name_dirty_spec.rb +0 -165
- data/spec/parser/scientific_name_spec.rb +0 -193
data/lib/biodiversity/version.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# s frozen_string_literal: true
|
4
|
+
|
5
|
+
describe Biodiversity::Parser do
|
6
|
+
describe('parse') do
|
7
|
+
it 'parses name in simple format' do
|
8
|
+
parsed = subject.parse('Homo sapiens Linn.', true)
|
9
|
+
expect(parsed[:canonicalName][:simple]).to eq 'Homo sapiens'
|
10
|
+
expect(parsed[:normalized]).to be_nil
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'parsed name in full format' do
|
14
|
+
parsed = subject.parse('Homo sapiens Linn.')
|
15
|
+
expect(parsed[:canonicalName][:simple]).to eq 'Homo sapiens'
|
16
|
+
expect(parsed[:normalized]).to eq 'Homo sapiens Linn.'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe('parse_ary') do
|
21
|
+
it 'parses names in simple format' do
|
22
|
+
parsed = subject.parse_ary(['Homo sapiens Linn.', 'Pardosa moesta'], true)
|
23
|
+
expect(parsed[0][:canonicalName][:simple]).to eq 'Homo sapiens'
|
24
|
+
expect(parsed[1][:canonicalName][:simple]).to eq 'Pardosa moesta'
|
25
|
+
expect(parsed[0][:normalized]).to be_nil
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'parsed name in full format' do
|
29
|
+
parsed = subject.parse_ary(
|
30
|
+
['Homo sapiens Linn.', 'Tobacco Mosaic Virus']
|
31
|
+
)
|
32
|
+
expect(parsed[0][:canonicalName][:simple]).to eq 'Homo sapiens'
|
33
|
+
expect(parsed[0][:normalized]).to eq 'Homo sapiens Linn.'
|
34
|
+
expect(parsed[1][:parsed]).to be false
|
35
|
+
expect(parsed[1][:virus]).to be true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,82 +1,5 @@
|
|
1
|
-
|
2
|
-
require "treetop"
|
3
|
-
require "json"
|
4
|
-
require "biodiversity"
|
5
|
-
require "webmock/rspec"
|
6
|
-
|
7
|
-
|
8
|
-
RSpec.configure do |c|
|
9
|
-
c.mock_with :rr
|
10
|
-
end
|
11
|
-
|
12
|
-
dir = File.dirname(__FILE__)
|
13
|
-
Treetop.load(File.expand_path(File.join(dir,
|
14
|
-
"../lib/biodiversity/parser/scientific_name_clean")))
|
15
|
-
Treetop.load(File.expand_path(File.join(dir,
|
16
|
-
"../lib/biodiversity/parser/scientific_name_dirty")))
|
17
|
-
Treetop.load(File.expand_path(File.join(dir,
|
18
|
-
"../lib/biodiversity/parser/scientific_name_canonical")))
|
19
|
-
|
20
|
-
PARSER_TEST_VERSION = "test_version"
|
21
|
-
|
22
|
-
def set_parser(parser)
|
23
|
-
@parser = parser
|
24
|
-
end
|
25
|
-
|
26
|
-
def parse(input)
|
27
|
-
@parser.parse(input)
|
28
|
-
end
|
29
|
-
|
30
|
-
def value(input)
|
31
|
-
parse(input).value
|
32
|
-
end
|
33
|
-
|
34
|
-
def canonical(input)
|
35
|
-
parse(input).canonical
|
36
|
-
end
|
37
|
-
|
38
|
-
def details(input)
|
39
|
-
parse(input).details
|
40
|
-
end
|
41
|
-
|
42
|
-
def pos(input)
|
43
|
-
parse(input).pos
|
44
|
-
end
|
45
|
-
|
46
|
-
def json(input)
|
47
|
-
parse(input).
|
48
|
-
to_json.gsub(/"parser_version":"[^"]*"/,
|
49
|
-
%Q["parser_version":"#{PARSER_TEST_VERSION}"])
|
50
|
-
end
|
51
|
-
|
52
|
-
def debug(input)
|
53
|
-
res = parse(input)
|
54
|
-
puts "<pre>"
|
55
|
-
if res
|
56
|
-
puts "success!"
|
57
|
-
puts res.inspect
|
58
|
-
else
|
59
|
-
puts input
|
60
|
-
val = @parser.failure_reason.to_s.match(/column [0-9]*/).
|
61
|
-
to_s.gsub(/column /, "").to_i
|
62
|
-
print ("-" * (val - 1))
|
63
|
-
print "^ Computer says 'ni'!\n"
|
64
|
-
puts @parser.failure_reason
|
65
|
-
puts @parser.to_yaml
|
66
|
-
end
|
67
|
-
puts "</pre>"
|
68
|
-
end
|
69
|
-
|
70
|
-
def read_test_file
|
71
|
-
f = open(File.expand_path("../files/test_data.txt", __FILE__))
|
72
|
-
f.each do |line|
|
73
|
-
name, jsn = line.split("|")
|
74
|
-
if line.match(/^\s*#/) == nil && name && jsn
|
75
|
-
yield({ name: name, jsn: jsn })
|
76
|
-
else
|
77
|
-
yield({ comment: line })
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
1
|
+
# frozen_string_literal: true
|
82
2
|
|
3
|
+
require 'rspec'
|
4
|
+
require 'json'
|
5
|
+
require 'biodiversity'
|
metadata
CHANGED
@@ -1,165 +1,105 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: ffi
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.11'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: parallel
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.12'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.12'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: unicode_utils
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '1.4'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '1.4'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: gn_uuid
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0.5'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0.5'
|
26
|
+
version: '1.11'
|
69
27
|
- !ruby/object:Gem::Dependency
|
70
28
|
name: bundler
|
71
29
|
requirement: !ruby/object:Gem::Requirement
|
72
30
|
requirements:
|
73
31
|
- - "~>"
|
74
32
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '1.16'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rake
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '12.3'
|
33
|
+
version: '2.0'
|
90
34
|
type: :development
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
38
|
- - "~>"
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
40
|
+
version: '2.0'
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: byebug
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
45
|
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
47
|
+
version: '11.0'
|
104
48
|
type: :development
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
52
|
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
54
|
+
version: '11.0'
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
56
|
+
name: rake
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
114
58
|
requirements:
|
115
59
|
- - "~>"
|
116
60
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
61
|
+
version: '13.0'
|
118
62
|
type: :development
|
119
63
|
prerelease: false
|
120
64
|
version_requirements: !ruby/object:Gem::Requirement
|
121
65
|
requirements:
|
122
66
|
- - "~>"
|
123
67
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
68
|
+
version: '13.0'
|
125
69
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
70
|
+
name: rspec
|
127
71
|
requirement: !ruby/object:Gem::Requirement
|
128
72
|
requirements:
|
129
73
|
- - "~>"
|
130
74
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
75
|
+
version: '3.9'
|
132
76
|
type: :development
|
133
77
|
prerelease: false
|
134
78
|
version_requirements: !ruby/object:Gem::Requirement
|
135
79
|
requirements:
|
136
80
|
- - "~>"
|
137
81
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
82
|
+
version: '3.9'
|
139
83
|
- !ruby/object:Gem::Dependency
|
140
84
|
name: rubocop
|
141
85
|
requirement: !ruby/object:Gem::Requirement
|
142
86
|
requirements:
|
143
87
|
- - "~>"
|
144
88
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0.
|
89
|
+
version: '0.76'
|
146
90
|
type: :development
|
147
91
|
prerelease: false
|
148
92
|
version_requirements: !ruby/object:Gem::Requirement
|
149
93
|
requirements:
|
150
94
|
- - "~>"
|
151
95
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0.
|
153
|
-
description:
|
96
|
+
version: '0.76'
|
97
|
+
description: Parsing tool for biodiversity informatics
|
154
98
|
email: dmozzherin@gmail.com
|
155
|
-
executables:
|
156
|
-
- nnparse
|
157
|
-
- parserver
|
99
|
+
executables: []
|
158
100
|
extensions: []
|
159
101
|
extra_rdoc_files: []
|
160
102
|
files:
|
161
|
-
- ".byebug_history"
|
162
|
-
- ".document"
|
163
103
|
- ".gitignore"
|
164
104
|
- ".rspec"
|
165
105
|
- ".rubocop.yml"
|
@@ -170,31 +110,16 @@ files:
|
|
170
110
|
- LICENSE
|
171
111
|
- README.md
|
172
112
|
- Rakefile
|
173
|
-
- bin/nnparse
|
174
|
-
- bin/parserver
|
175
113
|
- biodiversity.gemspec
|
176
|
-
-
|
114
|
+
- clib/linux/libgnparser.h
|
115
|
+
- clib/linux/libgnparser.so
|
116
|
+
- clib/mac/libgnparser.h
|
117
|
+
- clib/mac/libgnparser.so
|
177
118
|
- lib/biodiversity.rb
|
178
|
-
- lib/biodiversity/guid.rb
|
179
|
-
- lib/biodiversity/guid/lsid.rb
|
180
119
|
- lib/biodiversity/parser.rb
|
181
|
-
- lib/biodiversity/parser/scientific_name_canonical.rb
|
182
|
-
- lib/biodiversity/parser/scientific_name_canonical.treetop
|
183
|
-
- lib/biodiversity/parser/scientific_name_clean.rb
|
184
|
-
- lib/biodiversity/parser/scientific_name_clean.treetop
|
185
|
-
- lib/biodiversity/parser/scientific_name_dirty.rb
|
186
|
-
- lib/biodiversity/parser/scientific_name_dirty.treetop
|
187
120
|
- lib/biodiversity/version.rb
|
188
|
-
-
|
189
|
-
- spec/
|
190
|
-
- spec/files/lsid.xml
|
191
|
-
- spec/files/test_data.txt
|
192
|
-
- spec/files/todo.txt
|
193
|
-
- spec/guid/lsid.spec.rb
|
194
|
-
- spec/parser/scientific_name_canonical_spec.rb
|
195
|
-
- spec/parser/scientific_name_clean_spec.rb
|
196
|
-
- spec/parser/scientific_name_dirty_spec.rb
|
197
|
-
- spec/parser/scientific_name_spec.rb
|
121
|
+
- spec/lib/biodiversity_spec.rb
|
122
|
+
- spec/lib/parser_spec.rb
|
198
123
|
- spec/spec_helper.rb
|
199
124
|
homepage: https://github.com/GlobalNamesArchitecture/biodiversity
|
200
125
|
licenses:
|
@@ -216,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
141
|
version: '0'
|
217
142
|
requirements: []
|
218
143
|
rubyforge_project:
|
219
|
-
rubygems_version: 2.6.
|
144
|
+
rubygems_version: 2.7.6.2
|
220
145
|
signing_key:
|
221
146
|
specification_version: 4
|
222
147
|
summary: Parser of scientific names
|
data/.byebug_history
DELETED
data/.document
DELETED
data/examples/socket_client.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'socket'
|
3
|
-
|
4
|
-
host = 'localhost'
|
5
|
-
port = 4334
|
6
|
-
|
7
|
-
f = open('10000_names.txt')
|
8
|
-
w = open('output.txt', 'w')
|
9
|
-
s = TCPSocket.open(host, port)
|
10
|
-
|
11
|
-
f.each_with_index do |line, i|
|
12
|
-
puts i if i % 1000 == 0
|
13
|
-
line = line.strip
|
14
|
-
s.puts(line.strip)
|
15
|
-
res = s.gets
|
16
|
-
if res && res.split(" ").size > 3
|
17
|
-
res = res.strip
|
18
|
-
w.write(line + "\n")
|
19
|
-
w.write(res + "\n")
|
20
|
-
w.write("\n")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
s.close
|
25
|
-
|
data/lib/biodiversity/guid.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require_relative 'guid/lsid'
|
@@ -1,16 +0,0 @@
|
|
1
|
-
class LsidResolver
|
2
|
-
def self.resolve(lsid)
|
3
|
-
http_get_rdf(lsid)
|
4
|
-
end
|
5
|
-
|
6
|
-
protected
|
7
|
-
def self.http_get_rdf(lsid)
|
8
|
-
rdf = ''
|
9
|
-
open(Biodiversity::LSID_RESOLVER_URL + lsid) do |f|
|
10
|
-
f.each do |line|
|
11
|
-
rdf += line if !line.strip.blank?
|
12
|
-
end
|
13
|
-
end
|
14
|
-
rdf
|
15
|
-
end
|
16
|
-
end
|