biodiversity 3.1.5 → 3.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -0
- data/.ruby-version +1 -1
- data/CHANGELOG +2 -0
- data/LICENSE +1 -1
- data/README.md +3 -3
- data/Rakefile +14 -14
- data/biodiversity.gemspec +19 -19
- data/lib/biodiversity/parser.rb +1 -1
- data/lib/biodiversity/parser/scientific_name_canonical.rb +13 -13
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +17 -17
- data/lib/biodiversity/parser/scientific_name_clean.rb +163 -131
- data/lib/biodiversity/parser/scientific_name_clean.treetop +62 -56
- data/lib/biodiversity/parser/scientific_name_dirty.rb +36 -36
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +50 -53
- data/lib/biodiversity/version.rb +2 -2
- data/spec/files/test_data.txt +6 -1
- data/spec/parser/scientific_name_canonical_spec.rb +21 -21
- data/spec/parser/scientific_name_clean_spec.rb +750 -500
- data/spec/parser/scientific_name_dirty_spec.rb +91 -90
- data/spec/parser/scientific_name_spec.rb +3 -1
- data/spec/spec_helper.rb +21 -21
- metadata +17 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 791e6108615cca8282576b68d748ae798b18e227
|
4
|
+
data.tar.gz: d69dfab6af39a13692fe54d64130110d261e9f10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fef3958839ec08379c59c2dea9312cd19f72948442acfb19f4351d33ee39745009504a820b27b09ad54c74ce46512818f96ab57917ad9c9d3376d75a462995c0
|
7
|
+
data.tar.gz: 7ac18251f5353abf3c9aee0bf9595f75400b74cf95fbadc71f2a262c95f9a8a83b5c20840bb02e1fc576bc659d422ff58bf2b16625ada72aa77d7f9592c2cd7b
|
data/.rubocop.yml
ADDED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.1.
|
1
|
+
2.1.5
|
data/CHANGELOG
CHANGED
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2008-
|
3
|
+
Copyright (c) 2008-2015 Marine Biological Laboratory
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -12,7 +12,7 @@ Parses taxonomic scientific name and breaks it into semantic elements.
|
|
12
12
|
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
13
13
|
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
14
14
|
|
15
|
-
biodiversity19 is now deprecated and will be phased out in
|
15
|
+
biodiversity19 is now deprecated and will be phased out in 2015.
|
16
16
|
You are strongly encouraged to change your dependencies from
|
17
17
|
biodiversity19 to biodiversity
|
18
18
|
|
@@ -184,9 +184,9 @@ It should fix the problem.
|
|
184
184
|
Copyright
|
185
185
|
---------
|
186
186
|
|
187
|
-
Authors: [Dmitry Mozzherin][10]
|
187
|
+
Authors: [Dmitry Mozzherin][10]
|
188
188
|
|
189
|
-
Copyright (c) 2008-
|
189
|
+
Copyright (c) 2008-2015 Marine Biological Laboratory. See LICENSE for
|
190
190
|
further details.
|
191
191
|
|
192
192
|
[1]: https://badge.fury.io/rb/biodiversity.png
|
data/Rakefile
CHANGED
@@ -1,45 +1,45 @@
|
|
1
|
-
require
|
1
|
+
require "bundler"
|
2
2
|
Bundler::GemHelper.install_tasks
|
3
3
|
|
4
4
|
begin
|
5
5
|
Bundler.setup(:default, :development)
|
6
6
|
rescue Bundler::BundlerError => e
|
7
7
|
$stderr.puts e.message
|
8
|
-
$stderr.puts
|
8
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
9
9
|
exit e.status_code
|
10
10
|
end
|
11
11
|
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
12
|
+
require "rspec/core"
|
13
|
+
require "rspec/core/rake_task"
|
14
|
+
require "rake/dsl_definition"
|
15
|
+
require "rake"
|
16
|
+
require "rspec"
|
17
|
+
require "rspec/core/rake_task"
|
18
18
|
|
19
19
|
|
20
20
|
task :default => :spec
|
21
21
|
|
22
22
|
RSpec::Core::RakeTask.new do |t|
|
23
|
-
t.pattern =
|
23
|
+
t.pattern = "spec/**/*spec.rb"
|
24
24
|
end
|
25
25
|
|
26
26
|
task :tt do
|
27
27
|
dir = File.dirname(__FILE__)
|
28
|
-
[
|
29
|
-
|
30
|
-
|
28
|
+
["scientific_name_clean",
|
29
|
+
"scientific_name_dirty",
|
30
|
+
"scientific_name_canonical"].each do |f|
|
31
31
|
file = "#{dir}/lib/biodiversity/parser/#{f}"
|
32
32
|
FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
|
33
33
|
system("tt #{file}.treetop")
|
34
34
|
rf = "#{file}.rb"
|
35
|
-
rfn = open(rf +
|
35
|
+
rfn = open(rf + ".tmp", "w")
|
36
36
|
skip_head = false
|
37
37
|
f = open(rf)
|
38
38
|
# getting around a bug in treetop which prevents setting
|
39
39
|
# UTF-8 encoding in ruby19
|
40
40
|
f.each_with_index do |l, i|
|
41
41
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
42
|
-
if skip_head && (l.strip ==
|
42
|
+
if skip_head && (l.strip == "" || l.match(/^# Autogenerated/))
|
43
43
|
next
|
44
44
|
else
|
45
45
|
skip_head = false
|
data/biodiversity.gemspec
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
$:.push File.expand_path("../lib", __FILE__)
|
2
2
|
|
3
|
-
require
|
3
|
+
require "biodiversity/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
|
-
gem.name =
|
6
|
+
gem.name = "biodiversity"
|
7
7
|
gem.version = Biodiversity::VERSION
|
8
|
-
gem.homepage =
|
9
|
-
gem.license =
|
10
|
-
gem.summary =
|
11
|
-
gem.description =
|
12
|
-
gem.authors = [
|
13
|
-
gem.email =
|
8
|
+
gem.homepage = "https://github.com/GlobalNamesArchitecture/biodiversity"
|
9
|
+
gem.license = "MIT"
|
10
|
+
gem.summary = "Parser of scientific names"
|
11
|
+
gem.description = "Tools for biodiversity informatics"
|
12
|
+
gem.authors = ["Dmitry Mozzherin"]
|
13
|
+
gem.email = "dmozzherin@gmail.com"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split("\n")
|
16
|
-
gem.executables = [
|
17
|
-
gem.require_paths = [
|
16
|
+
gem.executables = ["nnparse", "parserver"]
|
17
|
+
gem.require_paths = ["lib"]
|
18
18
|
|
19
|
-
gem.add_runtime_dependency
|
20
|
-
gem.add_runtime_dependency
|
21
|
-
gem.add_runtime_dependency
|
19
|
+
gem.add_runtime_dependency "treetop", "~> 1.5"
|
20
|
+
gem.add_runtime_dependency "parallel", "~> 1.4"
|
21
|
+
gem.add_runtime_dependency "unicode_utils", "~> 1.4"
|
22
22
|
|
23
|
-
gem.add_development_dependency
|
24
|
-
gem.add_development_dependency
|
25
|
-
gem.add_development_dependency
|
26
|
-
gem.add_development_dependency
|
27
|
-
gem.add_development_dependency
|
28
|
-
gem.add_development_dependency
|
23
|
+
gem.add_development_dependency "bundler", "~> 1.8"
|
24
|
+
gem.add_development_dependency "rake", "~> 10.4"
|
25
|
+
gem.add_development_dependency "rspec", "~> 3.2"
|
26
|
+
gem.add_development_dependency "webmock", "~> 1.20"
|
27
|
+
gem.add_development_dependency "rr", "~> 1.1"
|
28
|
+
gem.add_development_dependency "rubocop", "~> 0.29"
|
29
29
|
end
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -251,7 +251,7 @@ class ScientificNameParser
|
|
251
251
|
surrogate1 = /BOLD:|[\d]{5,}/i
|
252
252
|
surrogate2 = /\b(spp|sp|nr|cf)[\.]?[\s]*$/i
|
253
253
|
is_surrogate = false
|
254
|
-
|
254
|
+
|
255
255
|
ai_index = pos.index('annotation_identification')
|
256
256
|
if ai_index
|
257
257
|
ai = name[pos[ai_index - 1]..pos[ai_index + 1]]
|
@@ -29,7 +29,7 @@ module ScientificNameCanonical
|
|
29
29
|
def hybrid
|
30
30
|
false
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def details
|
34
34
|
[super]
|
35
35
|
end
|
@@ -103,15 +103,15 @@ module ScientificNameCanonical
|
|
103
103
|
def value
|
104
104
|
a.value + " " + b.value + " " + c.value
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
107
|
def canonical
|
108
108
|
a.canonical + " " + c.canonical
|
109
109
|
end
|
110
|
-
|
110
|
+
|
111
111
|
def pos
|
112
112
|
a.pos.merge(b.pos).merge(c.pos)
|
113
113
|
end
|
114
|
-
|
114
|
+
|
115
115
|
def details
|
116
116
|
a.details.merge(b.details).merge(c.details)
|
117
117
|
end
|
@@ -139,15 +139,15 @@ module ScientificNameCanonical
|
|
139
139
|
def value
|
140
140
|
a.value + " " + b.value
|
141
141
|
end
|
142
|
-
|
142
|
+
|
143
143
|
def canonical
|
144
144
|
a.canonical
|
145
145
|
end
|
146
|
-
|
146
|
+
|
147
147
|
def pos
|
148
148
|
a.pos.merge(b.pos)
|
149
149
|
end
|
150
|
-
|
150
|
+
|
151
151
|
def details
|
152
152
|
a.details.merge(b.details)
|
153
153
|
end
|
@@ -175,15 +175,15 @@ module ScientificNameCanonical
|
|
175
175
|
def value
|
176
176
|
a.value + " " + b.value
|
177
177
|
end
|
178
|
-
|
178
|
+
|
179
179
|
def canonical
|
180
180
|
a.canonical + " " + b.canonical
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
def pos
|
184
184
|
a.pos.merge(b.pos)
|
185
185
|
end
|
186
|
-
|
186
|
+
|
187
187
|
def details
|
188
188
|
a.details.merge(b.details)
|
189
189
|
end
|
@@ -315,15 +315,15 @@ module ScientificNameCanonical
|
|
315
315
|
def value
|
316
316
|
a.value
|
317
317
|
end
|
318
|
-
|
318
|
+
|
319
319
|
def canonical
|
320
320
|
a.canonical
|
321
321
|
end
|
322
|
-
|
322
|
+
|
323
323
|
def pos
|
324
324
|
a.pos
|
325
325
|
end
|
326
|
-
|
326
|
+
|
327
327
|
def details
|
328
328
|
{:uninomial => a.details[:uninomial]}
|
329
329
|
end
|
@@ -21,7 +21,7 @@ grammar ScientificNameCanonical
|
|
21
21
|
def hybrid
|
22
22
|
false
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def details
|
26
26
|
[super]
|
27
27
|
end
|
@@ -31,22 +31,22 @@ grammar ScientificNameCanonical
|
|
31
31
|
end
|
32
32
|
}
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
35
|
rule multinomial_with_garbage
|
36
|
-
|
36
|
+
|
37
37
|
a:genus space b:infragenus space c:species garbage {
|
38
38
|
def value
|
39
39
|
a.value + " " + b.value + " " + c.value
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
42
|
def canonical
|
43
43
|
a.canonical + " " + c.canonical
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
def pos
|
47
47
|
a.pos.merge(b.pos).merge(c.pos)
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
def details
|
51
51
|
a.details.merge(b.details).merge(c.details)
|
52
52
|
end
|
@@ -56,15 +56,15 @@ grammar ScientificNameCanonical
|
|
56
56
|
def value
|
57
57
|
a.value + " " + b.value
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
def canonical
|
61
61
|
a.canonical
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
def pos
|
65
65
|
a.pos.merge(b.pos)
|
66
66
|
end
|
67
|
-
|
67
|
+
|
68
68
|
def details
|
69
69
|
a.details.merge(b.details)
|
70
70
|
end
|
@@ -74,41 +74,41 @@ grammar ScientificNameCanonical
|
|
74
74
|
def value
|
75
75
|
a.value + " " + b.value
|
76
76
|
end
|
77
|
-
|
77
|
+
|
78
78
|
def canonical
|
79
79
|
a.canonical + " " + b.canonical
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
def pos
|
83
83
|
a.pos.merge(b.pos)
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
def details
|
87
87
|
a.details.merge(b.details)
|
88
88
|
end
|
89
89
|
}
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
rule uninomial_with_garbage
|
93
93
|
a:uninomial_string b:garbage {
|
94
94
|
def value
|
95
95
|
a.value
|
96
96
|
end
|
97
|
-
|
97
|
+
|
98
98
|
def canonical
|
99
99
|
a.canonical
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
def pos
|
103
103
|
a.pos
|
104
104
|
end
|
105
|
-
|
105
|
+
|
106
106
|
def details
|
107
107
|
{:uninomial => a.details[:uninomial]}
|
108
108
|
end
|
109
109
|
}
|
110
110
|
end
|
111
|
-
|
111
|
+
|
112
112
|
rule garbage
|
113
113
|
space "$$g@rbg3$$"
|
114
114
|
/
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Autogenerated from a Treetop grammar. Edits may be lost.
|
3
3
|
|
4
4
|
|
5
|
-
require
|
5
|
+
require "unicode_utils"
|
6
6
|
|
7
7
|
module ScientificNameClean
|
8
8
|
include Treetop::Runtime
|
@@ -27,11 +27,11 @@ module ScientificNameClean
|
|
27
27
|
|
28
28
|
module Root1
|
29
29
|
def value
|
30
|
-
a.value.gsub(/\s{2,}/,
|
30
|
+
a.value.gsub(/\s{2,}/, " ").strip
|
31
31
|
end
|
32
32
|
|
33
33
|
def canonical
|
34
|
-
a.canonical.gsub(/\s{2,}/,
|
34
|
+
a.canonical.gsub(/\s{2,}/, " ").strip
|
35
35
|
end
|
36
36
|
|
37
37
|
def pos
|
@@ -797,7 +797,7 @@ module ScientificNameClean
|
|
797
797
|
module Unparsed1
|
798
798
|
|
799
799
|
def value
|
800
|
-
|
800
|
+
""
|
801
801
|
end
|
802
802
|
|
803
803
|
def hybrid
|
@@ -805,11 +805,11 @@ module ScientificNameClean
|
|
805
805
|
end
|
806
806
|
|
807
807
|
def canonical
|
808
|
-
|
808
|
+
""
|
809
809
|
end
|
810
810
|
|
811
811
|
def pos
|
812
|
-
{interval.begin => [
|
812
|
+
{interval.begin => ["unparsed", interval.end]}
|
813
813
|
end
|
814
814
|
|
815
815
|
def details
|
@@ -1837,14 +1837,14 @@ module ScientificNameClean
|
|
1837
1837
|
|
1838
1838
|
def pos
|
1839
1839
|
def a.pos
|
1840
|
-
{interval.begin => [
|
1840
|
+
{interval.begin => ["infraspecies", a.interval.end]}
|
1841
1841
|
end
|
1842
1842
|
aid.pos(a)
|
1843
1843
|
end
|
1844
1844
|
|
1845
1845
|
def details
|
1846
1846
|
def a.details
|
1847
|
-
{:infraspecies => {:string => value, :rank =>
|
1847
|
+
{:infraspecies => {:string => value, :rank => "n/a"}}
|
1848
1848
|
end
|
1849
1849
|
aid.details(a)
|
1850
1850
|
end
|
@@ -1867,11 +1867,11 @@ module ScientificNameClean
|
|
1867
1867
|
end
|
1868
1868
|
|
1869
1869
|
def pos
|
1870
|
-
{interval.begin => [
|
1870
|
+
{interval.begin => ["infraspecies", interval.end]}
|
1871
1871
|
end
|
1872
1872
|
|
1873
1873
|
def details
|
1874
|
-
{:infraspecies => {:string => value, :rank =>
|
1874
|
+
{:infraspecies => {:string => value, :rank => "n/a"}}
|
1875
1875
|
end
|
1876
1876
|
end
|
1877
1877
|
|
@@ -2065,8 +2065,8 @@ module ScientificNameClean
|
|
2065
2065
|
end
|
2066
2066
|
|
2067
2067
|
def pos(a)
|
2068
|
-
interval_end = text_value[-1] ==
|
2069
|
-
{interval.begin => [
|
2068
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
2069
|
+
{interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
|
2070
2070
|
end
|
2071
2071
|
|
2072
2072
|
def details(a = nil)
|
@@ -2326,11 +2326,11 @@ module ScientificNameClean
|
|
2326
2326
|
r0 = r20
|
2327
2327
|
r0.extend(Rank0)
|
2328
2328
|
else
|
2329
|
-
if (match_len = has_terminal?("forma
|
2329
|
+
if (match_len = has_terminal?("forma.", false, index))
|
2330
2330
|
r21 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2331
2331
|
@index += match_len
|
2332
2332
|
else
|
2333
|
-
terminal_parse_failure("forma
|
2333
|
+
terminal_parse_failure("forma.")
|
2334
2334
|
r21 = nil
|
2335
2335
|
end
|
2336
2336
|
if r21
|
@@ -2338,11 +2338,11 @@ module ScientificNameClean
|
|
2338
2338
|
r0 = r21
|
2339
2339
|
r0.extend(Rank0)
|
2340
2340
|
else
|
2341
|
-
if (match_len = has_terminal?("
|
2341
|
+
if (match_len = has_terminal?("forma ", false, index))
|
2342
2342
|
r22 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2343
2343
|
@index += match_len
|
2344
2344
|
else
|
2345
|
-
terminal_parse_failure("
|
2345
|
+
terminal_parse_failure("forma ")
|
2346
2346
|
r22 = nil
|
2347
2347
|
end
|
2348
2348
|
if r22
|
@@ -2350,11 +2350,11 @@ module ScientificNameClean
|
|
2350
2350
|
r0 = r22
|
2351
2351
|
r0.extend(Rank0)
|
2352
2352
|
else
|
2353
|
-
if (match_len = has_terminal?("fma
|
2353
|
+
if (match_len = has_terminal?("fma.", false, index))
|
2354
2354
|
r23 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2355
2355
|
@index += match_len
|
2356
2356
|
else
|
2357
|
-
terminal_parse_failure("fma
|
2357
|
+
terminal_parse_failure("fma.")
|
2358
2358
|
r23 = nil
|
2359
2359
|
end
|
2360
2360
|
if r23
|
@@ -2362,11 +2362,11 @@ module ScientificNameClean
|
|
2362
2362
|
r0 = r23
|
2363
2363
|
r0.extend(Rank0)
|
2364
2364
|
else
|
2365
|
-
if (match_len = has_terminal?("
|
2365
|
+
if (match_len = has_terminal?("fma ", false, index))
|
2366
2366
|
r24 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2367
2367
|
@index += match_len
|
2368
2368
|
else
|
2369
|
-
terminal_parse_failure("
|
2369
|
+
terminal_parse_failure("fma ")
|
2370
2370
|
r24 = nil
|
2371
2371
|
end
|
2372
2372
|
if r24
|
@@ -2374,11 +2374,11 @@ module ScientificNameClean
|
|
2374
2374
|
r0 = r24
|
2375
2375
|
r0.extend(Rank0)
|
2376
2376
|
else
|
2377
|
-
if (match_len = has_terminal?("form
|
2377
|
+
if (match_len = has_terminal?("form.", false, index))
|
2378
2378
|
r25 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2379
2379
|
@index += match_len
|
2380
2380
|
else
|
2381
|
-
terminal_parse_failure("form
|
2381
|
+
terminal_parse_failure("form.")
|
2382
2382
|
r25 = nil
|
2383
2383
|
end
|
2384
2384
|
if r25
|
@@ -2386,11 +2386,11 @@ module ScientificNameClean
|
|
2386
2386
|
r0 = r25
|
2387
2387
|
r0.extend(Rank0)
|
2388
2388
|
else
|
2389
|
-
if (match_len = has_terminal?("
|
2389
|
+
if (match_len = has_terminal?("form ", false, index))
|
2390
2390
|
r26 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2391
2391
|
@index += match_len
|
2392
2392
|
else
|
2393
|
-
terminal_parse_failure("
|
2393
|
+
terminal_parse_failure("form ")
|
2394
2394
|
r26 = nil
|
2395
2395
|
end
|
2396
2396
|
if r26
|
@@ -2398,11 +2398,11 @@ module ScientificNameClean
|
|
2398
2398
|
r0 = r26
|
2399
2399
|
r0.extend(Rank0)
|
2400
2400
|
else
|
2401
|
-
if (match_len = has_terminal?("fo
|
2401
|
+
if (match_len = has_terminal?("fo.", false, index))
|
2402
2402
|
r27 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2403
2403
|
@index += match_len
|
2404
2404
|
else
|
2405
|
-
terminal_parse_failure("fo
|
2405
|
+
terminal_parse_failure("fo.")
|
2406
2406
|
r27 = nil
|
2407
2407
|
end
|
2408
2408
|
if r27
|
@@ -2410,11 +2410,11 @@ module ScientificNameClean
|
|
2410
2410
|
r0 = r27
|
2411
2411
|
r0.extend(Rank0)
|
2412
2412
|
else
|
2413
|
-
if (match_len = has_terminal?("
|
2413
|
+
if (match_len = has_terminal?("fo ", false, index))
|
2414
2414
|
r28 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2415
2415
|
@index += match_len
|
2416
2416
|
else
|
2417
|
-
terminal_parse_failure("
|
2417
|
+
terminal_parse_failure("fo ")
|
2418
2418
|
r28 = nil
|
2419
2419
|
end
|
2420
2420
|
if r28
|
@@ -2422,11 +2422,11 @@ module ScientificNameClean
|
|
2422
2422
|
r0 = r28
|
2423
2423
|
r0.extend(Rank0)
|
2424
2424
|
else
|
2425
|
-
if (match_len = has_terminal?("
|
2426
|
-
r29 =
|
2425
|
+
if (match_len = has_terminal?("f.", false, index))
|
2426
|
+
r29 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2427
2427
|
@index += match_len
|
2428
2428
|
else
|
2429
|
-
terminal_parse_failure("
|
2429
|
+
terminal_parse_failure("f.")
|
2430
2430
|
r29 = nil
|
2431
2431
|
end
|
2432
2432
|
if r29
|
@@ -2434,11 +2434,11 @@ module ScientificNameClean
|
|
2434
2434
|
r0 = r29
|
2435
2435
|
r0.extend(Rank0)
|
2436
2436
|
else
|
2437
|
-
if (match_len = has_terminal?("
|
2438
|
-
r30 =
|
2437
|
+
if (match_len = has_terminal?("α", false, index))
|
2438
|
+
r30 = true
|
2439
2439
|
@index += match_len
|
2440
2440
|
else
|
2441
|
-
terminal_parse_failure("
|
2441
|
+
terminal_parse_failure("α")
|
2442
2442
|
r30 = nil
|
2443
2443
|
end
|
2444
2444
|
if r30
|
@@ -2446,11 +2446,11 @@ module ScientificNameClean
|
|
2446
2446
|
r0 = r30
|
2447
2447
|
r0.extend(Rank0)
|
2448
2448
|
else
|
2449
|
-
if (match_len = has_terminal?("
|
2450
|
-
r31 =
|
2449
|
+
if (match_len = has_terminal?("ββ", false, index))
|
2450
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2451
2451
|
@index += match_len
|
2452
2452
|
else
|
2453
|
-
terminal_parse_failure("
|
2453
|
+
terminal_parse_failure("ββ")
|
2454
2454
|
r31 = nil
|
2455
2455
|
end
|
2456
2456
|
if r31
|
@@ -2458,11 +2458,11 @@ module ScientificNameClean
|
|
2458
2458
|
r0 = r31
|
2459
2459
|
r0.extend(Rank0)
|
2460
2460
|
else
|
2461
|
-
if (match_len = has_terminal?("
|
2461
|
+
if (match_len = has_terminal?("β", false, index))
|
2462
2462
|
r32 = true
|
2463
2463
|
@index += match_len
|
2464
2464
|
else
|
2465
|
-
terminal_parse_failure("
|
2465
|
+
terminal_parse_failure("β")
|
2466
2466
|
r32 = nil
|
2467
2467
|
end
|
2468
2468
|
if r32
|
@@ -2470,11 +2470,11 @@ module ScientificNameClean
|
|
2470
2470
|
r0 = r32
|
2471
2471
|
r0.extend(Rank0)
|
2472
2472
|
else
|
2473
|
-
if (match_len = has_terminal?("
|
2473
|
+
if (match_len = has_terminal?("γ", false, index))
|
2474
2474
|
r33 = true
|
2475
2475
|
@index += match_len
|
2476
2476
|
else
|
2477
|
-
terminal_parse_failure("
|
2477
|
+
terminal_parse_failure("γ")
|
2478
2478
|
r33 = nil
|
2479
2479
|
end
|
2480
2480
|
if r33
|
@@ -2482,11 +2482,11 @@ module ScientificNameClean
|
|
2482
2482
|
r0 = r33
|
2483
2483
|
r0.extend(Rank0)
|
2484
2484
|
else
|
2485
|
-
if (match_len = has_terminal?("
|
2485
|
+
if (match_len = has_terminal?("δ", false, index))
|
2486
2486
|
r34 = true
|
2487
2487
|
@index += match_len
|
2488
2488
|
else
|
2489
|
-
terminal_parse_failure("
|
2489
|
+
terminal_parse_failure("δ")
|
2490
2490
|
r34 = nil
|
2491
2491
|
end
|
2492
2492
|
if r34
|
@@ -2494,11 +2494,11 @@ module ScientificNameClean
|
|
2494
2494
|
r0 = r34
|
2495
2495
|
r0.extend(Rank0)
|
2496
2496
|
else
|
2497
|
-
if (match_len = has_terminal?("
|
2497
|
+
if (match_len = has_terminal?("ε", false, index))
|
2498
2498
|
r35 = true
|
2499
2499
|
@index += match_len
|
2500
2500
|
else
|
2501
|
-
terminal_parse_failure("
|
2501
|
+
terminal_parse_failure("ε")
|
2502
2502
|
r35 = nil
|
2503
2503
|
end
|
2504
2504
|
if r35
|
@@ -2506,11 +2506,11 @@ module ScientificNameClean
|
|
2506
2506
|
r0 = r35
|
2507
2507
|
r0.extend(Rank0)
|
2508
2508
|
else
|
2509
|
-
if (match_len = has_terminal?("
|
2509
|
+
if (match_len = has_terminal?("φ", false, index))
|
2510
2510
|
r36 = true
|
2511
2511
|
@index += match_len
|
2512
2512
|
else
|
2513
|
-
terminal_parse_failure("
|
2513
|
+
terminal_parse_failure("φ")
|
2514
2514
|
r36 = nil
|
2515
2515
|
end
|
2516
2516
|
if r36
|
@@ -2518,11 +2518,11 @@ module ScientificNameClean
|
|
2518
2518
|
r0 = r36
|
2519
2519
|
r0.extend(Rank0)
|
2520
2520
|
else
|
2521
|
-
if (match_len = has_terminal?("
|
2521
|
+
if (match_len = has_terminal?("θ", false, index))
|
2522
2522
|
r37 = true
|
2523
2523
|
@index += match_len
|
2524
2524
|
else
|
2525
|
-
terminal_parse_failure("
|
2525
|
+
terminal_parse_failure("θ")
|
2526
2526
|
r37 = nil
|
2527
2527
|
end
|
2528
2528
|
if r37
|
@@ -2530,11 +2530,11 @@ module ScientificNameClean
|
|
2530
2530
|
r0 = r37
|
2531
2531
|
r0.extend(Rank0)
|
2532
2532
|
else
|
2533
|
-
if (match_len = has_terminal?("
|
2534
|
-
r38 =
|
2533
|
+
if (match_len = has_terminal?("μ", false, index))
|
2534
|
+
r38 = true
|
2535
2535
|
@index += match_len
|
2536
2536
|
else
|
2537
|
-
terminal_parse_failure("
|
2537
|
+
terminal_parse_failure("μ")
|
2538
2538
|
r38 = nil
|
2539
2539
|
end
|
2540
2540
|
if r38
|
@@ -2542,11 +2542,11 @@ module ScientificNameClean
|
|
2542
2542
|
r0 = r38
|
2543
2543
|
r0.extend(Rank0)
|
2544
2544
|
else
|
2545
|
-
if (match_len = has_terminal?("
|
2545
|
+
if (match_len = has_terminal?("a.", false, index))
|
2546
2546
|
r39 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2547
2547
|
@index += match_len
|
2548
2548
|
else
|
2549
|
-
terminal_parse_failure("
|
2549
|
+
terminal_parse_failure("a.")
|
2550
2550
|
r39 = nil
|
2551
2551
|
end
|
2552
2552
|
if r39
|
@@ -2554,11 +2554,11 @@ module ScientificNameClean
|
|
2554
2554
|
r0 = r39
|
2555
2555
|
r0.extend(Rank0)
|
2556
2556
|
else
|
2557
|
-
if (match_len = has_terminal?("
|
2557
|
+
if (match_len = has_terminal?("b.", false, index))
|
2558
2558
|
r40 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2559
2559
|
@index += match_len
|
2560
2560
|
else
|
2561
|
-
terminal_parse_failure("
|
2561
|
+
terminal_parse_failure("b.")
|
2562
2562
|
r40 = nil
|
2563
2563
|
end
|
2564
2564
|
if r40
|
@@ -2566,11 +2566,11 @@ module ScientificNameClean
|
|
2566
2566
|
r0 = r40
|
2567
2567
|
r0.extend(Rank0)
|
2568
2568
|
else
|
2569
|
-
if (match_len = has_terminal?("
|
2569
|
+
if (match_len = has_terminal?("c.", false, index))
|
2570
2570
|
r41 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2571
2571
|
@index += match_len
|
2572
2572
|
else
|
2573
|
-
terminal_parse_failure("
|
2573
|
+
terminal_parse_failure("c.")
|
2574
2574
|
r41 = nil
|
2575
2575
|
end
|
2576
2576
|
if r41
|
@@ -2578,11 +2578,11 @@ module ScientificNameClean
|
|
2578
2578
|
r0 = r41
|
2579
2579
|
r0.extend(Rank0)
|
2580
2580
|
else
|
2581
|
-
if (match_len = has_terminal?("
|
2581
|
+
if (match_len = has_terminal?("d.", false, index))
|
2582
2582
|
r42 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2583
2583
|
@index += match_len
|
2584
2584
|
else
|
2585
|
-
terminal_parse_failure("
|
2585
|
+
terminal_parse_failure("d.")
|
2586
2586
|
r42 = nil
|
2587
2587
|
end
|
2588
2588
|
if r42
|
@@ -2590,11 +2590,11 @@ module ScientificNameClean
|
|
2590
2590
|
r0 = r42
|
2591
2591
|
r0.extend(Rank0)
|
2592
2592
|
else
|
2593
|
-
if (match_len = has_terminal?("
|
2593
|
+
if (match_len = has_terminal?("e.", false, index))
|
2594
2594
|
r43 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2595
2595
|
@index += match_len
|
2596
2596
|
else
|
2597
|
-
terminal_parse_failure("
|
2597
|
+
terminal_parse_failure("e.")
|
2598
2598
|
r43 = nil
|
2599
2599
|
end
|
2600
2600
|
if r43
|
@@ -2602,11 +2602,11 @@ module ScientificNameClean
|
|
2602
2602
|
r0 = r43
|
2603
2603
|
r0.extend(Rank0)
|
2604
2604
|
else
|
2605
|
-
if (match_len = has_terminal?("
|
2605
|
+
if (match_len = has_terminal?("g.", false, index))
|
2606
2606
|
r44 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2607
2607
|
@index += match_len
|
2608
2608
|
else
|
2609
|
-
terminal_parse_failure("
|
2609
|
+
terminal_parse_failure("g.")
|
2610
2610
|
r44 = nil
|
2611
2611
|
end
|
2612
2612
|
if r44
|
@@ -2614,11 +2614,11 @@ module ScientificNameClean
|
|
2614
2614
|
r0 = r44
|
2615
2615
|
r0.extend(Rank0)
|
2616
2616
|
else
|
2617
|
-
if (match_len = has_terminal?("
|
2617
|
+
if (match_len = has_terminal?("k.", false, index))
|
2618
2618
|
r45 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2619
2619
|
@index += match_len
|
2620
2620
|
else
|
2621
|
-
terminal_parse_failure("
|
2621
|
+
terminal_parse_failure("k.")
|
2622
2622
|
r45 = nil
|
2623
2623
|
end
|
2624
2624
|
if r45
|
@@ -2626,11 +2626,11 @@ module ScientificNameClean
|
|
2626
2626
|
r0 = r45
|
2627
2627
|
r0.extend(Rank0)
|
2628
2628
|
else
|
2629
|
-
if (match_len = has_terminal?("
|
2629
|
+
if (match_len = has_terminal?("****", false, index))
|
2630
2630
|
r46 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2631
2631
|
@index += match_len
|
2632
2632
|
else
|
2633
|
-
terminal_parse_failure("
|
2633
|
+
terminal_parse_failure("****")
|
2634
2634
|
r46 = nil
|
2635
2635
|
end
|
2636
2636
|
if r46
|
@@ -2638,11 +2638,11 @@ module ScientificNameClean
|
|
2638
2638
|
r0 = r46
|
2639
2639
|
r0.extend(Rank0)
|
2640
2640
|
else
|
2641
|
-
if (match_len = has_terminal?("
|
2642
|
-
r47 =
|
2641
|
+
if (match_len = has_terminal?("**", false, index))
|
2642
|
+
r47 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2643
2643
|
@index += match_len
|
2644
2644
|
else
|
2645
|
-
terminal_parse_failure("
|
2645
|
+
terminal_parse_failure("**")
|
2646
2646
|
r47 = nil
|
2647
2647
|
end
|
2648
2648
|
if r47
|
@@ -2650,8 +2650,21 @@ module ScientificNameClean
|
|
2650
2650
|
r0 = r47
|
2651
2651
|
r0.extend(Rank0)
|
2652
2652
|
else
|
2653
|
-
|
2654
|
-
|
2653
|
+
if (match_len = has_terminal?("*", false, index))
|
2654
|
+
r48 = true
|
2655
|
+
@index += match_len
|
2656
|
+
else
|
2657
|
+
terminal_parse_failure("*")
|
2658
|
+
r48 = nil
|
2659
|
+
end
|
2660
|
+
if r48
|
2661
|
+
r48 = SyntaxNode.new(input, (index-1)...index) if r48 == true
|
2662
|
+
r0 = r48
|
2663
|
+
r0.extend(Rank0)
|
2664
|
+
else
|
2665
|
+
@index = i0
|
2666
|
+
r0 = nil
|
2667
|
+
end
|
2655
2668
|
end
|
2656
2669
|
end
|
2657
2670
|
end
|
@@ -2711,7 +2724,7 @@ module ScientificNameClean
|
|
2711
2724
|
end
|
2712
2725
|
|
2713
2726
|
def pos(uni)
|
2714
|
-
{interval.begin => [
|
2727
|
+
{interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
|
2715
2728
|
end
|
2716
2729
|
|
2717
2730
|
def details(uni)
|
@@ -3065,7 +3078,7 @@ module ScientificNameClean
|
|
3065
3078
|
end
|
3066
3079
|
|
3067
3080
|
def pos
|
3068
|
-
{interval.begin => [
|
3081
|
+
{interval.begin => ["species", interval.end]}
|
3069
3082
|
end
|
3070
3083
|
|
3071
3084
|
def hybrid
|
@@ -3142,7 +3155,7 @@ module ScientificNameClean
|
|
3142
3155
|
end
|
3143
3156
|
|
3144
3157
|
def pos
|
3145
|
-
{a.interval.begin => [
|
3158
|
+
{a.interval.begin => ["infragenus", a.interval.end]}
|
3146
3159
|
end
|
3147
3160
|
|
3148
3161
|
def details
|
@@ -3239,7 +3252,7 @@ module ScientificNameClean
|
|
3239
3252
|
end
|
3240
3253
|
|
3241
3254
|
def pos
|
3242
|
-
{a.interval.begin => [
|
3255
|
+
{a.interval.begin => ["genus", a.interval.end]}
|
3243
3256
|
end
|
3244
3257
|
|
3245
3258
|
def canonical
|
@@ -3655,7 +3668,7 @@ module ScientificNameClean
|
|
3655
3668
|
end
|
3656
3669
|
|
3657
3670
|
def pos
|
3658
|
-
{interval.begin => [
|
3671
|
+
{interval.begin => ["uninomial", interval.end]}
|
3659
3672
|
end
|
3660
3673
|
|
3661
3674
|
def hybrid
|
@@ -4103,11 +4116,11 @@ module ScientificNameClean
|
|
4103
4116
|
end
|
4104
4117
|
|
4105
4118
|
def pos
|
4106
|
-
{a.interval.begin => [
|
4119
|
+
{a.interval.begin => ["unknown_author", a.interval.end]}
|
4107
4120
|
end
|
4108
4121
|
|
4109
4122
|
def details
|
4110
|
-
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => [
|
4123
|
+
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
|
4111
4124
|
end
|
4112
4125
|
end
|
4113
4126
|
|
@@ -4435,7 +4448,7 @@ module ScientificNameClean
|
|
4435
4448
|
details_with_arg(:basionymAuthorTeam)
|
4436
4449
|
end
|
4437
4450
|
|
4438
|
-
def details_with_arg(authorTeamType =
|
4451
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4439
4452
|
{ :authorship => text_value,
|
4440
4453
|
authorTeamType.to_sym => {
|
4441
4454
|
:authorTeam => a.text_value.strip
|
@@ -4475,7 +4488,7 @@ module ScientificNameClean
|
|
4475
4488
|
details_with_arg(:basionymAuthorTeam)
|
4476
4489
|
end
|
4477
4490
|
|
4478
|
-
def details_with_arg(authorTeamType =
|
4491
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4479
4492
|
{ :authorship => text_value,
|
4480
4493
|
authorTeamType.to_sym => {
|
4481
4494
|
:authorTeam => a.text_value.strip
|
@@ -4491,7 +4504,7 @@ module ScientificNameClean
|
|
4491
4504
|
details
|
4492
4505
|
end
|
4493
4506
|
|
4494
|
-
def details_with_arg(authorTeamType =
|
4507
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4495
4508
|
{ :authorship => text_value,
|
4496
4509
|
authorTeamType.to_sym => {
|
4497
4510
|
:authorTeam => text_value,
|
@@ -4791,7 +4804,7 @@ module ScientificNameClean
|
|
4791
4804
|
end
|
4792
4805
|
|
4793
4806
|
def pos
|
4794
|
-
{interval.begin => [
|
4807
|
+
{interval.begin => ["unknown_author", interval.end]}
|
4795
4808
|
end
|
4796
4809
|
|
4797
4810
|
def details
|
@@ -5135,7 +5148,7 @@ module ScientificNameClean
|
|
5135
5148
|
|
5136
5149
|
module AuthorName1
|
5137
5150
|
def value
|
5138
|
-
a.value +
|
5151
|
+
a.value + " " + b.value
|
5139
5152
|
end
|
5140
5153
|
|
5141
5154
|
def pos
|
@@ -5360,7 +5373,7 @@ module ScientificNameClean
|
|
5360
5373
|
end
|
5361
5374
|
|
5362
5375
|
def pos
|
5363
|
-
{interval.begin => [
|
5376
|
+
{interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
|
5364
5377
|
end
|
5365
5378
|
|
5366
5379
|
def details
|
@@ -5375,7 +5388,7 @@ module ScientificNameClean
|
|
5375
5388
|
|
5376
5389
|
def pos
|
5377
5390
|
#cheating because there are several words in some of them
|
5378
|
-
{interval.begin => [
|
5391
|
+
{interval.begin => ["author_word", interval.end]}
|
5379
5392
|
end
|
5380
5393
|
|
5381
5394
|
def details
|
@@ -5394,7 +5407,7 @@ module ScientificNameClean
|
|
5394
5407
|
end
|
5395
5408
|
|
5396
5409
|
def pos
|
5397
|
-
{interval.begin => [
|
5410
|
+
{interval.begin => ["author_word", interval.end]}
|
5398
5411
|
end
|
5399
5412
|
|
5400
5413
|
def details
|
@@ -5411,7 +5424,7 @@ module ScientificNameClean
|
|
5411
5424
|
end
|
5412
5425
|
|
5413
5426
|
def pos
|
5414
|
-
{interval.begin => [
|
5427
|
+
{interval.begin => ["author_word", interval.end]}
|
5415
5428
|
end
|
5416
5429
|
|
5417
5430
|
def details
|
@@ -5783,7 +5796,7 @@ module ScientificNameClean
|
|
5783
5796
|
|
5784
5797
|
def pos
|
5785
5798
|
#cheating because there are several words in some of them
|
5786
|
-
{interval.begin => [
|
5799
|
+
{interval.begin => ["author_word", interval.end]}
|
5787
5800
|
end
|
5788
5801
|
end
|
5789
5802
|
|
@@ -6030,7 +6043,7 @@ module ScientificNameClean
|
|
6030
6043
|
end
|
6031
6044
|
|
6032
6045
|
def pos
|
6033
|
-
{interval.begin => [
|
6046
|
+
{interval.begin => ["author_word", interval.end]}
|
6034
6047
|
end
|
6035
6048
|
end
|
6036
6049
|
|
@@ -6183,7 +6196,7 @@ module ScientificNameClean
|
|
6183
6196
|
|
6184
6197
|
module CapLatinWord5
|
6185
6198
|
def value
|
6186
|
-
a.text_value[0..0] +
|
6199
|
+
a.text_value[0..0] + "e" + b.value
|
6187
6200
|
end
|
6188
6201
|
end
|
6189
6202
|
|
@@ -6685,7 +6698,7 @@ module ScientificNameClean
|
|
6685
6698
|
end
|
6686
6699
|
|
6687
6700
|
def pos
|
6688
|
-
{b.interval.begin => [
|
6701
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6689
6702
|
end
|
6690
6703
|
|
6691
6704
|
def details
|
@@ -6721,7 +6734,7 @@ module ScientificNameClean
|
|
6721
6734
|
end
|
6722
6735
|
|
6723
6736
|
def pos
|
6724
|
-
{b.interval.begin => [
|
6737
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6725
6738
|
end
|
6726
6739
|
|
6727
6740
|
def details
|
@@ -6757,7 +6770,7 @@ module ScientificNameClean
|
|
6757
6770
|
end
|
6758
6771
|
|
6759
6772
|
def pos
|
6760
|
-
{b.interval.begin => [
|
6773
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6761
6774
|
end
|
6762
6775
|
|
6763
6776
|
def details
|
@@ -6876,16 +6889,16 @@ module ScientificNameClean
|
|
6876
6889
|
end
|
6877
6890
|
|
6878
6891
|
def apply(sp)
|
6879
|
-
|
6892
|
+
""
|
6880
6893
|
end
|
6881
6894
|
|
6882
6895
|
def canonical(sp)
|
6883
|
-
|
6896
|
+
""
|
6884
6897
|
end
|
6885
6898
|
|
6886
6899
|
def pos(sp)
|
6887
|
-
interval_end = text_value[-1] ==
|
6888
|
-
{interval.begin => [
|
6900
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
6901
|
+
{interval.begin => ["annotation_identification", interval.end]}
|
6889
6902
|
end
|
6890
6903
|
|
6891
6904
|
def details(sp)
|
@@ -6899,16 +6912,16 @@ module ScientificNameClean
|
|
6899
6912
|
end
|
6900
6913
|
|
6901
6914
|
def apply(sp)
|
6902
|
-
|
6915
|
+
" " + value + " " + sp.value
|
6903
6916
|
end
|
6904
6917
|
|
6905
6918
|
def canonical(sp)
|
6906
|
-
|
6919
|
+
" " + sp.canonical
|
6907
6920
|
end
|
6908
6921
|
|
6909
6922
|
def pos(sp)
|
6910
|
-
interval_end = text_value[-1] ==
|
6911
|
-
{interval.begin => [
|
6923
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
6924
|
+
{interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
|
6912
6925
|
end
|
6913
6926
|
|
6914
6927
|
def details(sp)
|
@@ -7295,6 +7308,12 @@ module ScientificNameClean
|
|
7295
7308
|
end
|
7296
7309
|
|
7297
7310
|
module LatinWord3
|
7311
|
+
def value
|
7312
|
+
"oneili"
|
7313
|
+
end
|
7314
|
+
end
|
7315
|
+
|
7316
|
+
module LatinWord4
|
7298
7317
|
def a
|
7299
7318
|
elements[0]
|
7300
7319
|
end
|
@@ -7304,7 +7323,7 @@ module ScientificNameClean
|
|
7304
7323
|
end
|
7305
7324
|
end
|
7306
7325
|
|
7307
|
-
module
|
7326
|
+
module LatinWord5
|
7308
7327
|
def value
|
7309
7328
|
a.value + b.value
|
7310
7329
|
end
|
@@ -7351,39 +7370,52 @@ module ScientificNameClean
|
|
7351
7370
|
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
7352
7371
|
r0 = r1
|
7353
7372
|
else
|
7354
|
-
if (match_len = has_terminal?("o
|
7373
|
+
if (match_len = has_terminal?("o'donelli", false, index))
|
7355
7374
|
r5 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
7356
7375
|
r5.extend(LatinWord2)
|
7357
7376
|
@index += match_len
|
7358
7377
|
else
|
7359
|
-
terminal_parse_failure("o
|
7378
|
+
terminal_parse_failure("o'donelli")
|
7360
7379
|
r5 = nil
|
7361
7380
|
end
|
7362
7381
|
if r5
|
7363
7382
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
7364
7383
|
r0 = r5
|
7365
7384
|
else
|
7366
|
-
|
7367
|
-
|
7368
|
-
s6 << r7
|
7369
|
-
if r7
|
7370
|
-
r8 = _nt_valid_name_letters
|
7371
|
-
s6 << r8
|
7372
|
-
end
|
7373
|
-
if s6.last
|
7374
|
-
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
|
7385
|
+
if (match_len = has_terminal?("o'neili", false, index))
|
7386
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
7375
7387
|
r6.extend(LatinWord3)
|
7376
|
-
|
7388
|
+
@index += match_len
|
7377
7389
|
else
|
7378
|
-
|
7390
|
+
terminal_parse_failure("o'neili")
|
7379
7391
|
r6 = nil
|
7380
7392
|
end
|
7381
7393
|
if r6
|
7382
7394
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
7383
7395
|
r0 = r6
|
7384
7396
|
else
|
7385
|
-
|
7386
|
-
|
7397
|
+
i7, s7 = index, []
|
7398
|
+
r8 = _nt_valid_name_letter
|
7399
|
+
s7 << r8
|
7400
|
+
if r8
|
7401
|
+
r9 = _nt_valid_name_letters
|
7402
|
+
s7 << r9
|
7403
|
+
end
|
7404
|
+
if s7.last
|
7405
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
7406
|
+
r7.extend(LatinWord4)
|
7407
|
+
r7.extend(LatinWord5)
|
7408
|
+
else
|
7409
|
+
@index = i7
|
7410
|
+
r7 = nil
|
7411
|
+
end
|
7412
|
+
if r7
|
7413
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
7414
|
+
r0 = r7
|
7415
|
+
else
|
7416
|
+
@index = i0
|
7417
|
+
r0 = nil
|
7418
|
+
end
|
7387
7419
|
end
|
7388
7420
|
end
|
7389
7421
|
end
|
@@ -7395,13 +7427,13 @@ module ScientificNameClean
|
|
7395
7427
|
|
7396
7428
|
module ValidNameLetters0
|
7397
7429
|
def value
|
7398
|
-
res =
|
7399
|
-
text_value.split(
|
7400
|
-
l =
|
7401
|
-
l =
|
7430
|
+
res = ""
|
7431
|
+
text_value.split("").each do |l|
|
7432
|
+
l = "ae" if l == "æ"
|
7433
|
+
l = "oe" if l == "œ"
|
7402
7434
|
# We normalize ë as well. It is legal in botanical code, but it
|
7403
7435
|
# is beneficial to normalize it for the reconsiliation purposes
|
7404
|
-
l =
|
7436
|
+
l = "e" if l == "ë"
|
7405
7437
|
res << l
|
7406
7438
|
end
|
7407
7439
|
res
|
@@ -7450,9 +7482,9 @@ module ScientificNameClean
|
|
7450
7482
|
module ValidNameLetter0
|
7451
7483
|
def value
|
7452
7484
|
res = text_value
|
7453
|
-
res =
|
7454
|
-
res =
|
7455
|
-
res =
|
7485
|
+
res = "ae" if res == "æ"
|
7486
|
+
res = "oe" if res == "œ"
|
7487
|
+
res = "e" if res == "ë"
|
7456
7488
|
res
|
7457
7489
|
end
|
7458
7490
|
end
|
@@ -7484,13 +7516,13 @@ module ScientificNameClean
|
|
7484
7516
|
|
7485
7517
|
module CapDigraph0
|
7486
7518
|
def value
|
7487
|
-
|
7519
|
+
"Ae"
|
7488
7520
|
end
|
7489
7521
|
end
|
7490
7522
|
|
7491
7523
|
module CapDigraph1
|
7492
7524
|
def value
|
7493
|
-
|
7525
|
+
"Oe"
|
7494
7526
|
end
|
7495
7527
|
end
|
7496
7528
|
|
@@ -7667,7 +7699,7 @@ module ScientificNameClean
|
|
7667
7699
|
end
|
7668
7700
|
|
7669
7701
|
def pos
|
7670
|
-
{interval.begin => [
|
7702
|
+
{interval.begin => ["year", interval.end]}
|
7671
7703
|
end
|
7672
7704
|
|
7673
7705
|
def details
|
@@ -7725,7 +7757,7 @@ module ScientificNameClean
|
|
7725
7757
|
end
|
7726
7758
|
|
7727
7759
|
def pos
|
7728
|
-
{interval.begin => [
|
7760
|
+
{interval.begin => ["year", interval.end]}
|
7729
7761
|
end
|
7730
7762
|
|
7731
7763
|
def details
|