biodiversity19 3.1.5 → 3.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -0
- data/.ruby-version +1 -1
- data/CHANGELOG +2 -0
- data/LICENSE +1 -1
- data/README.md +3 -3
- data/Rakefile +14 -14
- data/biodiversity.gemspec +19 -19
- data/lib/biodiversity/parser.rb +1 -1
- data/lib/biodiversity/parser/scientific_name_canonical.rb +13 -13
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +17 -17
- data/lib/biodiversity/parser/scientific_name_clean.rb +163 -131
- data/lib/biodiversity/parser/scientific_name_clean.treetop +62 -56
- data/lib/biodiversity/parser/scientific_name_dirty.rb +36 -36
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +50 -53
- data/lib/biodiversity/version.rb +2 -2
- data/spec/files/test_data.txt +6 -1
- data/spec/parser/scientific_name_canonical_spec.rb +21 -21
- data/spec/parser/scientific_name_clean_spec.rb +750 -500
- data/spec/parser/scientific_name_dirty_spec.rb +91 -90
- data/spec/parser/scientific_name_spec.rb +3 -1
- data/spec/spec_helper.rb +21 -21
- metadata +17 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f636f36cbd937fd4b58cfc0b0fe746c0262d69f9
|
4
|
+
data.tar.gz: cf68c3b5fbf0330a0aed13e14ec91b3b04dc5001
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8db8204fd36157acb65dac7d420614545c17f05f0c9d71295c8c393753a6d03f0ed733c08a2364b3018dc618b98f7b4082ed4a669837677ad3bd157b05ff49b6
|
7
|
+
data.tar.gz: 4d7c2a060d63d70692a1d6831f2ee6b612028d9cd21ae4b936b48494f66d2a3a548e42f3961743de1c47cd119960f63c6478baff3f03f67b1aa6b96dc90e8ac1
|
data/.rubocop.yml
ADDED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.1.
|
1
|
+
2.1.5
|
data/CHANGELOG
CHANGED
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2008-
|
3
|
+
Copyright (c) 2008-2015 Marine Biological Laboratory
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -12,7 +12,7 @@ Parses taxonomic scientific name and breaks it into semantic elements.
|
|
12
12
|
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
13
13
|
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
14
14
|
|
15
|
-
biodiversity19 is now deprecated and will be phased out in
|
15
|
+
biodiversity19 is now deprecated and will be phased out in 2015.
|
16
16
|
You are strongly encouraged to change your dependencies from
|
17
17
|
biodiversity19 to biodiversity
|
18
18
|
|
@@ -184,9 +184,9 @@ It should fix the problem.
|
|
184
184
|
Copyright
|
185
185
|
---------
|
186
186
|
|
187
|
-
Authors: [Dmitry Mozzherin][10]
|
187
|
+
Authors: [Dmitry Mozzherin][10]
|
188
188
|
|
189
|
-
Copyright (c) 2008-
|
189
|
+
Copyright (c) 2008-2015 Marine Biological Laboratory. See LICENSE for
|
190
190
|
further details.
|
191
191
|
|
192
192
|
[1]: https://badge.fury.io/rb/biodiversity.png
|
data/Rakefile
CHANGED
@@ -1,45 +1,45 @@
|
|
1
|
-
require
|
1
|
+
require "bundler"
|
2
2
|
Bundler::GemHelper.install_tasks
|
3
3
|
|
4
4
|
begin
|
5
5
|
Bundler.setup(:default, :development)
|
6
6
|
rescue Bundler::BundlerError => e
|
7
7
|
$stderr.puts e.message
|
8
|
-
$stderr.puts
|
8
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
9
9
|
exit e.status_code
|
10
10
|
end
|
11
11
|
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
12
|
+
require "rspec/core"
|
13
|
+
require "rspec/core/rake_task"
|
14
|
+
require "rake/dsl_definition"
|
15
|
+
require "rake"
|
16
|
+
require "rspec"
|
17
|
+
require "rspec/core/rake_task"
|
18
18
|
|
19
19
|
|
20
20
|
task :default => :spec
|
21
21
|
|
22
22
|
RSpec::Core::RakeTask.new do |t|
|
23
|
-
t.pattern =
|
23
|
+
t.pattern = "spec/**/*spec.rb"
|
24
24
|
end
|
25
25
|
|
26
26
|
task :tt do
|
27
27
|
dir = File.dirname(__FILE__)
|
28
|
-
[
|
29
|
-
|
30
|
-
|
28
|
+
["scientific_name_clean",
|
29
|
+
"scientific_name_dirty",
|
30
|
+
"scientific_name_canonical"].each do |f|
|
31
31
|
file = "#{dir}/lib/biodiversity/parser/#{f}"
|
32
32
|
FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
|
33
33
|
system("tt #{file}.treetop")
|
34
34
|
rf = "#{file}.rb"
|
35
|
-
rfn = open(rf +
|
35
|
+
rfn = open(rf + ".tmp", "w")
|
36
36
|
skip_head = false
|
37
37
|
f = open(rf)
|
38
38
|
# getting around a bug in treetop which prevents setting
|
39
39
|
# UTF-8 encoding in ruby19
|
40
40
|
f.each_with_index do |l, i|
|
41
41
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
42
|
-
if skip_head && (l.strip ==
|
42
|
+
if skip_head && (l.strip == "" || l.match(/^# Autogenerated/))
|
43
43
|
next
|
44
44
|
else
|
45
45
|
skip_head = false
|
data/biodiversity.gemspec
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
$:.push File.expand_path("../lib", __FILE__)
|
2
2
|
|
3
|
-
require
|
3
|
+
require "biodiversity/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
|
-
gem.name =
|
6
|
+
gem.name = "biodiversity19"
|
7
7
|
gem.version = Biodiversity::VERSION
|
8
|
-
gem.homepage =
|
9
|
-
gem.license =
|
10
|
-
gem.summary =
|
11
|
-
gem.description =
|
12
|
-
gem.authors = [
|
13
|
-
gem.email =
|
8
|
+
gem.homepage = "https://github.com/GlobalNamesArchitecture/biodiversity"
|
9
|
+
gem.license = "MIT"
|
10
|
+
gem.summary = "Parser of scientific names"
|
11
|
+
gem.description = "Tools for biodiversity informatics"
|
12
|
+
gem.authors = ["Dmitry Mozzherin"]
|
13
|
+
gem.email = "dmozzherin@gmail.com"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split("\n")
|
16
|
-
gem.executables = [
|
17
|
-
gem.require_paths = [
|
16
|
+
gem.executables = ["nnparse", "parserver"]
|
17
|
+
gem.require_paths = ["lib"]
|
18
18
|
|
19
|
-
gem.add_runtime_dependency
|
20
|
-
gem.add_runtime_dependency
|
21
|
-
gem.add_runtime_dependency
|
19
|
+
gem.add_runtime_dependency "treetop", "~> 1.5"
|
20
|
+
gem.add_runtime_dependency "parallel", "~> 1.4"
|
21
|
+
gem.add_runtime_dependency "unicode_utils", "~> 1.4"
|
22
22
|
|
23
|
-
gem.add_development_dependency
|
24
|
-
gem.add_development_dependency
|
25
|
-
gem.add_development_dependency
|
26
|
-
gem.add_development_dependency
|
27
|
-
gem.add_development_dependency
|
28
|
-
gem.add_development_dependency
|
23
|
+
gem.add_development_dependency "bundler", "~> 1.8"
|
24
|
+
gem.add_development_dependency "rake", "~> 10.4"
|
25
|
+
gem.add_development_dependency "rspec", "~> 3.2"
|
26
|
+
gem.add_development_dependency "webmock", "~> 1.20"
|
27
|
+
gem.add_development_dependency "rr", "~> 1.1"
|
28
|
+
gem.add_development_dependency "rubocop", "~> 0.29"
|
29
29
|
end
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -251,7 +251,7 @@ class ScientificNameParser
|
|
251
251
|
surrogate1 = /BOLD:|[\d]{5,}/i
|
252
252
|
surrogate2 = /\b(spp|sp|nr|cf)[\.]?[\s]*$/i
|
253
253
|
is_surrogate = false
|
254
|
-
|
254
|
+
|
255
255
|
ai_index = pos.index('annotation_identification')
|
256
256
|
if ai_index
|
257
257
|
ai = name[pos[ai_index - 1]..pos[ai_index + 1]]
|
@@ -29,7 +29,7 @@ module ScientificNameCanonical
|
|
29
29
|
def hybrid
|
30
30
|
false
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def details
|
34
34
|
[super]
|
35
35
|
end
|
@@ -103,15 +103,15 @@ module ScientificNameCanonical
|
|
103
103
|
def value
|
104
104
|
a.value + " " + b.value + " " + c.value
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
107
|
def canonical
|
108
108
|
a.canonical + " " + c.canonical
|
109
109
|
end
|
110
|
-
|
110
|
+
|
111
111
|
def pos
|
112
112
|
a.pos.merge(b.pos).merge(c.pos)
|
113
113
|
end
|
114
|
-
|
114
|
+
|
115
115
|
def details
|
116
116
|
a.details.merge(b.details).merge(c.details)
|
117
117
|
end
|
@@ -139,15 +139,15 @@ module ScientificNameCanonical
|
|
139
139
|
def value
|
140
140
|
a.value + " " + b.value
|
141
141
|
end
|
142
|
-
|
142
|
+
|
143
143
|
def canonical
|
144
144
|
a.canonical
|
145
145
|
end
|
146
|
-
|
146
|
+
|
147
147
|
def pos
|
148
148
|
a.pos.merge(b.pos)
|
149
149
|
end
|
150
|
-
|
150
|
+
|
151
151
|
def details
|
152
152
|
a.details.merge(b.details)
|
153
153
|
end
|
@@ -175,15 +175,15 @@ module ScientificNameCanonical
|
|
175
175
|
def value
|
176
176
|
a.value + " " + b.value
|
177
177
|
end
|
178
|
-
|
178
|
+
|
179
179
|
def canonical
|
180
180
|
a.canonical + " " + b.canonical
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
def pos
|
184
184
|
a.pos.merge(b.pos)
|
185
185
|
end
|
186
|
-
|
186
|
+
|
187
187
|
def details
|
188
188
|
a.details.merge(b.details)
|
189
189
|
end
|
@@ -315,15 +315,15 @@ module ScientificNameCanonical
|
|
315
315
|
def value
|
316
316
|
a.value
|
317
317
|
end
|
318
|
-
|
318
|
+
|
319
319
|
def canonical
|
320
320
|
a.canonical
|
321
321
|
end
|
322
|
-
|
322
|
+
|
323
323
|
def pos
|
324
324
|
a.pos
|
325
325
|
end
|
326
|
-
|
326
|
+
|
327
327
|
def details
|
328
328
|
{:uninomial => a.details[:uninomial]}
|
329
329
|
end
|
@@ -21,7 +21,7 @@ grammar ScientificNameCanonical
|
|
21
21
|
def hybrid
|
22
22
|
false
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def details
|
26
26
|
[super]
|
27
27
|
end
|
@@ -31,22 +31,22 @@ grammar ScientificNameCanonical
|
|
31
31
|
end
|
32
32
|
}
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
35
|
rule multinomial_with_garbage
|
36
|
-
|
36
|
+
|
37
37
|
a:genus space b:infragenus space c:species garbage {
|
38
38
|
def value
|
39
39
|
a.value + " " + b.value + " " + c.value
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
42
|
def canonical
|
43
43
|
a.canonical + " " + c.canonical
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
def pos
|
47
47
|
a.pos.merge(b.pos).merge(c.pos)
|
48
48
|
end
|
49
|
-
|
49
|
+
|
50
50
|
def details
|
51
51
|
a.details.merge(b.details).merge(c.details)
|
52
52
|
end
|
@@ -56,15 +56,15 @@ grammar ScientificNameCanonical
|
|
56
56
|
def value
|
57
57
|
a.value + " " + b.value
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
def canonical
|
61
61
|
a.canonical
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
def pos
|
65
65
|
a.pos.merge(b.pos)
|
66
66
|
end
|
67
|
-
|
67
|
+
|
68
68
|
def details
|
69
69
|
a.details.merge(b.details)
|
70
70
|
end
|
@@ -74,41 +74,41 @@ grammar ScientificNameCanonical
|
|
74
74
|
def value
|
75
75
|
a.value + " " + b.value
|
76
76
|
end
|
77
|
-
|
77
|
+
|
78
78
|
def canonical
|
79
79
|
a.canonical + " " + b.canonical
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
def pos
|
83
83
|
a.pos.merge(b.pos)
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
def details
|
87
87
|
a.details.merge(b.details)
|
88
88
|
end
|
89
89
|
}
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
rule uninomial_with_garbage
|
93
93
|
a:uninomial_string b:garbage {
|
94
94
|
def value
|
95
95
|
a.value
|
96
96
|
end
|
97
|
-
|
97
|
+
|
98
98
|
def canonical
|
99
99
|
a.canonical
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
def pos
|
103
103
|
a.pos
|
104
104
|
end
|
105
|
-
|
105
|
+
|
106
106
|
def details
|
107
107
|
{:uninomial => a.details[:uninomial]}
|
108
108
|
end
|
109
109
|
}
|
110
110
|
end
|
111
|
-
|
111
|
+
|
112
112
|
rule garbage
|
113
113
|
space "$$g@rbg3$$"
|
114
114
|
/
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Autogenerated from a Treetop grammar. Edits may be lost.
|
3
3
|
|
4
4
|
|
5
|
-
require
|
5
|
+
require "unicode_utils"
|
6
6
|
|
7
7
|
module ScientificNameClean
|
8
8
|
include Treetop::Runtime
|
@@ -27,11 +27,11 @@ module ScientificNameClean
|
|
27
27
|
|
28
28
|
module Root1
|
29
29
|
def value
|
30
|
-
a.value.gsub(/\s{2,}/,
|
30
|
+
a.value.gsub(/\s{2,}/, " ").strip
|
31
31
|
end
|
32
32
|
|
33
33
|
def canonical
|
34
|
-
a.canonical.gsub(/\s{2,}/,
|
34
|
+
a.canonical.gsub(/\s{2,}/, " ").strip
|
35
35
|
end
|
36
36
|
|
37
37
|
def pos
|
@@ -797,7 +797,7 @@ module ScientificNameClean
|
|
797
797
|
module Unparsed1
|
798
798
|
|
799
799
|
def value
|
800
|
-
|
800
|
+
""
|
801
801
|
end
|
802
802
|
|
803
803
|
def hybrid
|
@@ -805,11 +805,11 @@ module ScientificNameClean
|
|
805
805
|
end
|
806
806
|
|
807
807
|
def canonical
|
808
|
-
|
808
|
+
""
|
809
809
|
end
|
810
810
|
|
811
811
|
def pos
|
812
|
-
{interval.begin => [
|
812
|
+
{interval.begin => ["unparsed", interval.end]}
|
813
813
|
end
|
814
814
|
|
815
815
|
def details
|
@@ -1837,14 +1837,14 @@ module ScientificNameClean
|
|
1837
1837
|
|
1838
1838
|
def pos
|
1839
1839
|
def a.pos
|
1840
|
-
{interval.begin => [
|
1840
|
+
{interval.begin => ["infraspecies", a.interval.end]}
|
1841
1841
|
end
|
1842
1842
|
aid.pos(a)
|
1843
1843
|
end
|
1844
1844
|
|
1845
1845
|
def details
|
1846
1846
|
def a.details
|
1847
|
-
{:infraspecies => {:string => value, :rank =>
|
1847
|
+
{:infraspecies => {:string => value, :rank => "n/a"}}
|
1848
1848
|
end
|
1849
1849
|
aid.details(a)
|
1850
1850
|
end
|
@@ -1867,11 +1867,11 @@ module ScientificNameClean
|
|
1867
1867
|
end
|
1868
1868
|
|
1869
1869
|
def pos
|
1870
|
-
{interval.begin => [
|
1870
|
+
{interval.begin => ["infraspecies", interval.end]}
|
1871
1871
|
end
|
1872
1872
|
|
1873
1873
|
def details
|
1874
|
-
{:infraspecies => {:string => value, :rank =>
|
1874
|
+
{:infraspecies => {:string => value, :rank => "n/a"}}
|
1875
1875
|
end
|
1876
1876
|
end
|
1877
1877
|
|
@@ -2065,8 +2065,8 @@ module ScientificNameClean
|
|
2065
2065
|
end
|
2066
2066
|
|
2067
2067
|
def pos(a)
|
2068
|
-
interval_end = text_value[-1] ==
|
2069
|
-
{interval.begin => [
|
2068
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
2069
|
+
{interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
|
2070
2070
|
end
|
2071
2071
|
|
2072
2072
|
def details(a = nil)
|
@@ -2326,11 +2326,11 @@ module ScientificNameClean
|
|
2326
2326
|
r0 = r20
|
2327
2327
|
r0.extend(Rank0)
|
2328
2328
|
else
|
2329
|
-
if (match_len = has_terminal?("forma
|
2329
|
+
if (match_len = has_terminal?("forma.", false, index))
|
2330
2330
|
r21 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2331
2331
|
@index += match_len
|
2332
2332
|
else
|
2333
|
-
terminal_parse_failure("forma
|
2333
|
+
terminal_parse_failure("forma.")
|
2334
2334
|
r21 = nil
|
2335
2335
|
end
|
2336
2336
|
if r21
|
@@ -2338,11 +2338,11 @@ module ScientificNameClean
|
|
2338
2338
|
r0 = r21
|
2339
2339
|
r0.extend(Rank0)
|
2340
2340
|
else
|
2341
|
-
if (match_len = has_terminal?("
|
2341
|
+
if (match_len = has_terminal?("forma ", false, index))
|
2342
2342
|
r22 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2343
2343
|
@index += match_len
|
2344
2344
|
else
|
2345
|
-
terminal_parse_failure("
|
2345
|
+
terminal_parse_failure("forma ")
|
2346
2346
|
r22 = nil
|
2347
2347
|
end
|
2348
2348
|
if r22
|
@@ -2350,11 +2350,11 @@ module ScientificNameClean
|
|
2350
2350
|
r0 = r22
|
2351
2351
|
r0.extend(Rank0)
|
2352
2352
|
else
|
2353
|
-
if (match_len = has_terminal?("fma
|
2353
|
+
if (match_len = has_terminal?("fma.", false, index))
|
2354
2354
|
r23 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2355
2355
|
@index += match_len
|
2356
2356
|
else
|
2357
|
-
terminal_parse_failure("fma
|
2357
|
+
terminal_parse_failure("fma.")
|
2358
2358
|
r23 = nil
|
2359
2359
|
end
|
2360
2360
|
if r23
|
@@ -2362,11 +2362,11 @@ module ScientificNameClean
|
|
2362
2362
|
r0 = r23
|
2363
2363
|
r0.extend(Rank0)
|
2364
2364
|
else
|
2365
|
-
if (match_len = has_terminal?("
|
2365
|
+
if (match_len = has_terminal?("fma ", false, index))
|
2366
2366
|
r24 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2367
2367
|
@index += match_len
|
2368
2368
|
else
|
2369
|
-
terminal_parse_failure("
|
2369
|
+
terminal_parse_failure("fma ")
|
2370
2370
|
r24 = nil
|
2371
2371
|
end
|
2372
2372
|
if r24
|
@@ -2374,11 +2374,11 @@ module ScientificNameClean
|
|
2374
2374
|
r0 = r24
|
2375
2375
|
r0.extend(Rank0)
|
2376
2376
|
else
|
2377
|
-
if (match_len = has_terminal?("form
|
2377
|
+
if (match_len = has_terminal?("form.", false, index))
|
2378
2378
|
r25 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2379
2379
|
@index += match_len
|
2380
2380
|
else
|
2381
|
-
terminal_parse_failure("form
|
2381
|
+
terminal_parse_failure("form.")
|
2382
2382
|
r25 = nil
|
2383
2383
|
end
|
2384
2384
|
if r25
|
@@ -2386,11 +2386,11 @@ module ScientificNameClean
|
|
2386
2386
|
r0 = r25
|
2387
2387
|
r0.extend(Rank0)
|
2388
2388
|
else
|
2389
|
-
if (match_len = has_terminal?("
|
2389
|
+
if (match_len = has_terminal?("form ", false, index))
|
2390
2390
|
r26 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2391
2391
|
@index += match_len
|
2392
2392
|
else
|
2393
|
-
terminal_parse_failure("
|
2393
|
+
terminal_parse_failure("form ")
|
2394
2394
|
r26 = nil
|
2395
2395
|
end
|
2396
2396
|
if r26
|
@@ -2398,11 +2398,11 @@ module ScientificNameClean
|
|
2398
2398
|
r0 = r26
|
2399
2399
|
r0.extend(Rank0)
|
2400
2400
|
else
|
2401
|
-
if (match_len = has_terminal?("fo
|
2401
|
+
if (match_len = has_terminal?("fo.", false, index))
|
2402
2402
|
r27 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2403
2403
|
@index += match_len
|
2404
2404
|
else
|
2405
|
-
terminal_parse_failure("fo
|
2405
|
+
terminal_parse_failure("fo.")
|
2406
2406
|
r27 = nil
|
2407
2407
|
end
|
2408
2408
|
if r27
|
@@ -2410,11 +2410,11 @@ module ScientificNameClean
|
|
2410
2410
|
r0 = r27
|
2411
2411
|
r0.extend(Rank0)
|
2412
2412
|
else
|
2413
|
-
if (match_len = has_terminal?("
|
2413
|
+
if (match_len = has_terminal?("fo ", false, index))
|
2414
2414
|
r28 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2415
2415
|
@index += match_len
|
2416
2416
|
else
|
2417
|
-
terminal_parse_failure("
|
2417
|
+
terminal_parse_failure("fo ")
|
2418
2418
|
r28 = nil
|
2419
2419
|
end
|
2420
2420
|
if r28
|
@@ -2422,11 +2422,11 @@ module ScientificNameClean
|
|
2422
2422
|
r0 = r28
|
2423
2423
|
r0.extend(Rank0)
|
2424
2424
|
else
|
2425
|
-
if (match_len = has_terminal?("
|
2426
|
-
r29 =
|
2425
|
+
if (match_len = has_terminal?("f.", false, index))
|
2426
|
+
r29 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2427
2427
|
@index += match_len
|
2428
2428
|
else
|
2429
|
-
terminal_parse_failure("
|
2429
|
+
terminal_parse_failure("f.")
|
2430
2430
|
r29 = nil
|
2431
2431
|
end
|
2432
2432
|
if r29
|
@@ -2434,11 +2434,11 @@ module ScientificNameClean
|
|
2434
2434
|
r0 = r29
|
2435
2435
|
r0.extend(Rank0)
|
2436
2436
|
else
|
2437
|
-
if (match_len = has_terminal?("
|
2438
|
-
r30 =
|
2437
|
+
if (match_len = has_terminal?("α", false, index))
|
2438
|
+
r30 = true
|
2439
2439
|
@index += match_len
|
2440
2440
|
else
|
2441
|
-
terminal_parse_failure("
|
2441
|
+
terminal_parse_failure("α")
|
2442
2442
|
r30 = nil
|
2443
2443
|
end
|
2444
2444
|
if r30
|
@@ -2446,11 +2446,11 @@ module ScientificNameClean
|
|
2446
2446
|
r0 = r30
|
2447
2447
|
r0.extend(Rank0)
|
2448
2448
|
else
|
2449
|
-
if (match_len = has_terminal?("
|
2450
|
-
r31 =
|
2449
|
+
if (match_len = has_terminal?("ββ", false, index))
|
2450
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2451
2451
|
@index += match_len
|
2452
2452
|
else
|
2453
|
-
terminal_parse_failure("
|
2453
|
+
terminal_parse_failure("ββ")
|
2454
2454
|
r31 = nil
|
2455
2455
|
end
|
2456
2456
|
if r31
|
@@ -2458,11 +2458,11 @@ module ScientificNameClean
|
|
2458
2458
|
r0 = r31
|
2459
2459
|
r0.extend(Rank0)
|
2460
2460
|
else
|
2461
|
-
if (match_len = has_terminal?("
|
2461
|
+
if (match_len = has_terminal?("β", false, index))
|
2462
2462
|
r32 = true
|
2463
2463
|
@index += match_len
|
2464
2464
|
else
|
2465
|
-
terminal_parse_failure("
|
2465
|
+
terminal_parse_failure("β")
|
2466
2466
|
r32 = nil
|
2467
2467
|
end
|
2468
2468
|
if r32
|
@@ -2470,11 +2470,11 @@ module ScientificNameClean
|
|
2470
2470
|
r0 = r32
|
2471
2471
|
r0.extend(Rank0)
|
2472
2472
|
else
|
2473
|
-
if (match_len = has_terminal?("
|
2473
|
+
if (match_len = has_terminal?("γ", false, index))
|
2474
2474
|
r33 = true
|
2475
2475
|
@index += match_len
|
2476
2476
|
else
|
2477
|
-
terminal_parse_failure("
|
2477
|
+
terminal_parse_failure("γ")
|
2478
2478
|
r33 = nil
|
2479
2479
|
end
|
2480
2480
|
if r33
|
@@ -2482,11 +2482,11 @@ module ScientificNameClean
|
|
2482
2482
|
r0 = r33
|
2483
2483
|
r0.extend(Rank0)
|
2484
2484
|
else
|
2485
|
-
if (match_len = has_terminal?("
|
2485
|
+
if (match_len = has_terminal?("δ", false, index))
|
2486
2486
|
r34 = true
|
2487
2487
|
@index += match_len
|
2488
2488
|
else
|
2489
|
-
terminal_parse_failure("
|
2489
|
+
terminal_parse_failure("δ")
|
2490
2490
|
r34 = nil
|
2491
2491
|
end
|
2492
2492
|
if r34
|
@@ -2494,11 +2494,11 @@ module ScientificNameClean
|
|
2494
2494
|
r0 = r34
|
2495
2495
|
r0.extend(Rank0)
|
2496
2496
|
else
|
2497
|
-
if (match_len = has_terminal?("
|
2497
|
+
if (match_len = has_terminal?("ε", false, index))
|
2498
2498
|
r35 = true
|
2499
2499
|
@index += match_len
|
2500
2500
|
else
|
2501
|
-
terminal_parse_failure("
|
2501
|
+
terminal_parse_failure("ε")
|
2502
2502
|
r35 = nil
|
2503
2503
|
end
|
2504
2504
|
if r35
|
@@ -2506,11 +2506,11 @@ module ScientificNameClean
|
|
2506
2506
|
r0 = r35
|
2507
2507
|
r0.extend(Rank0)
|
2508
2508
|
else
|
2509
|
-
if (match_len = has_terminal?("
|
2509
|
+
if (match_len = has_terminal?("φ", false, index))
|
2510
2510
|
r36 = true
|
2511
2511
|
@index += match_len
|
2512
2512
|
else
|
2513
|
-
terminal_parse_failure("
|
2513
|
+
terminal_parse_failure("φ")
|
2514
2514
|
r36 = nil
|
2515
2515
|
end
|
2516
2516
|
if r36
|
@@ -2518,11 +2518,11 @@ module ScientificNameClean
|
|
2518
2518
|
r0 = r36
|
2519
2519
|
r0.extend(Rank0)
|
2520
2520
|
else
|
2521
|
-
if (match_len = has_terminal?("
|
2521
|
+
if (match_len = has_terminal?("θ", false, index))
|
2522
2522
|
r37 = true
|
2523
2523
|
@index += match_len
|
2524
2524
|
else
|
2525
|
-
terminal_parse_failure("
|
2525
|
+
terminal_parse_failure("θ")
|
2526
2526
|
r37 = nil
|
2527
2527
|
end
|
2528
2528
|
if r37
|
@@ -2530,11 +2530,11 @@ module ScientificNameClean
|
|
2530
2530
|
r0 = r37
|
2531
2531
|
r0.extend(Rank0)
|
2532
2532
|
else
|
2533
|
-
if (match_len = has_terminal?("
|
2534
|
-
r38 =
|
2533
|
+
if (match_len = has_terminal?("μ", false, index))
|
2534
|
+
r38 = true
|
2535
2535
|
@index += match_len
|
2536
2536
|
else
|
2537
|
-
terminal_parse_failure("
|
2537
|
+
terminal_parse_failure("μ")
|
2538
2538
|
r38 = nil
|
2539
2539
|
end
|
2540
2540
|
if r38
|
@@ -2542,11 +2542,11 @@ module ScientificNameClean
|
|
2542
2542
|
r0 = r38
|
2543
2543
|
r0.extend(Rank0)
|
2544
2544
|
else
|
2545
|
-
if (match_len = has_terminal?("
|
2545
|
+
if (match_len = has_terminal?("a.", false, index))
|
2546
2546
|
r39 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2547
2547
|
@index += match_len
|
2548
2548
|
else
|
2549
|
-
terminal_parse_failure("
|
2549
|
+
terminal_parse_failure("a.")
|
2550
2550
|
r39 = nil
|
2551
2551
|
end
|
2552
2552
|
if r39
|
@@ -2554,11 +2554,11 @@ module ScientificNameClean
|
|
2554
2554
|
r0 = r39
|
2555
2555
|
r0.extend(Rank0)
|
2556
2556
|
else
|
2557
|
-
if (match_len = has_terminal?("
|
2557
|
+
if (match_len = has_terminal?("b.", false, index))
|
2558
2558
|
r40 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2559
2559
|
@index += match_len
|
2560
2560
|
else
|
2561
|
-
terminal_parse_failure("
|
2561
|
+
terminal_parse_failure("b.")
|
2562
2562
|
r40 = nil
|
2563
2563
|
end
|
2564
2564
|
if r40
|
@@ -2566,11 +2566,11 @@ module ScientificNameClean
|
|
2566
2566
|
r0 = r40
|
2567
2567
|
r0.extend(Rank0)
|
2568
2568
|
else
|
2569
|
-
if (match_len = has_terminal?("
|
2569
|
+
if (match_len = has_terminal?("c.", false, index))
|
2570
2570
|
r41 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2571
2571
|
@index += match_len
|
2572
2572
|
else
|
2573
|
-
terminal_parse_failure("
|
2573
|
+
terminal_parse_failure("c.")
|
2574
2574
|
r41 = nil
|
2575
2575
|
end
|
2576
2576
|
if r41
|
@@ -2578,11 +2578,11 @@ module ScientificNameClean
|
|
2578
2578
|
r0 = r41
|
2579
2579
|
r0.extend(Rank0)
|
2580
2580
|
else
|
2581
|
-
if (match_len = has_terminal?("
|
2581
|
+
if (match_len = has_terminal?("d.", false, index))
|
2582
2582
|
r42 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2583
2583
|
@index += match_len
|
2584
2584
|
else
|
2585
|
-
terminal_parse_failure("
|
2585
|
+
terminal_parse_failure("d.")
|
2586
2586
|
r42 = nil
|
2587
2587
|
end
|
2588
2588
|
if r42
|
@@ -2590,11 +2590,11 @@ module ScientificNameClean
|
|
2590
2590
|
r0 = r42
|
2591
2591
|
r0.extend(Rank0)
|
2592
2592
|
else
|
2593
|
-
if (match_len = has_terminal?("
|
2593
|
+
if (match_len = has_terminal?("e.", false, index))
|
2594
2594
|
r43 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2595
2595
|
@index += match_len
|
2596
2596
|
else
|
2597
|
-
terminal_parse_failure("
|
2597
|
+
terminal_parse_failure("e.")
|
2598
2598
|
r43 = nil
|
2599
2599
|
end
|
2600
2600
|
if r43
|
@@ -2602,11 +2602,11 @@ module ScientificNameClean
|
|
2602
2602
|
r0 = r43
|
2603
2603
|
r0.extend(Rank0)
|
2604
2604
|
else
|
2605
|
-
if (match_len = has_terminal?("
|
2605
|
+
if (match_len = has_terminal?("g.", false, index))
|
2606
2606
|
r44 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2607
2607
|
@index += match_len
|
2608
2608
|
else
|
2609
|
-
terminal_parse_failure("
|
2609
|
+
terminal_parse_failure("g.")
|
2610
2610
|
r44 = nil
|
2611
2611
|
end
|
2612
2612
|
if r44
|
@@ -2614,11 +2614,11 @@ module ScientificNameClean
|
|
2614
2614
|
r0 = r44
|
2615
2615
|
r0.extend(Rank0)
|
2616
2616
|
else
|
2617
|
-
if (match_len = has_terminal?("
|
2617
|
+
if (match_len = has_terminal?("k.", false, index))
|
2618
2618
|
r45 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2619
2619
|
@index += match_len
|
2620
2620
|
else
|
2621
|
-
terminal_parse_failure("
|
2621
|
+
terminal_parse_failure("k.")
|
2622
2622
|
r45 = nil
|
2623
2623
|
end
|
2624
2624
|
if r45
|
@@ -2626,11 +2626,11 @@ module ScientificNameClean
|
|
2626
2626
|
r0 = r45
|
2627
2627
|
r0.extend(Rank0)
|
2628
2628
|
else
|
2629
|
-
if (match_len = has_terminal?("
|
2629
|
+
if (match_len = has_terminal?("****", false, index))
|
2630
2630
|
r46 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2631
2631
|
@index += match_len
|
2632
2632
|
else
|
2633
|
-
terminal_parse_failure("
|
2633
|
+
terminal_parse_failure("****")
|
2634
2634
|
r46 = nil
|
2635
2635
|
end
|
2636
2636
|
if r46
|
@@ -2638,11 +2638,11 @@ module ScientificNameClean
|
|
2638
2638
|
r0 = r46
|
2639
2639
|
r0.extend(Rank0)
|
2640
2640
|
else
|
2641
|
-
if (match_len = has_terminal?("
|
2642
|
-
r47 =
|
2641
|
+
if (match_len = has_terminal?("**", false, index))
|
2642
|
+
r47 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
2643
2643
|
@index += match_len
|
2644
2644
|
else
|
2645
|
-
terminal_parse_failure("
|
2645
|
+
terminal_parse_failure("**")
|
2646
2646
|
r47 = nil
|
2647
2647
|
end
|
2648
2648
|
if r47
|
@@ -2650,8 +2650,21 @@ module ScientificNameClean
|
|
2650
2650
|
r0 = r47
|
2651
2651
|
r0.extend(Rank0)
|
2652
2652
|
else
|
2653
|
-
|
2654
|
-
|
2653
|
+
if (match_len = has_terminal?("*", false, index))
|
2654
|
+
r48 = true
|
2655
|
+
@index += match_len
|
2656
|
+
else
|
2657
|
+
terminal_parse_failure("*")
|
2658
|
+
r48 = nil
|
2659
|
+
end
|
2660
|
+
if r48
|
2661
|
+
r48 = SyntaxNode.new(input, (index-1)...index) if r48 == true
|
2662
|
+
r0 = r48
|
2663
|
+
r0.extend(Rank0)
|
2664
|
+
else
|
2665
|
+
@index = i0
|
2666
|
+
r0 = nil
|
2667
|
+
end
|
2655
2668
|
end
|
2656
2669
|
end
|
2657
2670
|
end
|
@@ -2711,7 +2724,7 @@ module ScientificNameClean
|
|
2711
2724
|
end
|
2712
2725
|
|
2713
2726
|
def pos(uni)
|
2714
|
-
{interval.begin => [
|
2727
|
+
{interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
|
2715
2728
|
end
|
2716
2729
|
|
2717
2730
|
def details(uni)
|
@@ -3065,7 +3078,7 @@ module ScientificNameClean
|
|
3065
3078
|
end
|
3066
3079
|
|
3067
3080
|
def pos
|
3068
|
-
{interval.begin => [
|
3081
|
+
{interval.begin => ["species", interval.end]}
|
3069
3082
|
end
|
3070
3083
|
|
3071
3084
|
def hybrid
|
@@ -3142,7 +3155,7 @@ module ScientificNameClean
|
|
3142
3155
|
end
|
3143
3156
|
|
3144
3157
|
def pos
|
3145
|
-
{a.interval.begin => [
|
3158
|
+
{a.interval.begin => ["infragenus", a.interval.end]}
|
3146
3159
|
end
|
3147
3160
|
|
3148
3161
|
def details
|
@@ -3239,7 +3252,7 @@ module ScientificNameClean
|
|
3239
3252
|
end
|
3240
3253
|
|
3241
3254
|
def pos
|
3242
|
-
{a.interval.begin => [
|
3255
|
+
{a.interval.begin => ["genus", a.interval.end]}
|
3243
3256
|
end
|
3244
3257
|
|
3245
3258
|
def canonical
|
@@ -3655,7 +3668,7 @@ module ScientificNameClean
|
|
3655
3668
|
end
|
3656
3669
|
|
3657
3670
|
def pos
|
3658
|
-
{interval.begin => [
|
3671
|
+
{interval.begin => ["uninomial", interval.end]}
|
3659
3672
|
end
|
3660
3673
|
|
3661
3674
|
def hybrid
|
@@ -4103,11 +4116,11 @@ module ScientificNameClean
|
|
4103
4116
|
end
|
4104
4117
|
|
4105
4118
|
def pos
|
4106
|
-
{a.interval.begin => [
|
4119
|
+
{a.interval.begin => ["unknown_author", a.interval.end]}
|
4107
4120
|
end
|
4108
4121
|
|
4109
4122
|
def details
|
4110
|
-
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => [
|
4123
|
+
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
|
4111
4124
|
end
|
4112
4125
|
end
|
4113
4126
|
|
@@ -4435,7 +4448,7 @@ module ScientificNameClean
|
|
4435
4448
|
details_with_arg(:basionymAuthorTeam)
|
4436
4449
|
end
|
4437
4450
|
|
4438
|
-
def details_with_arg(authorTeamType =
|
4451
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4439
4452
|
{ :authorship => text_value,
|
4440
4453
|
authorTeamType.to_sym => {
|
4441
4454
|
:authorTeam => a.text_value.strip
|
@@ -4475,7 +4488,7 @@ module ScientificNameClean
|
|
4475
4488
|
details_with_arg(:basionymAuthorTeam)
|
4476
4489
|
end
|
4477
4490
|
|
4478
|
-
def details_with_arg(authorTeamType =
|
4491
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4479
4492
|
{ :authorship => text_value,
|
4480
4493
|
authorTeamType.to_sym => {
|
4481
4494
|
:authorTeam => a.text_value.strip
|
@@ -4491,7 +4504,7 @@ module ScientificNameClean
|
|
4491
4504
|
details
|
4492
4505
|
end
|
4493
4506
|
|
4494
|
-
def details_with_arg(authorTeamType =
|
4507
|
+
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
4495
4508
|
{ :authorship => text_value,
|
4496
4509
|
authorTeamType.to_sym => {
|
4497
4510
|
:authorTeam => text_value,
|
@@ -4791,7 +4804,7 @@ module ScientificNameClean
|
|
4791
4804
|
end
|
4792
4805
|
|
4793
4806
|
def pos
|
4794
|
-
{interval.begin => [
|
4807
|
+
{interval.begin => ["unknown_author", interval.end]}
|
4795
4808
|
end
|
4796
4809
|
|
4797
4810
|
def details
|
@@ -5135,7 +5148,7 @@ module ScientificNameClean
|
|
5135
5148
|
|
5136
5149
|
module AuthorName1
|
5137
5150
|
def value
|
5138
|
-
a.value +
|
5151
|
+
a.value + " " + b.value
|
5139
5152
|
end
|
5140
5153
|
|
5141
5154
|
def pos
|
@@ -5360,7 +5373,7 @@ module ScientificNameClean
|
|
5360
5373
|
end
|
5361
5374
|
|
5362
5375
|
def pos
|
5363
|
-
{interval.begin => [
|
5376
|
+
{interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
|
5364
5377
|
end
|
5365
5378
|
|
5366
5379
|
def details
|
@@ -5375,7 +5388,7 @@ module ScientificNameClean
|
|
5375
5388
|
|
5376
5389
|
def pos
|
5377
5390
|
#cheating because there are several words in some of them
|
5378
|
-
{interval.begin => [
|
5391
|
+
{interval.begin => ["author_word", interval.end]}
|
5379
5392
|
end
|
5380
5393
|
|
5381
5394
|
def details
|
@@ -5394,7 +5407,7 @@ module ScientificNameClean
|
|
5394
5407
|
end
|
5395
5408
|
|
5396
5409
|
def pos
|
5397
|
-
{interval.begin => [
|
5410
|
+
{interval.begin => ["author_word", interval.end]}
|
5398
5411
|
end
|
5399
5412
|
|
5400
5413
|
def details
|
@@ -5411,7 +5424,7 @@ module ScientificNameClean
|
|
5411
5424
|
end
|
5412
5425
|
|
5413
5426
|
def pos
|
5414
|
-
{interval.begin => [
|
5427
|
+
{interval.begin => ["author_word", interval.end]}
|
5415
5428
|
end
|
5416
5429
|
|
5417
5430
|
def details
|
@@ -5783,7 +5796,7 @@ module ScientificNameClean
|
|
5783
5796
|
|
5784
5797
|
def pos
|
5785
5798
|
#cheating because there are several words in some of them
|
5786
|
-
{interval.begin => [
|
5799
|
+
{interval.begin => ["author_word", interval.end]}
|
5787
5800
|
end
|
5788
5801
|
end
|
5789
5802
|
|
@@ -6030,7 +6043,7 @@ module ScientificNameClean
|
|
6030
6043
|
end
|
6031
6044
|
|
6032
6045
|
def pos
|
6033
|
-
{interval.begin => [
|
6046
|
+
{interval.begin => ["author_word", interval.end]}
|
6034
6047
|
end
|
6035
6048
|
end
|
6036
6049
|
|
@@ -6183,7 +6196,7 @@ module ScientificNameClean
|
|
6183
6196
|
|
6184
6197
|
module CapLatinWord5
|
6185
6198
|
def value
|
6186
|
-
a.text_value[0..0] +
|
6199
|
+
a.text_value[0..0] + "e" + b.value
|
6187
6200
|
end
|
6188
6201
|
end
|
6189
6202
|
|
@@ -6685,7 +6698,7 @@ module ScientificNameClean
|
|
6685
6698
|
end
|
6686
6699
|
|
6687
6700
|
def pos
|
6688
|
-
{b.interval.begin => [
|
6701
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6689
6702
|
end
|
6690
6703
|
|
6691
6704
|
def details
|
@@ -6721,7 +6734,7 @@ module ScientificNameClean
|
|
6721
6734
|
end
|
6722
6735
|
|
6723
6736
|
def pos
|
6724
|
-
{b.interval.begin => [
|
6737
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6725
6738
|
end
|
6726
6739
|
|
6727
6740
|
def details
|
@@ -6757,7 +6770,7 @@ module ScientificNameClean
|
|
6757
6770
|
end
|
6758
6771
|
|
6759
6772
|
def pos
|
6760
|
-
{b.interval.begin => [
|
6773
|
+
{b.interval.begin => ["species", b.interval.end]}
|
6761
6774
|
end
|
6762
6775
|
|
6763
6776
|
def details
|
@@ -6876,16 +6889,16 @@ module ScientificNameClean
|
|
6876
6889
|
end
|
6877
6890
|
|
6878
6891
|
def apply(sp)
|
6879
|
-
|
6892
|
+
""
|
6880
6893
|
end
|
6881
6894
|
|
6882
6895
|
def canonical(sp)
|
6883
|
-
|
6896
|
+
""
|
6884
6897
|
end
|
6885
6898
|
|
6886
6899
|
def pos(sp)
|
6887
|
-
interval_end = text_value[-1] ==
|
6888
|
-
{interval.begin => [
|
6900
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
6901
|
+
{interval.begin => ["annotation_identification", interval.end]}
|
6889
6902
|
end
|
6890
6903
|
|
6891
6904
|
def details(sp)
|
@@ -6899,16 +6912,16 @@ module ScientificNameClean
|
|
6899
6912
|
end
|
6900
6913
|
|
6901
6914
|
def apply(sp)
|
6902
|
-
|
6915
|
+
" " + value + " " + sp.value
|
6903
6916
|
end
|
6904
6917
|
|
6905
6918
|
def canonical(sp)
|
6906
|
-
|
6919
|
+
" " + sp.canonical
|
6907
6920
|
end
|
6908
6921
|
|
6909
6922
|
def pos(sp)
|
6910
|
-
interval_end = text_value[-1] ==
|
6911
|
-
{interval.begin => [
|
6923
|
+
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
6924
|
+
{interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
|
6912
6925
|
end
|
6913
6926
|
|
6914
6927
|
def details(sp)
|
@@ -7295,6 +7308,12 @@ module ScientificNameClean
|
|
7295
7308
|
end
|
7296
7309
|
|
7297
7310
|
module LatinWord3
|
7311
|
+
def value
|
7312
|
+
"oneili"
|
7313
|
+
end
|
7314
|
+
end
|
7315
|
+
|
7316
|
+
module LatinWord4
|
7298
7317
|
def a
|
7299
7318
|
elements[0]
|
7300
7319
|
end
|
@@ -7304,7 +7323,7 @@ module ScientificNameClean
|
|
7304
7323
|
end
|
7305
7324
|
end
|
7306
7325
|
|
7307
|
-
module
|
7326
|
+
module LatinWord5
|
7308
7327
|
def value
|
7309
7328
|
a.value + b.value
|
7310
7329
|
end
|
@@ -7351,39 +7370,52 @@ module ScientificNameClean
|
|
7351
7370
|
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
7352
7371
|
r0 = r1
|
7353
7372
|
else
|
7354
|
-
if (match_len = has_terminal?("o
|
7373
|
+
if (match_len = has_terminal?("o'donelli", false, index))
|
7355
7374
|
r5 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
7356
7375
|
r5.extend(LatinWord2)
|
7357
7376
|
@index += match_len
|
7358
7377
|
else
|
7359
|
-
terminal_parse_failure("o
|
7378
|
+
terminal_parse_failure("o'donelli")
|
7360
7379
|
r5 = nil
|
7361
7380
|
end
|
7362
7381
|
if r5
|
7363
7382
|
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
7364
7383
|
r0 = r5
|
7365
7384
|
else
|
7366
|
-
|
7367
|
-
|
7368
|
-
s6 << r7
|
7369
|
-
if r7
|
7370
|
-
r8 = _nt_valid_name_letters
|
7371
|
-
s6 << r8
|
7372
|
-
end
|
7373
|
-
if s6.last
|
7374
|
-
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
|
7385
|
+
if (match_len = has_terminal?("o'neili", false, index))
|
7386
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
7375
7387
|
r6.extend(LatinWord3)
|
7376
|
-
|
7388
|
+
@index += match_len
|
7377
7389
|
else
|
7378
|
-
|
7390
|
+
terminal_parse_failure("o'neili")
|
7379
7391
|
r6 = nil
|
7380
7392
|
end
|
7381
7393
|
if r6
|
7382
7394
|
r6 = SyntaxNode.new(input, (index-1)...index) if r6 == true
|
7383
7395
|
r0 = r6
|
7384
7396
|
else
|
7385
|
-
|
7386
|
-
|
7397
|
+
i7, s7 = index, []
|
7398
|
+
r8 = _nt_valid_name_letter
|
7399
|
+
s7 << r8
|
7400
|
+
if r8
|
7401
|
+
r9 = _nt_valid_name_letters
|
7402
|
+
s7 << r9
|
7403
|
+
end
|
7404
|
+
if s7.last
|
7405
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
7406
|
+
r7.extend(LatinWord4)
|
7407
|
+
r7.extend(LatinWord5)
|
7408
|
+
else
|
7409
|
+
@index = i7
|
7410
|
+
r7 = nil
|
7411
|
+
end
|
7412
|
+
if r7
|
7413
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
7414
|
+
r0 = r7
|
7415
|
+
else
|
7416
|
+
@index = i0
|
7417
|
+
r0 = nil
|
7418
|
+
end
|
7387
7419
|
end
|
7388
7420
|
end
|
7389
7421
|
end
|
@@ -7395,13 +7427,13 @@ module ScientificNameClean
|
|
7395
7427
|
|
7396
7428
|
module ValidNameLetters0
|
7397
7429
|
def value
|
7398
|
-
res =
|
7399
|
-
text_value.split(
|
7400
|
-
l =
|
7401
|
-
l =
|
7430
|
+
res = ""
|
7431
|
+
text_value.split("").each do |l|
|
7432
|
+
l = "ae" if l == "æ"
|
7433
|
+
l = "oe" if l == "œ"
|
7402
7434
|
# We normalize ë as well. It is legal in botanical code, but it
|
7403
7435
|
# is beneficial to normalize it for the reconsiliation purposes
|
7404
|
-
l =
|
7436
|
+
l = "e" if l == "ë"
|
7405
7437
|
res << l
|
7406
7438
|
end
|
7407
7439
|
res
|
@@ -7450,9 +7482,9 @@ module ScientificNameClean
|
|
7450
7482
|
module ValidNameLetter0
|
7451
7483
|
def value
|
7452
7484
|
res = text_value
|
7453
|
-
res =
|
7454
|
-
res =
|
7455
|
-
res =
|
7485
|
+
res = "ae" if res == "æ"
|
7486
|
+
res = "oe" if res == "œ"
|
7487
|
+
res = "e" if res == "ë"
|
7456
7488
|
res
|
7457
7489
|
end
|
7458
7490
|
end
|
@@ -7484,13 +7516,13 @@ module ScientificNameClean
|
|
7484
7516
|
|
7485
7517
|
module CapDigraph0
|
7486
7518
|
def value
|
7487
|
-
|
7519
|
+
"Ae"
|
7488
7520
|
end
|
7489
7521
|
end
|
7490
7522
|
|
7491
7523
|
module CapDigraph1
|
7492
7524
|
def value
|
7493
|
-
|
7525
|
+
"Oe"
|
7494
7526
|
end
|
7495
7527
|
end
|
7496
7528
|
|
@@ -7667,7 +7699,7 @@ module ScientificNameClean
|
|
7667
7699
|
end
|
7668
7700
|
|
7669
7701
|
def pos
|
7670
|
-
{interval.begin => [
|
7702
|
+
{interval.begin => ["year", interval.end]}
|
7671
7703
|
end
|
7672
7704
|
|
7673
7705
|
def details
|
@@ -7725,7 +7757,7 @@ module ScientificNameClean
|
|
7725
7757
|
end
|
7726
7758
|
|
7727
7759
|
def pos
|
7728
|
-
{interval.begin => [
|
7760
|
+
{interval.begin => ["year", interval.end]}
|
7729
7761
|
end
|
7730
7762
|
|
7731
7763
|
def details
|