biodiversity19 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -0
- data/CHANGELOG +4 -0
- data/README.md +27 -6
- data/Rakefile +16 -10
- data/VERSION +1 -1
- data/lib/biodiversity/parser/scientific_name_clean.treetop +223 -223
- data/spec/parser/test_data.txt +8 -2
- metadata +3 -3
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
3.0.0 -- removing support for ruby 1.8.7, making biodiversity gem be the same
|
2
|
+
as biodiversity19, deprecating biodiversity19. A few newly discovered bugs
|
3
|
+
are fixed.
|
4
|
+
|
1
5
|
2.1.0 -- added ScientificNameParser.version method
|
2
6
|
|
3
7
|
2.0.0 -- backward incompatibe change in parserver, therefore new major number.
|
data/README.md
CHANGED
@@ -8,14 +8,18 @@ Biodiversity
|
|
8
8
|
|
9
9
|
Parses taxonomic scientific name and breaks it into semantic elements.
|
10
10
|
|
11
|
+
*WARNING, IMPORTANT!:*
|
12
|
+
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
13
|
+
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
14
|
+
|
15
|
+
biodiversity19 is now deprecated and will be phased out in a couple of years.
|
16
|
+
You are strongly encouraged to change your dependencies from
|
17
|
+
biodiversity19 to biodiversity
|
18
|
+
|
11
19
|
Installation
|
12
20
|
------------
|
13
21
|
|
14
|
-
|
15
|
-
biodiversity gem for Ruby 1.8.7 is not getting updated anymore
|
16
|
-
|
17
|
-
sudo gem install biodiversity19 #for ruby 1.9.x
|
18
|
-
sudo gem install biodiversity #for ruby 1.8.x
|
22
|
+
sudo gem install biodiversity
|
19
23
|
|
20
24
|
Example usage
|
21
25
|
-------------
|
@@ -25,7 +29,12 @@ Example usage
|
|
25
29
|
You can parse file with taxonomic names from command line.
|
26
30
|
File should contain one scientific name per line
|
27
31
|
|
28
|
-
|
32
|
+
nnparse file_with_names
|
33
|
+
|
34
|
+
The resuls will be put into parsed.json file in the current directory.
|
35
|
+
To save results into a different file:
|
36
|
+
|
37
|
+
nnparse file_with_names output_file
|
29
38
|
|
30
39
|
### As a socket server
|
31
40
|
|
@@ -112,6 +121,18 @@ You can use it as a library in Ruby, JRuby etc.
|
|
112
121
|
# to get detailed information about elements of the name
|
113
122
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
114
123
|
|
124
|
+
Returned result is not always linear, if name is complex. To get simple linear
|
125
|
+
representation of the name you can use:
|
126
|
+
|
127
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:position]
|
128
|
+
# returns {0=>["genus", 16], 17=>["species", 26],
|
129
|
+
# 28=>["author_word", 32], 33=>["author_word", 40],
|
130
|
+
# 42=>["author_word", 44], 45=>["author_word", 50],
|
131
|
+
# 53=>["author_word", 58], 59=>["year", 63]}
|
132
|
+
# where the key is the char index of the start of
|
133
|
+
# a word, first element of the value is a semantic meaning
|
134
|
+
# of the word, second element of the value is the character index
|
135
|
+
# of end of the word
|
115
136
|
|
116
137
|
To parse using several CPUs (4 seem to be optimal)
|
117
138
|
|
data/Rakefile
CHANGED
@@ -20,35 +20,41 @@ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
|
|
20
20
|
begin
|
21
21
|
require 'jeweler'
|
22
22
|
Jeweler::Tasks.new do |gem|
|
23
|
-
gem.name =
|
23
|
+
gem.name = 'biodiversity19'
|
24
|
+
#To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
|
24
25
|
gem.summary = 'Parser of scientific names'
|
25
26
|
gem.description = 'Tools for biodiversity informatics'
|
26
|
-
gem.email =
|
27
|
-
gem.homepage =
|
28
|
-
gem.authors = [
|
27
|
+
gem.email = 'dmozzherin@gmail.com'
|
28
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
|
29
|
+
gem.authors = ['Dmitry Mozzherin']
|
29
30
|
gem.has_rdoc = false
|
30
31
|
gem.bindir = 'bin'
|
31
32
|
gem.executables = ['nnparse', 'parserver']
|
32
33
|
gem.add_dependency('treetop')
|
33
34
|
gem.add_dependency('parallel')
|
34
|
-
gem.add_dependency('json') if ruby_version < 19
|
35
|
+
# gem.add_dependency('json') if ruby_version < 19
|
35
36
|
gem.add_development_dependency "rspec"
|
36
|
-
# gem is a Gem::Specification...
|
37
|
+
# gem is a Gem::Specification...
|
38
|
+
# see http://www.rubygems.org/read/chapter/20 for additional settings
|
37
39
|
end
|
38
40
|
rescue LoadError
|
39
|
-
puts
|
41
|
+
puts 'Jeweler (or a dependency) not available. ' +
|
42
|
+
'Install it with: sudo gem install jeweler'
|
40
43
|
end
|
41
44
|
|
42
45
|
task :tt do
|
43
|
-
['scientific_name_clean',
|
46
|
+
['scientific_name_clean',
|
47
|
+
'scientific_name_dirty',
|
48
|
+
'scientific_name_canonical'].each do |f|
|
44
49
|
file = "#{dir}/lib/biodiversity/parser/#{f}"
|
45
50
|
FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
|
46
51
|
system("tt #{file}.treetop")
|
47
52
|
rf = "#{file}.rb"
|
48
|
-
rfn = open(rf +
|
53
|
+
rfn = open(rf + '.tmp', 'w')
|
49
54
|
skip_head = false
|
50
55
|
f = open(rf)
|
51
|
-
#getting around a bug in treetop which prevents setting
|
56
|
+
# getting around a bug in treetop which prevents setting
|
57
|
+
# UTF-8 encoding in ruby19
|
52
58
|
f.each_with_index do |l, i|
|
53
59
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
54
60
|
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
@@ -8,19 +8,19 @@ grammar ScientificNameClean
|
|
8
8
|
def value
|
9
9
|
a.value.gsub(/\s{2,}/, ' ').strip
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def canonical
|
13
13
|
a.canonical.gsub(/\s{2,}/, ' ').strip
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def pos
|
17
17
|
a.pos
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def hybrid
|
21
21
|
a.hybrid
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def details
|
25
25
|
a.details.class == Array ? a.details : [a.details]
|
26
26
|
end
|
@@ -30,25 +30,25 @@ grammar ScientificNameClean
|
|
30
30
|
end
|
31
31
|
}
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
rule scientific_name_5
|
35
35
|
a:multinomial_name space_hard hybrid_character space_hard b:species {
|
36
36
|
def value
|
37
37
|
a.value + " × " + b.value
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def canonical
|
41
41
|
a.canonical + " × " + b.canonical
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
def pos
|
45
45
|
a.pos.merge(b.pos)
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
def hybrid
|
49
49
|
true
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
def details
|
53
53
|
[a.details, b.details.merge({:genus => a.details[:genus]})]
|
54
54
|
end
|
@@ -58,19 +58,19 @@ grammar ScientificNameClean
|
|
58
58
|
def value
|
59
59
|
a.value + " " + b.apply(c)
|
60
60
|
end
|
61
|
-
|
61
|
+
|
62
62
|
def canonical
|
63
63
|
a.canonical
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
def pos
|
67
67
|
a.pos.merge(c.pos)
|
68
68
|
end
|
69
|
-
|
69
|
+
|
70
70
|
def hybrid
|
71
71
|
a.hybrid
|
72
72
|
end
|
73
|
-
|
73
|
+
|
74
74
|
def details
|
75
75
|
a.details.merge(b.details(c))
|
76
76
|
end
|
@@ -78,25 +78,25 @@ grammar ScientificNameClean
|
|
78
78
|
/
|
79
79
|
scientific_name_4
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
rule scientific_name_4
|
83
83
|
a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
|
84
84
|
def value
|
85
85
|
a.value + " × " + b.value
|
86
86
|
end
|
87
|
-
|
87
|
+
|
88
88
|
def canonical
|
89
89
|
a.canonical + " × " + b.canonical
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
def pos
|
93
93
|
a.pos.merge(b.pos)
|
94
94
|
end
|
95
|
-
|
95
|
+
|
96
96
|
def hybrid
|
97
97
|
true
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
def details
|
101
101
|
[a.details, b.details]
|
102
102
|
end
|
@@ -106,19 +106,19 @@ grammar ScientificNameClean
|
|
106
106
|
def value
|
107
107
|
a.value + " × ?"
|
108
108
|
end
|
109
|
-
|
109
|
+
|
110
110
|
def canonical
|
111
111
|
a.canonical
|
112
112
|
end
|
113
|
-
|
113
|
+
|
114
114
|
def pos
|
115
115
|
a.pos
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
def hybrid
|
119
119
|
true
|
120
120
|
end
|
121
|
-
|
121
|
+
|
122
122
|
def details
|
123
123
|
[a.details, "?"]
|
124
124
|
end
|
@@ -126,25 +126,25 @@ grammar ScientificNameClean
|
|
126
126
|
/
|
127
127
|
scientific_name_3
|
128
128
|
end
|
129
|
-
|
129
|
+
|
130
130
|
rule scientific_name_3
|
131
131
|
a:hybrid_character space b:scientific_name_2 {
|
132
132
|
def value
|
133
133
|
a.value + " " + b.value
|
134
134
|
end
|
135
|
-
|
135
|
+
|
136
136
|
def canonical
|
137
137
|
b.canonical
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
def pos
|
141
141
|
b.pos
|
142
142
|
end
|
143
|
-
|
143
|
+
|
144
144
|
def hybrid
|
145
145
|
true
|
146
146
|
end
|
147
|
-
|
147
|
+
|
148
148
|
def details
|
149
149
|
b.details
|
150
150
|
end
|
@@ -152,25 +152,25 @@ grammar ScientificNameClean
|
|
152
152
|
/
|
153
153
|
scientific_name_2
|
154
154
|
end
|
155
|
-
|
155
|
+
|
156
156
|
rule scientific_name_2
|
157
157
|
a:scientific_name_1 space b:status_part {
|
158
158
|
def value
|
159
159
|
a.value + " " + b.value
|
160
160
|
end
|
161
|
-
|
161
|
+
|
162
162
|
def canonical
|
163
163
|
a.canonical
|
164
164
|
end
|
165
|
-
|
165
|
+
|
166
166
|
def pos
|
167
167
|
a.pos
|
168
168
|
end
|
169
|
-
|
169
|
+
|
170
170
|
def hybrid
|
171
171
|
a.hybrid rescue false
|
172
172
|
end
|
173
|
-
|
173
|
+
|
174
174
|
def details
|
175
175
|
a.details.merge(b.details)
|
176
176
|
end
|
@@ -184,10 +184,10 @@ grammar ScientificNameClean
|
|
184
184
|
/
|
185
185
|
multinomial_name
|
186
186
|
/
|
187
|
-
uninomial_name
|
187
|
+
uninomial_name
|
188
188
|
end
|
189
|
-
|
190
|
-
|
189
|
+
|
190
|
+
|
191
191
|
rule status_part
|
192
192
|
a:status_word space b:status_part {
|
193
193
|
def value
|
@@ -200,7 +200,7 @@ grammar ScientificNameClean
|
|
200
200
|
/
|
201
201
|
status_word
|
202
202
|
end
|
203
|
-
|
203
|
+
|
204
204
|
rule status_word
|
205
205
|
latin_word [\.] {
|
206
206
|
def value
|
@@ -216,7 +216,7 @@ grammar ScientificNameClean
|
|
216
216
|
|
217
217
|
rule unparsed
|
218
218
|
.+ space {
|
219
|
-
|
219
|
+
|
220
220
|
def value
|
221
221
|
''
|
222
222
|
end
|
@@ -238,52 +238,52 @@ grammar ScientificNameClean
|
|
238
238
|
end
|
239
239
|
}
|
240
240
|
end
|
241
|
-
|
241
|
+
|
242
242
|
rule multinomial_name
|
243
243
|
a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
|
244
244
|
def value
|
245
245
|
a.value + " " + b.value + " " + c.value + " " + d.value
|
246
246
|
end
|
247
|
-
|
247
|
+
|
248
248
|
def canonical
|
249
249
|
a.canonical + " " + c.canonical + " " + d.canonical
|
250
250
|
end
|
251
|
-
|
251
|
+
|
252
252
|
def pos
|
253
253
|
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
254
254
|
end
|
255
|
-
|
255
|
+
|
256
256
|
def hybrid
|
257
257
|
c.hybrid rescue false
|
258
258
|
end
|
259
|
-
|
259
|
+
|
260
260
|
def details
|
261
261
|
a.details.merge(b.details).merge(c.details).merge(d.details)
|
262
262
|
end
|
263
263
|
}
|
264
|
-
/
|
264
|
+
/
|
265
265
|
a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
|
266
266
|
def value
|
267
267
|
a.value + " " + b.value + " " + c.value + " " + d.value
|
268
268
|
end
|
269
|
-
|
269
|
+
|
270
270
|
def canonical
|
271
271
|
a.canonical + " " + c.canonical + " " + d.canonical
|
272
272
|
end
|
273
|
-
|
273
|
+
|
274
274
|
def pos
|
275
275
|
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
276
276
|
end
|
277
|
-
|
277
|
+
|
278
278
|
def hybrid
|
279
279
|
c.hybrid rescue false
|
280
280
|
end
|
281
|
-
|
281
|
+
|
282
282
|
def details
|
283
283
|
a.details.merge(b.details).merge(c.details).merge(d.details)
|
284
284
|
end
|
285
285
|
}
|
286
|
-
/
|
286
|
+
/
|
287
287
|
a:genus space b:infragenus space aid:annotation_identification? space c:species {
|
288
288
|
def value
|
289
289
|
if defined? aid.apply
|
@@ -292,7 +292,7 @@ grammar ScientificNameClean
|
|
292
292
|
a.value + " " + b.value + " " + c.value
|
293
293
|
end
|
294
294
|
end
|
295
|
-
|
295
|
+
|
296
296
|
def canonical
|
297
297
|
if defined? aid.apply
|
298
298
|
a.canonical + aid.canonical(c)
|
@@ -300,7 +300,7 @@ grammar ScientificNameClean
|
|
300
300
|
a.canonical + " " + c.canonical
|
301
301
|
end
|
302
302
|
end
|
303
|
-
|
303
|
+
|
304
304
|
def pos
|
305
305
|
if defined? aid.apply
|
306
306
|
a.pos.merge(b.pos).merge(aid.pos(c))
|
@@ -308,11 +308,11 @@ grammar ScientificNameClean
|
|
308
308
|
a.pos.merge(b.pos).merge(c.pos)
|
309
309
|
end
|
310
310
|
end
|
311
|
-
|
311
|
+
|
312
312
|
def hybrid
|
313
313
|
c.hybrid rescue false
|
314
314
|
end
|
315
|
-
|
315
|
+
|
316
316
|
def details
|
317
317
|
if defined? aid.apply
|
318
318
|
a.details.merge(b.details).merge(aid.apply(c))
|
@@ -324,21 +324,21 @@ grammar ScientificNameClean
|
|
324
324
|
/
|
325
325
|
a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
|
326
326
|
def value
|
327
|
-
a.value + " " + b.value + " " + c.value
|
327
|
+
a.value + " " + b.value + " " + c.value
|
328
328
|
end
|
329
329
|
|
330
330
|
def canonical
|
331
331
|
a.canonical + " " + b.canonical + " " + c.canonical
|
332
332
|
end
|
333
|
-
|
333
|
+
|
334
334
|
def pos
|
335
335
|
a.pos.merge(b.pos).merge(c.pos)
|
336
336
|
end
|
337
|
-
|
337
|
+
|
338
338
|
def hybrid
|
339
339
|
b.hybrid rescue false
|
340
340
|
end
|
341
|
-
|
341
|
+
|
342
342
|
def details
|
343
343
|
a.details.merge(b.details).merge(c.details)
|
344
344
|
end
|
@@ -349,7 +349,7 @@ grammar ScientificNameClean
|
|
349
349
|
if defined? aid.apply
|
350
350
|
a.value + aid.apply(b)
|
351
351
|
else
|
352
|
-
a.value + " " + b.value
|
352
|
+
a.value + " " + b.value
|
353
353
|
end
|
354
354
|
end
|
355
355
|
|
@@ -360,7 +360,7 @@ grammar ScientificNameClean
|
|
360
360
|
a.canonical + " " + b.canonical
|
361
361
|
end
|
362
362
|
end
|
363
|
-
|
363
|
+
|
364
364
|
def pos
|
365
365
|
if defined? aid.apply
|
366
366
|
a.pos.merge(aid.pos(b))
|
@@ -368,11 +368,11 @@ grammar ScientificNameClean
|
|
368
368
|
a.pos.merge(b.pos)
|
369
369
|
end
|
370
370
|
end
|
371
|
-
|
371
|
+
|
372
372
|
def hybrid
|
373
373
|
b.hybrid rescue false
|
374
374
|
end
|
375
|
-
|
375
|
+
|
376
376
|
def details
|
377
377
|
if defined? aid.apply
|
378
378
|
a.details.merge(aid.details(b))
|
@@ -390,15 +390,15 @@ grammar ScientificNameClean
|
|
390
390
|
def canonical
|
391
391
|
a.canonical + aid.canonical(b)
|
392
392
|
end
|
393
|
-
|
393
|
+
|
394
394
|
def pos
|
395
395
|
a.pos.merge(aid.pos(b))
|
396
396
|
end
|
397
|
-
|
397
|
+
|
398
398
|
def hybrid
|
399
399
|
false
|
400
400
|
end
|
401
|
-
|
401
|
+
|
402
402
|
def details
|
403
403
|
a.details.merge(aid.details(b))
|
404
404
|
end
|
@@ -408,7 +408,7 @@ grammar ScientificNameClean
|
|
408
408
|
rule multiuninomial_name
|
409
409
|
a:uninomial_name space b:rank_uninomial space c:uninomial_name {
|
410
410
|
|
411
|
-
def value
|
411
|
+
def value
|
412
412
|
a.value + " " + b.value + " " + c.value
|
413
413
|
end
|
414
414
|
|
@@ -429,23 +429,23 @@ grammar ScientificNameClean
|
|
429
429
|
end
|
430
430
|
}
|
431
431
|
end
|
432
|
-
|
432
|
+
|
433
433
|
rule infraspecies_mult
|
434
434
|
a:infraspecies space b:infraspecies_mult {
|
435
435
|
def value
|
436
436
|
a.value + " " + b.value
|
437
437
|
end
|
438
|
-
|
438
|
+
|
439
439
|
def canonical
|
440
440
|
a.canonical + " " + b.canonical
|
441
441
|
end
|
442
|
-
|
442
|
+
|
443
443
|
def pos
|
444
444
|
a.pos.merge(b.pos)
|
445
445
|
end
|
446
|
-
|
446
|
+
|
447
447
|
def details
|
448
|
-
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
448
|
+
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
449
449
|
b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
|
450
450
|
a.details.merge({:infraspecies => a_array + b_array})
|
451
451
|
end
|
@@ -461,42 +461,42 @@ grammar ScientificNameClean
|
|
461
461
|
end
|
462
462
|
}
|
463
463
|
end
|
464
|
-
|
464
|
+
|
465
465
|
rule infraspecies
|
466
466
|
a:infraspecies_string space b:authorship {
|
467
467
|
def value
|
468
468
|
a.value + " " + b.value
|
469
469
|
end
|
470
|
-
|
470
|
+
|
471
471
|
def canonical
|
472
472
|
a.canonical
|
473
473
|
end
|
474
|
-
|
474
|
+
|
475
475
|
def pos
|
476
476
|
a.pos.merge(b.pos)
|
477
477
|
end
|
478
|
-
|
478
|
+
|
479
479
|
def details
|
480
480
|
{:infraspecies => a.details[:infraspecies].merge(b.details)}
|
481
481
|
end
|
482
482
|
}
|
483
483
|
/
|
484
|
-
infraspecies_string
|
484
|
+
infraspecies_string
|
485
485
|
end
|
486
|
-
|
486
|
+
|
487
487
|
rule infraspecies_string
|
488
488
|
sel:rank space a:species_word {
|
489
|
-
def value
|
489
|
+
def value
|
490
490
|
sel.apply(a)
|
491
491
|
end
|
492
492
|
def canonical
|
493
493
|
sel.canonical(a)
|
494
494
|
end
|
495
|
-
|
495
|
+
|
496
496
|
def pos
|
497
497
|
sel.pos(a)
|
498
498
|
end
|
499
|
-
|
499
|
+
|
500
500
|
def details
|
501
501
|
sel.details(a)
|
502
502
|
end
|
@@ -506,18 +506,18 @@ grammar ScientificNameClean
|
|
506
506
|
def value
|
507
507
|
aid.apply(a)
|
508
508
|
end
|
509
|
-
|
509
|
+
|
510
510
|
def canonical
|
511
511
|
aid.canonical(a)
|
512
512
|
end
|
513
|
-
|
513
|
+
|
514
514
|
def pos
|
515
515
|
def a.pos
|
516
516
|
{interval.begin => ['infraspecies', a.interval.end]}
|
517
517
|
end
|
518
518
|
aid.pos(a)
|
519
519
|
end
|
520
|
-
|
520
|
+
|
521
521
|
def details
|
522
522
|
def a.details
|
523
523
|
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
@@ -530,21 +530,21 @@ grammar ScientificNameClean
|
|
530
530
|
def value
|
531
531
|
a.value
|
532
532
|
end
|
533
|
-
|
533
|
+
|
534
534
|
def canonical
|
535
535
|
value
|
536
536
|
end
|
537
|
-
|
537
|
+
|
538
538
|
def pos
|
539
539
|
{interval.begin => ['infraspecies', interval.end]}
|
540
540
|
end
|
541
|
-
|
541
|
+
|
542
542
|
def details
|
543
543
|
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
544
544
|
end
|
545
545
|
}
|
546
546
|
end
|
547
|
-
|
547
|
+
|
548
548
|
rule taxon_concept_rank
|
549
549
|
("sec."/"sensu.") {
|
550
550
|
def value
|
@@ -555,7 +555,7 @@ grammar ScientificNameClean
|
|
555
555
|
end
|
556
556
|
def details(a = nil)
|
557
557
|
{:taxon_concept => a.details}
|
558
|
-
end
|
558
|
+
end
|
559
559
|
}
|
560
560
|
end
|
561
561
|
|
@@ -573,12 +573,12 @@ grammar ScientificNameClean
|
|
573
573
|
def canonical(a)
|
574
574
|
" " + a.value
|
575
575
|
end
|
576
|
-
|
576
|
+
|
577
577
|
def pos(a)
|
578
|
-
interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
|
578
|
+
interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
|
579
579
|
{interval.begin => ['infraspecific_type', interval_end], a.interval.begin => ['infraspecies', a.interval.end]}
|
580
580
|
end
|
581
|
-
|
581
|
+
|
582
582
|
def details(a = nil)
|
583
583
|
{:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
|
584
584
|
end
|
@@ -594,31 +594,31 @@ grammar ScientificNameClean
|
|
594
594
|
def pos(uni)
|
595
595
|
{interval.begin => ['rank_uninomial', interval.end], uni.interval.begin => ['uninomial', uni.interval.end]}
|
596
596
|
end
|
597
|
-
|
597
|
+
|
598
598
|
def details(uni)
|
599
599
|
{:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
|
600
600
|
end
|
601
601
|
}
|
602
602
|
end
|
603
|
-
|
603
|
+
|
604
604
|
rule species
|
605
605
|
a:species_string space b:authorship {
|
606
606
|
def value
|
607
607
|
a.value + " " + b.value
|
608
608
|
end
|
609
|
-
|
609
|
+
|
610
610
|
def canonical
|
611
611
|
a.canonical
|
612
612
|
end
|
613
|
-
|
613
|
+
|
614
614
|
def hybrid
|
615
615
|
a.hybrid rescue false
|
616
616
|
end
|
617
|
-
|
617
|
+
|
618
618
|
def pos
|
619
619
|
a.pos.merge(b.pos)
|
620
620
|
end
|
621
|
-
|
621
|
+
|
622
622
|
def details
|
623
623
|
{:species => a.details[:species].merge(b.details)}
|
624
624
|
end
|
@@ -626,21 +626,21 @@ grammar ScientificNameClean
|
|
626
626
|
/
|
627
627
|
species_string
|
628
628
|
end
|
629
|
-
|
629
|
+
|
630
630
|
rule species_string
|
631
631
|
species_word {
|
632
632
|
def canonical
|
633
633
|
value
|
634
634
|
end
|
635
|
-
|
635
|
+
|
636
636
|
def pos
|
637
637
|
{interval.begin => ['species', interval.end]}
|
638
638
|
end
|
639
|
-
|
639
|
+
|
640
640
|
def hybrid
|
641
641
|
false
|
642
642
|
end
|
643
|
-
|
643
|
+
|
644
644
|
def details
|
645
645
|
{:species => {:string => value}}
|
646
646
|
end
|
@@ -648,41 +648,41 @@ grammar ScientificNameClean
|
|
648
648
|
/
|
649
649
|
species_word_hybrid
|
650
650
|
end
|
651
|
-
|
651
|
+
|
652
652
|
rule infragenus
|
653
653
|
left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
|
654
654
|
def value
|
655
655
|
"(" + a.value + ")"
|
656
656
|
end
|
657
|
-
|
657
|
+
|
658
658
|
def canonical
|
659
659
|
a.value
|
660
660
|
end
|
661
|
-
|
661
|
+
|
662
662
|
def pos
|
663
663
|
{a.interval.begin => ['infragenus', a.interval.end]}
|
664
664
|
end
|
665
|
-
|
665
|
+
|
666
666
|
def details
|
667
667
|
{:infragenus => {:string => a.value}}
|
668
668
|
end
|
669
669
|
}
|
670
670
|
end
|
671
|
-
|
671
|
+
|
672
672
|
rule genus
|
673
673
|
a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
|
674
674
|
def value
|
675
675
|
a.value
|
676
676
|
end
|
677
|
-
|
677
|
+
|
678
678
|
def pos
|
679
679
|
{a.interval.begin => ['genus', a.interval.end]}
|
680
680
|
end
|
681
|
-
|
681
|
+
|
682
682
|
def canonical
|
683
683
|
a.value
|
684
684
|
end
|
685
|
-
|
685
|
+
|
686
686
|
def details
|
687
687
|
{:genus => {:string => a.value}}
|
688
688
|
end
|
@@ -706,27 +706,27 @@ grammar ScientificNameClean
|
|
706
706
|
def details
|
707
707
|
{:abbreviated_genus => {:string => value}}
|
708
708
|
end
|
709
|
-
}
|
709
|
+
}
|
710
710
|
end
|
711
|
-
|
711
|
+
|
712
712
|
rule uninomial_name
|
713
713
|
a:uninomial_string space b:infragenus space c:simple_authorship {
|
714
714
|
def value
|
715
715
|
a.value + " " + b.value + " " + c.value
|
716
716
|
end
|
717
|
-
|
717
|
+
|
718
718
|
def canonical
|
719
719
|
a.canonical
|
720
720
|
end
|
721
|
-
|
721
|
+
|
722
722
|
def pos
|
723
723
|
a.pos.merge(b.pos).merge(c.pos)
|
724
724
|
end
|
725
|
-
|
725
|
+
|
726
726
|
def hybrid
|
727
727
|
false
|
728
728
|
end
|
729
|
-
|
729
|
+
|
730
730
|
def details
|
731
731
|
{:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
|
732
732
|
end
|
@@ -736,19 +736,19 @@ grammar ScientificNameClean
|
|
736
736
|
def value
|
737
737
|
a.value + " " + b.value
|
738
738
|
end
|
739
|
-
|
739
|
+
|
740
740
|
def canonical
|
741
741
|
a.canonical
|
742
742
|
end
|
743
|
-
|
743
|
+
|
744
744
|
def pos
|
745
745
|
a.pos.merge(b.pos)
|
746
746
|
end
|
747
|
-
|
747
|
+
|
748
748
|
def hybrid
|
749
749
|
false
|
750
750
|
end
|
751
|
-
|
751
|
+
|
752
752
|
def details
|
753
753
|
{:uninomial => a.details[:uninomial].merge(b.details)}
|
754
754
|
end
|
@@ -758,19 +758,19 @@ grammar ScientificNameClean
|
|
758
758
|
def value
|
759
759
|
a.value + " " + b.value
|
760
760
|
end
|
761
|
-
|
761
|
+
|
762
762
|
def canonical
|
763
763
|
a.canonical
|
764
764
|
end
|
765
|
-
|
765
|
+
|
766
766
|
def pos
|
767
767
|
a.pos.merge(b.pos)
|
768
768
|
end
|
769
|
-
|
769
|
+
|
770
770
|
def hybrid
|
771
771
|
false
|
772
772
|
end
|
773
|
-
|
773
|
+
|
774
774
|
def details
|
775
775
|
{:uninomial => a.details[:uninomial].merge(b.details)}
|
776
776
|
end
|
@@ -784,31 +784,31 @@ grammar ScientificNameClean
|
|
784
784
|
def canonical
|
785
785
|
value
|
786
786
|
end
|
787
|
-
|
787
|
+
|
788
788
|
def pos
|
789
789
|
{interval.begin => ['uninomial', interval.end]}
|
790
790
|
end
|
791
|
-
|
791
|
+
|
792
792
|
def hybrid
|
793
793
|
false
|
794
794
|
end
|
795
|
-
|
796
|
-
def details
|
795
|
+
|
796
|
+
def details
|
797
797
|
{:uninomial => {:string => value}}
|
798
798
|
end
|
799
799
|
}
|
800
800
|
end
|
801
|
-
|
801
|
+
|
802
802
|
rule authorship
|
803
803
|
a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
|
804
804
|
def value
|
805
805
|
a.value + " " + b.value + " " + c.value
|
806
806
|
end
|
807
|
-
|
807
|
+
|
808
808
|
def pos
|
809
809
|
a.pos.merge(b.pos).merge(c.pos)
|
810
810
|
end
|
811
|
-
|
811
|
+
|
812
812
|
def details
|
813
813
|
val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
814
814
|
val[:combinationAuthorTeam].merge!(c.details)
|
@@ -820,11 +820,11 @@ grammar ScientificNameClean
|
|
820
820
|
def value
|
821
821
|
a.value + " " + b.value
|
822
822
|
end
|
823
|
-
|
823
|
+
|
824
824
|
def pos
|
825
825
|
a.pos.merge(b.pos)
|
826
826
|
end
|
827
|
-
|
827
|
+
|
828
828
|
def details
|
829
829
|
{:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
830
830
|
end
|
@@ -836,11 +836,11 @@ grammar ScientificNameClean
|
|
836
836
|
def value
|
837
837
|
a.value + " " + b.value
|
838
838
|
end
|
839
|
-
|
839
|
+
|
840
840
|
def pos
|
841
841
|
a.pos.merge(b.pos)
|
842
842
|
end
|
843
|
-
|
843
|
+
|
844
844
|
def details
|
845
845
|
val = a.details
|
846
846
|
val[:authorship] = text_value.strip
|
@@ -851,21 +851,21 @@ grammar ScientificNameClean
|
|
851
851
|
/
|
852
852
|
simple_authorship
|
853
853
|
end
|
854
|
-
|
855
|
-
|
854
|
+
|
855
|
+
|
856
856
|
rule basionym_authorship_with_parenthesis
|
857
857
|
left_paren space a:authors_names space right_paren space [,]? space b:year {
|
858
858
|
def value
|
859
859
|
"(" + a.value + " " + b.value + ")"
|
860
860
|
end
|
861
|
-
|
861
|
+
|
862
862
|
def pos
|
863
863
|
a.pos.merge(b.pos)
|
864
|
-
end
|
865
|
-
|
864
|
+
end
|
865
|
+
|
866
866
|
def details
|
867
|
-
{ :authorship => text_value,
|
868
|
-
:basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
|
867
|
+
{ :authorship => text_value,
|
868
|
+
:basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
|
869
869
|
}
|
870
870
|
end
|
871
871
|
}
|
@@ -874,11 +874,11 @@ grammar ScientificNameClean
|
|
874
874
|
def value
|
875
875
|
"(" + a.value + " " + b.value + ")"
|
876
876
|
end
|
877
|
-
|
877
|
+
|
878
878
|
def pos
|
879
879
|
a.pos.merge(b.pos)
|
880
880
|
end
|
881
|
-
|
881
|
+
|
882
882
|
def details
|
883
883
|
val = a.details
|
884
884
|
val[:basionymAuthorTeam].merge!(b.details)
|
@@ -891,15 +891,15 @@ grammar ScientificNameClean
|
|
891
891
|
def value
|
892
892
|
"(" + a.value + ")"
|
893
893
|
end
|
894
|
-
|
894
|
+
|
895
895
|
def pos
|
896
896
|
a.pos
|
897
897
|
end
|
898
|
-
|
898
|
+
|
899
899
|
def details
|
900
900
|
val = a.details
|
901
901
|
val[:authorship] = text_value
|
902
|
-
val
|
902
|
+
val
|
903
903
|
end
|
904
904
|
}
|
905
905
|
/
|
@@ -907,32 +907,32 @@ grammar ScientificNameClean
|
|
907
907
|
def value
|
908
908
|
"(?)"
|
909
909
|
end
|
910
|
-
|
910
|
+
|
911
911
|
def pos
|
912
912
|
{a.interval.begin => ['unknown_author', a.interval.end]}
|
913
913
|
end
|
914
|
-
|
914
|
+
|
915
915
|
def details
|
916
916
|
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
|
917
917
|
end
|
918
918
|
}
|
919
919
|
end
|
920
|
-
|
920
|
+
|
921
921
|
rule ex_authorship
|
922
922
|
ex_sep space b:simple_authorship {
|
923
923
|
def value
|
924
924
|
" ex " + b.value
|
925
925
|
end
|
926
|
-
|
926
|
+
|
927
927
|
def pos
|
928
928
|
b.pos
|
929
929
|
end
|
930
|
-
|
930
|
+
|
931
931
|
def details
|
932
932
|
val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
933
933
|
val
|
934
934
|
end
|
935
|
-
}
|
935
|
+
}
|
936
936
|
end
|
937
937
|
|
938
938
|
rule simple_authorship
|
@@ -940,17 +940,17 @@ grammar ScientificNameClean
|
|
940
940
|
def value
|
941
941
|
a.value + " " + b.value
|
942
942
|
end
|
943
|
-
|
943
|
+
|
944
944
|
def pos
|
945
945
|
a.pos.merge(b.pos)
|
946
946
|
end
|
947
|
-
|
947
|
+
|
948
948
|
def details
|
949
949
|
details_with_arg(:basionymAuthorTeam)
|
950
950
|
end
|
951
|
-
|
951
|
+
|
952
952
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
953
|
-
{ :authorship => text_value,
|
953
|
+
{ :authorship => text_value,
|
954
954
|
authorTeamType.to_sym => {
|
955
955
|
:authorTeam => a.text_value.strip
|
956
956
|
}.merge(a.details).merge(b.details)
|
@@ -962,17 +962,17 @@ grammar ScientificNameClean
|
|
962
962
|
def value
|
963
963
|
a.value + " " + b.value
|
964
964
|
end
|
965
|
-
|
965
|
+
|
966
966
|
def pos
|
967
967
|
a.pos.merge(b.pos)
|
968
968
|
end
|
969
|
-
|
969
|
+
|
970
970
|
def details
|
971
971
|
details_with_arg(:basionymAuthorTeam)
|
972
972
|
end
|
973
|
-
|
973
|
+
|
974
974
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
975
|
-
{ :authorship => text_value,
|
975
|
+
{ :authorship => text_value,
|
976
976
|
authorTeamType.to_sym => {
|
977
977
|
:authorTeam => a.text_value.strip
|
978
978
|
}.merge(a.details).merge(b.details)
|
@@ -986,27 +986,27 @@ grammar ScientificNameClean
|
|
986
986
|
details[:basionymAuthorTeam].merge!(super)
|
987
987
|
details
|
988
988
|
end
|
989
|
-
|
989
|
+
|
990
990
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
991
|
-
{ :authorship => text_value,
|
991
|
+
{ :authorship => text_value,
|
992
992
|
authorTeamType.to_sym => {
|
993
993
|
:authorTeam => text_value,
|
994
994
|
}
|
995
|
-
}
|
995
|
+
}
|
996
996
|
end
|
997
997
|
}
|
998
998
|
end
|
999
|
-
|
999
|
+
|
1000
1000
|
rule authors_names
|
1001
1001
|
a:author_name space sep:author_separator space b:authors_names {
|
1002
1002
|
def value
|
1003
1003
|
sep.apply(a,b)
|
1004
1004
|
end
|
1005
|
-
|
1005
|
+
|
1006
1006
|
def pos
|
1007
1007
|
sep.pos(a,b)
|
1008
1008
|
end
|
1009
|
-
|
1009
|
+
|
1010
1010
|
def details
|
1011
1011
|
sep.details(a,b)
|
1012
1012
|
end
|
@@ -1016,28 +1016,28 @@ grammar ScientificNameClean
|
|
1016
1016
|
/
|
1017
1017
|
unknown_auth
|
1018
1018
|
end
|
1019
|
-
|
1020
|
-
|
1019
|
+
|
1020
|
+
|
1021
1021
|
rule unknown_auth
|
1022
1022
|
("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
|
1023
1023
|
def value
|
1024
1024
|
text_value
|
1025
1025
|
end
|
1026
|
-
|
1026
|
+
|
1027
1027
|
def pos
|
1028
1028
|
{interval.begin => ['unknown_author', interval.end]}
|
1029
1029
|
end
|
1030
|
-
|
1030
|
+
|
1031
1031
|
def details
|
1032
1032
|
{:author => ["unknown"]}
|
1033
1033
|
end
|
1034
1034
|
}
|
1035
1035
|
end
|
1036
|
-
|
1036
|
+
|
1037
1037
|
rule ex_sep
|
1038
1038
|
("ex"/"in") &[\s]
|
1039
1039
|
end
|
1040
|
-
|
1040
|
+
|
1041
1041
|
rule author_separator
|
1042
1042
|
("&"/"&"/","/"and"/"et") {
|
1043
1043
|
def apply(a,b)
|
@@ -1045,11 +1045,11 @@ grammar ScientificNameClean
|
|
1045
1045
|
sep = " &" if ["&", "&","and","et"].include? sep
|
1046
1046
|
a.value + sep + " " + b.value
|
1047
1047
|
end
|
1048
|
-
|
1048
|
+
|
1049
1049
|
def pos(a,b)
|
1050
1050
|
a.pos.merge(b.pos)
|
1051
1051
|
end
|
1052
|
-
|
1052
|
+
|
1053
1053
|
def details(a,b)
|
1054
1054
|
{:author => a.details[:author] + b.details[:author]}
|
1055
1055
|
end
|
@@ -1061,8 +1061,8 @@ grammar ScientificNameClean
|
|
1061
1061
|
def value
|
1062
1062
|
a.value + ' ' + b.value
|
1063
1063
|
end
|
1064
|
-
|
1065
|
-
def pos
|
1064
|
+
|
1065
|
+
def pos
|
1066
1066
|
a.pos.merge(b.pos)
|
1067
1067
|
end
|
1068
1068
|
|
@@ -1073,17 +1073,17 @@ grammar ScientificNameClean
|
|
1073
1073
|
/
|
1074
1074
|
author_name_without_postfix
|
1075
1075
|
end
|
1076
|
-
|
1076
|
+
|
1077
1077
|
rule author_name_without_postfix
|
1078
1078
|
space a:author_prefix_word space b:author_name {
|
1079
1079
|
def value
|
1080
1080
|
a.value + " " + b.value
|
1081
1081
|
end
|
1082
|
-
|
1082
|
+
|
1083
1083
|
def pos
|
1084
1084
|
a.pos.merge(b.pos)
|
1085
1085
|
end
|
1086
|
-
|
1086
|
+
|
1087
1087
|
def details
|
1088
1088
|
{:author => [value]}
|
1089
1089
|
end
|
@@ -1093,11 +1093,11 @@ grammar ScientificNameClean
|
|
1093
1093
|
def value
|
1094
1094
|
a.value + " " + b.value
|
1095
1095
|
end
|
1096
|
-
|
1096
|
+
|
1097
1097
|
def pos
|
1098
1098
|
a.pos.merge(b.pos)
|
1099
1099
|
end
|
1100
|
-
|
1100
|
+
|
1101
1101
|
def details
|
1102
1102
|
{:author => [value]}
|
1103
1103
|
end
|
@@ -1105,17 +1105,17 @@ grammar ScientificNameClean
|
|
1105
1105
|
/
|
1106
1106
|
author_word
|
1107
1107
|
end
|
1108
|
-
|
1108
|
+
|
1109
1109
|
rule author_word
|
1110
1110
|
"A S. Xu" {
|
1111
1111
|
def value
|
1112
1112
|
text_value.strip
|
1113
1113
|
end
|
1114
|
-
|
1114
|
+
|
1115
1115
|
def pos
|
1116
1116
|
{interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
|
1117
1117
|
end
|
1118
|
-
|
1118
|
+
|
1119
1119
|
def details
|
1120
1120
|
{:author => [value]}
|
1121
1121
|
end
|
@@ -1125,28 +1125,28 @@ grammar ScientificNameClean
|
|
1125
1125
|
def value
|
1126
1126
|
text_value.strip
|
1127
1127
|
end
|
1128
|
-
|
1128
|
+
|
1129
1129
|
def pos
|
1130
1130
|
#cheating because there are several words in some of them
|
1131
1131
|
{interval.begin => ['author_word', interval.end]}
|
1132
1132
|
end
|
1133
|
-
|
1133
|
+
|
1134
1134
|
def details
|
1135
1135
|
{:author => [value]}
|
1136
1136
|
end
|
1137
1137
|
}
|
1138
|
-
/
|
1138
|
+
/
|
1139
1139
|
("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
|
1140
1140
|
def value
|
1141
1141
|
text_value.gsub(/([\p{Lu}]{3,})/) do |match|
|
1142
1142
|
UnicodeUtils.titlecase(match)
|
1143
1143
|
end
|
1144
1144
|
end
|
1145
|
-
|
1145
|
+
|
1146
1146
|
def pos
|
1147
1147
|
{interval.begin => ['author_word', interval.end]}
|
1148
1148
|
end
|
1149
|
-
|
1149
|
+
|
1150
1150
|
def details
|
1151
1151
|
{:author => [value]}
|
1152
1152
|
end
|
@@ -1156,11 +1156,11 @@ grammar ScientificNameClean
|
|
1156
1156
|
def value
|
1157
1157
|
text_value
|
1158
1158
|
end
|
1159
|
-
|
1159
|
+
|
1160
1160
|
def pos
|
1161
1161
|
{interval.begin => ['author_word', interval.end]}
|
1162
1162
|
end
|
1163
|
-
|
1163
|
+
|
1164
1164
|
def details
|
1165
1165
|
{:author => [value]}
|
1166
1166
|
end
|
@@ -1168,13 +1168,13 @@ grammar ScientificNameClean
|
|
1168
1168
|
/
|
1169
1169
|
author_prefix_word
|
1170
1170
|
end
|
1171
|
-
|
1171
|
+
|
1172
1172
|
rule author_prefix_word
|
1173
1173
|
space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
|
1174
1174
|
def value
|
1175
1175
|
text_value
|
1176
1176
|
end
|
1177
|
-
|
1177
|
+
|
1178
1178
|
def pos
|
1179
1179
|
#cheating because there are several words in some of them
|
1180
1180
|
{interval.begin => ['author_word', interval.end]}
|
@@ -1184,7 +1184,7 @@ grammar ScientificNameClean
|
|
1184
1184
|
|
1185
1185
|
rule author_postfix_word
|
1186
1186
|
("f."/"filius") {
|
1187
|
-
def value
|
1187
|
+
def value
|
1188
1188
|
text_value.strip
|
1189
1189
|
end
|
1190
1190
|
|
@@ -1193,7 +1193,7 @@ grammar ScientificNameClean
|
|
1193
1193
|
end
|
1194
1194
|
}
|
1195
1195
|
end
|
1196
|
-
|
1196
|
+
|
1197
1197
|
rule cap_latin_word_pair
|
1198
1198
|
a:cap_latin_word "-" b:cap_latin_word {
|
1199
1199
|
def value
|
@@ -1201,7 +1201,7 @@ grammar ScientificNameClean
|
|
1201
1201
|
end
|
1202
1202
|
}
|
1203
1203
|
end
|
1204
|
-
|
1204
|
+
|
1205
1205
|
rule cap_latin_word
|
1206
1206
|
a:([A-Z]/cap_digraph) b:latin_word "?" {
|
1207
1207
|
def value
|
@@ -1241,19 +1241,19 @@ grammar ScientificNameClean
|
|
1241
1241
|
def value
|
1242
1242
|
a.value + " " + b.value
|
1243
1243
|
end
|
1244
|
-
|
1244
|
+
|
1245
1245
|
def canonical
|
1246
1246
|
b.value
|
1247
1247
|
end
|
1248
|
-
|
1248
|
+
|
1249
1249
|
def hybrid
|
1250
1250
|
true
|
1251
1251
|
end
|
1252
|
-
|
1252
|
+
|
1253
1253
|
def pos
|
1254
1254
|
{b.interval.begin => ['species', b.interval.end]}
|
1255
1255
|
end
|
1256
|
-
|
1256
|
+
|
1257
1257
|
def details
|
1258
1258
|
{:species => {:string => b.value}}
|
1259
1259
|
end
|
@@ -1263,19 +1263,19 @@ grammar ScientificNameClean
|
|
1263
1263
|
def value
|
1264
1264
|
"× " + b.value
|
1265
1265
|
end
|
1266
|
-
|
1266
|
+
|
1267
1267
|
def canonical
|
1268
1268
|
b.value
|
1269
1269
|
end
|
1270
|
-
|
1270
|
+
|
1271
1271
|
def hybrid
|
1272
1272
|
true
|
1273
1273
|
end
|
1274
|
-
|
1274
|
+
|
1275
1275
|
def pos
|
1276
1276
|
{b.interval.begin => ['species', b.interval.end]}
|
1277
1277
|
end
|
1278
|
-
|
1278
|
+
|
1279
1279
|
def details
|
1280
1280
|
{:species => {:string => b.value}}
|
1281
1281
|
end
|
@@ -1285,19 +1285,19 @@ grammar ScientificNameClean
|
|
1285
1285
|
def value
|
1286
1286
|
"× " + b.value
|
1287
1287
|
end
|
1288
|
-
|
1288
|
+
|
1289
1289
|
def canonical
|
1290
1290
|
b.value
|
1291
1291
|
end
|
1292
|
-
|
1292
|
+
|
1293
1293
|
def hybrid
|
1294
1294
|
true
|
1295
1295
|
end
|
1296
|
-
|
1296
|
+
|
1297
1297
|
def pos
|
1298
1298
|
{b.interval.begin => ['species', b.interval.end]}
|
1299
1299
|
end
|
1300
|
-
|
1300
|
+
|
1301
1301
|
def details
|
1302
1302
|
{:species => {:string => b.value}}
|
1303
1303
|
end
|
@@ -1305,7 +1305,7 @@ grammar ScientificNameClean
|
|
1305
1305
|
end
|
1306
1306
|
|
1307
1307
|
rule annotation_identification
|
1308
|
-
("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"spp."/"spp "/"aff."/"aff "/"monst."/"?") {
|
1308
|
+
("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
|
1309
1309
|
|
1310
1310
|
def value
|
1311
1311
|
text_value.strip
|
@@ -1392,9 +1392,9 @@ grammar ScientificNameClean
|
|
1392
1392
|
text_value.split('').each do |l|
|
1393
1393
|
l = 'ae' if l == 'æ'
|
1394
1394
|
l = 'oe' if l == 'œ'
|
1395
|
-
# We normalize ë as well. It is legal in botanical code, but it
|
1395
|
+
# We normalize ë as well. It is legal in botanical code, but it
|
1396
1396
|
# is beneficial to normalize it for the reconsiliation purposes
|
1397
|
-
l = 'e' if l == 'ë'
|
1397
|
+
l = 'e' if l == 'ë'
|
1398
1398
|
res << l
|
1399
1399
|
end
|
1400
1400
|
res
|
@@ -1408,7 +1408,7 @@ grammar ScientificNameClean
|
|
1408
1408
|
res = text_value
|
1409
1409
|
res = 'ae' if res == 'æ'
|
1410
1410
|
res = 'oe' if res == 'œ'
|
1411
|
-
res = 'e' if res == 'ë'
|
1411
|
+
res = 'e' if res == 'ë'
|
1412
1412
|
res
|
1413
1413
|
end
|
1414
1414
|
}
|
@@ -1426,7 +1426,7 @@ grammar ScientificNameClean
|
|
1426
1426
|
def value
|
1427
1427
|
'Oe'
|
1428
1428
|
end
|
1429
|
-
}
|
1429
|
+
}
|
1430
1430
|
end
|
1431
1431
|
|
1432
1432
|
rule year
|
@@ -1434,14 +1434,14 @@ grammar ScientificNameClean
|
|
1434
1434
|
def value
|
1435
1435
|
a.value
|
1436
1436
|
end
|
1437
|
-
|
1437
|
+
|
1438
1438
|
def pos
|
1439
1439
|
a.pos
|
1440
1440
|
end
|
1441
|
-
|
1441
|
+
|
1442
1442
|
def details
|
1443
1443
|
a.details
|
1444
|
-
end
|
1444
|
+
end
|
1445
1445
|
}
|
1446
1446
|
/
|
1447
1447
|
year_number_with_character
|
@@ -1464,31 +1464,31 @@ grammar ScientificNameClean
|
|
1464
1464
|
end
|
1465
1465
|
}
|
1466
1466
|
end
|
1467
|
-
|
1467
|
+
|
1468
1468
|
rule year_number
|
1469
1469
|
[12] [7890] [0-9] ([0-9] [\?]?/"?") {
|
1470
1470
|
def value
|
1471
1471
|
text_value
|
1472
1472
|
end
|
1473
|
-
|
1473
|
+
|
1474
1474
|
def pos
|
1475
1475
|
{interval.begin => ['year', interval.end]}
|
1476
1476
|
end
|
1477
|
-
|
1477
|
+
|
1478
1478
|
def details
|
1479
1479
|
{:year => value}
|
1480
1480
|
end
|
1481
1481
|
}
|
1482
1482
|
end
|
1483
|
-
|
1483
|
+
|
1484
1484
|
rule left_paren
|
1485
1485
|
"("
|
1486
1486
|
end
|
1487
|
-
|
1487
|
+
|
1488
1488
|
rule right_paren
|
1489
1489
|
")"
|
1490
1490
|
end
|
1491
|
-
|
1491
|
+
|
1492
1492
|
rule hybrid_character
|
1493
1493
|
("x"/"X") {
|
1494
1494
|
def value
|
@@ -1498,7 +1498,7 @@ grammar ScientificNameClean
|
|
1498
1498
|
/
|
1499
1499
|
multiplication_sign
|
1500
1500
|
end
|
1501
|
-
|
1501
|
+
|
1502
1502
|
rule multiplication_sign
|
1503
1503
|
("×"/"*") {
|
1504
1504
|
def value
|
@@ -1506,7 +1506,7 @@ grammar ScientificNameClean
|
|
1506
1506
|
end
|
1507
1507
|
}
|
1508
1508
|
end
|
1509
|
-
|
1509
|
+
|
1510
1510
|
rule space
|
1511
1511
|
[\s]*
|
1512
1512
|
end
|
@@ -1514,5 +1514,5 @@ grammar ScientificNameClean
|
|
1514
1514
|
rule space_hard
|
1515
1515
|
[\s]+
|
1516
1516
|
end
|
1517
|
-
|
1517
|
+
|
1518
1518
|
end
|