biodiversity19 2.1.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -0
- data/CHANGELOG +4 -0
- data/README.md +27 -6
- data/Rakefile +16 -10
- data/VERSION +1 -1
- data/lib/biodiversity/parser/scientific_name_clean.treetop +223 -223
- data/spec/parser/test_data.txt +8 -2
- metadata +3 -3
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
3.0.0 -- removing support for ruby 1.8.7, making biodiversity gem be the same
|
2
|
+
as biodiversity19, deprecating biodiversity19. A few newly discovered bugs
|
3
|
+
are fixed.
|
4
|
+
|
1
5
|
2.1.0 -- added ScientificNameParser.version method
|
2
6
|
|
3
7
|
2.0.0 -- backward incompatibe change in parserver, therefore new major number.
|
data/README.md
CHANGED
@@ -8,14 +8,18 @@ Biodiversity
|
|
8
8
|
|
9
9
|
Parses taxonomic scientific name and breaks it into semantic elements.
|
10
10
|
|
11
|
+
*WARNING, IMPORTANT!:*
|
12
|
+
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
13
|
+
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
14
|
+
|
15
|
+
biodiversity19 is now deprecated and will be phased out in a couple of years.
|
16
|
+
You are strongly encouraged to change your dependencies from
|
17
|
+
biodiversity19 to biodiversity
|
18
|
+
|
11
19
|
Installation
|
12
20
|
------------
|
13
21
|
|
14
|
-
|
15
|
-
biodiversity gem for Ruby 1.8.7 is not getting updated anymore
|
16
|
-
|
17
|
-
sudo gem install biodiversity19 #for ruby 1.9.x
|
18
|
-
sudo gem install biodiversity #for ruby 1.8.x
|
22
|
+
sudo gem install biodiversity
|
19
23
|
|
20
24
|
Example usage
|
21
25
|
-------------
|
@@ -25,7 +29,12 @@ Example usage
|
|
25
29
|
You can parse file with taxonomic names from command line.
|
26
30
|
File should contain one scientific name per line
|
27
31
|
|
28
|
-
|
32
|
+
nnparse file_with_names
|
33
|
+
|
34
|
+
The resuls will be put into parsed.json file in the current directory.
|
35
|
+
To save results into a different file:
|
36
|
+
|
37
|
+
nnparse file_with_names output_file
|
29
38
|
|
30
39
|
### As a socket server
|
31
40
|
|
@@ -112,6 +121,18 @@ You can use it as a library in Ruby, JRuby etc.
|
|
112
121
|
# to get detailed information about elements of the name
|
113
122
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
114
123
|
|
124
|
+
Returned result is not always linear, if name is complex. To get simple linear
|
125
|
+
representation of the name you can use:
|
126
|
+
|
127
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:position]
|
128
|
+
# returns {0=>["genus", 16], 17=>["species", 26],
|
129
|
+
# 28=>["author_word", 32], 33=>["author_word", 40],
|
130
|
+
# 42=>["author_word", 44], 45=>["author_word", 50],
|
131
|
+
# 53=>["author_word", 58], 59=>["year", 63]}
|
132
|
+
# where the key is the char index of the start of
|
133
|
+
# a word, first element of the value is a semantic meaning
|
134
|
+
# of the word, second element of the value is the character index
|
135
|
+
# of end of the word
|
115
136
|
|
116
137
|
To parse using several CPUs (4 seem to be optimal)
|
117
138
|
|
data/Rakefile
CHANGED
@@ -20,35 +20,41 @@ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
|
|
20
20
|
begin
|
21
21
|
require 'jeweler'
|
22
22
|
Jeweler::Tasks.new do |gem|
|
23
|
-
gem.name =
|
23
|
+
gem.name = 'biodiversity19'
|
24
|
+
#To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
|
24
25
|
gem.summary = 'Parser of scientific names'
|
25
26
|
gem.description = 'Tools for biodiversity informatics'
|
26
|
-
gem.email =
|
27
|
-
gem.homepage =
|
28
|
-
gem.authors = [
|
27
|
+
gem.email = 'dmozzherin@gmail.com'
|
28
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
|
29
|
+
gem.authors = ['Dmitry Mozzherin']
|
29
30
|
gem.has_rdoc = false
|
30
31
|
gem.bindir = 'bin'
|
31
32
|
gem.executables = ['nnparse', 'parserver']
|
32
33
|
gem.add_dependency('treetop')
|
33
34
|
gem.add_dependency('parallel')
|
34
|
-
gem.add_dependency('json') if ruby_version < 19
|
35
|
+
# gem.add_dependency('json') if ruby_version < 19
|
35
36
|
gem.add_development_dependency "rspec"
|
36
|
-
# gem is a Gem::Specification...
|
37
|
+
# gem is a Gem::Specification...
|
38
|
+
# see http://www.rubygems.org/read/chapter/20 for additional settings
|
37
39
|
end
|
38
40
|
rescue LoadError
|
39
|
-
puts
|
41
|
+
puts 'Jeweler (or a dependency) not available. ' +
|
42
|
+
'Install it with: sudo gem install jeweler'
|
40
43
|
end
|
41
44
|
|
42
45
|
task :tt do
|
43
|
-
['scientific_name_clean',
|
46
|
+
['scientific_name_clean',
|
47
|
+
'scientific_name_dirty',
|
48
|
+
'scientific_name_canonical'].each do |f|
|
44
49
|
file = "#{dir}/lib/biodiversity/parser/#{f}"
|
45
50
|
FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
|
46
51
|
system("tt #{file}.treetop")
|
47
52
|
rf = "#{file}.rb"
|
48
|
-
rfn = open(rf +
|
53
|
+
rfn = open(rf + '.tmp', 'w')
|
49
54
|
skip_head = false
|
50
55
|
f = open(rf)
|
51
|
-
#getting around a bug in treetop which prevents setting
|
56
|
+
# getting around a bug in treetop which prevents setting
|
57
|
+
# UTF-8 encoding in ruby19
|
52
58
|
f.each_with_index do |l, i|
|
53
59
|
skip_head = l.match(/^# Autogenerated/) if i == 0
|
54
60
|
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
@@ -8,19 +8,19 @@ grammar ScientificNameClean
|
|
8
8
|
def value
|
9
9
|
a.value.gsub(/\s{2,}/, ' ').strip
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def canonical
|
13
13
|
a.canonical.gsub(/\s{2,}/, ' ').strip
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def pos
|
17
17
|
a.pos
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def hybrid
|
21
21
|
a.hybrid
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def details
|
25
25
|
a.details.class == Array ? a.details : [a.details]
|
26
26
|
end
|
@@ -30,25 +30,25 @@ grammar ScientificNameClean
|
|
30
30
|
end
|
31
31
|
}
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
rule scientific_name_5
|
35
35
|
a:multinomial_name space_hard hybrid_character space_hard b:species {
|
36
36
|
def value
|
37
37
|
a.value + " × " + b.value
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def canonical
|
41
41
|
a.canonical + " × " + b.canonical
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
def pos
|
45
45
|
a.pos.merge(b.pos)
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
def hybrid
|
49
49
|
true
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
def details
|
53
53
|
[a.details, b.details.merge({:genus => a.details[:genus]})]
|
54
54
|
end
|
@@ -58,19 +58,19 @@ grammar ScientificNameClean
|
|
58
58
|
def value
|
59
59
|
a.value + " " + b.apply(c)
|
60
60
|
end
|
61
|
-
|
61
|
+
|
62
62
|
def canonical
|
63
63
|
a.canonical
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
def pos
|
67
67
|
a.pos.merge(c.pos)
|
68
68
|
end
|
69
|
-
|
69
|
+
|
70
70
|
def hybrid
|
71
71
|
a.hybrid
|
72
72
|
end
|
73
|
-
|
73
|
+
|
74
74
|
def details
|
75
75
|
a.details.merge(b.details(c))
|
76
76
|
end
|
@@ -78,25 +78,25 @@ grammar ScientificNameClean
|
|
78
78
|
/
|
79
79
|
scientific_name_4
|
80
80
|
end
|
81
|
-
|
81
|
+
|
82
82
|
rule scientific_name_4
|
83
83
|
a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
|
84
84
|
def value
|
85
85
|
a.value + " × " + b.value
|
86
86
|
end
|
87
|
-
|
87
|
+
|
88
88
|
def canonical
|
89
89
|
a.canonical + " × " + b.canonical
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
def pos
|
93
93
|
a.pos.merge(b.pos)
|
94
94
|
end
|
95
|
-
|
95
|
+
|
96
96
|
def hybrid
|
97
97
|
true
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
def details
|
101
101
|
[a.details, b.details]
|
102
102
|
end
|
@@ -106,19 +106,19 @@ grammar ScientificNameClean
|
|
106
106
|
def value
|
107
107
|
a.value + " × ?"
|
108
108
|
end
|
109
|
-
|
109
|
+
|
110
110
|
def canonical
|
111
111
|
a.canonical
|
112
112
|
end
|
113
|
-
|
113
|
+
|
114
114
|
def pos
|
115
115
|
a.pos
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
def hybrid
|
119
119
|
true
|
120
120
|
end
|
121
|
-
|
121
|
+
|
122
122
|
def details
|
123
123
|
[a.details, "?"]
|
124
124
|
end
|
@@ -126,25 +126,25 @@ grammar ScientificNameClean
|
|
126
126
|
/
|
127
127
|
scientific_name_3
|
128
128
|
end
|
129
|
-
|
129
|
+
|
130
130
|
rule scientific_name_3
|
131
131
|
a:hybrid_character space b:scientific_name_2 {
|
132
132
|
def value
|
133
133
|
a.value + " " + b.value
|
134
134
|
end
|
135
|
-
|
135
|
+
|
136
136
|
def canonical
|
137
137
|
b.canonical
|
138
138
|
end
|
139
|
-
|
139
|
+
|
140
140
|
def pos
|
141
141
|
b.pos
|
142
142
|
end
|
143
|
-
|
143
|
+
|
144
144
|
def hybrid
|
145
145
|
true
|
146
146
|
end
|
147
|
-
|
147
|
+
|
148
148
|
def details
|
149
149
|
b.details
|
150
150
|
end
|
@@ -152,25 +152,25 @@ grammar ScientificNameClean
|
|
152
152
|
/
|
153
153
|
scientific_name_2
|
154
154
|
end
|
155
|
-
|
155
|
+
|
156
156
|
rule scientific_name_2
|
157
157
|
a:scientific_name_1 space b:status_part {
|
158
158
|
def value
|
159
159
|
a.value + " " + b.value
|
160
160
|
end
|
161
|
-
|
161
|
+
|
162
162
|
def canonical
|
163
163
|
a.canonical
|
164
164
|
end
|
165
|
-
|
165
|
+
|
166
166
|
def pos
|
167
167
|
a.pos
|
168
168
|
end
|
169
|
-
|
169
|
+
|
170
170
|
def hybrid
|
171
171
|
a.hybrid rescue false
|
172
172
|
end
|
173
|
-
|
173
|
+
|
174
174
|
def details
|
175
175
|
a.details.merge(b.details)
|
176
176
|
end
|
@@ -184,10 +184,10 @@ grammar ScientificNameClean
|
|
184
184
|
/
|
185
185
|
multinomial_name
|
186
186
|
/
|
187
|
-
uninomial_name
|
187
|
+
uninomial_name
|
188
188
|
end
|
189
|
-
|
190
|
-
|
189
|
+
|
190
|
+
|
191
191
|
rule status_part
|
192
192
|
a:status_word space b:status_part {
|
193
193
|
def value
|
@@ -200,7 +200,7 @@ grammar ScientificNameClean
|
|
200
200
|
/
|
201
201
|
status_word
|
202
202
|
end
|
203
|
-
|
203
|
+
|
204
204
|
rule status_word
|
205
205
|
latin_word [\.] {
|
206
206
|
def value
|
@@ -216,7 +216,7 @@ grammar ScientificNameClean
|
|
216
216
|
|
217
217
|
rule unparsed
|
218
218
|
.+ space {
|
219
|
-
|
219
|
+
|
220
220
|
def value
|
221
221
|
''
|
222
222
|
end
|
@@ -238,52 +238,52 @@ grammar ScientificNameClean
|
|
238
238
|
end
|
239
239
|
}
|
240
240
|
end
|
241
|
-
|
241
|
+
|
242
242
|
rule multinomial_name
|
243
243
|
a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
|
244
244
|
def value
|
245
245
|
a.value + " " + b.value + " " + c.value + " " + d.value
|
246
246
|
end
|
247
|
-
|
247
|
+
|
248
248
|
def canonical
|
249
249
|
a.canonical + " " + c.canonical + " " + d.canonical
|
250
250
|
end
|
251
|
-
|
251
|
+
|
252
252
|
def pos
|
253
253
|
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
254
254
|
end
|
255
|
-
|
255
|
+
|
256
256
|
def hybrid
|
257
257
|
c.hybrid rescue false
|
258
258
|
end
|
259
|
-
|
259
|
+
|
260
260
|
def details
|
261
261
|
a.details.merge(b.details).merge(c.details).merge(d.details)
|
262
262
|
end
|
263
263
|
}
|
264
|
-
/
|
264
|
+
/
|
265
265
|
a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
|
266
266
|
def value
|
267
267
|
a.value + " " + b.value + " " + c.value + " " + d.value
|
268
268
|
end
|
269
|
-
|
269
|
+
|
270
270
|
def canonical
|
271
271
|
a.canonical + " " + c.canonical + " " + d.canonical
|
272
272
|
end
|
273
|
-
|
273
|
+
|
274
274
|
def pos
|
275
275
|
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
276
276
|
end
|
277
|
-
|
277
|
+
|
278
278
|
def hybrid
|
279
279
|
c.hybrid rescue false
|
280
280
|
end
|
281
|
-
|
281
|
+
|
282
282
|
def details
|
283
283
|
a.details.merge(b.details).merge(c.details).merge(d.details)
|
284
284
|
end
|
285
285
|
}
|
286
|
-
/
|
286
|
+
/
|
287
287
|
a:genus space b:infragenus space aid:annotation_identification? space c:species {
|
288
288
|
def value
|
289
289
|
if defined? aid.apply
|
@@ -292,7 +292,7 @@ grammar ScientificNameClean
|
|
292
292
|
a.value + " " + b.value + " " + c.value
|
293
293
|
end
|
294
294
|
end
|
295
|
-
|
295
|
+
|
296
296
|
def canonical
|
297
297
|
if defined? aid.apply
|
298
298
|
a.canonical + aid.canonical(c)
|
@@ -300,7 +300,7 @@ grammar ScientificNameClean
|
|
300
300
|
a.canonical + " " + c.canonical
|
301
301
|
end
|
302
302
|
end
|
303
|
-
|
303
|
+
|
304
304
|
def pos
|
305
305
|
if defined? aid.apply
|
306
306
|
a.pos.merge(b.pos).merge(aid.pos(c))
|
@@ -308,11 +308,11 @@ grammar ScientificNameClean
|
|
308
308
|
a.pos.merge(b.pos).merge(c.pos)
|
309
309
|
end
|
310
310
|
end
|
311
|
-
|
311
|
+
|
312
312
|
def hybrid
|
313
313
|
c.hybrid rescue false
|
314
314
|
end
|
315
|
-
|
315
|
+
|
316
316
|
def details
|
317
317
|
if defined? aid.apply
|
318
318
|
a.details.merge(b.details).merge(aid.apply(c))
|
@@ -324,21 +324,21 @@ grammar ScientificNameClean
|
|
324
324
|
/
|
325
325
|
a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
|
326
326
|
def value
|
327
|
-
a.value + " " + b.value + " " + c.value
|
327
|
+
a.value + " " + b.value + " " + c.value
|
328
328
|
end
|
329
329
|
|
330
330
|
def canonical
|
331
331
|
a.canonical + " " + b.canonical + " " + c.canonical
|
332
332
|
end
|
333
|
-
|
333
|
+
|
334
334
|
def pos
|
335
335
|
a.pos.merge(b.pos).merge(c.pos)
|
336
336
|
end
|
337
|
-
|
337
|
+
|
338
338
|
def hybrid
|
339
339
|
b.hybrid rescue false
|
340
340
|
end
|
341
|
-
|
341
|
+
|
342
342
|
def details
|
343
343
|
a.details.merge(b.details).merge(c.details)
|
344
344
|
end
|
@@ -349,7 +349,7 @@ grammar ScientificNameClean
|
|
349
349
|
if defined? aid.apply
|
350
350
|
a.value + aid.apply(b)
|
351
351
|
else
|
352
|
-
a.value + " " + b.value
|
352
|
+
a.value + " " + b.value
|
353
353
|
end
|
354
354
|
end
|
355
355
|
|
@@ -360,7 +360,7 @@ grammar ScientificNameClean
|
|
360
360
|
a.canonical + " " + b.canonical
|
361
361
|
end
|
362
362
|
end
|
363
|
-
|
363
|
+
|
364
364
|
def pos
|
365
365
|
if defined? aid.apply
|
366
366
|
a.pos.merge(aid.pos(b))
|
@@ -368,11 +368,11 @@ grammar ScientificNameClean
|
|
368
368
|
a.pos.merge(b.pos)
|
369
369
|
end
|
370
370
|
end
|
371
|
-
|
371
|
+
|
372
372
|
def hybrid
|
373
373
|
b.hybrid rescue false
|
374
374
|
end
|
375
|
-
|
375
|
+
|
376
376
|
def details
|
377
377
|
if defined? aid.apply
|
378
378
|
a.details.merge(aid.details(b))
|
@@ -390,15 +390,15 @@ grammar ScientificNameClean
|
|
390
390
|
def canonical
|
391
391
|
a.canonical + aid.canonical(b)
|
392
392
|
end
|
393
|
-
|
393
|
+
|
394
394
|
def pos
|
395
395
|
a.pos.merge(aid.pos(b))
|
396
396
|
end
|
397
|
-
|
397
|
+
|
398
398
|
def hybrid
|
399
399
|
false
|
400
400
|
end
|
401
|
-
|
401
|
+
|
402
402
|
def details
|
403
403
|
a.details.merge(aid.details(b))
|
404
404
|
end
|
@@ -408,7 +408,7 @@ grammar ScientificNameClean
|
|
408
408
|
rule multiuninomial_name
|
409
409
|
a:uninomial_name space b:rank_uninomial space c:uninomial_name {
|
410
410
|
|
411
|
-
def value
|
411
|
+
def value
|
412
412
|
a.value + " " + b.value + " " + c.value
|
413
413
|
end
|
414
414
|
|
@@ -429,23 +429,23 @@ grammar ScientificNameClean
|
|
429
429
|
end
|
430
430
|
}
|
431
431
|
end
|
432
|
-
|
432
|
+
|
433
433
|
rule infraspecies_mult
|
434
434
|
a:infraspecies space b:infraspecies_mult {
|
435
435
|
def value
|
436
436
|
a.value + " " + b.value
|
437
437
|
end
|
438
|
-
|
438
|
+
|
439
439
|
def canonical
|
440
440
|
a.canonical + " " + b.canonical
|
441
441
|
end
|
442
|
-
|
442
|
+
|
443
443
|
def pos
|
444
444
|
a.pos.merge(b.pos)
|
445
445
|
end
|
446
|
-
|
446
|
+
|
447
447
|
def details
|
448
|
-
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
448
|
+
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
449
449
|
b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
|
450
450
|
a.details.merge({:infraspecies => a_array + b_array})
|
451
451
|
end
|
@@ -461,42 +461,42 @@ grammar ScientificNameClean
|
|
461
461
|
end
|
462
462
|
}
|
463
463
|
end
|
464
|
-
|
464
|
+
|
465
465
|
rule infraspecies
|
466
466
|
a:infraspecies_string space b:authorship {
|
467
467
|
def value
|
468
468
|
a.value + " " + b.value
|
469
469
|
end
|
470
|
-
|
470
|
+
|
471
471
|
def canonical
|
472
472
|
a.canonical
|
473
473
|
end
|
474
|
-
|
474
|
+
|
475
475
|
def pos
|
476
476
|
a.pos.merge(b.pos)
|
477
477
|
end
|
478
|
-
|
478
|
+
|
479
479
|
def details
|
480
480
|
{:infraspecies => a.details[:infraspecies].merge(b.details)}
|
481
481
|
end
|
482
482
|
}
|
483
483
|
/
|
484
|
-
infraspecies_string
|
484
|
+
infraspecies_string
|
485
485
|
end
|
486
|
-
|
486
|
+
|
487
487
|
rule infraspecies_string
|
488
488
|
sel:rank space a:species_word {
|
489
|
-
def value
|
489
|
+
def value
|
490
490
|
sel.apply(a)
|
491
491
|
end
|
492
492
|
def canonical
|
493
493
|
sel.canonical(a)
|
494
494
|
end
|
495
|
-
|
495
|
+
|
496
496
|
def pos
|
497
497
|
sel.pos(a)
|
498
498
|
end
|
499
|
-
|
499
|
+
|
500
500
|
def details
|
501
501
|
sel.details(a)
|
502
502
|
end
|
@@ -506,18 +506,18 @@ grammar ScientificNameClean
|
|
506
506
|
def value
|
507
507
|
aid.apply(a)
|
508
508
|
end
|
509
|
-
|
509
|
+
|
510
510
|
def canonical
|
511
511
|
aid.canonical(a)
|
512
512
|
end
|
513
|
-
|
513
|
+
|
514
514
|
def pos
|
515
515
|
def a.pos
|
516
516
|
{interval.begin => ['infraspecies', a.interval.end]}
|
517
517
|
end
|
518
518
|
aid.pos(a)
|
519
519
|
end
|
520
|
-
|
520
|
+
|
521
521
|
def details
|
522
522
|
def a.details
|
523
523
|
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
@@ -530,21 +530,21 @@ grammar ScientificNameClean
|
|
530
530
|
def value
|
531
531
|
a.value
|
532
532
|
end
|
533
|
-
|
533
|
+
|
534
534
|
def canonical
|
535
535
|
value
|
536
536
|
end
|
537
|
-
|
537
|
+
|
538
538
|
def pos
|
539
539
|
{interval.begin => ['infraspecies', interval.end]}
|
540
540
|
end
|
541
|
-
|
541
|
+
|
542
542
|
def details
|
543
543
|
{:infraspecies => {:string => value, :rank => 'n/a'}}
|
544
544
|
end
|
545
545
|
}
|
546
546
|
end
|
547
|
-
|
547
|
+
|
548
548
|
rule taxon_concept_rank
|
549
549
|
("sec."/"sensu.") {
|
550
550
|
def value
|
@@ -555,7 +555,7 @@ grammar ScientificNameClean
|
|
555
555
|
end
|
556
556
|
def details(a = nil)
|
557
557
|
{:taxon_concept => a.details}
|
558
|
-
end
|
558
|
+
end
|
559
559
|
}
|
560
560
|
end
|
561
561
|
|
@@ -573,12 +573,12 @@ grammar ScientificNameClean
|
|
573
573
|
def canonical(a)
|
574
574
|
" " + a.value
|
575
575
|
end
|
576
|
-
|
576
|
+
|
577
577
|
def pos(a)
|
578
|
-
interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
|
578
|
+
interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
|
579
579
|
{interval.begin => ['infraspecific_type', interval_end], a.interval.begin => ['infraspecies', a.interval.end]}
|
580
580
|
end
|
581
|
-
|
581
|
+
|
582
582
|
def details(a = nil)
|
583
583
|
{:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
|
584
584
|
end
|
@@ -594,31 +594,31 @@ grammar ScientificNameClean
|
|
594
594
|
def pos(uni)
|
595
595
|
{interval.begin => ['rank_uninomial', interval.end], uni.interval.begin => ['uninomial', uni.interval.end]}
|
596
596
|
end
|
597
|
-
|
597
|
+
|
598
598
|
def details(uni)
|
599
599
|
{:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
|
600
600
|
end
|
601
601
|
}
|
602
602
|
end
|
603
|
-
|
603
|
+
|
604
604
|
rule species
|
605
605
|
a:species_string space b:authorship {
|
606
606
|
def value
|
607
607
|
a.value + " " + b.value
|
608
608
|
end
|
609
|
-
|
609
|
+
|
610
610
|
def canonical
|
611
611
|
a.canonical
|
612
612
|
end
|
613
|
-
|
613
|
+
|
614
614
|
def hybrid
|
615
615
|
a.hybrid rescue false
|
616
616
|
end
|
617
|
-
|
617
|
+
|
618
618
|
def pos
|
619
619
|
a.pos.merge(b.pos)
|
620
620
|
end
|
621
|
-
|
621
|
+
|
622
622
|
def details
|
623
623
|
{:species => a.details[:species].merge(b.details)}
|
624
624
|
end
|
@@ -626,21 +626,21 @@ grammar ScientificNameClean
|
|
626
626
|
/
|
627
627
|
species_string
|
628
628
|
end
|
629
|
-
|
629
|
+
|
630
630
|
rule species_string
|
631
631
|
species_word {
|
632
632
|
def canonical
|
633
633
|
value
|
634
634
|
end
|
635
|
-
|
635
|
+
|
636
636
|
def pos
|
637
637
|
{interval.begin => ['species', interval.end]}
|
638
638
|
end
|
639
|
-
|
639
|
+
|
640
640
|
def hybrid
|
641
641
|
false
|
642
642
|
end
|
643
|
-
|
643
|
+
|
644
644
|
def details
|
645
645
|
{:species => {:string => value}}
|
646
646
|
end
|
@@ -648,41 +648,41 @@ grammar ScientificNameClean
|
|
648
648
|
/
|
649
649
|
species_word_hybrid
|
650
650
|
end
|
651
|
-
|
651
|
+
|
652
652
|
rule infragenus
|
653
653
|
left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
|
654
654
|
def value
|
655
655
|
"(" + a.value + ")"
|
656
656
|
end
|
657
|
-
|
657
|
+
|
658
658
|
def canonical
|
659
659
|
a.value
|
660
660
|
end
|
661
|
-
|
661
|
+
|
662
662
|
def pos
|
663
663
|
{a.interval.begin => ['infragenus', a.interval.end]}
|
664
664
|
end
|
665
|
-
|
665
|
+
|
666
666
|
def details
|
667
667
|
{:infragenus => {:string => a.value}}
|
668
668
|
end
|
669
669
|
}
|
670
670
|
end
|
671
|
-
|
671
|
+
|
672
672
|
rule genus
|
673
673
|
a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
|
674
674
|
def value
|
675
675
|
a.value
|
676
676
|
end
|
677
|
-
|
677
|
+
|
678
678
|
def pos
|
679
679
|
{a.interval.begin => ['genus', a.interval.end]}
|
680
680
|
end
|
681
|
-
|
681
|
+
|
682
682
|
def canonical
|
683
683
|
a.value
|
684
684
|
end
|
685
|
-
|
685
|
+
|
686
686
|
def details
|
687
687
|
{:genus => {:string => a.value}}
|
688
688
|
end
|
@@ -706,27 +706,27 @@ grammar ScientificNameClean
|
|
706
706
|
def details
|
707
707
|
{:abbreviated_genus => {:string => value}}
|
708
708
|
end
|
709
|
-
}
|
709
|
+
}
|
710
710
|
end
|
711
|
-
|
711
|
+
|
712
712
|
rule uninomial_name
|
713
713
|
a:uninomial_string space b:infragenus space c:simple_authorship {
|
714
714
|
def value
|
715
715
|
a.value + " " + b.value + " " + c.value
|
716
716
|
end
|
717
|
-
|
717
|
+
|
718
718
|
def canonical
|
719
719
|
a.canonical
|
720
720
|
end
|
721
|
-
|
721
|
+
|
722
722
|
def pos
|
723
723
|
a.pos.merge(b.pos).merge(c.pos)
|
724
724
|
end
|
725
|
-
|
725
|
+
|
726
726
|
def hybrid
|
727
727
|
false
|
728
728
|
end
|
729
|
-
|
729
|
+
|
730
730
|
def details
|
731
731
|
{:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
|
732
732
|
end
|
@@ -736,19 +736,19 @@ grammar ScientificNameClean
|
|
736
736
|
def value
|
737
737
|
a.value + " " + b.value
|
738
738
|
end
|
739
|
-
|
739
|
+
|
740
740
|
def canonical
|
741
741
|
a.canonical
|
742
742
|
end
|
743
|
-
|
743
|
+
|
744
744
|
def pos
|
745
745
|
a.pos.merge(b.pos)
|
746
746
|
end
|
747
|
-
|
747
|
+
|
748
748
|
def hybrid
|
749
749
|
false
|
750
750
|
end
|
751
|
-
|
751
|
+
|
752
752
|
def details
|
753
753
|
{:uninomial => a.details[:uninomial].merge(b.details)}
|
754
754
|
end
|
@@ -758,19 +758,19 @@ grammar ScientificNameClean
|
|
758
758
|
def value
|
759
759
|
a.value + " " + b.value
|
760
760
|
end
|
761
|
-
|
761
|
+
|
762
762
|
def canonical
|
763
763
|
a.canonical
|
764
764
|
end
|
765
|
-
|
765
|
+
|
766
766
|
def pos
|
767
767
|
a.pos.merge(b.pos)
|
768
768
|
end
|
769
|
-
|
769
|
+
|
770
770
|
def hybrid
|
771
771
|
false
|
772
772
|
end
|
773
|
-
|
773
|
+
|
774
774
|
def details
|
775
775
|
{:uninomial => a.details[:uninomial].merge(b.details)}
|
776
776
|
end
|
@@ -784,31 +784,31 @@ grammar ScientificNameClean
|
|
784
784
|
def canonical
|
785
785
|
value
|
786
786
|
end
|
787
|
-
|
787
|
+
|
788
788
|
def pos
|
789
789
|
{interval.begin => ['uninomial', interval.end]}
|
790
790
|
end
|
791
|
-
|
791
|
+
|
792
792
|
def hybrid
|
793
793
|
false
|
794
794
|
end
|
795
|
-
|
796
|
-
def details
|
795
|
+
|
796
|
+
def details
|
797
797
|
{:uninomial => {:string => value}}
|
798
798
|
end
|
799
799
|
}
|
800
800
|
end
|
801
|
-
|
801
|
+
|
802
802
|
rule authorship
|
803
803
|
a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
|
804
804
|
def value
|
805
805
|
a.value + " " + b.value + " " + c.value
|
806
806
|
end
|
807
|
-
|
807
|
+
|
808
808
|
def pos
|
809
809
|
a.pos.merge(b.pos).merge(c.pos)
|
810
810
|
end
|
811
|
-
|
811
|
+
|
812
812
|
def details
|
813
813
|
val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
814
814
|
val[:combinationAuthorTeam].merge!(c.details)
|
@@ -820,11 +820,11 @@ grammar ScientificNameClean
|
|
820
820
|
def value
|
821
821
|
a.value + " " + b.value
|
822
822
|
end
|
823
|
-
|
823
|
+
|
824
824
|
def pos
|
825
825
|
a.pos.merge(b.pos)
|
826
826
|
end
|
827
|
-
|
827
|
+
|
828
828
|
def details
|
829
829
|
{:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
830
830
|
end
|
@@ -836,11 +836,11 @@ grammar ScientificNameClean
|
|
836
836
|
def value
|
837
837
|
a.value + " " + b.value
|
838
838
|
end
|
839
|
-
|
839
|
+
|
840
840
|
def pos
|
841
841
|
a.pos.merge(b.pos)
|
842
842
|
end
|
843
|
-
|
843
|
+
|
844
844
|
def details
|
845
845
|
val = a.details
|
846
846
|
val[:authorship] = text_value.strip
|
@@ -851,21 +851,21 @@ grammar ScientificNameClean
|
|
851
851
|
/
|
852
852
|
simple_authorship
|
853
853
|
end
|
854
|
-
|
855
|
-
|
854
|
+
|
855
|
+
|
856
856
|
rule basionym_authorship_with_parenthesis
|
857
857
|
left_paren space a:authors_names space right_paren space [,]? space b:year {
|
858
858
|
def value
|
859
859
|
"(" + a.value + " " + b.value + ")"
|
860
860
|
end
|
861
|
-
|
861
|
+
|
862
862
|
def pos
|
863
863
|
a.pos.merge(b.pos)
|
864
|
-
end
|
865
|
-
|
864
|
+
end
|
865
|
+
|
866
866
|
def details
|
867
|
-
{ :authorship => text_value,
|
868
|
-
:basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
|
867
|
+
{ :authorship => text_value,
|
868
|
+
:basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
|
869
869
|
}
|
870
870
|
end
|
871
871
|
}
|
@@ -874,11 +874,11 @@ grammar ScientificNameClean
|
|
874
874
|
def value
|
875
875
|
"(" + a.value + " " + b.value + ")"
|
876
876
|
end
|
877
|
-
|
877
|
+
|
878
878
|
def pos
|
879
879
|
a.pos.merge(b.pos)
|
880
880
|
end
|
881
|
-
|
881
|
+
|
882
882
|
def details
|
883
883
|
val = a.details
|
884
884
|
val[:basionymAuthorTeam].merge!(b.details)
|
@@ -891,15 +891,15 @@ grammar ScientificNameClean
|
|
891
891
|
def value
|
892
892
|
"(" + a.value + ")"
|
893
893
|
end
|
894
|
-
|
894
|
+
|
895
895
|
def pos
|
896
896
|
a.pos
|
897
897
|
end
|
898
|
-
|
898
|
+
|
899
899
|
def details
|
900
900
|
val = a.details
|
901
901
|
val[:authorship] = text_value
|
902
|
-
val
|
902
|
+
val
|
903
903
|
end
|
904
904
|
}
|
905
905
|
/
|
@@ -907,32 +907,32 @@ grammar ScientificNameClean
|
|
907
907
|
def value
|
908
908
|
"(?)"
|
909
909
|
end
|
910
|
-
|
910
|
+
|
911
911
|
def pos
|
912
912
|
{a.interval.begin => ['unknown_author', a.interval.end]}
|
913
913
|
end
|
914
|
-
|
914
|
+
|
915
915
|
def details
|
916
916
|
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
|
917
917
|
end
|
918
918
|
}
|
919
919
|
end
|
920
|
-
|
920
|
+
|
921
921
|
rule ex_authorship
|
922
922
|
ex_sep space b:simple_authorship {
|
923
923
|
def value
|
924
924
|
" ex " + b.value
|
925
925
|
end
|
926
|
-
|
926
|
+
|
927
927
|
def pos
|
928
928
|
b.pos
|
929
929
|
end
|
930
|
-
|
930
|
+
|
931
931
|
def details
|
932
932
|
val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
933
933
|
val
|
934
934
|
end
|
935
|
-
}
|
935
|
+
}
|
936
936
|
end
|
937
937
|
|
938
938
|
rule simple_authorship
|
@@ -940,17 +940,17 @@ grammar ScientificNameClean
|
|
940
940
|
def value
|
941
941
|
a.value + " " + b.value
|
942
942
|
end
|
943
|
-
|
943
|
+
|
944
944
|
def pos
|
945
945
|
a.pos.merge(b.pos)
|
946
946
|
end
|
947
|
-
|
947
|
+
|
948
948
|
def details
|
949
949
|
details_with_arg(:basionymAuthorTeam)
|
950
950
|
end
|
951
|
-
|
951
|
+
|
952
952
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
953
|
-
{ :authorship => text_value,
|
953
|
+
{ :authorship => text_value,
|
954
954
|
authorTeamType.to_sym => {
|
955
955
|
:authorTeam => a.text_value.strip
|
956
956
|
}.merge(a.details).merge(b.details)
|
@@ -962,17 +962,17 @@ grammar ScientificNameClean
|
|
962
962
|
def value
|
963
963
|
a.value + " " + b.value
|
964
964
|
end
|
965
|
-
|
965
|
+
|
966
966
|
def pos
|
967
967
|
a.pos.merge(b.pos)
|
968
968
|
end
|
969
|
-
|
969
|
+
|
970
970
|
def details
|
971
971
|
details_with_arg(:basionymAuthorTeam)
|
972
972
|
end
|
973
|
-
|
973
|
+
|
974
974
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
975
|
-
{ :authorship => text_value,
|
975
|
+
{ :authorship => text_value,
|
976
976
|
authorTeamType.to_sym => {
|
977
977
|
:authorTeam => a.text_value.strip
|
978
978
|
}.merge(a.details).merge(b.details)
|
@@ -986,27 +986,27 @@ grammar ScientificNameClean
|
|
986
986
|
details[:basionymAuthorTeam].merge!(super)
|
987
987
|
details
|
988
988
|
end
|
989
|
-
|
989
|
+
|
990
990
|
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
991
|
-
{ :authorship => text_value,
|
991
|
+
{ :authorship => text_value,
|
992
992
|
authorTeamType.to_sym => {
|
993
993
|
:authorTeam => text_value,
|
994
994
|
}
|
995
|
-
}
|
995
|
+
}
|
996
996
|
end
|
997
997
|
}
|
998
998
|
end
|
999
|
-
|
999
|
+
|
1000
1000
|
rule authors_names
|
1001
1001
|
a:author_name space sep:author_separator space b:authors_names {
|
1002
1002
|
def value
|
1003
1003
|
sep.apply(a,b)
|
1004
1004
|
end
|
1005
|
-
|
1005
|
+
|
1006
1006
|
def pos
|
1007
1007
|
sep.pos(a,b)
|
1008
1008
|
end
|
1009
|
-
|
1009
|
+
|
1010
1010
|
def details
|
1011
1011
|
sep.details(a,b)
|
1012
1012
|
end
|
@@ -1016,28 +1016,28 @@ grammar ScientificNameClean
|
|
1016
1016
|
/
|
1017
1017
|
unknown_auth
|
1018
1018
|
end
|
1019
|
-
|
1020
|
-
|
1019
|
+
|
1020
|
+
|
1021
1021
|
rule unknown_auth
|
1022
1022
|
("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
|
1023
1023
|
def value
|
1024
1024
|
text_value
|
1025
1025
|
end
|
1026
|
-
|
1026
|
+
|
1027
1027
|
def pos
|
1028
1028
|
{interval.begin => ['unknown_author', interval.end]}
|
1029
1029
|
end
|
1030
|
-
|
1030
|
+
|
1031
1031
|
def details
|
1032
1032
|
{:author => ["unknown"]}
|
1033
1033
|
end
|
1034
1034
|
}
|
1035
1035
|
end
|
1036
|
-
|
1036
|
+
|
1037
1037
|
rule ex_sep
|
1038
1038
|
("ex"/"in") &[\s]
|
1039
1039
|
end
|
1040
|
-
|
1040
|
+
|
1041
1041
|
rule author_separator
|
1042
1042
|
("&"/"&"/","/"and"/"et") {
|
1043
1043
|
def apply(a,b)
|
@@ -1045,11 +1045,11 @@ grammar ScientificNameClean
|
|
1045
1045
|
sep = " &" if ["&", "&","and","et"].include? sep
|
1046
1046
|
a.value + sep + " " + b.value
|
1047
1047
|
end
|
1048
|
-
|
1048
|
+
|
1049
1049
|
def pos(a,b)
|
1050
1050
|
a.pos.merge(b.pos)
|
1051
1051
|
end
|
1052
|
-
|
1052
|
+
|
1053
1053
|
def details(a,b)
|
1054
1054
|
{:author => a.details[:author] + b.details[:author]}
|
1055
1055
|
end
|
@@ -1061,8 +1061,8 @@ grammar ScientificNameClean
|
|
1061
1061
|
def value
|
1062
1062
|
a.value + ' ' + b.value
|
1063
1063
|
end
|
1064
|
-
|
1065
|
-
def pos
|
1064
|
+
|
1065
|
+
def pos
|
1066
1066
|
a.pos.merge(b.pos)
|
1067
1067
|
end
|
1068
1068
|
|
@@ -1073,17 +1073,17 @@ grammar ScientificNameClean
|
|
1073
1073
|
/
|
1074
1074
|
author_name_without_postfix
|
1075
1075
|
end
|
1076
|
-
|
1076
|
+
|
1077
1077
|
rule author_name_without_postfix
|
1078
1078
|
space a:author_prefix_word space b:author_name {
|
1079
1079
|
def value
|
1080
1080
|
a.value + " " + b.value
|
1081
1081
|
end
|
1082
|
-
|
1082
|
+
|
1083
1083
|
def pos
|
1084
1084
|
a.pos.merge(b.pos)
|
1085
1085
|
end
|
1086
|
-
|
1086
|
+
|
1087
1087
|
def details
|
1088
1088
|
{:author => [value]}
|
1089
1089
|
end
|
@@ -1093,11 +1093,11 @@ grammar ScientificNameClean
|
|
1093
1093
|
def value
|
1094
1094
|
a.value + " " + b.value
|
1095
1095
|
end
|
1096
|
-
|
1096
|
+
|
1097
1097
|
def pos
|
1098
1098
|
a.pos.merge(b.pos)
|
1099
1099
|
end
|
1100
|
-
|
1100
|
+
|
1101
1101
|
def details
|
1102
1102
|
{:author => [value]}
|
1103
1103
|
end
|
@@ -1105,17 +1105,17 @@ grammar ScientificNameClean
|
|
1105
1105
|
/
|
1106
1106
|
author_word
|
1107
1107
|
end
|
1108
|
-
|
1108
|
+
|
1109
1109
|
rule author_word
|
1110
1110
|
"A S. Xu" {
|
1111
1111
|
def value
|
1112
1112
|
text_value.strip
|
1113
1113
|
end
|
1114
|
-
|
1114
|
+
|
1115
1115
|
def pos
|
1116
1116
|
{interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
|
1117
1117
|
end
|
1118
|
-
|
1118
|
+
|
1119
1119
|
def details
|
1120
1120
|
{:author => [value]}
|
1121
1121
|
end
|
@@ -1125,28 +1125,28 @@ grammar ScientificNameClean
|
|
1125
1125
|
def value
|
1126
1126
|
text_value.strip
|
1127
1127
|
end
|
1128
|
-
|
1128
|
+
|
1129
1129
|
def pos
|
1130
1130
|
#cheating because there are several words in some of them
|
1131
1131
|
{interval.begin => ['author_word', interval.end]}
|
1132
1132
|
end
|
1133
|
-
|
1133
|
+
|
1134
1134
|
def details
|
1135
1135
|
{:author => [value]}
|
1136
1136
|
end
|
1137
1137
|
}
|
1138
|
-
/
|
1138
|
+
/
|
1139
1139
|
("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
|
1140
1140
|
def value
|
1141
1141
|
text_value.gsub(/([\p{Lu}]{3,})/) do |match|
|
1142
1142
|
UnicodeUtils.titlecase(match)
|
1143
1143
|
end
|
1144
1144
|
end
|
1145
|
-
|
1145
|
+
|
1146
1146
|
def pos
|
1147
1147
|
{interval.begin => ['author_word', interval.end]}
|
1148
1148
|
end
|
1149
|
-
|
1149
|
+
|
1150
1150
|
def details
|
1151
1151
|
{:author => [value]}
|
1152
1152
|
end
|
@@ -1156,11 +1156,11 @@ grammar ScientificNameClean
|
|
1156
1156
|
def value
|
1157
1157
|
text_value
|
1158
1158
|
end
|
1159
|
-
|
1159
|
+
|
1160
1160
|
def pos
|
1161
1161
|
{interval.begin => ['author_word', interval.end]}
|
1162
1162
|
end
|
1163
|
-
|
1163
|
+
|
1164
1164
|
def details
|
1165
1165
|
{:author => [value]}
|
1166
1166
|
end
|
@@ -1168,13 +1168,13 @@ grammar ScientificNameClean
|
|
1168
1168
|
/
|
1169
1169
|
author_prefix_word
|
1170
1170
|
end
|
1171
|
-
|
1171
|
+
|
1172
1172
|
rule author_prefix_word
|
1173
1173
|
space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
|
1174
1174
|
def value
|
1175
1175
|
text_value
|
1176
1176
|
end
|
1177
|
-
|
1177
|
+
|
1178
1178
|
def pos
|
1179
1179
|
#cheating because there are several words in some of them
|
1180
1180
|
{interval.begin => ['author_word', interval.end]}
|
@@ -1184,7 +1184,7 @@ grammar ScientificNameClean
|
|
1184
1184
|
|
1185
1185
|
rule author_postfix_word
|
1186
1186
|
("f."/"filius") {
|
1187
|
-
def value
|
1187
|
+
def value
|
1188
1188
|
text_value.strip
|
1189
1189
|
end
|
1190
1190
|
|
@@ -1193,7 +1193,7 @@ grammar ScientificNameClean
|
|
1193
1193
|
end
|
1194
1194
|
}
|
1195
1195
|
end
|
1196
|
-
|
1196
|
+
|
1197
1197
|
rule cap_latin_word_pair
|
1198
1198
|
a:cap_latin_word "-" b:cap_latin_word {
|
1199
1199
|
def value
|
@@ -1201,7 +1201,7 @@ grammar ScientificNameClean
|
|
1201
1201
|
end
|
1202
1202
|
}
|
1203
1203
|
end
|
1204
|
-
|
1204
|
+
|
1205
1205
|
rule cap_latin_word
|
1206
1206
|
a:([A-Z]/cap_digraph) b:latin_word "?" {
|
1207
1207
|
def value
|
@@ -1241,19 +1241,19 @@ grammar ScientificNameClean
|
|
1241
1241
|
def value
|
1242
1242
|
a.value + " " + b.value
|
1243
1243
|
end
|
1244
|
-
|
1244
|
+
|
1245
1245
|
def canonical
|
1246
1246
|
b.value
|
1247
1247
|
end
|
1248
|
-
|
1248
|
+
|
1249
1249
|
def hybrid
|
1250
1250
|
true
|
1251
1251
|
end
|
1252
|
-
|
1252
|
+
|
1253
1253
|
def pos
|
1254
1254
|
{b.interval.begin => ['species', b.interval.end]}
|
1255
1255
|
end
|
1256
|
-
|
1256
|
+
|
1257
1257
|
def details
|
1258
1258
|
{:species => {:string => b.value}}
|
1259
1259
|
end
|
@@ -1263,19 +1263,19 @@ grammar ScientificNameClean
|
|
1263
1263
|
def value
|
1264
1264
|
"× " + b.value
|
1265
1265
|
end
|
1266
|
-
|
1266
|
+
|
1267
1267
|
def canonical
|
1268
1268
|
b.value
|
1269
1269
|
end
|
1270
|
-
|
1270
|
+
|
1271
1271
|
def hybrid
|
1272
1272
|
true
|
1273
1273
|
end
|
1274
|
-
|
1274
|
+
|
1275
1275
|
def pos
|
1276
1276
|
{b.interval.begin => ['species', b.interval.end]}
|
1277
1277
|
end
|
1278
|
-
|
1278
|
+
|
1279
1279
|
def details
|
1280
1280
|
{:species => {:string => b.value}}
|
1281
1281
|
end
|
@@ -1285,19 +1285,19 @@ grammar ScientificNameClean
|
|
1285
1285
|
def value
|
1286
1286
|
"× " + b.value
|
1287
1287
|
end
|
1288
|
-
|
1288
|
+
|
1289
1289
|
def canonical
|
1290
1290
|
b.value
|
1291
1291
|
end
|
1292
|
-
|
1292
|
+
|
1293
1293
|
def hybrid
|
1294
1294
|
true
|
1295
1295
|
end
|
1296
|
-
|
1296
|
+
|
1297
1297
|
def pos
|
1298
1298
|
{b.interval.begin => ['species', b.interval.end]}
|
1299
1299
|
end
|
1300
|
-
|
1300
|
+
|
1301
1301
|
def details
|
1302
1302
|
{:species => {:string => b.value}}
|
1303
1303
|
end
|
@@ -1305,7 +1305,7 @@ grammar ScientificNameClean
|
|
1305
1305
|
end
|
1306
1306
|
|
1307
1307
|
rule annotation_identification
|
1308
|
-
("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"spp."/"spp "/"aff."/"aff "/"monst."/"?") {
|
1308
|
+
("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
|
1309
1309
|
|
1310
1310
|
def value
|
1311
1311
|
text_value.strip
|
@@ -1392,9 +1392,9 @@ grammar ScientificNameClean
|
|
1392
1392
|
text_value.split('').each do |l|
|
1393
1393
|
l = 'ae' if l == 'æ'
|
1394
1394
|
l = 'oe' if l == 'œ'
|
1395
|
-
# We normalize ë as well. It is legal in botanical code, but it
|
1395
|
+
# We normalize ë as well. It is legal in botanical code, but it
|
1396
1396
|
# is beneficial to normalize it for the reconsiliation purposes
|
1397
|
-
l = 'e' if l == 'ë'
|
1397
|
+
l = 'e' if l == 'ë'
|
1398
1398
|
res << l
|
1399
1399
|
end
|
1400
1400
|
res
|
@@ -1408,7 +1408,7 @@ grammar ScientificNameClean
|
|
1408
1408
|
res = text_value
|
1409
1409
|
res = 'ae' if res == 'æ'
|
1410
1410
|
res = 'oe' if res == 'œ'
|
1411
|
-
res = 'e' if res == 'ë'
|
1411
|
+
res = 'e' if res == 'ë'
|
1412
1412
|
res
|
1413
1413
|
end
|
1414
1414
|
}
|
@@ -1426,7 +1426,7 @@ grammar ScientificNameClean
|
|
1426
1426
|
def value
|
1427
1427
|
'Oe'
|
1428
1428
|
end
|
1429
|
-
}
|
1429
|
+
}
|
1430
1430
|
end
|
1431
1431
|
|
1432
1432
|
rule year
|
@@ -1434,14 +1434,14 @@ grammar ScientificNameClean
|
|
1434
1434
|
def value
|
1435
1435
|
a.value
|
1436
1436
|
end
|
1437
|
-
|
1437
|
+
|
1438
1438
|
def pos
|
1439
1439
|
a.pos
|
1440
1440
|
end
|
1441
|
-
|
1441
|
+
|
1442
1442
|
def details
|
1443
1443
|
a.details
|
1444
|
-
end
|
1444
|
+
end
|
1445
1445
|
}
|
1446
1446
|
/
|
1447
1447
|
year_number_with_character
|
@@ -1464,31 +1464,31 @@ grammar ScientificNameClean
|
|
1464
1464
|
end
|
1465
1465
|
}
|
1466
1466
|
end
|
1467
|
-
|
1467
|
+
|
1468
1468
|
rule year_number
|
1469
1469
|
[12] [7890] [0-9] ([0-9] [\?]?/"?") {
|
1470
1470
|
def value
|
1471
1471
|
text_value
|
1472
1472
|
end
|
1473
|
-
|
1473
|
+
|
1474
1474
|
def pos
|
1475
1475
|
{interval.begin => ['year', interval.end]}
|
1476
1476
|
end
|
1477
|
-
|
1477
|
+
|
1478
1478
|
def details
|
1479
1479
|
{:year => value}
|
1480
1480
|
end
|
1481
1481
|
}
|
1482
1482
|
end
|
1483
|
-
|
1483
|
+
|
1484
1484
|
rule left_paren
|
1485
1485
|
"("
|
1486
1486
|
end
|
1487
|
-
|
1487
|
+
|
1488
1488
|
rule right_paren
|
1489
1489
|
")"
|
1490
1490
|
end
|
1491
|
-
|
1491
|
+
|
1492
1492
|
rule hybrid_character
|
1493
1493
|
("x"/"X") {
|
1494
1494
|
def value
|
@@ -1498,7 +1498,7 @@ grammar ScientificNameClean
|
|
1498
1498
|
/
|
1499
1499
|
multiplication_sign
|
1500
1500
|
end
|
1501
|
-
|
1501
|
+
|
1502
1502
|
rule multiplication_sign
|
1503
1503
|
("×"/"*") {
|
1504
1504
|
def value
|
@@ -1506,7 +1506,7 @@ grammar ScientificNameClean
|
|
1506
1506
|
end
|
1507
1507
|
}
|
1508
1508
|
end
|
1509
|
-
|
1509
|
+
|
1510
1510
|
rule space
|
1511
1511
|
[\s]*
|
1512
1512
|
end
|
@@ -1514,5 +1514,5 @@ grammar ScientificNameClean
|
|
1514
1514
|
rule space_hard
|
1515
1515
|
[\s]+
|
1516
1516
|
end
|
1517
|
-
|
1517
|
+
|
1518
1518
|
end
|