dimus-biodiversity 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/nnparse +12 -12
- data/lib/biodiversity/parser/scientific_name.rb +213 -131
- data/lib/biodiversity/parser/scientific_name.treetop +30 -12
- data/spec/parser/scientific_name.spec.rb +9 -7
- metadata +11 -2
data/bin/nnparse
CHANGED
@@ -1,26 +1,29 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'rubygems'
|
3
|
-
gem 'dimus-biodiversity'
|
3
|
+
gem 'dimus-biodiversity' rescue gem 'biodiversity'
|
4
4
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
6
6
|
require 'biodiversity'
|
7
|
-
require '
|
7
|
+
require 'json/ext'
|
8
8
|
|
9
9
|
if ARGV.empty?
|
10
|
-
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.
|
10
|
+
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
|
11
11
|
exit
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
15
|
parser = ScientificNameParser.new
|
16
16
|
|
17
|
-
output = ARGV[1] || 'parsed.
|
17
|
+
output = ARGV[1] || 'parsed.json'
|
18
18
|
o = File.open(output,'w')
|
19
19
|
|
20
20
|
# parse a file with names
|
21
21
|
count = count2 = 0
|
22
22
|
names = []
|
23
|
+
last_result = nil
|
24
|
+
o.write("[\n")
|
23
25
|
IO.foreach(ARGV[0]) do |n|
|
26
|
+
o.write(last_result + ",\n") if last_result
|
24
27
|
name_dict = {}
|
25
28
|
puts 'Parsing names' if count2 == 0
|
26
29
|
count2 += 1
|
@@ -30,19 +33,16 @@ IO.foreach(ARGV[0]) do |n|
|
|
30
33
|
parsed = parser.parse n
|
31
34
|
unless parsed
|
32
35
|
name_dict[:details] = {:parsed => false}
|
33
|
-
|
36
|
+
last_result = JSON.generate name_dict
|
34
37
|
count += 1
|
35
38
|
else
|
36
39
|
name_dict[:output] = parsed.value
|
37
|
-
name_dict[:
|
40
|
+
name_dict[:canononical] = parsed.canonical
|
38
41
|
name_dict[:details] = parsed.details
|
39
42
|
name_dict[:parsed => true]
|
40
|
-
|
43
|
+
last_result = JSON.generate name_dict
|
41
44
|
end
|
42
45
|
end
|
43
|
-
|
44
|
-
|
45
|
-
results = YAML.dump(names)
|
46
|
-
puts "Writing restuls to #{output} file"
|
47
|
-
o.write(results)
|
46
|
+
o.write(last_result + "\n") if last_result
|
47
|
+
o.write("]")
|
48
48
|
puts "Found #{count2} records, #{count} of them could not be parsed."
|
@@ -295,110 +295,115 @@ module ScientificName
|
|
295
295
|
end
|
296
296
|
|
297
297
|
i0 = index
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
298
|
+
r1 = _nt_name_authors_part
|
299
|
+
if r1
|
300
|
+
r0 = r1
|
301
|
+
else
|
302
|
+
i2, s2 = index, []
|
303
|
+
r3 = _nt_space
|
304
|
+
s2 << r3
|
304
305
|
if r3
|
305
|
-
r4 =
|
306
|
-
|
306
|
+
r4 = _nt_name_part
|
307
|
+
s2 << r4
|
307
308
|
if r4
|
308
|
-
r5 =
|
309
|
-
|
309
|
+
r5 = _nt_space
|
310
|
+
s2 << r5
|
310
311
|
if r5
|
311
|
-
r6 =
|
312
|
-
|
312
|
+
r6 = _nt_authors_part
|
313
|
+
s2 << r6
|
313
314
|
if r6
|
314
|
-
r7 =
|
315
|
-
|
315
|
+
r7 = _nt_space
|
316
|
+
s2 << r7
|
316
317
|
if r7
|
317
|
-
r8 =
|
318
|
-
|
318
|
+
r8 = _nt_status_part
|
319
|
+
s2 << r8
|
320
|
+
if r8
|
321
|
+
r9 = _nt_space
|
322
|
+
s2 << r9
|
323
|
+
end
|
319
324
|
end
|
320
325
|
end
|
321
326
|
end
|
322
327
|
end
|
323
328
|
end
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
s9 << r10
|
339
|
-
if r10
|
340
|
-
r11 = _nt_name_part
|
341
|
-
s9 << r11
|
329
|
+
if s2.last
|
330
|
+
r2 = (SyntaxNode).new(input, i2...index, s2)
|
331
|
+
r2.extend(ScientificName0)
|
332
|
+
r2.extend(ScientificName1)
|
333
|
+
else
|
334
|
+
self.index = i2
|
335
|
+
r2 = nil
|
336
|
+
end
|
337
|
+
if r2
|
338
|
+
r0 = r2
|
339
|
+
else
|
340
|
+
i10, s10 = index, []
|
341
|
+
r11 = _nt_space
|
342
|
+
s10 << r11
|
342
343
|
if r11
|
343
|
-
r12 =
|
344
|
-
|
344
|
+
r12 = _nt_name_part
|
345
|
+
s10 << r12
|
345
346
|
if r12
|
346
|
-
r13 =
|
347
|
-
|
347
|
+
r13 = _nt_space
|
348
|
+
s10 << r13
|
348
349
|
if r13
|
349
|
-
r14 =
|
350
|
-
|
350
|
+
r14 = _nt_authors_part
|
351
|
+
s10 << r14
|
352
|
+
if r14
|
353
|
+
r15 = _nt_space
|
354
|
+
s10 << r15
|
355
|
+
end
|
351
356
|
end
|
352
357
|
end
|
353
358
|
end
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
s15 << r16
|
369
|
-
if r16
|
370
|
-
r17 = _nt_name_part
|
371
|
-
s15 << r17
|
359
|
+
if s10.last
|
360
|
+
r10 = (SyntaxNode).new(input, i10...index, s10)
|
361
|
+
r10.extend(ScientificName2)
|
362
|
+
r10.extend(ScientificName3)
|
363
|
+
else
|
364
|
+
self.index = i10
|
365
|
+
r10 = nil
|
366
|
+
end
|
367
|
+
if r10
|
368
|
+
r0 = r10
|
369
|
+
else
|
370
|
+
i16, s16 = index, []
|
371
|
+
r17 = _nt_space
|
372
|
+
s16 << r17
|
372
373
|
if r17
|
373
|
-
r18 =
|
374
|
-
|
374
|
+
r18 = _nt_name_part
|
375
|
+
s16 << r18
|
375
376
|
if r18
|
376
|
-
r19 =
|
377
|
-
|
377
|
+
r19 = _nt_space
|
378
|
+
s16 << r19
|
378
379
|
if r19
|
379
|
-
r20 =
|
380
|
-
|
380
|
+
r20 = _nt_year
|
381
|
+
s16 << r20
|
382
|
+
if r20
|
383
|
+
r21 = _nt_space
|
384
|
+
s16 << r21
|
385
|
+
end
|
381
386
|
end
|
382
387
|
end
|
383
388
|
end
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
r0 = r15
|
395
|
-
else
|
396
|
-
r21 = _nt_name_part
|
397
|
-
if r21
|
398
|
-
r0 = r21
|
389
|
+
if s16.last
|
390
|
+
r16 = (SyntaxNode).new(input, i16...index, s16)
|
391
|
+
r16.extend(ScientificName4)
|
392
|
+
r16.extend(ScientificName5)
|
393
|
+
else
|
394
|
+
self.index = i16
|
395
|
+
r16 = nil
|
396
|
+
end
|
397
|
+
if r16
|
398
|
+
r0 = r16
|
399
399
|
else
|
400
|
-
|
401
|
-
|
400
|
+
r22 = _nt_name_part
|
401
|
+
if r22
|
402
|
+
r0 = r22
|
403
|
+
else
|
404
|
+
self.index = i0
|
405
|
+
r0 = nil
|
406
|
+
end
|
402
407
|
end
|
403
408
|
end
|
404
409
|
end
|
@@ -539,6 +544,81 @@ module ScientificName
|
|
539
544
|
return r0
|
540
545
|
end
|
541
546
|
|
547
|
+
module NameAuthorsPart0
|
548
|
+
def a
|
549
|
+
elements[0]
|
550
|
+
end
|
551
|
+
|
552
|
+
def space
|
553
|
+
elements[1]
|
554
|
+
end
|
555
|
+
|
556
|
+
def b
|
557
|
+
elements[2]
|
558
|
+
end
|
559
|
+
|
560
|
+
def space
|
561
|
+
elements[3]
|
562
|
+
end
|
563
|
+
|
564
|
+
def c
|
565
|
+
elements[4]
|
566
|
+
end
|
567
|
+
end
|
568
|
+
|
569
|
+
module NameAuthorsPart1
|
570
|
+
def value
|
571
|
+
a.value + " " + b.value + " " + c.value
|
572
|
+
end
|
573
|
+
def canonical
|
574
|
+
a.canonical + " " + c.canonical
|
575
|
+
end
|
576
|
+
def details
|
577
|
+
a.details.merge(c.details).merge({:species_authors=>b.details})
|
578
|
+
end
|
579
|
+
end
|
580
|
+
|
581
|
+
def _nt_name_authors_part
|
582
|
+
start_index = index
|
583
|
+
if node_cache[:name_authors_part].has_key?(index)
|
584
|
+
cached = node_cache[:name_authors_part][index]
|
585
|
+
@index = cached.interval.end if cached
|
586
|
+
return cached
|
587
|
+
end
|
588
|
+
|
589
|
+
i0, s0 = index, []
|
590
|
+
r1 = _nt_species_name
|
591
|
+
s0 << r1
|
592
|
+
if r1
|
593
|
+
r2 = _nt_space
|
594
|
+
s0 << r2
|
595
|
+
if r2
|
596
|
+
r3 = _nt_authors_part
|
597
|
+
s0 << r3
|
598
|
+
if r3
|
599
|
+
r4 = _nt_space
|
600
|
+
s0 << r4
|
601
|
+
if r4
|
602
|
+
r5 = _nt_subspecies_name
|
603
|
+
s0 << r5
|
604
|
+
end
|
605
|
+
end
|
606
|
+
end
|
607
|
+
end
|
608
|
+
if s0.last
|
609
|
+
r0 = (SyntaxNode).new(input, i0...index, s0)
|
610
|
+
r0.extend(NameAuthorsPart0)
|
611
|
+
r0.extend(NameAuthorsPart1)
|
612
|
+
else
|
613
|
+
self.index = i0
|
614
|
+
r0 = nil
|
615
|
+
end
|
616
|
+
|
617
|
+
node_cache[:name_authors_part][start_index] = r0
|
618
|
+
|
619
|
+
return r0
|
620
|
+
end
|
621
|
+
|
542
622
|
module AuthorsPart0
|
543
623
|
def a
|
544
624
|
elements[0]
|
@@ -1208,7 +1288,9 @@ module ScientificName
|
|
1208
1288
|
|
1209
1289
|
module AuthorNameSeparator0
|
1210
1290
|
def apply(a,b)
|
1211
|
-
|
1291
|
+
sep = text_value.strip
|
1292
|
+
sep = " " + sep if sep == "&"
|
1293
|
+
a.value + sep + " " + b.value
|
1212
1294
|
end
|
1213
1295
|
|
1214
1296
|
def details(a,b)
|
@@ -1846,7 +1928,7 @@ module ScientificName
|
|
1846
1928
|
end
|
1847
1929
|
|
1848
1930
|
def details
|
1849
|
-
a.details.merge({:subspecies => {:
|
1931
|
+
a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
|
1850
1932
|
end
|
1851
1933
|
end
|
1852
1934
|
|
@@ -1869,7 +1951,7 @@ module ScientificName
|
|
1869
1951
|
r4 = _nt_space
|
1870
1952
|
s1 << r4
|
1871
1953
|
if r4
|
1872
|
-
r5 =
|
1954
|
+
r5 = _nt_rank
|
1873
1955
|
s1 << r5
|
1874
1956
|
if r5
|
1875
1957
|
r6 = _nt_space_hard
|
@@ -2094,7 +2176,7 @@ module ScientificName
|
|
2094
2176
|
end
|
2095
2177
|
|
2096
2178
|
i0, s0 = index, []
|
2097
|
-
r1 =
|
2179
|
+
r1 = _nt_rank
|
2098
2180
|
s0 << r1
|
2099
2181
|
if r1
|
2100
2182
|
r2 = _nt_space_hard
|
@@ -2236,7 +2318,7 @@ module ScientificName
|
|
2236
2318
|
r2 = _nt_space
|
2237
2319
|
s1 << r2
|
2238
2320
|
if r2
|
2239
|
-
r3 =
|
2321
|
+
r3 = _nt_rank
|
2240
2322
|
s1 << r3
|
2241
2323
|
if r3
|
2242
2324
|
r4 = _nt_space
|
@@ -2276,7 +2358,7 @@ module ScientificName
|
|
2276
2358
|
if r1
|
2277
2359
|
r0 = r1
|
2278
2360
|
else
|
2279
|
-
r9 =
|
2361
|
+
r9 = _nt_rank
|
2280
2362
|
if r9
|
2281
2363
|
r0 = r9
|
2282
2364
|
else
|
@@ -2290,7 +2372,7 @@ module ScientificName
|
|
2290
2372
|
return r0
|
2291
2373
|
end
|
2292
2374
|
|
2293
|
-
module
|
2375
|
+
module Rank0
|
2294
2376
|
def value
|
2295
2377
|
text_value.strip
|
2296
2378
|
end
|
@@ -2301,14 +2383,14 @@ module ScientificName
|
|
2301
2383
|
" " + a.value
|
2302
2384
|
end
|
2303
2385
|
def details(a = nil)
|
2304
|
-
{:subspecies => [{:
|
2386
|
+
{:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
|
2305
2387
|
end
|
2306
2388
|
end
|
2307
2389
|
|
2308
|
-
def
|
2390
|
+
def _nt_rank
|
2309
2391
|
start_index = index
|
2310
|
-
if node_cache[:
|
2311
|
-
cached = node_cache[:
|
2392
|
+
if node_cache[:rank].has_key?(index)
|
2393
|
+
cached = node_cache[:rank][index]
|
2312
2394
|
@index = cached.interval.end if cached
|
2313
2395
|
return cached
|
2314
2396
|
end
|
@@ -2323,7 +2405,7 @@ module ScientificName
|
|
2323
2405
|
end
|
2324
2406
|
if r1
|
2325
2407
|
r0 = r1
|
2326
|
-
r0.extend(
|
2408
|
+
r0.extend(Rank0)
|
2327
2409
|
else
|
2328
2410
|
if input.index("f.", index) == index
|
2329
2411
|
r2 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2334,7 +2416,7 @@ module ScientificName
|
|
2334
2416
|
end
|
2335
2417
|
if r2
|
2336
2418
|
r0 = r2
|
2337
|
-
r0.extend(
|
2419
|
+
r0.extend(Rank0)
|
2338
2420
|
else
|
2339
2421
|
if input.index("B", index) == index
|
2340
2422
|
r3 = (SyntaxNode).new(input, index...(index + 1))
|
@@ -2345,7 +2427,7 @@ module ScientificName
|
|
2345
2427
|
end
|
2346
2428
|
if r3
|
2347
2429
|
r0 = r3
|
2348
|
-
r0.extend(
|
2430
|
+
r0.extend(Rank0)
|
2349
2431
|
else
|
2350
2432
|
if input.index("ssp.", index) == index
|
2351
2433
|
r4 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2356,7 +2438,7 @@ module ScientificName
|
|
2356
2438
|
end
|
2357
2439
|
if r4
|
2358
2440
|
r0 = r4
|
2359
|
-
r0.extend(
|
2441
|
+
r0.extend(Rank0)
|
2360
2442
|
else
|
2361
2443
|
if input.index("mut.", index) == index
|
2362
2444
|
r5 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2367,7 +2449,7 @@ module ScientificName
|
|
2367
2449
|
end
|
2368
2450
|
if r5
|
2369
2451
|
r0 = r5
|
2370
|
-
r0.extend(
|
2452
|
+
r0.extend(Rank0)
|
2371
2453
|
else
|
2372
2454
|
if input.index("pseudovar.", index) == index
|
2373
2455
|
r6 = (SyntaxNode).new(input, index...(index + 10))
|
@@ -2378,7 +2460,7 @@ module ScientificName
|
|
2378
2460
|
end
|
2379
2461
|
if r6
|
2380
2462
|
r0 = r6
|
2381
|
-
r0.extend(
|
2463
|
+
r0.extend(Rank0)
|
2382
2464
|
else
|
2383
2465
|
if input.index("sect.", index) == index
|
2384
2466
|
r7 = (SyntaxNode).new(input, index...(index + 5))
|
@@ -2389,7 +2471,7 @@ module ScientificName
|
|
2389
2471
|
end
|
2390
2472
|
if r7
|
2391
2473
|
r0 = r7
|
2392
|
-
r0.extend(
|
2474
|
+
r0.extend(Rank0)
|
2393
2475
|
else
|
2394
2476
|
if input.index("ser.", index) == index
|
2395
2477
|
r8 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2400,7 +2482,7 @@ module ScientificName
|
|
2400
2482
|
end
|
2401
2483
|
if r8
|
2402
2484
|
r0 = r8
|
2403
|
-
r0.extend(
|
2485
|
+
r0.extend(Rank0)
|
2404
2486
|
else
|
2405
2487
|
if input.index("var.", index) == index
|
2406
2488
|
r9 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2411,7 +2493,7 @@ module ScientificName
|
|
2411
2493
|
end
|
2412
2494
|
if r9
|
2413
2495
|
r0 = r9
|
2414
|
-
r0.extend(
|
2496
|
+
r0.extend(Rank0)
|
2415
2497
|
else
|
2416
2498
|
if input.index("subvar.", index) == index
|
2417
2499
|
r10 = (SyntaxNode).new(input, index...(index + 7))
|
@@ -2422,7 +2504,7 @@ module ScientificName
|
|
2422
2504
|
end
|
2423
2505
|
if r10
|
2424
2506
|
r0 = r10
|
2425
|
-
r0.extend(
|
2507
|
+
r0.extend(Rank0)
|
2426
2508
|
else
|
2427
2509
|
if input.index("[var.]", index) == index
|
2428
2510
|
r11 = (SyntaxNode).new(input, index...(index + 6))
|
@@ -2433,7 +2515,7 @@ module ScientificName
|
|
2433
2515
|
end
|
2434
2516
|
if r11
|
2435
2517
|
r0 = r11
|
2436
|
-
r0.extend(
|
2518
|
+
r0.extend(Rank0)
|
2437
2519
|
else
|
2438
2520
|
if input.index("subsp.", index) == index
|
2439
2521
|
r12 = (SyntaxNode).new(input, index...(index + 6))
|
@@ -2444,7 +2526,7 @@ module ScientificName
|
|
2444
2526
|
end
|
2445
2527
|
if r12
|
2446
2528
|
r0 = r12
|
2447
|
-
r0.extend(
|
2529
|
+
r0.extend(Rank0)
|
2448
2530
|
else
|
2449
2531
|
if input.index("subf.", index) == index
|
2450
2532
|
r13 = (SyntaxNode).new(input, index...(index + 5))
|
@@ -2455,7 +2537,7 @@ module ScientificName
|
|
2455
2537
|
end
|
2456
2538
|
if r13
|
2457
2539
|
r0 = r13
|
2458
|
-
r0.extend(
|
2540
|
+
r0.extend(Rank0)
|
2459
2541
|
else
|
2460
2542
|
if input.index("race", index) == index
|
2461
2543
|
r14 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2466,7 +2548,7 @@ module ScientificName
|
|
2466
2548
|
end
|
2467
2549
|
if r14
|
2468
2550
|
r0 = r14
|
2469
|
-
r0.extend(
|
2551
|
+
r0.extend(Rank0)
|
2470
2552
|
else
|
2471
2553
|
if input.index("α", index) == index
|
2472
2554
|
r15 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2477,7 +2559,7 @@ module ScientificName
|
|
2477
2559
|
end
|
2478
2560
|
if r15
|
2479
2561
|
r0 = r15
|
2480
|
-
r0.extend(
|
2562
|
+
r0.extend(Rank0)
|
2481
2563
|
else
|
2482
2564
|
if input.index("ββ", index) == index
|
2483
2565
|
r16 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2488,7 +2570,7 @@ module ScientificName
|
|
2488
2570
|
end
|
2489
2571
|
if r16
|
2490
2572
|
r0 = r16
|
2491
|
-
r0.extend(
|
2573
|
+
r0.extend(Rank0)
|
2492
2574
|
else
|
2493
2575
|
if input.index("β", index) == index
|
2494
2576
|
r17 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2499,7 +2581,7 @@ module ScientificName
|
|
2499
2581
|
end
|
2500
2582
|
if r17
|
2501
2583
|
r0 = r17
|
2502
|
-
r0.extend(
|
2584
|
+
r0.extend(Rank0)
|
2503
2585
|
else
|
2504
2586
|
if input.index("γ", index) == index
|
2505
2587
|
r18 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2510,7 +2592,7 @@ module ScientificName
|
|
2510
2592
|
end
|
2511
2593
|
if r18
|
2512
2594
|
r0 = r18
|
2513
|
-
r0.extend(
|
2595
|
+
r0.extend(Rank0)
|
2514
2596
|
else
|
2515
2597
|
if input.index("δ", index) == index
|
2516
2598
|
r19 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2521,7 +2603,7 @@ module ScientificName
|
|
2521
2603
|
end
|
2522
2604
|
if r19
|
2523
2605
|
r0 = r19
|
2524
|
-
r0.extend(
|
2606
|
+
r0.extend(Rank0)
|
2525
2607
|
else
|
2526
2608
|
if input.index("ε", index) == index
|
2527
2609
|
r20 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2532,7 +2614,7 @@ module ScientificName
|
|
2532
2614
|
end
|
2533
2615
|
if r20
|
2534
2616
|
r0 = r20
|
2535
|
-
r0.extend(
|
2617
|
+
r0.extend(Rank0)
|
2536
2618
|
else
|
2537
2619
|
if input.index("φ", index) == index
|
2538
2620
|
r21 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2543,7 +2625,7 @@ module ScientificName
|
|
2543
2625
|
end
|
2544
2626
|
if r21
|
2545
2627
|
r0 = r21
|
2546
|
-
r0.extend(
|
2628
|
+
r0.extend(Rank0)
|
2547
2629
|
else
|
2548
2630
|
if input.index("θ", index) == index
|
2549
2631
|
r22 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2554,7 +2636,7 @@ module ScientificName
|
|
2554
2636
|
end
|
2555
2637
|
if r22
|
2556
2638
|
r0 = r22
|
2557
|
-
r0.extend(
|
2639
|
+
r0.extend(Rank0)
|
2558
2640
|
else
|
2559
2641
|
if input.index("μ", index) == index
|
2560
2642
|
r23 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2565,7 +2647,7 @@ module ScientificName
|
|
2565
2647
|
end
|
2566
2648
|
if r23
|
2567
2649
|
r0 = r23
|
2568
|
-
r0.extend(
|
2650
|
+
r0.extend(Rank0)
|
2569
2651
|
else
|
2570
2652
|
if input.index("a.", index) == index
|
2571
2653
|
r24 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2576,7 +2658,7 @@ module ScientificName
|
|
2576
2658
|
end
|
2577
2659
|
if r24
|
2578
2660
|
r0 = r24
|
2579
|
-
r0.extend(
|
2661
|
+
r0.extend(Rank0)
|
2580
2662
|
else
|
2581
2663
|
if input.index("b.", index) == index
|
2582
2664
|
r25 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2587,7 +2669,7 @@ module ScientificName
|
|
2587
2669
|
end
|
2588
2670
|
if r25
|
2589
2671
|
r0 = r25
|
2590
|
-
r0.extend(
|
2672
|
+
r0.extend(Rank0)
|
2591
2673
|
else
|
2592
2674
|
if input.index("c.", index) == index
|
2593
2675
|
r26 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2598,7 +2680,7 @@ module ScientificName
|
|
2598
2680
|
end
|
2599
2681
|
if r26
|
2600
2682
|
r0 = r26
|
2601
|
-
r0.extend(
|
2683
|
+
r0.extend(Rank0)
|
2602
2684
|
else
|
2603
2685
|
if input.index("d.", index) == index
|
2604
2686
|
r27 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2609,7 +2691,7 @@ module ScientificName
|
|
2609
2691
|
end
|
2610
2692
|
if r27
|
2611
2693
|
r0 = r27
|
2612
|
-
r0.extend(
|
2694
|
+
r0.extend(Rank0)
|
2613
2695
|
else
|
2614
2696
|
if input.index("e.", index) == index
|
2615
2697
|
r28 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2620,7 +2702,7 @@ module ScientificName
|
|
2620
2702
|
end
|
2621
2703
|
if r28
|
2622
2704
|
r0 = r28
|
2623
|
-
r0.extend(
|
2705
|
+
r0.extend(Rank0)
|
2624
2706
|
else
|
2625
2707
|
if input.index("g.", index) == index
|
2626
2708
|
r29 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2631,7 +2713,7 @@ module ScientificName
|
|
2631
2713
|
end
|
2632
2714
|
if r29
|
2633
2715
|
r0 = r29
|
2634
|
-
r0.extend(
|
2716
|
+
r0.extend(Rank0)
|
2635
2717
|
else
|
2636
2718
|
if input.index("k.", index) == index
|
2637
2719
|
r30 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2642,7 +2724,7 @@ module ScientificName
|
|
2642
2724
|
end
|
2643
2725
|
if r30
|
2644
2726
|
r0 = r30
|
2645
|
-
r0.extend(
|
2727
|
+
r0.extend(Rank0)
|
2646
2728
|
else
|
2647
2729
|
if input.index("****", index) == index
|
2648
2730
|
r31 = (SyntaxNode).new(input, index...(index + 4))
|
@@ -2653,7 +2735,7 @@ module ScientificName
|
|
2653
2735
|
end
|
2654
2736
|
if r31
|
2655
2737
|
r0 = r31
|
2656
|
-
r0.extend(
|
2738
|
+
r0.extend(Rank0)
|
2657
2739
|
else
|
2658
2740
|
if input.index("**", index) == index
|
2659
2741
|
r32 = (SyntaxNode).new(input, index...(index + 2))
|
@@ -2664,7 +2746,7 @@ module ScientificName
|
|
2664
2746
|
end
|
2665
2747
|
if r32
|
2666
2748
|
r0 = r32
|
2667
|
-
r0.extend(
|
2749
|
+
r0.extend(Rank0)
|
2668
2750
|
else
|
2669
2751
|
if input.index("*", index) == index
|
2670
2752
|
r33 = (SyntaxNode).new(input, index...(index + 1))
|
@@ -2675,7 +2757,7 @@ module ScientificName
|
|
2675
2757
|
end
|
2676
2758
|
if r33
|
2677
2759
|
r0 = r33
|
2678
|
-
r0.extend(
|
2760
|
+
r0.extend(Rank0)
|
2679
2761
|
else
|
2680
2762
|
self.index = i0
|
2681
2763
|
r0 = nil
|
@@ -2713,7 +2795,7 @@ module ScientificName
|
|
2713
2795
|
end
|
2714
2796
|
end
|
2715
2797
|
|
2716
|
-
node_cache[:
|
2798
|
+
node_cache[:rank][start_index] = r0
|
2717
2799
|
|
2718
2800
|
return r0
|
2719
2801
|
end
|
@@ -2817,7 +2899,7 @@ module ScientificName
|
|
2817
2899
|
elements[0]
|
2818
2900
|
end
|
2819
2901
|
|
2820
|
-
def
|
2902
|
+
def space
|
2821
2903
|
elements[1]
|
2822
2904
|
end
|
2823
2905
|
|
@@ -2825,7 +2907,7 @@ module ScientificName
|
|
2825
2907
|
elements[2]
|
2826
2908
|
end
|
2827
2909
|
|
2828
|
-
def
|
2910
|
+
def space
|
2829
2911
|
elements[3]
|
2830
2912
|
end
|
2831
2913
|
|
@@ -2968,13 +3050,13 @@ module ScientificName
|
|
2968
3050
|
r18 = _nt_cap_latin_word
|
2969
3051
|
s17 << r18
|
2970
3052
|
if r18
|
2971
|
-
r19 =
|
3053
|
+
r19 = _nt_space
|
2972
3054
|
s17 << r19
|
2973
3055
|
if r19
|
2974
3056
|
r20 = _nt_subgenus
|
2975
3057
|
s17 << r20
|
2976
3058
|
if r20
|
2977
|
-
r21 =
|
3059
|
+
r21 = _nt_space
|
2978
3060
|
s17 << r21
|
2979
3061
|
if r21
|
2980
3062
|
r22 = _nt_latin_word
|
@@ -3181,7 +3263,7 @@ module ScientificName
|
|
3181
3263
|
end
|
3182
3264
|
|
3183
3265
|
def details
|
3184
|
-
{:
|
3266
|
+
{:uninomial => value}
|
3185
3267
|
end
|
3186
3268
|
end
|
3187
3269
|
|
@@ -31,6 +31,8 @@ grammar ScientificName
|
|
31
31
|
end
|
32
32
|
|
33
33
|
rule scientific_name
|
34
|
+
name_authors_part
|
35
|
+
/
|
34
36
|
space a:name_part space b:authors_part space c:status_part space {
|
35
37
|
def value
|
36
38
|
a.value + " " + b.value + " " + c.value
|
@@ -98,6 +100,20 @@ grammar ScientificName
|
|
98
100
|
latin_word
|
99
101
|
end
|
100
102
|
|
103
|
+
rule name_authors_part
|
104
|
+
a:species_name space b:authors_part space c:subspecies_name {
|
105
|
+
def value
|
106
|
+
a.value + " " + b.value + " " + c.value
|
107
|
+
end
|
108
|
+
def canonical
|
109
|
+
a.canonical + " " + c.canonical
|
110
|
+
end
|
111
|
+
def details
|
112
|
+
a.details.merge(c.details).merge({:species_authors=>b.details})
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
101
117
|
rule authors_part
|
102
118
|
a:original_authors_revised_name space b:authors_revised_name {
|
103
119
|
def value
|
@@ -214,7 +230,9 @@ grammar ScientificName
|
|
214
230
|
rule author_name_separator
|
215
231
|
("&"/",") {
|
216
232
|
def apply(a,b)
|
217
|
-
|
233
|
+
sep = text_value.strip
|
234
|
+
sep = " " + sep if sep == "&"
|
235
|
+
a.value + sep + " " + b.value
|
218
236
|
end
|
219
237
|
|
220
238
|
def details(a,b)
|
@@ -260,7 +278,7 @@ grammar ScientificName
|
|
260
278
|
end
|
261
279
|
|
262
280
|
rule name_part
|
263
|
-
space a:species_name space b:
|
281
|
+
space a:species_name space b:rank space_hard c:editorials_full {
|
264
282
|
def value
|
265
283
|
a.value + " " + b.value + " " + c.value
|
266
284
|
end
|
@@ -295,7 +313,7 @@ grammar ScientificName
|
|
295
313
|
end
|
296
314
|
|
297
315
|
def details
|
298
|
-
a.details.merge({:subspecies => {:
|
316
|
+
a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
|
299
317
|
end
|
300
318
|
}
|
301
319
|
/
|
@@ -324,7 +342,7 @@ grammar ScientificName
|
|
324
342
|
end
|
325
343
|
|
326
344
|
rule subspecies_name
|
327
|
-
sel:
|
345
|
+
sel:rank space_hard a:latin_word {
|
328
346
|
def value
|
329
347
|
sel.apply(a)
|
330
348
|
end
|
@@ -352,16 +370,16 @@ grammar ScientificName
|
|
352
370
|
end
|
353
371
|
|
354
372
|
rule editorials
|
355
|
-
space a:
|
373
|
+
space a:rank space [&]? space b:editorials {
|
356
374
|
def value
|
357
375
|
a.value + b.value
|
358
376
|
end
|
359
377
|
}
|
360
378
|
/
|
361
|
-
|
379
|
+
rank
|
362
380
|
end
|
363
381
|
|
364
|
-
rule
|
382
|
+
rule rank
|
365
383
|
("f.sp."/"f."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
|
366
384
|
/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
367
385
|
{
|
@@ -375,13 +393,13 @@ grammar ScientificName
|
|
375
393
|
" " + a.value
|
376
394
|
end
|
377
395
|
def details(a = nil)
|
378
|
-
{:subspecies => [{:
|
396
|
+
{:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
|
379
397
|
end
|
380
398
|
}
|
381
399
|
end
|
382
400
|
|
383
401
|
rule species_name
|
384
|
-
hybrid_separator space_hard
|
402
|
+
hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word {
|
385
403
|
def value
|
386
404
|
"× " + a.value + " " + b.value
|
387
405
|
end
|
@@ -393,7 +411,7 @@ grammar ScientificName
|
|
393
411
|
end
|
394
412
|
}
|
395
413
|
/
|
396
|
-
hybrid_separator space_hard
|
414
|
+
hybrid_separator space_hard a:cap_latin_word {
|
397
415
|
def value
|
398
416
|
"× " + a.value
|
399
417
|
end
|
@@ -417,7 +435,7 @@ grammar ScientificName
|
|
417
435
|
end
|
418
436
|
}
|
419
437
|
/
|
420
|
-
a:cap_latin_word
|
438
|
+
a:cap_latin_word space b:subgenus space c:latin_word {
|
421
439
|
def value
|
422
440
|
a.value + " " + b.value + " " + c.value
|
423
441
|
end
|
@@ -473,7 +491,7 @@ grammar ScientificName
|
|
473
491
|
end
|
474
492
|
|
475
493
|
def details
|
476
|
-
{:
|
494
|
+
{:uninomial => value}
|
477
495
|
end
|
478
496
|
}
|
479
497
|
end
|
@@ -31,7 +31,7 @@ describe ScientificName do
|
|
31
31
|
parse(sn).should_not be_nil
|
32
32
|
value(sn).should == 'Pseudocercospora'
|
33
33
|
canonical(sn).should == 'Pseudocercospora'
|
34
|
-
details(sn).should == {:uninomial=>"Pseudocercospora"
|
34
|
+
details(sn).should == {:uninomial=>"Pseudocercospora"}
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'should parse canonical' do
|
@@ -51,7 +51,8 @@ describe ScientificName do
|
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'should parse species autonym for complex subspecies authorships' do
|
54
|
-
|
54
|
+
parse("Aus bus Linn. var. bus").should_not be_nil
|
55
|
+
details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
|
55
56
|
# aus genus, bus species, Linn. author, var. rank, bus infraspecific epithet
|
56
57
|
end
|
57
58
|
|
@@ -91,6 +92,7 @@ describe ScientificName do
|
|
91
92
|
|
92
93
|
parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
|
93
94
|
parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
|
95
|
+
value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981"
|
94
96
|
parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
|
95
97
|
parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
|
96
98
|
parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
|
@@ -135,7 +137,7 @@ describe ScientificName do
|
|
135
137
|
parse("Sphaerotheca fuliginea f. dahliae Movss. 1967").should_not be_nil
|
136
138
|
value(" Sphaerotheca fuliginea f. dahliae Movss. 1967 ").should == "Sphaerotheca fuliginea f. dahliae Movss. 1967"
|
137
139
|
canonical("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == "Sphaerotheca fuliginea dahliae"
|
138
|
-
details("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == {:subspecies=>[{:
|
140
|
+
details("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == {:subspecies=>[{:rank=>"f.", :value=>"dahliae"}], :authors=>{:year=>"1967", :names=>["Movss."]}, :species=>"fuliginea", :genus=>"Sphaerotheca"}
|
139
141
|
end
|
140
142
|
|
141
143
|
it "should parse name with var." do
|
@@ -147,7 +149,7 @@ describe ScientificName do
|
|
147
149
|
it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
|
148
150
|
parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
|
149
151
|
value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
|
150
|
-
details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:
|
152
|
+
details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:rank=>"var.", :value=>"zonatum"}, {:rank=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
|
151
153
|
end
|
152
154
|
|
153
155
|
it "should parse status BOTANICAL RARE" do
|
@@ -215,12 +217,12 @@ describe ScientificName do
|
|
215
217
|
|
216
218
|
|
217
219
|
|
218
|
-
it "should parse name with subspecies without rank
|
220
|
+
it "should parse name with subspecies without rank NOT BOTANICAL" do
|
219
221
|
name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
|
220
222
|
parse(name).should_not be_nil
|
221
223
|
value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
|
222
224
|
canonical(name).should == "Hydnellum scrobiculatum zonatum"
|
223
|
-
details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:
|
225
|
+
details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:rank=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
|
224
226
|
end
|
225
227
|
|
226
228
|
it "should not parse utf-8 chars in name part" do
|
@@ -234,7 +236,7 @@ describe ScientificName do
|
|
234
236
|
value("Agaricus acris var. (b.)").should == "Agaricus acris var. (b.)"
|
235
237
|
parse("Agaricus acris var. (b.)").should_not be_nil
|
236
238
|
value("Agaricus acris var. (b.&c.)").should == "Agaricus acris var. (b.c.)"
|
237
|
-
details("Agaricus acris var. (b.&c.)").should == {:editorial_markup=>"(b.c.)", :subspecies=>[{:
|
239
|
+
details("Agaricus acris var. (b.&c.)").should == {:editorial_markup=>"(b.c.)", :subspecies=>[{:rank=>"var.", :value=>nil}], :species=>"acris", :genus=>"Agaricus", :is_valid=>false}
|
238
240
|
|
239
241
|
end
|
240
242
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -21,6 +21,15 @@ dependencies:
|
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 1.2.4
|
23
23
|
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: json
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 1.1.3
|
32
|
+
version:
|
24
33
|
description: Biodiversity library provides a parser tool for scientific species names
|
25
34
|
email: dmozzherin {et} eol {dt} org
|
26
35
|
executables:
|
@@ -67,6 +76,6 @@ rubyforge_project:
|
|
67
76
|
rubygems_version: 1.2.0
|
68
77
|
signing_key:
|
69
78
|
specification_version: 2
|
70
|
-
summary: scientific species name parser
|
79
|
+
summary: scientific species name parser Executable is nnparse
|
71
80
|
test_files: []
|
72
81
|
|