dimus-biodiversity 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
data/bin/nnparse CHANGED
@@ -1,26 +1,29 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'rubygems'
3
- gem 'dimus-biodiversity'
3
+ gem 'dimus-biodiversity' rescue gem 'biodiversity'
4
4
 
5
5
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
6
  require 'biodiversity'
7
- require 'yaml'
7
+ require 'json/ext'
8
8
 
9
9
  if ARGV.empty?
10
- puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.yml\n\n"
10
+ puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
11
11
  exit
12
12
  end
13
13
 
14
14
 
15
15
  parser = ScientificNameParser.new
16
16
 
17
- output = ARGV[1] || 'parsed.yml'
17
+ output = ARGV[1] || 'parsed.json'
18
18
  o = File.open(output,'w')
19
19
 
20
20
  # parse a file with names
21
21
  count = count2 = 0
22
22
  names = []
23
+ last_result = nil
24
+ o.write("[\n")
23
25
  IO.foreach(ARGV[0]) do |n|
26
+ o.write(last_result + ",\n") if last_result
24
27
  name_dict = {}
25
28
  puts 'Parsing names' if count2 == 0
26
29
  count2 += 1
@@ -30,19 +33,16 @@ IO.foreach(ARGV[0]) do |n|
30
33
  parsed = parser.parse n
31
34
  unless parsed
32
35
  name_dict[:details] = {:parsed => false}
33
- names << name_dict
36
+ last_result = JSON.generate name_dict
34
37
  count += 1
35
38
  else
36
39
  name_dict[:output] = parsed.value
37
- name_dict[:caononical] = parsed.canonical
40
+ name_dict[:canononical] = parsed.canonical
38
41
  name_dict[:details] = parsed.details
39
42
  name_dict[:parsed => true]
40
- names << name_dict
43
+ last_result = JSON.generate name_dict
41
44
  end
42
45
  end
43
- $KCODE = 'UTF8'
44
- puts "Converting results to YAML"
45
- results = YAML.dump(names)
46
- puts "Writing restuls to #{output} file"
47
- o.write(results)
46
+ o.write(last_result + "\n") if last_result
47
+ o.write("]")
48
48
  puts "Found #{count2} records, #{count} of them could not be parsed."
@@ -295,110 +295,115 @@ module ScientificName
295
295
  end
296
296
 
297
297
  i0 = index
298
- i1, s1 = index, []
299
- r2 = _nt_space
300
- s1 << r2
301
- if r2
302
- r3 = _nt_name_part
303
- s1 << r3
298
+ r1 = _nt_name_authors_part
299
+ if r1
300
+ r0 = r1
301
+ else
302
+ i2, s2 = index, []
303
+ r3 = _nt_space
304
+ s2 << r3
304
305
  if r3
305
- r4 = _nt_space
306
- s1 << r4
306
+ r4 = _nt_name_part
307
+ s2 << r4
307
308
  if r4
308
- r5 = _nt_authors_part
309
- s1 << r5
309
+ r5 = _nt_space
310
+ s2 << r5
310
311
  if r5
311
- r6 = _nt_space
312
- s1 << r6
312
+ r6 = _nt_authors_part
313
+ s2 << r6
313
314
  if r6
314
- r7 = _nt_status_part
315
- s1 << r7
315
+ r7 = _nt_space
316
+ s2 << r7
316
317
  if r7
317
- r8 = _nt_space
318
- s1 << r8
318
+ r8 = _nt_status_part
319
+ s2 << r8
320
+ if r8
321
+ r9 = _nt_space
322
+ s2 << r9
323
+ end
319
324
  end
320
325
  end
321
326
  end
322
327
  end
323
328
  end
324
- end
325
- if s1.last
326
- r1 = (SyntaxNode).new(input, i1...index, s1)
327
- r1.extend(ScientificName0)
328
- r1.extend(ScientificName1)
329
- else
330
- self.index = i1
331
- r1 = nil
332
- end
333
- if r1
334
- r0 = r1
335
- else
336
- i9, s9 = index, []
337
- r10 = _nt_space
338
- s9 << r10
339
- if r10
340
- r11 = _nt_name_part
341
- s9 << r11
329
+ if s2.last
330
+ r2 = (SyntaxNode).new(input, i2...index, s2)
331
+ r2.extend(ScientificName0)
332
+ r2.extend(ScientificName1)
333
+ else
334
+ self.index = i2
335
+ r2 = nil
336
+ end
337
+ if r2
338
+ r0 = r2
339
+ else
340
+ i10, s10 = index, []
341
+ r11 = _nt_space
342
+ s10 << r11
342
343
  if r11
343
- r12 = _nt_space
344
- s9 << r12
344
+ r12 = _nt_name_part
345
+ s10 << r12
345
346
  if r12
346
- r13 = _nt_authors_part
347
- s9 << r13
347
+ r13 = _nt_space
348
+ s10 << r13
348
349
  if r13
349
- r14 = _nt_space
350
- s9 << r14
350
+ r14 = _nt_authors_part
351
+ s10 << r14
352
+ if r14
353
+ r15 = _nt_space
354
+ s10 << r15
355
+ end
351
356
  end
352
357
  end
353
358
  end
354
- end
355
- if s9.last
356
- r9 = (SyntaxNode).new(input, i9...index, s9)
357
- r9.extend(ScientificName2)
358
- r9.extend(ScientificName3)
359
- else
360
- self.index = i9
361
- r9 = nil
362
- end
363
- if r9
364
- r0 = r9
365
- else
366
- i15, s15 = index, []
367
- r16 = _nt_space
368
- s15 << r16
369
- if r16
370
- r17 = _nt_name_part
371
- s15 << r17
359
+ if s10.last
360
+ r10 = (SyntaxNode).new(input, i10...index, s10)
361
+ r10.extend(ScientificName2)
362
+ r10.extend(ScientificName3)
363
+ else
364
+ self.index = i10
365
+ r10 = nil
366
+ end
367
+ if r10
368
+ r0 = r10
369
+ else
370
+ i16, s16 = index, []
371
+ r17 = _nt_space
372
+ s16 << r17
372
373
  if r17
373
- r18 = _nt_space
374
- s15 << r18
374
+ r18 = _nt_name_part
375
+ s16 << r18
375
376
  if r18
376
- r19 = _nt_year
377
- s15 << r19
377
+ r19 = _nt_space
378
+ s16 << r19
378
379
  if r19
379
- r20 = _nt_space
380
- s15 << r20
380
+ r20 = _nt_year
381
+ s16 << r20
382
+ if r20
383
+ r21 = _nt_space
384
+ s16 << r21
385
+ end
381
386
  end
382
387
  end
383
388
  end
384
- end
385
- if s15.last
386
- r15 = (SyntaxNode).new(input, i15...index, s15)
387
- r15.extend(ScientificName4)
388
- r15.extend(ScientificName5)
389
- else
390
- self.index = i15
391
- r15 = nil
392
- end
393
- if r15
394
- r0 = r15
395
- else
396
- r21 = _nt_name_part
397
- if r21
398
- r0 = r21
389
+ if s16.last
390
+ r16 = (SyntaxNode).new(input, i16...index, s16)
391
+ r16.extend(ScientificName4)
392
+ r16.extend(ScientificName5)
393
+ else
394
+ self.index = i16
395
+ r16 = nil
396
+ end
397
+ if r16
398
+ r0 = r16
399
399
  else
400
- self.index = i0
401
- r0 = nil
400
+ r22 = _nt_name_part
401
+ if r22
402
+ r0 = r22
403
+ else
404
+ self.index = i0
405
+ r0 = nil
406
+ end
402
407
  end
403
408
  end
404
409
  end
@@ -539,6 +544,81 @@ module ScientificName
539
544
  return r0
540
545
  end
541
546
 
547
+ module NameAuthorsPart0
548
+ def a
549
+ elements[0]
550
+ end
551
+
552
+ def space
553
+ elements[1]
554
+ end
555
+
556
+ def b
557
+ elements[2]
558
+ end
559
+
560
+ def space
561
+ elements[3]
562
+ end
563
+
564
+ def c
565
+ elements[4]
566
+ end
567
+ end
568
+
569
+ module NameAuthorsPart1
570
+ def value
571
+ a.value + " " + b.value + " " + c.value
572
+ end
573
+ def canonical
574
+ a.canonical + " " + c.canonical
575
+ end
576
+ def details
577
+ a.details.merge(c.details).merge({:species_authors=>b.details})
578
+ end
579
+ end
580
+
581
+ def _nt_name_authors_part
582
+ start_index = index
583
+ if node_cache[:name_authors_part].has_key?(index)
584
+ cached = node_cache[:name_authors_part][index]
585
+ @index = cached.interval.end if cached
586
+ return cached
587
+ end
588
+
589
+ i0, s0 = index, []
590
+ r1 = _nt_species_name
591
+ s0 << r1
592
+ if r1
593
+ r2 = _nt_space
594
+ s0 << r2
595
+ if r2
596
+ r3 = _nt_authors_part
597
+ s0 << r3
598
+ if r3
599
+ r4 = _nt_space
600
+ s0 << r4
601
+ if r4
602
+ r5 = _nt_subspecies_name
603
+ s0 << r5
604
+ end
605
+ end
606
+ end
607
+ end
608
+ if s0.last
609
+ r0 = (SyntaxNode).new(input, i0...index, s0)
610
+ r0.extend(NameAuthorsPart0)
611
+ r0.extend(NameAuthorsPart1)
612
+ else
613
+ self.index = i0
614
+ r0 = nil
615
+ end
616
+
617
+ node_cache[:name_authors_part][start_index] = r0
618
+
619
+ return r0
620
+ end
621
+
542
622
  module AuthorsPart0
543
623
  def a
544
624
  elements[0]
@@ -1208,7 +1288,9 @@ module ScientificName
1208
1288
 
1209
1289
  module AuthorNameSeparator0
1210
1290
  def apply(a,b)
1211
- a.value + " " + text_value + " " + b.value
1291
+ sep = text_value.strip
1292
+ sep = " " + sep if sep == "&"
1293
+ a.value + sep + " " + b.value
1212
1294
  end
1213
1295
 
1214
1296
  def details(a,b)
@@ -1846,7 +1928,7 @@ module ScientificName
1846
1928
  end
1847
1929
 
1848
1930
  def details
1849
- a.details.merge({:subspecies => {:type => "n/a", :value =>b.value}})
1931
+ a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
1850
1932
  end
1851
1933
  end
1852
1934
 
@@ -1869,7 +1951,7 @@ module ScientificName
1869
1951
  r4 = _nt_space
1870
1952
  s1 << r4
1871
1953
  if r4
1872
- r5 = _nt_selector
1954
+ r5 = _nt_rank
1873
1955
  s1 << r5
1874
1956
  if r5
1875
1957
  r6 = _nt_space_hard
@@ -2094,7 +2176,7 @@ module ScientificName
2094
2176
  end
2095
2177
 
2096
2178
  i0, s0 = index, []
2097
- r1 = _nt_selector
2179
+ r1 = _nt_rank
2098
2180
  s0 << r1
2099
2181
  if r1
2100
2182
  r2 = _nt_space_hard
@@ -2236,7 +2318,7 @@ module ScientificName
2236
2318
  r2 = _nt_space
2237
2319
  s1 << r2
2238
2320
  if r2
2239
- r3 = _nt_selector
2321
+ r3 = _nt_rank
2240
2322
  s1 << r3
2241
2323
  if r3
2242
2324
  r4 = _nt_space
@@ -2276,7 +2358,7 @@ module ScientificName
2276
2358
  if r1
2277
2359
  r0 = r1
2278
2360
  else
2279
- r9 = _nt_selector
2361
+ r9 = _nt_rank
2280
2362
  if r9
2281
2363
  r0 = r9
2282
2364
  else
@@ -2290,7 +2372,7 @@ module ScientificName
2290
2372
  return r0
2291
2373
  end
2292
2374
 
2293
- module Selector0
2375
+ module Rank0
2294
2376
  def value
2295
2377
  text_value.strip
2296
2378
  end
@@ -2301,14 +2383,14 @@ module ScientificName
2301
2383
  " " + a.value
2302
2384
  end
2303
2385
  def details(a = nil)
2304
- {:subspecies => [{:type => text_value, :value => (a.value rescue nil)}]}
2386
+ {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
2305
2387
  end
2306
2388
  end
2307
2389
 
2308
- def _nt_selector
2390
+ def _nt_rank
2309
2391
  start_index = index
2310
- if node_cache[:selector].has_key?(index)
2311
- cached = node_cache[:selector][index]
2392
+ if node_cache[:rank].has_key?(index)
2393
+ cached = node_cache[:rank][index]
2312
2394
  @index = cached.interval.end if cached
2313
2395
  return cached
2314
2396
  end
@@ -2323,7 +2405,7 @@ module ScientificName
2323
2405
  end
2324
2406
  if r1
2325
2407
  r0 = r1
2326
- r0.extend(Selector0)
2408
+ r0.extend(Rank0)
2327
2409
  else
2328
2410
  if input.index("f.", index) == index
2329
2411
  r2 = (SyntaxNode).new(input, index...(index + 2))
@@ -2334,7 +2416,7 @@ module ScientificName
2334
2416
  end
2335
2417
  if r2
2336
2418
  r0 = r2
2337
- r0.extend(Selector0)
2419
+ r0.extend(Rank0)
2338
2420
  else
2339
2421
  if input.index("B", index) == index
2340
2422
  r3 = (SyntaxNode).new(input, index...(index + 1))
@@ -2345,7 +2427,7 @@ module ScientificName
2345
2427
  end
2346
2428
  if r3
2347
2429
  r0 = r3
2348
- r0.extend(Selector0)
2430
+ r0.extend(Rank0)
2349
2431
  else
2350
2432
  if input.index("ssp.", index) == index
2351
2433
  r4 = (SyntaxNode).new(input, index...(index + 4))
@@ -2356,7 +2438,7 @@ module ScientificName
2356
2438
  end
2357
2439
  if r4
2358
2440
  r0 = r4
2359
- r0.extend(Selector0)
2441
+ r0.extend(Rank0)
2360
2442
  else
2361
2443
  if input.index("mut.", index) == index
2362
2444
  r5 = (SyntaxNode).new(input, index...(index + 4))
@@ -2367,7 +2449,7 @@ module ScientificName
2367
2449
  end
2368
2450
  if r5
2369
2451
  r0 = r5
2370
- r0.extend(Selector0)
2452
+ r0.extend(Rank0)
2371
2453
  else
2372
2454
  if input.index("pseudovar.", index) == index
2373
2455
  r6 = (SyntaxNode).new(input, index...(index + 10))
@@ -2378,7 +2460,7 @@ module ScientificName
2378
2460
  end
2379
2461
  if r6
2380
2462
  r0 = r6
2381
- r0.extend(Selector0)
2463
+ r0.extend(Rank0)
2382
2464
  else
2383
2465
  if input.index("sect.", index) == index
2384
2466
  r7 = (SyntaxNode).new(input, index...(index + 5))
@@ -2389,7 +2471,7 @@ module ScientificName
2389
2471
  end
2390
2472
  if r7
2391
2473
  r0 = r7
2392
- r0.extend(Selector0)
2474
+ r0.extend(Rank0)
2393
2475
  else
2394
2476
  if input.index("ser.", index) == index
2395
2477
  r8 = (SyntaxNode).new(input, index...(index + 4))
@@ -2400,7 +2482,7 @@ module ScientificName
2400
2482
  end
2401
2483
  if r8
2402
2484
  r0 = r8
2403
- r0.extend(Selector0)
2485
+ r0.extend(Rank0)
2404
2486
  else
2405
2487
  if input.index("var.", index) == index
2406
2488
  r9 = (SyntaxNode).new(input, index...(index + 4))
@@ -2411,7 +2493,7 @@ module ScientificName
2411
2493
  end
2412
2494
  if r9
2413
2495
  r0 = r9
2414
- r0.extend(Selector0)
2496
+ r0.extend(Rank0)
2415
2497
  else
2416
2498
  if input.index("subvar.", index) == index
2417
2499
  r10 = (SyntaxNode).new(input, index...(index + 7))
@@ -2422,7 +2504,7 @@ module ScientificName
2422
2504
  end
2423
2505
  if r10
2424
2506
  r0 = r10
2425
- r0.extend(Selector0)
2507
+ r0.extend(Rank0)
2426
2508
  else
2427
2509
  if input.index("[var.]", index) == index
2428
2510
  r11 = (SyntaxNode).new(input, index...(index + 6))
@@ -2433,7 +2515,7 @@ module ScientificName
2433
2515
  end
2434
2516
  if r11
2435
2517
  r0 = r11
2436
- r0.extend(Selector0)
2518
+ r0.extend(Rank0)
2437
2519
  else
2438
2520
  if input.index("subsp.", index) == index
2439
2521
  r12 = (SyntaxNode).new(input, index...(index + 6))
@@ -2444,7 +2526,7 @@ module ScientificName
2444
2526
  end
2445
2527
  if r12
2446
2528
  r0 = r12
2447
- r0.extend(Selector0)
2529
+ r0.extend(Rank0)
2448
2530
  else
2449
2531
  if input.index("subf.", index) == index
2450
2532
  r13 = (SyntaxNode).new(input, index...(index + 5))
@@ -2455,7 +2537,7 @@ module ScientificName
2455
2537
  end
2456
2538
  if r13
2457
2539
  r0 = r13
2458
- r0.extend(Selector0)
2540
+ r0.extend(Rank0)
2459
2541
  else
2460
2542
  if input.index("race", index) == index
2461
2543
  r14 = (SyntaxNode).new(input, index...(index + 4))
@@ -2466,7 +2548,7 @@ module ScientificName
2466
2548
  end
2467
2549
  if r14
2468
2550
  r0 = r14
2469
- r0.extend(Selector0)
2551
+ r0.extend(Rank0)
2470
2552
  else
2471
2553
  if input.index("α", index) == index
2472
2554
  r15 = (SyntaxNode).new(input, index...(index + 2))
@@ -2477,7 +2559,7 @@ module ScientificName
2477
2559
  end
2478
2560
  if r15
2479
2561
  r0 = r15
2480
- r0.extend(Selector0)
2562
+ r0.extend(Rank0)
2481
2563
  else
2482
2564
  if input.index("ββ", index) == index
2483
2565
  r16 = (SyntaxNode).new(input, index...(index + 4))
@@ -2488,7 +2570,7 @@ module ScientificName
2488
2570
  end
2489
2571
  if r16
2490
2572
  r0 = r16
2491
- r0.extend(Selector0)
2573
+ r0.extend(Rank0)
2492
2574
  else
2493
2575
  if input.index("β", index) == index
2494
2576
  r17 = (SyntaxNode).new(input, index...(index + 2))
@@ -2499,7 +2581,7 @@ module ScientificName
2499
2581
  end
2500
2582
  if r17
2501
2583
  r0 = r17
2502
- r0.extend(Selector0)
2584
+ r0.extend(Rank0)
2503
2585
  else
2504
2586
  if input.index("γ", index) == index
2505
2587
  r18 = (SyntaxNode).new(input, index...(index + 2))
@@ -2510,7 +2592,7 @@ module ScientificName
2510
2592
  end
2511
2593
  if r18
2512
2594
  r0 = r18
2513
- r0.extend(Selector0)
2595
+ r0.extend(Rank0)
2514
2596
  else
2515
2597
  if input.index("δ", index) == index
2516
2598
  r19 = (SyntaxNode).new(input, index...(index + 2))
@@ -2521,7 +2603,7 @@ module ScientificName
2521
2603
  end
2522
2604
  if r19
2523
2605
  r0 = r19
2524
- r0.extend(Selector0)
2606
+ r0.extend(Rank0)
2525
2607
  else
2526
2608
  if input.index("ε", index) == index
2527
2609
  r20 = (SyntaxNode).new(input, index...(index + 2))
@@ -2532,7 +2614,7 @@ module ScientificName
2532
2614
  end
2533
2615
  if r20
2534
2616
  r0 = r20
2535
- r0.extend(Selector0)
2617
+ r0.extend(Rank0)
2536
2618
  else
2537
2619
  if input.index("φ", index) == index
2538
2620
  r21 = (SyntaxNode).new(input, index...(index + 2))
@@ -2543,7 +2625,7 @@ module ScientificName
2543
2625
  end
2544
2626
  if r21
2545
2627
  r0 = r21
2546
- r0.extend(Selector0)
2628
+ r0.extend(Rank0)
2547
2629
  else
2548
2630
  if input.index("θ", index) == index
2549
2631
  r22 = (SyntaxNode).new(input, index...(index + 2))
@@ -2554,7 +2636,7 @@ module ScientificName
2554
2636
  end
2555
2637
  if r22
2556
2638
  r0 = r22
2557
- r0.extend(Selector0)
2639
+ r0.extend(Rank0)
2558
2640
  else
2559
2641
  if input.index("μ", index) == index
2560
2642
  r23 = (SyntaxNode).new(input, index...(index + 2))
@@ -2565,7 +2647,7 @@ module ScientificName
2565
2647
  end
2566
2648
  if r23
2567
2649
  r0 = r23
2568
- r0.extend(Selector0)
2650
+ r0.extend(Rank0)
2569
2651
  else
2570
2652
  if input.index("a.", index) == index
2571
2653
  r24 = (SyntaxNode).new(input, index...(index + 2))
@@ -2576,7 +2658,7 @@ module ScientificName
2576
2658
  end
2577
2659
  if r24
2578
2660
  r0 = r24
2579
- r0.extend(Selector0)
2661
+ r0.extend(Rank0)
2580
2662
  else
2581
2663
  if input.index("b.", index) == index
2582
2664
  r25 = (SyntaxNode).new(input, index...(index + 2))
@@ -2587,7 +2669,7 @@ module ScientificName
2587
2669
  end
2588
2670
  if r25
2589
2671
  r0 = r25
2590
- r0.extend(Selector0)
2672
+ r0.extend(Rank0)
2591
2673
  else
2592
2674
  if input.index("c.", index) == index
2593
2675
  r26 = (SyntaxNode).new(input, index...(index + 2))
@@ -2598,7 +2680,7 @@ module ScientificName
2598
2680
  end
2599
2681
  if r26
2600
2682
  r0 = r26
2601
- r0.extend(Selector0)
2683
+ r0.extend(Rank0)
2602
2684
  else
2603
2685
  if input.index("d.", index) == index
2604
2686
  r27 = (SyntaxNode).new(input, index...(index + 2))
@@ -2609,7 +2691,7 @@ module ScientificName
2609
2691
  end
2610
2692
  if r27
2611
2693
  r0 = r27
2612
- r0.extend(Selector0)
2694
+ r0.extend(Rank0)
2613
2695
  else
2614
2696
  if input.index("e.", index) == index
2615
2697
  r28 = (SyntaxNode).new(input, index...(index + 2))
@@ -2620,7 +2702,7 @@ module ScientificName
2620
2702
  end
2621
2703
  if r28
2622
2704
  r0 = r28
2623
- r0.extend(Selector0)
2705
+ r0.extend(Rank0)
2624
2706
  else
2625
2707
  if input.index("g.", index) == index
2626
2708
  r29 = (SyntaxNode).new(input, index...(index + 2))
@@ -2631,7 +2713,7 @@ module ScientificName
2631
2713
  end
2632
2714
  if r29
2633
2715
  r0 = r29
2634
- r0.extend(Selector0)
2716
+ r0.extend(Rank0)
2635
2717
  else
2636
2718
  if input.index("k.", index) == index
2637
2719
  r30 = (SyntaxNode).new(input, index...(index + 2))
@@ -2642,7 +2724,7 @@ module ScientificName
2642
2724
  end
2643
2725
  if r30
2644
2726
  r0 = r30
2645
- r0.extend(Selector0)
2727
+ r0.extend(Rank0)
2646
2728
  else
2647
2729
  if input.index("****", index) == index
2648
2730
  r31 = (SyntaxNode).new(input, index...(index + 4))
@@ -2653,7 +2735,7 @@ module ScientificName
2653
2735
  end
2654
2736
  if r31
2655
2737
  r0 = r31
2656
- r0.extend(Selector0)
2738
+ r0.extend(Rank0)
2657
2739
  else
2658
2740
  if input.index("**", index) == index
2659
2741
  r32 = (SyntaxNode).new(input, index...(index + 2))
@@ -2664,7 +2746,7 @@ module ScientificName
2664
2746
  end
2665
2747
  if r32
2666
2748
  r0 = r32
2667
- r0.extend(Selector0)
2749
+ r0.extend(Rank0)
2668
2750
  else
2669
2751
  if input.index("*", index) == index
2670
2752
  r33 = (SyntaxNode).new(input, index...(index + 1))
@@ -2675,7 +2757,7 @@ module ScientificName
2675
2757
  end
2676
2758
  if r33
2677
2759
  r0 = r33
2678
- r0.extend(Selector0)
2760
+ r0.extend(Rank0)
2679
2761
  else
2680
2762
  self.index = i0
2681
2763
  r0 = nil
@@ -2713,7 +2795,7 @@ module ScientificName
2713
2795
  end
2714
2796
  end
2715
2797
 
2716
- node_cache[:selector][start_index] = r0
2798
+ node_cache[:rank][start_index] = r0
2717
2799
 
2718
2800
  return r0
2719
2801
  end
@@ -2817,7 +2899,7 @@ module ScientificName
2817
2899
  elements[0]
2818
2900
  end
2819
2901
 
2820
- def space_hard
2902
+ def space
2821
2903
  elements[1]
2822
2904
  end
2823
2905
 
@@ -2825,7 +2907,7 @@ module ScientificName
2825
2907
  elements[2]
2826
2908
  end
2827
2909
 
2828
- def space_hard
2910
+ def space
2829
2911
  elements[3]
2830
2912
  end
2831
2913
 
@@ -2968,13 +3050,13 @@ module ScientificName
2968
3050
  r18 = _nt_cap_latin_word
2969
3051
  s17 << r18
2970
3052
  if r18
2971
- r19 = _nt_space_hard
3053
+ r19 = _nt_space
2972
3054
  s17 << r19
2973
3055
  if r19
2974
3056
  r20 = _nt_subgenus
2975
3057
  s17 << r20
2976
3058
  if r20
2977
- r21 = _nt_space_hard
3059
+ r21 = _nt_space
2978
3060
  s17 << r21
2979
3061
  if r21
2980
3062
  r22 = _nt_latin_word
@@ -3181,7 +3263,7 @@ module ScientificName
3181
3263
  end
3182
3264
 
3183
3265
  def details
3184
- {:name_type => "Uninomial", :uninomial => value}
3266
+ {:uninomial => value}
3185
3267
  end
3186
3268
  end
3187
3269
 
@@ -31,6 +31,8 @@ grammar ScientificName
31
31
  end
32
32
 
33
33
  rule scientific_name
34
+ name_authors_part
35
+ /
34
36
  space a:name_part space b:authors_part space c:status_part space {
35
37
  def value
36
38
  a.value + " " + b.value + " " + c.value
@@ -98,6 +100,20 @@ grammar ScientificName
98
100
  latin_word
99
101
  end
100
102
 
103
+ rule name_authors_part
104
+ a:species_name space b:authors_part space c:subspecies_name {
105
+ def value
106
+ a.value + " " + b.value + " " + c.value
107
+ end
108
+ def canonical
109
+ a.canonical + " " + c.canonical
110
+ end
111
+ def details
112
+ a.details.merge(c.details).merge({:species_authors=>b.details})
113
+ end
114
+ }
115
+ end
116
+
101
117
  rule authors_part
102
118
  a:original_authors_revised_name space b:authors_revised_name {
103
119
  def value
@@ -214,7 +230,9 @@ grammar ScientificName
214
230
  rule author_name_separator
215
231
  ("&"/",") {
216
232
  def apply(a,b)
217
- a.value + " " + text_value + " " + b.value
233
+ sep = text_value.strip
234
+ sep = " " + sep if sep == "&"
235
+ a.value + sep + " " + b.value
218
236
  end
219
237
 
220
238
  def details(a,b)
@@ -260,7 +278,7 @@ grammar ScientificName
260
278
  end
261
279
 
262
280
  rule name_part
263
- space a:species_name space b:selector space_hard c:editorials_full {
281
+ space a:species_name space b:rank space_hard c:editorials_full {
264
282
  def value
265
283
  a.value + " " + b.value + " " + c.value
266
284
  end
@@ -295,7 +313,7 @@ grammar ScientificName
295
313
  end
296
314
 
297
315
  def details
298
- a.details.merge({:subspecies => {:type => "n/a", :value =>b.value}})
316
+ a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
299
317
  end
300
318
  }
301
319
  /
@@ -324,7 +342,7 @@ grammar ScientificName
324
342
  end
325
343
 
326
344
  rule subspecies_name
327
- sel:selector space_hard a:latin_word {
345
+ sel:rank space_hard a:latin_word {
328
346
  def value
329
347
  sel.apply(a)
330
348
  end
@@ -352,16 +370,16 @@ grammar ScientificName
352
370
  end
353
371
 
354
372
  rule editorials
355
- space a:selector space [&]? space b:editorials {
373
+ space a:rank space [&]? space b:editorials {
356
374
  def value
357
375
  a.value + b.value
358
376
  end
359
377
  }
360
378
  /
361
- selector
379
+ rank
362
380
  end
363
381
 
364
- rule selector
382
+ rule rank
365
383
  ("f.sp."/"f."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
366
384
  /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
367
385
  {
@@ -375,13 +393,13 @@ grammar ScientificName
375
393
  " " + a.value
376
394
  end
377
395
  def details(a = nil)
378
- {:subspecies => [{:type => text_value, :value => (a.value rescue nil)}]}
396
+ {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
379
397
  end
380
398
  }
381
399
  end
382
400
 
383
401
  rule species_name
384
- hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word {
402
+ hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word {
385
403
  def value
386
404
  "× " + a.value + " " + b.value
387
405
  end
@@ -393,7 +411,7 @@ grammar ScientificName
393
411
  end
394
412
  }
395
413
  /
396
- hybrid_separator space_hard a:cap_latin_word {
414
+ hybrid_separator space_hard a:cap_latin_word {
397
415
  def value
398
416
  "× " + a.value
399
417
  end
@@ -417,7 +435,7 @@ grammar ScientificName
417
435
  end
418
436
  }
419
437
  /
420
- a:cap_latin_word space_hard b:subgenus space_hard c:latin_word {
438
+ a:cap_latin_word space b:subgenus space c:latin_word {
421
439
  def value
422
440
  a.value + " " + b.value + " " + c.value
423
441
  end
@@ -473,7 +491,7 @@ grammar ScientificName
473
491
  end
474
492
 
475
493
  def details
476
- {:name_type => "Uninomial", :uninomial => value}
494
+ {:uninomial => value}
477
495
  end
478
496
  }
479
497
  end
@@ -31,7 +31,7 @@ describe ScientificName do
31
31
  parse(sn).should_not be_nil
32
32
  value(sn).should == 'Pseudocercospora'
33
33
  canonical(sn).should == 'Pseudocercospora'
34
- details(sn).should == {:uninomial=>"Pseudocercospora", :name_type=>"Uninomial"}
34
+ details(sn).should == {:uninomial=>"Pseudocercospora"}
35
35
  end
36
36
 
37
37
  it 'should parse canonical' do
@@ -51,7 +51,8 @@ describe ScientificName do
51
51
  end
52
52
 
53
53
  it 'should parse species autonym for complex subspecies authorships' do
54
- #parse("Aus bus Linn. var. bus").should_not be_nil
54
+ parse("Aus bus Linn. var. bus").should_not be_nil
55
+ details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
55
56
  # aus genus, bus species, Linn. author, var. rank, bus infraspecific epithet
56
57
  end
57
58
 
@@ -91,6 +92,7 @@ describe ScientificName do
91
92
 
92
93
  parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
93
94
  parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
95
+ value("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should == "Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981"
94
96
  parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
95
97
  parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
96
98
  parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
@@ -135,7 +137,7 @@ describe ScientificName do
135
137
  parse("Sphaerotheca fuliginea f. dahliae Movss. 1967").should_not be_nil
136
138
  value(" Sphaerotheca fuliginea f. dahliae Movss. 1967 ").should == "Sphaerotheca fuliginea f. dahliae Movss. 1967"
137
139
  canonical("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == "Sphaerotheca fuliginea dahliae"
138
- details("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == {:subspecies=>[{:type=>"f.", :value=>"dahliae"}], :authors=>{:year=>"1967", :names=>["Movss."]}, :species=>"fuliginea", :genus=>"Sphaerotheca"}
140
+ details("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == {:subspecies=>[{:rank=>"f.", :value=>"dahliae"}], :authors=>{:year=>"1967", :names=>["Movss."]}, :species=>"fuliginea", :genus=>"Sphaerotheca"}
139
141
  end
140
142
 
141
143
  it "should parse name with var." do
@@ -147,7 +149,7 @@ describe ScientificName do
147
149
  it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
148
150
  parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
149
151
  value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
150
- details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:type=>"var.", :value=>"zonatum"}, {:type=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
152
+ details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:rank=>"var.", :value=>"zonatum"}, {:rank=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
151
153
  end
152
154
 
153
155
  it "should parse status BOTANICAL RARE" do
@@ -215,12 +217,12 @@ describe ScientificName do
215
217
 
216
218
 
217
219
 
218
- it "should parse name with subspecies without rank selector NOT BOTANICAL" do
220
+ it "should parse name with subspecies without rank NOT BOTANICAL" do
219
221
  name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
220
222
  parse(name).should_not be_nil
221
223
  value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
222
224
  canonical(name).should == "Hydnellum scrobiculatum zonatum"
223
- details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:type=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
225
+ details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:rank=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
224
226
  end
225
227
 
226
228
  it "should not parse utf-8 chars in name part" do
@@ -234,7 +236,7 @@ describe ScientificName do
234
236
  value("Agaricus acris var. (b.)").should == "Agaricus acris var. (b.)"
235
237
  parse("Agaricus acris var. (b.)").should_not be_nil
236
238
  value("Agaricus acris var. (b.&c.)").should == "Agaricus acris var. (b.c.)"
237
- details("Agaricus acris var. (b.&c.)").should == {:editorial_markup=>"(b.c.)", :subspecies=>[{:type=>"var.", :value=>nil}], :species=>"acris", :genus=>"Agaricus", :is_valid=>false}
239
+ details("Agaricus acris var. (b.&c.)").should == {:editorial_markup=>"(b.c.)", :subspecies=>[{:rank=>"var.", :value=>nil}], :species=>"acris", :genus=>"Agaricus", :is_valid=>false}
238
240
 
239
241
  end
240
242
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -21,6 +21,15 @@ dependencies:
21
21
  - !ruby/object:Gem::Version
22
22
  version: 1.2.4
23
23
  version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: json
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: 1.1.3
32
+ version:
24
33
  description: Biodiversity library provides a parser tool for scientific species names
25
34
  email: dmozzherin {et} eol {dt} org
26
35
  executables:
@@ -67,6 +76,6 @@ rubyforge_project:
67
76
  rubygems_version: 1.2.0
68
77
  signing_key:
69
78
  specification_version: 2
70
- summary: scientific species name parser
79
+ summary: scientific species name parser Executable is nnparse
71
80
  test_files: []
72
81