biodiversity 0.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +13 -0
- data/LICENSE +20 -0
- data/README.rdoc +44 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/bin/nnparse +43 -0
- data/bin/parserver +14 -0
- data/biodiversity.gemspec +88 -0
- data/conf/environment.rb +3 -0
- data/lib/biodiversity.rb +9 -0
- data/lib/biodiversity/guid.rb +2 -0
- data/lib/biodiversity/guid/lsid.rb +18 -0
- data/lib/biodiversity/parser.rb +57 -0
- data/lib/biodiversity/parser/scientific_name_canonical.rb +462 -0
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +111 -0
- data/lib/biodiversity/parser/scientific_name_clean.rb +5991 -0
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1195 -0
- data/lib/biodiversity/parser/scientific_name_dirty.rb +1056 -0
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +211 -0
- data/spec/biodiversity_spec.rb +0 -0
- data/spec/guid/lsid.spec.rb +12 -0
- data/spec/parser/scientific_name.spec.rb +35 -0
- data/spec/parser/scientific_name_canonical.spec.rb +27 -0
- data/spec/parser/scientific_name_clean.spec.rb +504 -0
- data/spec/parser/scientific_name_dirty.spec.rb +90 -0
- data/spec/parser/spec_helper.rb +69 -0
- data/spec/parser/test_data.txt +235 -0
- data/spec/spec_helper.rb +0 -0
- metadata +134 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2009 Dmitry Mozzherin
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
= Biodiversity
|
|
2
|
+
|
|
3
|
+
Parses species scientific name and breaks it into elements.
|
|
4
|
+
|
|
5
|
+
== Installation
|
|
6
|
+
|
|
7
|
+
To install gem you need RubyGems >= 1.2.0
|
|
8
|
+
|
|
9
|
+
$ gem sources -a http://gems.github.com (you only have to do this once)
|
|
10
|
+
$ sudo gem install dimus-biodiversity
|
|
11
|
+
|
|
12
|
+
== Example usage
|
|
13
|
+
|
|
14
|
+
You can parse file with species names from command line. File should contain one scientific name per line
|
|
15
|
+
|
|
16
|
+
nnparser file_with_names
|
|
17
|
+
|
|
18
|
+
You can use it as a library
|
|
19
|
+
|
|
20
|
+
require 'biodiversity'
|
|
21
|
+
|
|
22
|
+
parser = ScientificNameParser.new
|
|
23
|
+
|
|
24
|
+
# to parse a scientific name into a ruby hash
|
|
25
|
+
parser.parse("Plantago major")
|
|
26
|
+
|
|
27
|
+
#to get json representation
|
|
28
|
+
parser.parse("Plantago").to_json
|
|
29
|
+
#or
|
|
30
|
+
parser.parse("Plantago")
|
|
31
|
+
parser.all_json
|
|
32
|
+
|
|
33
|
+
# to clean name up
|
|
34
|
+
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
|
35
|
+
|
|
36
|
+
# to get only cleaned up latin part of the name
|
|
37
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:canonical]
|
|
38
|
+
|
|
39
|
+
# to get detailed information about elements of the name
|
|
40
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
|
|
41
|
+
|
|
42
|
+
# to resolve lsid and get back RDF file
|
|
43
|
+
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
|
44
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
dir = File.dirname(__FILE__)
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'rake'
|
|
4
|
+
#$LOAD_PATH.unshift(File.join(dir, 'vendor', 'rspec', 'lib'))
|
|
5
|
+
require 'spec/rake/spectask'
|
|
6
|
+
|
|
7
|
+
#Gem::manage_gems
|
|
8
|
+
#require 'rake/gempackagetask'
|
|
9
|
+
|
|
10
|
+
task :default => :spec
|
|
11
|
+
|
|
12
|
+
Spec::Rake::SpecTask.new do |t|
|
|
13
|
+
t.pattern = 'spec/**/*spec.rb'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
require 'jeweler'
|
|
19
|
+
Jeweler::Tasks.new do |gem|
|
|
20
|
+
gem.name = "biodiversity"
|
|
21
|
+
gem.summary = 'Parser of scientific names'
|
|
22
|
+
gem.description = 'Tools for biodiversity informatics'
|
|
23
|
+
gem.email = "dmozzherin@gmail.com"
|
|
24
|
+
gem.homepage = "http://github.com/dimus/biodiversity"
|
|
25
|
+
gem.authors = ["Dmitry Mozzherin"]
|
|
26
|
+
gem.has_rdoc = false
|
|
27
|
+
gem.bindir = 'bin'
|
|
28
|
+
gem.executables = ['nnparse']
|
|
29
|
+
gem.add_dependency('treetop')
|
|
30
|
+
gem.add_dependency('json') if RUBY_VERSION.split(".")[0..1].join('').to_i < 19
|
|
31
|
+
gem.add_development_dependency "rspec"
|
|
32
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
33
|
+
end
|
|
34
|
+
rescue LoadError
|
|
35
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
task :tt do
|
|
39
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_clean.treetop")
|
|
40
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_dirty.treetop")
|
|
41
|
+
system("tt #{dir}/lib/biodiversity/parser/scientific_name_canonical.treetop")
|
|
42
|
+
end
|
|
43
|
+
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.5.14
|
data/bin/nnparse
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil
|
|
4
|
+
|
|
5
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
6
|
+
require 'biodiversity'
|
|
7
|
+
require 'json'
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if ARGV.empty?
|
|
11
|
+
puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
|
|
12
|
+
exit
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
input = ARGV[0]
|
|
16
|
+
output = ARGV[1] || 'parsed.json'
|
|
17
|
+
|
|
18
|
+
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
|
|
19
|
+
|
|
20
|
+
p = ScientificNameParser.new
|
|
21
|
+
o = open(output, 'w')
|
|
22
|
+
count = 0
|
|
23
|
+
puts 'Parsing...'
|
|
24
|
+
IO.foreach(input) do |line|
|
|
25
|
+
count += 1
|
|
26
|
+
puts("%s lines parsed" % count) if count % 10000 == 0
|
|
27
|
+
name = line.gsub(/^[\d]*\s*/, '').strip
|
|
28
|
+
begin
|
|
29
|
+
if ruby_min_version < 19
|
|
30
|
+
old_kcode = $KCODE
|
|
31
|
+
$KCODE = 'NONE'
|
|
32
|
+
end
|
|
33
|
+
p.parse(name)
|
|
34
|
+
parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
35
|
+
if ruby_min_version < 19
|
|
36
|
+
$KCODE = old_kcode
|
|
37
|
+
end
|
|
38
|
+
rescue
|
|
39
|
+
parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
40
|
+
end
|
|
41
|
+
o.write parsed_data + "\n"
|
|
42
|
+
end
|
|
43
|
+
|
data/bin/parserver
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'socket'
|
|
4
|
+
require 'biodiversity' # Get sockets from stdlib
|
|
5
|
+
parser = ScientificNameParser.new
|
|
6
|
+
server = TCPServer.open(4334) # Socket to listen on port 4334
|
|
7
|
+
loop do # Servers run forever
|
|
8
|
+
client = server.accept # Wait for a client to connect
|
|
9
|
+
while a = client.readline
|
|
10
|
+
client.close if ['end','exit','q', '.'].include? a.strip
|
|
11
|
+
client.puts parser.parse(a).to_json
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Generated by jeweler
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.name = %q{biodiversity}
|
|
8
|
+
s.version = "0.5.14"
|
|
9
|
+
|
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
|
+
s.authors = ["Dmitry Mozzherin"]
|
|
12
|
+
s.date = %q{2010-03-19}
|
|
13
|
+
s.default_executable = %q{nnparse}
|
|
14
|
+
s.description = %q{Tools for biodiversity informatics}
|
|
15
|
+
s.email = %q{dmozzherin@gmail.com}
|
|
16
|
+
s.executables = ["nnparse"]
|
|
17
|
+
s.extra_rdoc_files = [
|
|
18
|
+
"LICENSE",
|
|
19
|
+
"README.rdoc"
|
|
20
|
+
]
|
|
21
|
+
s.files = [
|
|
22
|
+
".document",
|
|
23
|
+
".gitignore",
|
|
24
|
+
"LICENSE",
|
|
25
|
+
"README.rdoc",
|
|
26
|
+
"Rakefile",
|
|
27
|
+
"VERSION",
|
|
28
|
+
"bin/nnparse",
|
|
29
|
+
"bin/parserver",
|
|
30
|
+
"biodiversity.gemspec",
|
|
31
|
+
"conf/environment.rb",
|
|
32
|
+
"lib/biodiversity.rb",
|
|
33
|
+
"lib/biodiversity/guid.rb",
|
|
34
|
+
"lib/biodiversity/guid/lsid.rb",
|
|
35
|
+
"lib/biodiversity/parser.rb",
|
|
36
|
+
"lib/biodiversity/parser/scientific_name_canonical.rb",
|
|
37
|
+
"lib/biodiversity/parser/scientific_name_canonical.treetop",
|
|
38
|
+
"lib/biodiversity/parser/scientific_name_clean.rb",
|
|
39
|
+
"lib/biodiversity/parser/scientific_name_clean.treetop",
|
|
40
|
+
"lib/biodiversity/parser/scientific_name_dirty.rb",
|
|
41
|
+
"lib/biodiversity/parser/scientific_name_dirty.treetop",
|
|
42
|
+
"pkg/.gitignore",
|
|
43
|
+
"spec/biodiversity_spec.rb",
|
|
44
|
+
"spec/guid/lsid.spec.rb",
|
|
45
|
+
"spec/parser/scientific_name.spec.rb",
|
|
46
|
+
"spec/parser/scientific_name_canonical.spec.rb",
|
|
47
|
+
"spec/parser/scientific_name_clean.spec.rb",
|
|
48
|
+
"spec/parser/scientific_name_dirty.spec.rb",
|
|
49
|
+
"spec/parser/spec_helper.rb",
|
|
50
|
+
"spec/parser/test_data.txt",
|
|
51
|
+
"spec/spec_helper.rb"
|
|
52
|
+
]
|
|
53
|
+
s.homepage = %q{http://github.com/dimus/biodiversity}
|
|
54
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
|
55
|
+
s.require_paths = ["lib"]
|
|
56
|
+
s.rubygems_version = %q{1.3.6}
|
|
57
|
+
s.summary = %q{Parser of scientific names}
|
|
58
|
+
s.test_files = [
|
|
59
|
+
"spec/biodiversity_spec.rb",
|
|
60
|
+
"spec/guid/lsid.spec.rb",
|
|
61
|
+
"spec/parser/scientific_name.spec.rb",
|
|
62
|
+
"spec/parser/scientific_name_canonical.spec.rb",
|
|
63
|
+
"spec/parser/scientific_name_clean.spec.rb",
|
|
64
|
+
"spec/parser/scientific_name_dirty.spec.rb",
|
|
65
|
+
"spec/parser/spec_helper.rb",
|
|
66
|
+
"spec/spec_helper.rb"
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
if s.respond_to? :specification_version then
|
|
70
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
71
|
+
s.specification_version = 3
|
|
72
|
+
|
|
73
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
74
|
+
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
|
75
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
|
76
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
|
77
|
+
else
|
|
78
|
+
s.add_dependency(%q<treetop>, [">= 0"])
|
|
79
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
80
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
|
81
|
+
end
|
|
82
|
+
else
|
|
83
|
+
s.add_dependency(%q<treetop>, [">= 0"])
|
|
84
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
85
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
data/conf/environment.rb
ADDED
data/lib/biodiversity.rb
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'treetop'
|
|
3
|
+
|
|
4
|
+
dir = File.dirname(__FILE__)
|
|
5
|
+
|
|
6
|
+
BIODIVERSITY_ROOT = File.join(dir, 'biodiversity')
|
|
7
|
+
require File.join(dir, "/../conf/environment")
|
|
8
|
+
require File.join(BIODIVERSITY_ROOT, "parser")
|
|
9
|
+
require File.join(BIODIVERSITY_ROOT, "guid")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
|
|
3
|
+
class LsidResolver
|
|
4
|
+
def self.resolve(lsid)
|
|
5
|
+
http_get_rdf(lsid)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
protected
|
|
9
|
+
def self.http_get_rdf(lsid)
|
|
10
|
+
rdf = ''
|
|
11
|
+
open(LSID_RESOLVER_URL + lsid) do |f|
|
|
12
|
+
f.each do |line|
|
|
13
|
+
rdf += line if !line.strip.blank?
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
rdf
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
dir = File.dirname(__FILE__)
|
|
3
|
+
require File.join(dir, *%w[parser scientific_name_clean])
|
|
4
|
+
require File.join(dir, *%w[parser scientific_name_dirty])
|
|
5
|
+
require File.join(dir, *%w[parser scientific_name_canonical])
|
|
6
|
+
require 'rubygems'
|
|
7
|
+
require 'json'
|
|
8
|
+
|
|
9
|
+
class ScientificNameParser
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@verbatim = ''
|
|
13
|
+
@clean = ScientificNameCleanParser.new
|
|
14
|
+
@dirty = ScientificNameDirtyParser.new
|
|
15
|
+
@canonical = ScientificNameCanonicalParser.new
|
|
16
|
+
@parsed = nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def parsed
|
|
20
|
+
@parsed
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def parse(a_string)
|
|
24
|
+
@verbatim = a_string
|
|
25
|
+
@parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || {:verbatim => a_string}
|
|
26
|
+
def @parsed.all
|
|
27
|
+
parsed = self.class != Hash
|
|
28
|
+
res = {:parsed => parsed}
|
|
29
|
+
if parsed
|
|
30
|
+
hybrid = self.hybrid rescue false
|
|
31
|
+
res.merge!({
|
|
32
|
+
:verbatim => self.text_value,
|
|
33
|
+
:normalized => self.value,
|
|
34
|
+
:canonical => self.canonical,
|
|
35
|
+
:hybrid => hybrid,
|
|
36
|
+
:details => self.details,
|
|
37
|
+
:positions => self.pos
|
|
38
|
+
})
|
|
39
|
+
else
|
|
40
|
+
res.merge!(self)
|
|
41
|
+
end
|
|
42
|
+
res = {:scientificName => res}
|
|
43
|
+
res
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def @parsed.pos_json
|
|
47
|
+
self.pos.to_json rescue ''
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def @parsed.all_json
|
|
51
|
+
self.all.to_json rescue ''
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
@parsed.all
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
module ScientificNameCanonical
|
|
3
|
+
include Treetop::Runtime
|
|
4
|
+
|
|
5
|
+
def root
|
|
6
|
+
@root || :root
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
include ScientificNameClean
|
|
10
|
+
|
|
11
|
+
include ScientificNameDirty
|
|
12
|
+
|
|
13
|
+
module Root0
|
|
14
|
+
def hybrid
|
|
15
|
+
false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def details
|
|
19
|
+
[super]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
module Root1
|
|
24
|
+
def hybrid
|
|
25
|
+
false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def details
|
|
29
|
+
[super]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def _nt_root
|
|
34
|
+
start_index = index
|
|
35
|
+
if node_cache[:root].has_key?(index)
|
|
36
|
+
cached = node_cache[:root][index]
|
|
37
|
+
@index = cached.interval.end if cached
|
|
38
|
+
return cached
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
i0 = index
|
|
42
|
+
r1 = _nt_multinomial_with_garbage
|
|
43
|
+
r1.extend(Root0)
|
|
44
|
+
if r1
|
|
45
|
+
r0 = r1
|
|
46
|
+
else
|
|
47
|
+
r2 = _nt_uninomial_with_garbage
|
|
48
|
+
r2.extend(Root1)
|
|
49
|
+
if r2
|
|
50
|
+
r0 = r2
|
|
51
|
+
else
|
|
52
|
+
@index = i0
|
|
53
|
+
r0 = nil
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
node_cache[:root][start_index] = r0
|
|
58
|
+
|
|
59
|
+
r0
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
module MultinomialWithGarbage0
|
|
63
|
+
def a
|
|
64
|
+
elements[0]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def space
|
|
68
|
+
elements[1]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def b
|
|
72
|
+
elements[2]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def space
|
|
76
|
+
elements[3]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def c
|
|
80
|
+
elements[4]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def garbage
|
|
84
|
+
elements[5]
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
module MultinomialWithGarbage1
|
|
89
|
+
def value
|
|
90
|
+
a.value + " " + b.value + " " + c.value
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def canonical
|
|
94
|
+
a.canonical + " " + b.canonical + " " + c.canonical
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def pos
|
|
98
|
+
a.pos.merge(b.pos).merge(c.pos)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def details
|
|
102
|
+
a.details.merge(b.details).merge(c.details)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
module MultinomialWithGarbage2
|
|
107
|
+
def a
|
|
108
|
+
elements[0]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def space
|
|
112
|
+
elements[1]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def b
|
|
116
|
+
elements[2]
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def garbage
|
|
120
|
+
elements[3]
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
module MultinomialWithGarbage3
|
|
125
|
+
def value
|
|
126
|
+
a.value + " " + b.value
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def canonical
|
|
130
|
+
a.canonical + " " + b.canonical
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def pos
|
|
134
|
+
a.pos.merge(b.pos)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def details
|
|
138
|
+
a.details.merge(b.details)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
module MultinomialWithGarbage4
|
|
143
|
+
def a
|
|
144
|
+
elements[0]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def space
|
|
148
|
+
elements[1]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def b
|
|
152
|
+
elements[2]
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def garbage
|
|
156
|
+
elements[3]
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
module MultinomialWithGarbage5
|
|
161
|
+
def value
|
|
162
|
+
a.value + " " + b.value
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def canonical
|
|
166
|
+
a.canonical + " " + b.canonical
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def pos
|
|
170
|
+
a.pos.merge(b.pos)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def details
|
|
174
|
+
a.details.merge(b.details)
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def _nt_multinomial_with_garbage
|
|
179
|
+
start_index = index
|
|
180
|
+
if node_cache[:multinomial_with_garbage].has_key?(index)
|
|
181
|
+
cached = node_cache[:multinomial_with_garbage][index]
|
|
182
|
+
@index = cached.interval.end if cached
|
|
183
|
+
return cached
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
i0 = index
|
|
187
|
+
i1, s1 = index, []
|
|
188
|
+
r2 = _nt_genus
|
|
189
|
+
s1 << r2
|
|
190
|
+
if r2
|
|
191
|
+
r3 = _nt_space
|
|
192
|
+
s1 << r3
|
|
193
|
+
if r3
|
|
194
|
+
r4 = _nt_subgenus
|
|
195
|
+
s1 << r4
|
|
196
|
+
if r4
|
|
197
|
+
r5 = _nt_space
|
|
198
|
+
s1 << r5
|
|
199
|
+
if r5
|
|
200
|
+
r6 = _nt_species
|
|
201
|
+
s1 << r6
|
|
202
|
+
if r6
|
|
203
|
+
r7 = _nt_garbage
|
|
204
|
+
s1 << r7
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
if s1.last
|
|
211
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
212
|
+
r1.extend(MultinomialWithGarbage0)
|
|
213
|
+
r1.extend(MultinomialWithGarbage1)
|
|
214
|
+
else
|
|
215
|
+
@index = i1
|
|
216
|
+
r1 = nil
|
|
217
|
+
end
|
|
218
|
+
if r1
|
|
219
|
+
r0 = r1
|
|
220
|
+
else
|
|
221
|
+
i8, s8 = index, []
|
|
222
|
+
r9 = _nt_genus
|
|
223
|
+
s8 << r9
|
|
224
|
+
if r9
|
|
225
|
+
r10 = _nt_space
|
|
226
|
+
s8 << r10
|
|
227
|
+
if r10
|
|
228
|
+
r11 = _nt_subgenus
|
|
229
|
+
s8 << r11
|
|
230
|
+
if r11
|
|
231
|
+
r12 = _nt_garbage
|
|
232
|
+
s8 << r12
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
if s8.last
|
|
237
|
+
r8 = instantiate_node(SyntaxNode,input, i8...index, s8)
|
|
238
|
+
r8.extend(MultinomialWithGarbage2)
|
|
239
|
+
r8.extend(MultinomialWithGarbage3)
|
|
240
|
+
else
|
|
241
|
+
@index = i8
|
|
242
|
+
r8 = nil
|
|
243
|
+
end
|
|
244
|
+
if r8
|
|
245
|
+
r0 = r8
|
|
246
|
+
else
|
|
247
|
+
i13, s13 = index, []
|
|
248
|
+
r14 = _nt_genus
|
|
249
|
+
s13 << r14
|
|
250
|
+
if r14
|
|
251
|
+
r15 = _nt_space
|
|
252
|
+
s13 << r15
|
|
253
|
+
if r15
|
|
254
|
+
r16 = _nt_species
|
|
255
|
+
s13 << r16
|
|
256
|
+
if r16
|
|
257
|
+
r17 = _nt_garbage
|
|
258
|
+
s13 << r17
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
if s13.last
|
|
263
|
+
r13 = instantiate_node(SyntaxNode,input, i13...index, s13)
|
|
264
|
+
r13.extend(MultinomialWithGarbage4)
|
|
265
|
+
r13.extend(MultinomialWithGarbage5)
|
|
266
|
+
else
|
|
267
|
+
@index = i13
|
|
268
|
+
r13 = nil
|
|
269
|
+
end
|
|
270
|
+
if r13
|
|
271
|
+
r0 = r13
|
|
272
|
+
else
|
|
273
|
+
@index = i0
|
|
274
|
+
r0 = nil
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
node_cache[:multinomial_with_garbage][start_index] = r0
|
|
280
|
+
|
|
281
|
+
r0
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
module UninomialWithGarbage0
|
|
285
|
+
def a
|
|
286
|
+
elements[0]
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def b
|
|
290
|
+
elements[1]
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
module UninomialWithGarbage1
|
|
295
|
+
def value
|
|
296
|
+
a.value
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def canonical
|
|
300
|
+
a.canonical
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def pos
|
|
304
|
+
a.pos
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def details
|
|
308
|
+
{:uninomial => a.details[:uninomial]}
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def _nt_uninomial_with_garbage
|
|
313
|
+
start_index = index
|
|
314
|
+
if node_cache[:uninomial_with_garbage].has_key?(index)
|
|
315
|
+
cached = node_cache[:uninomial_with_garbage][index]
|
|
316
|
+
@index = cached.interval.end if cached
|
|
317
|
+
return cached
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
i0, s0 = index, []
|
|
321
|
+
r1 = _nt_uninomial_epitheton
|
|
322
|
+
s0 << r1
|
|
323
|
+
if r1
|
|
324
|
+
r2 = _nt_garbage
|
|
325
|
+
s0 << r2
|
|
326
|
+
end
|
|
327
|
+
if s0.last
|
|
328
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
329
|
+
r0.extend(UninomialWithGarbage0)
|
|
330
|
+
r0.extend(UninomialWithGarbage1)
|
|
331
|
+
else
|
|
332
|
+
@index = i0
|
|
333
|
+
r0 = nil
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
node_cache[:uninomial_with_garbage][start_index] = r0
|
|
337
|
+
|
|
338
|
+
r0
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
module Garbage0
|
|
342
|
+
def space
|
|
343
|
+
elements[0]
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def space
|
|
347
|
+
elements[2]
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
module Garbage1
|
|
353
|
+
def space_hard
|
|
354
|
+
elements[0]
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def _nt_garbage
|
|
360
|
+
start_index = index
|
|
361
|
+
if node_cache[:garbage].has_key?(index)
|
|
362
|
+
cached = node_cache[:garbage][index]
|
|
363
|
+
@index = cached.interval.end if cached
|
|
364
|
+
return cached
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
i0 = index
|
|
368
|
+
i1, s1 = index, []
|
|
369
|
+
r2 = _nt_space
|
|
370
|
+
s1 << r2
|
|
371
|
+
if r2
|
|
372
|
+
if has_terminal?('\G["\',.]', true, index)
|
|
373
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
374
|
+
@index += 1
|
|
375
|
+
else
|
|
376
|
+
r3 = nil
|
|
377
|
+
end
|
|
378
|
+
s1 << r3
|
|
379
|
+
if r3
|
|
380
|
+
r4 = _nt_space
|
|
381
|
+
s1 << r4
|
|
382
|
+
if r4
|
|
383
|
+
s5, i5 = [], index
|
|
384
|
+
loop do
|
|
385
|
+
if has_terminal?('\G[^щ]', true, index)
|
|
386
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
387
|
+
@index += 1
|
|
388
|
+
else
|
|
389
|
+
r6 = nil
|
|
390
|
+
end
|
|
391
|
+
if r6
|
|
392
|
+
s5 << r6
|
|
393
|
+
else
|
|
394
|
+
break
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
|
398
|
+
s1 << r5
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
if s1.last
|
|
403
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
404
|
+
r1.extend(Garbage0)
|
|
405
|
+
else
|
|
406
|
+
@index = i1
|
|
407
|
+
r1 = nil
|
|
408
|
+
end
|
|
409
|
+
if r1
|
|
410
|
+
r0 = r1
|
|
411
|
+
else
|
|
412
|
+
i7, s7 = index, []
|
|
413
|
+
r8 = _nt_space_hard
|
|
414
|
+
s7 << r8
|
|
415
|
+
if r8
|
|
416
|
+
s9, i9 = [], index
|
|
417
|
+
loop do
|
|
418
|
+
if has_terminal?('\G[^ш]', true, index)
|
|
419
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
420
|
+
@index += 1
|
|
421
|
+
else
|
|
422
|
+
r10 = nil
|
|
423
|
+
end
|
|
424
|
+
if r10
|
|
425
|
+
s9 << r10
|
|
426
|
+
else
|
|
427
|
+
break
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
if s9.empty?
|
|
431
|
+
@index = i9
|
|
432
|
+
r9 = nil
|
|
433
|
+
else
|
|
434
|
+
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
|
435
|
+
end
|
|
436
|
+
s7 << r9
|
|
437
|
+
end
|
|
438
|
+
if s7.last
|
|
439
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
|
440
|
+
r7.extend(Garbage1)
|
|
441
|
+
else
|
|
442
|
+
@index = i7
|
|
443
|
+
r7 = nil
|
|
444
|
+
end
|
|
445
|
+
if r7
|
|
446
|
+
r0 = r7
|
|
447
|
+
else
|
|
448
|
+
@index = i0
|
|
449
|
+
r0 = nil
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
node_cache[:garbage][start_index] = r0
|
|
454
|
+
|
|
455
|
+
r0
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
class ScientificNameCanonicalParser < Treetop::Runtime::CompiledParser
|
|
461
|
+
include ScientificNameCanonical
|
|
462
|
+
end
|