biodiversity19 0.5.15 → 0.5.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Rakefile +20 -4
- data/VERSION +1 -1
- data/bin/nnparse +2 -2
- data/{biodiversity.gemspec → biodiversity19.gemspec} +11 -8
- data/lib/biodiversity/parser/scientific_name_canonical.rb +9 -3
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +8 -1
- data/lib/biodiversity/parser/scientific_name_clean.rb +362 -386
- data/lib/biodiversity/parser/scientific_name_clean.treetop +39 -45
- data/lib/biodiversity/parser/scientific_name_dirty.rb +215 -2
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +62 -1
- data/lib/biodiversity/parser.rb +1 -0
- data/spec/parser/scientific_name_canonical.spec.rb +1 -2
- data/spec/parser/scientific_name_clean.spec.rb +45 -23
- data/spec/parser/scientific_name_dirty.spec.rb +17 -1
- data/spec/parser/test_data.txt +148 -148
- metadata +23 -11
data/.gitignore
CHANGED
data/Rakefile
CHANGED
|
@@ -19,7 +19,7 @@ begin
|
|
|
19
19
|
Jeweler::Tasks.new do |gem|
|
|
20
20
|
gem.name = "biodiversity19"
|
|
21
21
|
gem.summary = 'Parser of scientific names'
|
|
22
|
-
gem.description = 'Tools for biodiversity informatics
|
|
22
|
+
gem.description = 'Tools for biodiversity informatics'
|
|
23
23
|
gem.email = "dmozzherin@gmail.com"
|
|
24
24
|
gem.homepage = "http://github.com/dimus/biodiversity"
|
|
25
25
|
gem.authors = ["Dmitry Mozzherin"]
|
|
@@ -36,8 +36,24 @@ rescue LoadError
|
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
task :tt do
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
['scientific_name_clean', 'scientific_name_dirty', 'scientific_name_canonical'].each do |f|
|
|
40
|
+
system("tt #{dir}/lib/biodiversity/parser/#{f}.treetop")
|
|
41
|
+
rf = "#{dir}/lib/biodiversity/parser/#{f}.rb"
|
|
42
|
+
rfn = open(rf + ".tmp", 'w')
|
|
43
|
+
skip_head = false
|
|
44
|
+
f = open(rf)
|
|
45
|
+
f.each_with_index do |l, i|
|
|
46
|
+
skip_head = l.match(/^# Autogenerated/) if i == 0
|
|
47
|
+
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
|
48
|
+
next
|
|
49
|
+
else
|
|
50
|
+
skip_head = false
|
|
51
|
+
rfn.write(l)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
rfn.close
|
|
55
|
+
f.close
|
|
56
|
+
`mv #{rf}.tmp #{rf}`
|
|
57
|
+
end
|
|
42
58
|
end
|
|
43
59
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.5.
|
|
1
|
+
0.5.16
|
data/bin/nnparse
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
require 'rubygems'
|
|
3
|
-
gem '
|
|
3
|
+
gem 'biodiversity' rescue nil
|
|
4
4
|
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
6
6
|
require 'biodiversity'
|
|
@@ -31,7 +31,7 @@ IO.foreach(input) do |line|
|
|
|
31
31
|
$KCODE = 'NONE'
|
|
32
32
|
end
|
|
33
33
|
p.parse(name)
|
|
34
|
-
parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
34
|
+
parsed_data = p.parsed.all_json rescue {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
|
|
35
35
|
if ruby_min_version < 19
|
|
36
36
|
$KCODE = old_kcode
|
|
37
37
|
end
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
|
-
s.name = %q{
|
|
8
|
-
s.version = "0.5.
|
|
7
|
+
s.name = %q{biodiversity19}
|
|
8
|
+
s.version = "0.5.16"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
|
12
|
-
s.date = %q{2010-
|
|
12
|
+
s.date = %q{2010-04-08}
|
|
13
13
|
s.default_executable = %q{nnparse}
|
|
14
14
|
s.description = %q{Tools for biodiversity informatics}
|
|
15
15
|
s.email = %q{dmozzherin@gmail.com}
|
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |s|
|
|
|
27
27
|
"VERSION",
|
|
28
28
|
"bin/nnparse",
|
|
29
29
|
"bin/parserver",
|
|
30
|
-
"
|
|
30
|
+
"biodiversity19.gemspec",
|
|
31
31
|
"conf/environment.rb",
|
|
32
32
|
"lib/biodiversity.rb",
|
|
33
33
|
"lib/biodiversity/guid.rb",
|
|
@@ -56,13 +56,13 @@ Gem::Specification.new do |s|
|
|
|
56
56
|
s.rubygems_version = %q{1.3.6}
|
|
57
57
|
s.summary = %q{Parser of scientific names}
|
|
58
58
|
s.test_files = [
|
|
59
|
-
"spec/
|
|
59
|
+
"spec/biodiversity_spec.rb",
|
|
60
|
+
"spec/guid/lsid.spec.rb",
|
|
61
|
+
"spec/parser/scientific_name.spec.rb",
|
|
60
62
|
"spec/parser/scientific_name_canonical.spec.rb",
|
|
61
63
|
"spec/parser/scientific_name_clean.spec.rb",
|
|
64
|
+
"spec/parser/scientific_name_dirty.spec.rb",
|
|
62
65
|
"spec/parser/spec_helper.rb",
|
|
63
|
-
"spec/parser/scientific_name.spec.rb",
|
|
64
|
-
"spec/biodiversity_spec.rb",
|
|
65
|
-
"spec/guid/lsid.spec.rb",
|
|
66
66
|
"spec/spec_helper.rb"
|
|
67
67
|
]
|
|
68
68
|
|
|
@@ -72,13 +72,16 @@ Gem::Specification.new do |s|
|
|
|
72
72
|
|
|
73
73
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
74
74
|
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
|
75
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
|
75
76
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
|
76
77
|
else
|
|
77
78
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
79
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
78
80
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
79
81
|
end
|
|
80
82
|
else
|
|
81
83
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
84
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
82
85
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
83
86
|
end
|
|
84
87
|
end
|
|
@@ -3,11 +3,9 @@ module ScientificNameCanonical
|
|
|
3
3
|
include Treetop::Runtime
|
|
4
4
|
|
|
5
5
|
def root
|
|
6
|
-
@root
|
|
6
|
+
@root ||= :root
|
|
7
7
|
end
|
|
8
8
|
|
|
9
|
-
include ScientificNameClean
|
|
10
|
-
|
|
11
9
|
include ScientificNameDirty
|
|
12
10
|
|
|
13
11
|
module Root0
|
|
@@ -18,6 +16,10 @@ module ScientificNameCanonical
|
|
|
18
16
|
def details
|
|
19
17
|
[super]
|
|
20
18
|
end
|
|
19
|
+
|
|
20
|
+
def parser_run
|
|
21
|
+
3
|
|
22
|
+
end
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
module Root1
|
|
@@ -28,6 +30,10 @@ module ScientificNameCanonical
|
|
|
28
30
|
def details
|
|
29
31
|
[super]
|
|
30
32
|
end
|
|
33
|
+
|
|
34
|
+
def parser_run
|
|
35
|
+
3
|
|
36
|
+
end
|
|
31
37
|
end
|
|
32
38
|
|
|
33
39
|
def _nt_root
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
2
|
grammar ScientificNameCanonical
|
|
3
|
-
include ScientificNameClean
|
|
4
3
|
include ScientificNameDirty
|
|
5
4
|
|
|
6
5
|
rule root
|
|
@@ -12,6 +11,10 @@ grammar ScientificNameCanonical
|
|
|
12
11
|
def details
|
|
13
12
|
[super]
|
|
14
13
|
end
|
|
14
|
+
|
|
15
|
+
def parser_run
|
|
16
|
+
3
|
|
17
|
+
end
|
|
15
18
|
}
|
|
16
19
|
/
|
|
17
20
|
uninomial_with_garbage {
|
|
@@ -22,6 +25,10 @@ grammar ScientificNameCanonical
|
|
|
22
25
|
def details
|
|
23
26
|
[super]
|
|
24
27
|
end
|
|
28
|
+
|
|
29
|
+
def parser_run
|
|
30
|
+
3
|
|
31
|
+
end
|
|
25
32
|
}
|
|
26
33
|
end
|
|
27
34
|
|