biodiversity19 1.0.12 → 1.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -0
- data/Gemfile +3 -1
- data/Gemfile.lock +0 -15
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +7 -2
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1 -1
- data/spec/parser/test_data.txt +13 -0
- metadata +3 -2
data/CHANGELOG
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
1.0.13 -- canonical forms for cf. aff. qualifiers are modified: canonical for
|
2
|
+
'Aus cf. bus' is now 'Aus bus'; canonical for 'Aus aff. bus' is now 'Aus'.
|
3
|
+
Ranks at the end of the name like 'var', 'ssp', 'spp' are considered junk and
|
4
|
+
are ignored
|
5
|
+
|
6
|
+
1.0.12 -- bug is fixed which prevented 'Cucurbita pepo' be parsed correctly,
|
7
|
+
f., forma, fr. are now treated as any other ranks.
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,16 +1,12 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
archive-tar-minitar (0.5.2)
|
5
|
-
columnize (0.3.4)
|
6
4
|
diff-lcs (1.1.3)
|
7
5
|
git (1.2.5)
|
8
6
|
jeweler (1.6.4)
|
9
7
|
bundler (~> 1.0)
|
10
8
|
git (>= 1.2.5)
|
11
9
|
rake
|
12
|
-
linecache19 (0.5.12)
|
13
|
-
ruby_core_source (>= 0.1.4)
|
14
10
|
parallel (0.5.9)
|
15
11
|
polyglot (0.3.3)
|
16
12
|
rake (0.9.2.2)
|
@@ -22,16 +18,6 @@ GEM
|
|
22
18
|
rspec-expectations (2.7.0)
|
23
19
|
diff-lcs (~> 1.1.2)
|
24
20
|
rspec-mocks (2.7.0)
|
25
|
-
ruby-debug-base19 (0.11.25)
|
26
|
-
columnize (>= 0.3.1)
|
27
|
-
linecache19 (>= 0.5.11)
|
28
|
-
ruby_core_source (>= 0.1.4)
|
29
|
-
ruby-debug19 (0.11.6)
|
30
|
-
columnize (>= 0.3.1)
|
31
|
-
linecache19 (>= 0.5.11)
|
32
|
-
ruby-debug-base19 (>= 0.11.19)
|
33
|
-
ruby_core_source (0.1.5)
|
34
|
-
archive-tar-minitar (>= 0.5.2)
|
35
21
|
treetop (1.4.10)
|
36
22
|
polyglot
|
37
23
|
polyglot (>= 0.3.1)
|
@@ -43,5 +29,4 @@ DEPENDENCIES
|
|
43
29
|
jeweler
|
44
30
|
parallel
|
45
31
|
rspec
|
46
|
-
ruby-debug19
|
47
32
|
treetop
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.13
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -12,12 +12,17 @@ module PreProcessor
|
|
12
12
|
TAXON_CONCEPTS2 = /\s+(\(?s\.\s?s\.|\(?s\.\s?l\.|\(?s\.\s?str\.|\(?s\.\s?lat\.|sec\.|sec|near)\b.*$/
|
13
13
|
TAXON_CONCEPTS3 = /(,\s*|\s+)(pro parte|p\.\s?p\.)\s*$/i
|
14
14
|
NOMEN_CONCEPTS = /(,\s*|\s+)(\(?nomen|\(?nom\.|\(?comb\.).*$/i
|
15
|
-
|
15
|
+
COMPARATORS = /\s+(aff\.|aff)\b.*$/i
|
16
|
+
CF_COMPARATOR = /\s+(cf\.|cf)\s+/i
|
17
|
+
LAST_WORD_JUNK = /(,\s*|\s+)(spp\.|spp|var\.|var|von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort\.|hort)\s*$/i
|
16
18
|
|
17
19
|
def self.clean(a_string)
|
18
|
-
[NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
|
20
|
+
[NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, COMPARATORS, LAST_WORD_JUNK].each do |i|
|
19
21
|
a_string = a_string.gsub(i, '')
|
20
22
|
end
|
23
|
+
[CF_COMPARATOR].each do |i|
|
24
|
+
a_string = a_string.gsub(i, ' ')
|
25
|
+
end
|
21
26
|
a_string = a_string.tr('ſ','s') #old 's'
|
22
27
|
a_string
|
23
28
|
end
|
@@ -401,7 +401,7 @@ grammar ScientificNameClean
|
|
401
401
|
end
|
402
402
|
}
|
403
403
|
end
|
404
|
-
|
404
|
+
|
405
405
|
rule rank
|
406
406
|
("morph."/"f.sp."/"B"/"ssp."/"ssp"/"mut."/"nat"/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"forma"/"form."/"form"/"fo."/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
407
407
|
{
|
data/spec/parser/test_data.txt
CHANGED
@@ -125,6 +125,19 @@ Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 19
|
|
125
125
|
Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. et Heldr.) Hayek", "canonical":"Senecio fuchsii fuchsii expansus", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"expansus", "rank":"var.", "authorship":"(Boiss. & Heldr.) Hayek", "combinationAuthorTeam":{"authorTeam":"Hayek", "author":["Hayek"]}, "basionymAuthorTeam":{"authorTeam":"Boiss. & Heldr.", "author":["Boiss.", "Heldr."]}}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 54], "56":["author_word", 62], "65":["author_word", 71], "73":["author_word", 78]}}}
|
126
126
|
Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "canonical":"Senecio fuchsii fuchsii fuchsii", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"fuchsii", "rank":"var."}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 53]}}}
|
127
127
|
|
128
|
+
|
129
|
+
#species and infraspecies without epithets, comparisons
|
130
|
+
Alviniconcha aff alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
|
131
|
+
Alviniconcha aff. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff. alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
|
132
|
+
Alviniconcha cf. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf. alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
|
133
|
+
Alviniconcha cf alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
|
134
|
+
Alyxia reinwardti var|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
135
|
+
Alyxia reinwardti var.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
136
|
+
Alyxia reinwardti ssp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
137
|
+
Alyxia reinwardti ssp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp.", "normalized":"Alyxia reinwardti ssp.", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}, "status":"ssp."}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
138
|
+
Alaria spp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
|
139
|
+
Alaria spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp.", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
|
140
|
+
|
128
141
|
#unknown authorship
|
129
142
|
Tragacantha leporina (?) Kuntze|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Tragacantha leporina (?) Kuntze","normalized":"Tragacantha leporina (?) Kuntze","canonical":"Tragacantha leporina","hybrid":false,"details":[{"genus":{"string":"Tragacantha"},"species":{"string":"leporina","authorship":"(?) Kuntze","combinationAuthorTeam":{"authorTeam":"Kuntze","author":["Kuntze"]},"basionymAuthorTeam":{"authorTeam":"(?)","author":["?"]}}}],"positions":{"0":["genus",11],"12":["species",20],"22":["unknown_author",23],"25":["author_word",31]}}}
|
130
143
|
Lachenalia tricolor var. nelsonii (auct.) Baker|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lachenalia tricolor var. nelsonii (auct.) Baker", "normalized":"Lachenalia tricolor var. nelsonii (auct.) Baker", "canonical":"Lachenalia tricolor nelsonii", "hybrid":false, "details":[{"genus":{"string":"Lachenalia"}, "species":{"string":"tricolor"}, "infraspecies":[{"string":"nelsonii", "rank":"var.", "authorship":"(auct.) Baker", "combinationAuthorTeam":{"authorTeam":"Baker", "author":["Baker"]}, "basionymAuthorTeam":{"authorTeam":"auct.", "author":["unknown"]}}]}], "parser_run":1, "positions":{"0":["genus", 10], "11":["species", 19], "20":["infraspecific_type", 24], "25":["infraspecies", 33], "35":["unknown_author", 40], "42":["author_word", 47]}}}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity19
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.13
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: treetop
|
@@ -119,6 +119,7 @@ extra_rdoc_files:
|
|
119
119
|
files:
|
120
120
|
- .document
|
121
121
|
- .rvmrc
|
122
|
+
- CHANGELOG
|
122
123
|
- Gemfile
|
123
124
|
- Gemfile.lock
|
124
125
|
- LICENSE
|