biodiversity19 1.0.12 → 1.0.13

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,7 @@
1
+ 1.0.13 -- canonical forms for cf. aff. qualifiers are modified: canonical for
2
+ 'Aus cf. bus' is now 'Aus bus'; canonical for 'Aus aff. bus' is now 'Aus'.
3
+ Ranks at the end of the name like 'var', 'ssp', 'spp' are considered junk and
4
+ are ignored
5
+
6
+ 1.0.12 -- bug is fixed which prevented 'Cucurbita pepo' be parsed correctly,
7
+ f., forma, fr. are now treated as any other ranks.
data/Gemfile CHANGED
@@ -8,6 +8,8 @@ group :development do
8
8
  end
9
9
 
10
10
  group :test do
11
- gem "ruby-debug19", :require => "ruby-debug"
11
+ if RUBY_VERSION =~ /^1\.9/
12
+ gem "ruby-debug19", :require => "ruby-debug"
13
+ end
12
14
  gem "rspec"
13
15
  end
data/Gemfile.lock CHANGED
@@ -1,16 +1,12 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- archive-tar-minitar (0.5.2)
5
- columnize (0.3.4)
6
4
  diff-lcs (1.1.3)
7
5
  git (1.2.5)
8
6
  jeweler (1.6.4)
9
7
  bundler (~> 1.0)
10
8
  git (>= 1.2.5)
11
9
  rake
12
- linecache19 (0.5.12)
13
- ruby_core_source (>= 0.1.4)
14
10
  parallel (0.5.9)
15
11
  polyglot (0.3.3)
16
12
  rake (0.9.2.2)
@@ -22,16 +18,6 @@ GEM
22
18
  rspec-expectations (2.7.0)
23
19
  diff-lcs (~> 1.1.2)
24
20
  rspec-mocks (2.7.0)
25
- ruby-debug-base19 (0.11.25)
26
- columnize (>= 0.3.1)
27
- linecache19 (>= 0.5.11)
28
- ruby_core_source (>= 0.1.4)
29
- ruby-debug19 (0.11.6)
30
- columnize (>= 0.3.1)
31
- linecache19 (>= 0.5.11)
32
- ruby-debug-base19 (>= 0.11.19)
33
- ruby_core_source (0.1.5)
34
- archive-tar-minitar (>= 0.5.2)
35
21
  treetop (1.4.10)
36
22
  polyglot
37
23
  polyglot (>= 0.3.1)
@@ -43,5 +29,4 @@ DEPENDENCIES
43
29
  jeweler
44
30
  parallel
45
31
  rspec
46
- ruby-debug19
47
32
  treetop
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.12
1
+ 1.0.13
@@ -12,12 +12,17 @@ module PreProcessor
12
12
  TAXON_CONCEPTS2 = /\s+(\(?s\.\s?s\.|\(?s\.\s?l\.|\(?s\.\s?str\.|\(?s\.\s?lat\.|sec\.|sec|near)\b.*$/
13
13
  TAXON_CONCEPTS3 = /(,\s*|\s+)(pro parte|p\.\s?p\.)\s*$/i
14
14
  NOMEN_CONCEPTS = /(,\s*|\s+)(\(?nomen|\(?nom\.|\(?comb\.).*$/i
15
- LAST_WORD_JUNK = /(,\s*|\s+)(von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort.|hort)\s*$/i
15
+ COMPARATORS = /\s+(aff\.|aff)\b.*$/i
16
+ CF_COMPARATOR = /\s+(cf\.|cf)\s+/i
17
+ LAST_WORD_JUNK = /(,\s*|\s+)(spp\.|spp|var\.|var|von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort\.|hort)\s*$/i
16
18
 
17
19
  def self.clean(a_string)
18
- [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
20
+ [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, COMPARATORS, LAST_WORD_JUNK].each do |i|
19
21
  a_string = a_string.gsub(i, '')
20
22
  end
23
+ [CF_COMPARATOR].each do |i|
24
+ a_string = a_string.gsub(i, ' ')
25
+ end
21
26
  a_string = a_string.tr('ſ','s') #old 's'
22
27
  a_string
23
28
  end
@@ -401,7 +401,7 @@ grammar ScientificNameClean
401
401
  end
402
402
  }
403
403
  end
404
-
404
+
405
405
  rule rank
406
406
  ("morph."/"f.sp."/"B"/"ssp."/"ssp"/"mut."/"nat"/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"forma"/"form."/"form"/"fo."/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
407
407
  {
@@ -125,6 +125,19 @@ Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 19
125
125
  Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. et Heldr.) Hayek", "canonical":"Senecio fuchsii fuchsii expansus", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"expansus", "rank":"var.", "authorship":"(Boiss. & Heldr.) Hayek", "combinationAuthorTeam":{"authorTeam":"Hayek", "author":["Hayek"]}, "basionymAuthorTeam":{"authorTeam":"Boiss. & Heldr.", "author":["Boiss.", "Heldr."]}}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 54], "56":["author_word", 62], "65":["author_word", 71], "73":["author_word", 78]}}}
126
126
  Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "canonical":"Senecio fuchsii fuchsii fuchsii", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"fuchsii", "rank":"var."}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 53]}}}
127
127
 
128
+
129
+ #species and infraspecies without epithets, comparisons
130
+ Alviniconcha aff alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
131
+ Alviniconcha aff. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff. alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
132
+ Alviniconcha cf. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf. alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
133
+ Alviniconcha cf alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
134
+ Alyxia reinwardti var|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
135
+ Alyxia reinwardti var.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
136
+ Alyxia reinwardti ssp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
137
+ Alyxia reinwardti ssp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp.", "normalized":"Alyxia reinwardti ssp.", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}, "status":"ssp."}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
138
+ Alaria spp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
139
+ Alaria spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp.", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
140
+
128
141
  #unknown authorship
129
142
  Tragacantha leporina (?) Kuntze|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Tragacantha leporina (?) Kuntze","normalized":"Tragacantha leporina (?) Kuntze","canonical":"Tragacantha leporina","hybrid":false,"details":[{"genus":{"string":"Tragacantha"},"species":{"string":"leporina","authorship":"(?) Kuntze","combinationAuthorTeam":{"authorTeam":"Kuntze","author":["Kuntze"]},"basionymAuthorTeam":{"authorTeam":"(?)","author":["?"]}}}],"positions":{"0":["genus",11],"12":["species",20],"22":["unknown_author",23],"25":["author_word",31]}}}
130
143
  Lachenalia tricolor var. nelsonii (auct.) Baker|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lachenalia tricolor var. nelsonii (auct.) Baker", "normalized":"Lachenalia tricolor var. nelsonii (auct.) Baker", "canonical":"Lachenalia tricolor nelsonii", "hybrid":false, "details":[{"genus":{"string":"Lachenalia"}, "species":{"string":"tricolor"}, "infraspecies":[{"string":"nelsonii", "rank":"var.", "authorship":"(auct.) Baker", "combinationAuthorTeam":{"authorTeam":"Baker", "author":["Baker"]}, "basionymAuthorTeam":{"authorTeam":"auct.", "author":["unknown"]}}]}], "parser_run":1, "positions":{"0":["genus", 10], "11":["species", 19], "20":["infraspecific_type", 24], "25":["infraspecies", 33], "35":["unknown_author", 40], "42":["author_word", 47]}}}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity19
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.12
4
+ version: 1.0.13
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-15 00:00:00.000000000 Z
12
+ date: 2012-06-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: treetop
@@ -119,6 +119,7 @@ extra_rdoc_files:
119
119
  files:
120
120
  - .document
121
121
  - .rvmrc
122
+ - CHANGELOG
122
123
  - Gemfile
123
124
  - Gemfile.lock
124
125
  - LICENSE