biodiversity19 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,7 @@
1
+ 1.0.13 -- canonical forms for cf. aff. qualifiers are modified: canonical for
2
+ 'Aus cf. bus' is now 'Aus bus'; canonical for 'Aus aff. bus' is now 'Aus'.
3
+ Ranks at the end of the name like 'var', 'ssp', 'spp' are considered junk and
4
+ are ignored
5
+
6
+ 1.0.12 -- bug is fixed which prevented 'Cucurbita pepo' be parsed correctly,
7
+ f., forma, fr. are now treated as any other ranks.
data/Gemfile CHANGED
@@ -8,6 +8,8 @@ group :development do
8
8
  end
9
9
 
10
10
  group :test do
11
- gem "ruby-debug19", :require => "ruby-debug"
11
+ if RUBY_VERSION =~ /^1\.9/
12
+ gem "ruby-debug19", :require => "ruby-debug"
13
+ end
12
14
  gem "rspec"
13
15
  end
data/Gemfile.lock CHANGED
@@ -1,16 +1,12 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- archive-tar-minitar (0.5.2)
5
- columnize (0.3.4)
6
4
  diff-lcs (1.1.3)
7
5
  git (1.2.5)
8
6
  jeweler (1.6.4)
9
7
  bundler (~> 1.0)
10
8
  git (>= 1.2.5)
11
9
  rake
12
- linecache19 (0.5.12)
13
- ruby_core_source (>= 0.1.4)
14
10
  parallel (0.5.9)
15
11
  polyglot (0.3.3)
16
12
  rake (0.9.2.2)
@@ -22,16 +18,6 @@ GEM
22
18
  rspec-expectations (2.7.0)
23
19
  diff-lcs (~> 1.1.2)
24
20
  rspec-mocks (2.7.0)
25
- ruby-debug-base19 (0.11.25)
26
- columnize (>= 0.3.1)
27
- linecache19 (>= 0.5.11)
28
- ruby_core_source (>= 0.1.4)
29
- ruby-debug19 (0.11.6)
30
- columnize (>= 0.3.1)
31
- linecache19 (>= 0.5.11)
32
- ruby-debug-base19 (>= 0.11.19)
33
- ruby_core_source (0.1.5)
34
- archive-tar-minitar (>= 0.5.2)
35
21
  treetop (1.4.10)
36
22
  polyglot
37
23
  polyglot (>= 0.3.1)
@@ -43,5 +29,4 @@ DEPENDENCIES
43
29
  jeweler
44
30
  parallel
45
31
  rspec
46
- ruby-debug19
47
32
  treetop
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.12
1
+ 1.0.13
@@ -12,12 +12,17 @@ module PreProcessor
12
12
  TAXON_CONCEPTS2 = /\s+(\(?s\.\s?s\.|\(?s\.\s?l\.|\(?s\.\s?str\.|\(?s\.\s?lat\.|sec\.|sec|near)\b.*$/
13
13
  TAXON_CONCEPTS3 = /(,\s*|\s+)(pro parte|p\.\s?p\.)\s*$/i
14
14
  NOMEN_CONCEPTS = /(,\s*|\s+)(\(?nomen|\(?nom\.|\(?comb\.).*$/i
15
- LAST_WORD_JUNK = /(,\s*|\s+)(von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort.|hort)\s*$/i
15
+ COMPARATORS = /\s+(aff\.|aff)\b.*$/i
16
+ CF_COMPARATOR = /\s+(cf\.|cf)\s+/i
17
+ LAST_WORD_JUNK = /(,\s*|\s+)(spp\.|spp|var\.|var|von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort\.|hort)\s*$/i
16
18
 
17
19
  def self.clean(a_string)
18
- [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
20
+ [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, COMPARATORS, LAST_WORD_JUNK].each do |i|
19
21
  a_string = a_string.gsub(i, '')
20
22
  end
23
+ [CF_COMPARATOR].each do |i|
24
+ a_string = a_string.gsub(i, ' ')
25
+ end
21
26
  a_string = a_string.tr('ſ','s') #old 's'
22
27
  a_string
23
28
  end
@@ -401,7 +401,7 @@ grammar ScientificNameClean
401
401
  end
402
402
  }
403
403
  end
404
-
404
+
405
405
  rule rank
406
406
  ("morph."/"f.sp."/"B"/"ssp."/"ssp"/"mut."/"nat"/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"forma"/"form."/"form"/"fo."/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
407
407
  {
@@ -125,6 +125,19 @@ Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 19
125
125
  Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. et Heldr.) Hayek", "canonical":"Senecio fuchsii fuchsii expansus", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"expansus", "rank":"var.", "authorship":"(Boiss. & Heldr.) Hayek", "combinationAuthorTeam":{"authorTeam":"Hayek", "author":["Hayek"]}, "basionymAuthorTeam":{"authorTeam":"Boiss. & Heldr.", "author":["Boiss.", "Heldr."]}}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 54], "56":["author_word", 62], "65":["author_word", 71], "73":["author_word", 78]}}}
126
126
  Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "normalized":"Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii", "canonical":"Senecio fuchsii fuchsii fuchsii", "hybrid":false, "details":[{"genus":{"string":"Senecio"}, "species":{"string":"fuchsii", "authorship":"C.C.Gmel.", "basionymAuthorTeam":{"authorTeam":"C.C.Gmel.", "author":["C.C.Gmel."]}}, "infraspecies":[{"string":"fuchsii", "rank":"subsp."}, {"string":"fuchsii", "rank":"var."}]}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 15], "16":["author_word", 25], "26":["infraspecific_type", 32], "33":["infraspecies", 40], "41":["infraspecific_type", 45], "46":["infraspecies", 53]}}}
127
127
 
128
+
129
+ #species and infraspecies without epithets, comparisons
130
+ Alviniconcha aff alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
131
+ Alviniconcha aff. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha aff. alba", "normalized":"Alviniconcha", "canonical":"Alviniconcha", "hybrid":false, "details":[{"uninomial":{"string":"Alviniconcha"}}], "parser_run":1, "positions":{"0":["uninomial", 12]}}}
132
+ Alviniconcha cf. alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf. alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
133
+ Alviniconcha cf alba|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alviniconcha cf alba", "normalized":"Alviniconcha alba", "canonical":"Alviniconcha alba", "hybrid":false, "details":[{"genus":{"string":"Alviniconcha"}, "species":{"string":"alba"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["species", 17]}}}
134
+ Alyxia reinwardti var|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
135
+ Alyxia reinwardti var.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
136
+ Alyxia reinwardti ssp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
137
+ Alyxia reinwardti ssp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp.", "normalized":"Alyxia reinwardti ssp.", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}, "status":"ssp."}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
138
+ Alaria spp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
139
+ Alaria spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp.", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
140
+
128
141
  #unknown authorship
129
142
  Tragacantha leporina (?) Kuntze|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Tragacantha leporina (?) Kuntze","normalized":"Tragacantha leporina (?) Kuntze","canonical":"Tragacantha leporina","hybrid":false,"details":[{"genus":{"string":"Tragacantha"},"species":{"string":"leporina","authorship":"(?) Kuntze","combinationAuthorTeam":{"authorTeam":"Kuntze","author":["Kuntze"]},"basionymAuthorTeam":{"authorTeam":"(?)","author":["?"]}}}],"positions":{"0":["genus",11],"12":["species",20],"22":["unknown_author",23],"25":["author_word",31]}}}
130
143
  Lachenalia tricolor var. nelsonii (auct.) Baker|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lachenalia tricolor var. nelsonii (auct.) Baker", "normalized":"Lachenalia tricolor var. nelsonii (auct.) Baker", "canonical":"Lachenalia tricolor nelsonii", "hybrid":false, "details":[{"genus":{"string":"Lachenalia"}, "species":{"string":"tricolor"}, "infraspecies":[{"string":"nelsonii", "rank":"var.", "authorship":"(auct.) Baker", "combinationAuthorTeam":{"authorTeam":"Baker", "author":["Baker"]}, "basionymAuthorTeam":{"authorTeam":"auct.", "author":["unknown"]}}]}], "parser_run":1, "positions":{"0":["genus", 10], "11":["species", 19], "20":["infraspecific_type", 24], "25":["infraspecies", 33], "35":["unknown_author", 40], "42":["author_word", 47]}}}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity19
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.12
4
+ version: 1.0.13
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-15 00:00:00.000000000 Z
12
+ date: 2012-06-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: treetop
@@ -119,6 +119,7 @@ extra_rdoc_files:
119
119
  files:
120
120
  - .document
121
121
  - .rvmrc
122
+ - CHANGELOG
122
123
  - Gemfile
123
124
  - Gemfile.lock
124
125
  - LICENSE