name-spotter 0.1.10 → 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/name-spotter/taxon_finder_client.rb +1 -1
- data/name-spotter.gemspec +2 -2
- data/spec/name-spotter_spec.rb +7 -1
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.11
|
@@ -83,7 +83,7 @@ class NameSpotter
|
|
83
83
|
if @current_index
|
84
84
|
start_position = @current_index
|
85
85
|
words, indices = @cursor.transpose
|
86
|
-
verbatim_string = words[indices.index(start_position)
|
86
|
+
verbatim_string = str.include?("[") ? words[indices.index(start_position)..-1].join(" ") : words[indices.index(start_position)...-1].join(" ")
|
87
87
|
else
|
88
88
|
verbatim_string, start_position = @cursor[-1]
|
89
89
|
end
|
data/name-spotter.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "name-spotter"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.11"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
12
|
-
s.date = "2012-06-
|
12
|
+
s.date = "2012-06-13"
|
13
13
|
s.description = "The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)"
|
14
14
|
s.email = "dmozzherin@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/name-spotter_spec.rb
CHANGED
@@ -71,13 +71,19 @@ describe "NameSpotter" do
|
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
74
|
-
it "should be able to return offsets for all names
|
74
|
+
it "should be able to return offsets for all names" do
|
75
75
|
text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and ×Inkea which is not Passeriformes. We also have another hybrid Passiflora ×rosea and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
|
76
76
|
res = @neti.find(text)
|
77
77
|
res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora ×rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"Pardosa\n moesta", :scientificName=>"Pardosa moesta", :offsetStart=>198, :offsetEnd=>212}]}
|
78
78
|
tf_res = @tf.find(text)
|
79
79
|
tf_res.should == {:names=>[{:verbatim=>"Betula alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"PSEUDOSCORPIONIDA", :scientificName=>"Pseudoscorpionida", :offsetStart=>41, :offsetEnd=>57}, {:verbatim=>"Passeriformes.", :scientificName=>"Passeriformes", :offsetStart=>83, :offsetEnd=>96}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"ARANEA", :scientificName=>"Aranea", :offsetStart=>179, :offsetEnd=>184}, {:verbatim=>"Pardosa moesta f. moesta", :scientificName=>"Pardosa moesta f. moesta", :offsetStart=>198, :offsetEnd=>222}]}
|
80
80
|
end
|
81
|
+
|
82
|
+
it "should properly handle abbreviated names found by taxonfinder" do
|
83
|
+
text = "Pardosa moesta Banks, 1892 is one spider, Schizocosa ocreata Keyserling, 1887 is a second and a third is Schizocosa saltatrix borealis. The abbreviations are P. moesta, S. ocreata, and S. saltatrix borealis is the third."
|
84
|
+
tf_res = @tf.find(text)
|
85
|
+
tf_res.should == {:names=>[{:verbatim=>"Pardosa moesta", :scientificName=>"Pardosa moesta", :offsetStart=>0, :offsetEnd=>13}, {:verbatim=>"Schizocosa ocreata", :scientificName=>"Schizocosa ocreata", :offsetStart=>42, :offsetEnd=>59}, {:verbatim=>"Schizocosa saltatrix borealis.", :scientificName=>"Schizocosa saltatrix borealis", :offsetStart=>105, :offsetEnd=>134}, {:verbatim=>"P. moesta,", :scientificName=>"P[ardosa] moesta", :offsetStart=>158, :offsetEnd=>167}, {:verbatim=>"S. ocreata,", :scientificName=>"S[chizocosa] ocreata", :offsetStart=>169, :offsetEnd=>179}, {:verbatim=>"S. saltatrix borealis is", :scientificName=>"S[chizocosa] saltatrix borealis", :offsetStart=>185, :offsetEnd=>208}]}
|
86
|
+
end
|
81
87
|
|
82
88
|
it "should not make unsequential offsets on a page when using NetiNeti" do
|
83
89
|
text = open(File.join(File.dirname(__FILE__), 'files', 'journalofentomol13pomo_0063.txt'), 'r:utf-8').read
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-spotter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-06-
|
14
|
+
date: 2012-06-13 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
@@ -269,7 +269,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
269
269
|
version: '0'
|
270
270
|
segments:
|
271
271
|
- 0
|
272
|
-
hash:
|
272
|
+
hash: -2251568119704049389
|
273
273
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
274
274
|
none: false
|
275
275
|
requirements:
|