name-spotter 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/name-spotter/taxon_finder_client.rb +22 -15
- data/name-spotter.gemspec +2 -2
- data/spec/name-spotter_spec.rb +4 -4
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.9
|
@@ -1,11 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
class NameSpotter
|
2
3
|
class TaxonFinderClient < NameSpotter::Client
|
3
4
|
def initialize(opts = { host: "0.0.0.0", port: "1234" })
|
4
5
|
super
|
5
|
-
# We keep track of the document to get accurate offsets.
|
6
|
-
# Other methods such as keeping track of the character number
|
7
|
-
# didn't work so well due to the nature of TaxonFinder.
|
8
|
-
@document = ""
|
9
6
|
end
|
10
7
|
|
11
8
|
def find(str, from_web_form=false)
|
@@ -17,20 +14,21 @@ class NameSpotter
|
|
17
14
|
@current_string = ''
|
18
15
|
@current_string_state = ''
|
19
16
|
@word_list_matches = 0
|
20
|
-
@
|
17
|
+
@cursor = 5.times.inject([]) { |res| res << ['',-1] }
|
18
|
+
@current_index = nil
|
21
19
|
words = str.split(/\s/)
|
22
20
|
words.each do |word|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
if word.empty?
|
22
|
+
@cursor[-1][0] << " "
|
23
|
+
else
|
24
|
+
cursor_entry = [word, 1 + @cursor[-1][0].size + @cursor[-1][-1]]
|
25
|
+
@cursor.shift
|
26
|
+
@cursor << cursor_entry
|
28
27
|
taxon_find(word)
|
29
28
|
end
|
30
29
|
end
|
31
30
|
socket.close
|
32
31
|
@socket = nil
|
33
|
-
@document = ""
|
34
32
|
@names
|
35
33
|
end
|
36
34
|
|
@@ -64,6 +62,9 @@ class NameSpotter
|
|
64
62
|
@current_string_state = current_string_state
|
65
63
|
@word_list_matches = word_list_matches
|
66
64
|
@return_score = return_score
|
65
|
+
if @current_string.size > 0 && !@current_index
|
66
|
+
@current_index = @cursor[-1][-1]
|
67
|
+
end
|
67
68
|
if not return_string.blank? or not return_string_2.blank?
|
68
69
|
OpenStruct.new( { :current_string => current_string,
|
69
70
|
:current_string_state => current_string_state,
|
@@ -73,16 +74,22 @@ class NameSpotter
|
|
73
74
|
:return_string_2 => return_string_2,
|
74
75
|
:return_score_2 => return_score_2 })
|
75
76
|
else
|
77
|
+
@current_index = nil if @current_string.empty? && @current_index
|
76
78
|
false
|
77
79
|
end
|
78
80
|
end
|
79
81
|
|
80
82
|
def process_response(str)
|
81
83
|
str.force_encoding('utf-8')
|
82
|
-
verbatim_string =
|
83
|
-
|
84
|
-
|
85
|
-
|
84
|
+
start_position = verbatim_string = nil
|
85
|
+
if @current_index
|
86
|
+
start_position = @current_index
|
87
|
+
words, indices = @cursor.transpose
|
88
|
+
verbatim_string = words[indices.index(start_position)...-1].join(" ")
|
89
|
+
@current_index = nil
|
90
|
+
else
|
91
|
+
verbatim_string, start_position = @cursor[-1]
|
92
|
+
end
|
86
93
|
scientific_string = str
|
87
94
|
[verbatim_string, scientific_string, start_position]
|
88
95
|
end
|
data/name-spotter.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "name-spotter"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.9"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
12
|
-
s.date = "2012-06-
|
12
|
+
s.date = "2012-06-12"
|
13
13
|
s.description = "The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)"
|
14
14
|
s.email = "dmozzherin@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/name-spotter_spec.rb
CHANGED
@@ -39,7 +39,7 @@ describe "NameSpotter" do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
it "should be able to find scientific names in text" do
|
42
|
-
text = "Some text
|
42
|
+
text = "Some text that has Betula\n alba and Mus musculus and \neven B. alba and even M. mus-\nculus and unicoded name Aranea röselii. Also it has name unknown before: Varanus bitatawa species"
|
43
43
|
res = @neti.find(text)[:names].map { |n| n[:scientificName] }
|
44
44
|
res.should == ["Betula alba", "Mus musculus", "B. alba", "Aranea röselii", "Varanus bitatawa"]
|
45
45
|
tf_res = @tf.find(text)
|
@@ -72,11 +72,11 @@ describe "NameSpotter" do
|
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should be able to return offsets for all names found by taxonfinder" do
|
75
|
-
text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
|
75
|
+
text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and ×Inkea which is not Passeriformes. We also have another hybrid Passiflora ×rosea and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
|
76
76
|
res = @neti.find(text)
|
77
|
-
res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>
|
77
|
+
res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora ×rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"Pardosa\n moesta", :scientificName=>"Pardosa moesta", :offsetStart=>198, :offsetEnd=>212}]}
|
78
78
|
tf_res = @tf.find(text)
|
79
|
-
tf_res.should == {:names=>[{:verbatim=>"Betula
|
79
|
+
tf_res.should == {:names=>[{:verbatim=>"Betula alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"PSEUDOSCORPIONIDA", :scientificName=>"Pseudoscorpionida", :offsetStart=>41, :offsetEnd=>57}, {:verbatim=>"Passeriformes.", :scientificName=>"Passeriformes", :offsetStart=>83, :offsetEnd=>96}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"ARANEA", :scientificName=>"Aranea", :offsetStart=>179, :offsetEnd=>184}, {:verbatim=>"Pardosa moesta f. moesta", :scientificName=>"Pardosa moesta f. moesta", :offsetStart=>198, :offsetEnd=>222}]}
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should not make unsequential offsets on a page when using NetiNeti" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-spotter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-06-
|
14
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
@@ -269,7 +269,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
269
269
|
version: '0'
|
270
270
|
segments:
|
271
271
|
- 0
|
272
|
-
hash:
|
272
|
+
hash: 3944184144538713044
|
273
273
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
274
274
|
none: false
|
275
275
|
requirements:
|