name-spotter 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/name-spotter/taxon_finder_client.rb +22 -15
- data/name-spotter.gemspec +2 -2
- data/spec/name-spotter_spec.rb +4 -4
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.9
|
@@ -1,11 +1,8 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
class NameSpotter
|
2
3
|
class TaxonFinderClient < NameSpotter::Client
|
3
4
|
def initialize(opts = { host: "0.0.0.0", port: "1234" })
|
4
5
|
super
|
5
|
-
# We keep track of the document to get accurate offsets.
|
6
|
-
# Other methods such as keeping track of the character number
|
7
|
-
# didn't work so well due to the nature of TaxonFinder.
|
8
|
-
@document = ""
|
9
6
|
end
|
10
7
|
|
11
8
|
def find(str, from_web_form=false)
|
@@ -17,20 +14,21 @@ class NameSpotter
|
|
17
14
|
@current_string = ''
|
18
15
|
@current_string_state = ''
|
19
16
|
@word_list_matches = 0
|
20
|
-
@
|
17
|
+
@cursor = 5.times.inject([]) { |res| res << ['',-1] }
|
18
|
+
@current_index = nil
|
21
19
|
words = str.split(/\s/)
|
22
20
|
words.each do |word|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
if word.empty?
|
22
|
+
@cursor[-1][0] << " "
|
23
|
+
else
|
24
|
+
cursor_entry = [word, 1 + @cursor[-1][0].size + @cursor[-1][-1]]
|
25
|
+
@cursor.shift
|
26
|
+
@cursor << cursor_entry
|
28
27
|
taxon_find(word)
|
29
28
|
end
|
30
29
|
end
|
31
30
|
socket.close
|
32
31
|
@socket = nil
|
33
|
-
@document = ""
|
34
32
|
@names
|
35
33
|
end
|
36
34
|
|
@@ -64,6 +62,9 @@ class NameSpotter
|
|
64
62
|
@current_string_state = current_string_state
|
65
63
|
@word_list_matches = word_list_matches
|
66
64
|
@return_score = return_score
|
65
|
+
if @current_string.size > 0 && !@current_index
|
66
|
+
@current_index = @cursor[-1][-1]
|
67
|
+
end
|
67
68
|
if not return_string.blank? or not return_string_2.blank?
|
68
69
|
OpenStruct.new( { :current_string => current_string,
|
69
70
|
:current_string_state => current_string_state,
|
@@ -73,16 +74,22 @@ class NameSpotter
|
|
73
74
|
:return_string_2 => return_string_2,
|
74
75
|
:return_score_2 => return_score_2 })
|
75
76
|
else
|
77
|
+
@current_index = nil if @current_string.empty? && @current_index
|
76
78
|
false
|
77
79
|
end
|
78
80
|
end
|
79
81
|
|
80
82
|
def process_response(str)
|
81
83
|
str.force_encoding('utf-8')
|
82
|
-
verbatim_string =
|
83
|
-
|
84
|
-
|
85
|
-
|
84
|
+
start_position = verbatim_string = nil
|
85
|
+
if @current_index
|
86
|
+
start_position = @current_index
|
87
|
+
words, indices = @cursor.transpose
|
88
|
+
verbatim_string = words[indices.index(start_position)...-1].join(" ")
|
89
|
+
@current_index = nil
|
90
|
+
else
|
91
|
+
verbatim_string, start_position = @cursor[-1]
|
92
|
+
end
|
86
93
|
scientific_string = str
|
87
94
|
[verbatim_string, scientific_string, start_position]
|
88
95
|
end
|
data/name-spotter.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "name-spotter"
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.9"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
12
|
-
s.date = "2012-06-
|
12
|
+
s.date = "2012-06-12"
|
13
13
|
s.description = "The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)"
|
14
14
|
s.email = "dmozzherin@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/name-spotter_spec.rb
CHANGED
@@ -39,7 +39,7 @@ describe "NameSpotter" do
|
|
39
39
|
end
|
40
40
|
|
41
41
|
it "should be able to find scientific names in text" do
|
42
|
-
text = "Some text
|
42
|
+
text = "Some text that has Betula\n alba and Mus musculus and \neven B. alba and even M. mus-\nculus and unicoded name Aranea röselii. Also it has name unknown before: Varanus bitatawa species"
|
43
43
|
res = @neti.find(text)[:names].map { |n| n[:scientificName] }
|
44
44
|
res.should == ["Betula alba", "Mus musculus", "B. alba", "Aranea röselii", "Varanus bitatawa"]
|
45
45
|
tf_res = @tf.find(text)
|
@@ -72,11 +72,11 @@ describe "NameSpotter" do
|
|
72
72
|
end
|
73
73
|
|
74
74
|
it "should be able to return offsets for all names found by taxonfinder" do
|
75
|
-
text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
|
75
|
+
text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and ×Inkea which is not Passeriformes. We also have another hybrid Passiflora ×rosea and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
|
76
76
|
res = @neti.find(text)
|
77
|
-
res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>
|
77
|
+
res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora ×rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"Pardosa\n moesta", :scientificName=>"Pardosa moesta", :offsetStart=>198, :offsetEnd=>212}]}
|
78
78
|
tf_res = @tf.find(text)
|
79
|
-
tf_res.should == {:names=>[{:verbatim=>"Betula
|
79
|
+
tf_res.should == {:names=>[{:verbatim=>"Betula alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"PSEUDOSCORPIONIDA", :scientificName=>"Pseudoscorpionida", :offsetStart=>41, :offsetEnd=>57}, {:verbatim=>"Passeriformes.", :scientificName=>"Passeriformes", :offsetStart=>83, :offsetEnd=>96}, {:verbatim=>"Passiflora ×rosea", :scientificName=>"Passiflora rosea", :offsetStart=>126, :offsetEnd=>142}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>148, :offsetEnd=>161}, {:verbatim=>"ARANEA", :scientificName=>"Aranea", :offsetStart=>179, :offsetEnd=>184}, {:verbatim=>"Pardosa moesta f. moesta", :scientificName=>"Pardosa moesta f. moesta", :offsetStart=>198, :offsetEnd=>222}]}
|
80
80
|
end
|
81
81
|
|
82
82
|
it "should not make unsequential offsets on a page when using NetiNeti" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-spotter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-06-
|
14
|
+
date: 2012-06-12 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
@@ -269,7 +269,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
269
269
|
version: '0'
|
270
270
|
segments:
|
271
271
|
- 0
|
272
|
-
hash:
|
272
|
+
hash: 3944184144538713044
|
273
273
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
274
274
|
none: false
|
275
275
|
requirements:
|