name-spotter 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.byebug_history +44 -0
- data/.gitignore +51 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CHANGELOG +2 -0
- data/Gemfile +2 -22
- data/README.md +116 -0
- data/Rakefile +2 -19
- data/lib/name-spotter.rb +3 -1
- data/lib/name-spotter/monkey_patches.rb +4 -2
- data/lib/name-spotter/neti_neti_client.rb +13 -6
- data/lib/name-spotter/scientific_name.rb +3 -3
- data/lib/name-spotter/taxon_finder_client.rb +35 -24
- data/lib/name-spotter/version.rb +8 -0
- data/name-spotter.gemspec +26 -98
- data/spec/name-spotter_spec.rb +334 -131
- data/spec/scientific_name_spec.rb +14 -19
- data/spec/spec_helper.rb +2 -12
- data/tf_logic.txt +3 -3
- metadata +69 -142
- data/.rvmrc +0 -1
- data/Gemfile.lock +0 -84
- data/README.rdoc +0 -95
- data/VERSION +0 -1
- data/features/name-spotter.feature +0 -9
- data/features/step_definitions/name-spotter_steps.rb +0 -0
- data/features/support/env.rb +0 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7d5a8968a40ba3511bda226ce4e7cbbae11189e9
|
4
|
+
data.tar.gz: 02dd6325c7a5786737a3ddbf73f2282c9e3b9f3d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7c3b6c2c51b869b45b1dd7a057a5439595978efa967005871689392d9fdb5f6c5e5fb92beeebb0769aa3d1318417bb0ea1f819b5ecd28014ebd3b24a7694242a
|
7
|
+
data.tar.gz: a7acd16270b09005f0732319e15264b3373519852d940af4ed47db2d94d70b296e021abc64f2a9b6cb6397aa5874bd967ef73cf7e8ada4874a0cbf5ec4061c4c
|
data/.byebug_history
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
q
|
2
|
+
c
|
3
|
+
q
|
4
|
+
p response
|
5
|
+
q
|
6
|
+
@names
|
7
|
+
c
|
8
|
+
words
|
9
|
+
q
|
10
|
+
fg
|
11
|
+
words
|
12
|
+
q
|
13
|
+
fg
|
14
|
+
words
|
15
|
+
q
|
16
|
+
fg
|
17
|
+
c
|
18
|
+
words
|
19
|
+
fg
|
20
|
+
q
|
21
|
+
fg
|
22
|
+
@names
|
23
|
+
q
|
24
|
+
res
|
25
|
+
q
|
26
|
+
fg
|
27
|
+
name
|
28
|
+
q
|
29
|
+
fg
|
30
|
+
response
|
31
|
+
q
|
32
|
+
p response
|
33
|
+
q
|
34
|
+
fg
|
35
|
+
text
|
36
|
+
q
|
37
|
+
p response
|
38
|
+
expect(NameSpotter.english?(eng3)).to be true
|
39
|
+
expect(NameSpotter.english?(eng2)).to be true
|
40
|
+
expect(NameSpotter.english?(eng)).to be true
|
41
|
+
NameSpotter.english?(not_eng)
|
42
|
+
NameSpotter.english?(eng3)
|
43
|
+
NameSpotter.english?(eng2)
|
44
|
+
NameSpotter.english?(eng)
|
data/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#Gemfile.lock
|
2
|
+
Gemfile.lock
|
3
|
+
|
4
|
+
# rcov generated
|
5
|
+
coverage
|
6
|
+
|
7
|
+
# rdoc generated
|
8
|
+
rdoc
|
9
|
+
|
10
|
+
# yard generated
|
11
|
+
doc
|
12
|
+
.yardoc
|
13
|
+
|
14
|
+
# bundler
|
15
|
+
.bundle
|
16
|
+
|
17
|
+
# jeweler generated
|
18
|
+
pkg
|
19
|
+
|
20
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
21
|
+
#
|
22
|
+
# * Create a file at ~/.gitignore
|
23
|
+
# * Include files you want ignored
|
24
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
25
|
+
#
|
26
|
+
# After doing this, these files will be ignored in all your git projects,
|
27
|
+
# saving you from having to 'pollute' every project you touch with them
|
28
|
+
#
|
29
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
30
|
+
#
|
31
|
+
# For MacOS:
|
32
|
+
#
|
33
|
+
#.DS_Store
|
34
|
+
|
35
|
+
# For TextMate
|
36
|
+
#*.tmproj
|
37
|
+
#tmtags
|
38
|
+
|
39
|
+
# For emacs:
|
40
|
+
#*~
|
41
|
+
#\#*
|
42
|
+
#.\#*
|
43
|
+
|
44
|
+
# For vim:
|
45
|
+
#*.swp
|
46
|
+
|
47
|
+
# For redcar:
|
48
|
+
#.redcar
|
49
|
+
|
50
|
+
# For rubinius:
|
51
|
+
#*.rbc
|
data/.rspec
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.6
|
data/.travis.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
sudo: required
|
2
|
+
language: ruby
|
3
|
+
services:
|
4
|
+
- docker
|
5
|
+
|
6
|
+
rvm:
|
7
|
+
- 2.0
|
8
|
+
- 2.1
|
9
|
+
- 2.2
|
10
|
+
|
11
|
+
# bundler_args: --without development
|
12
|
+
|
13
|
+
before_install:
|
14
|
+
- docker pull gnames/netineti
|
15
|
+
- docker pull gnames/taxonfinder
|
16
|
+
- docker run -d -p 0.0.0.0:1234:1234 --name tf gnames/taxonfinder
|
17
|
+
- docker run -d -p 0.0.0.0:6384:6384 --name nn gnames/netineti
|
18
|
+
# before_script:
|
19
|
+
# - sleep 100
|
20
|
+
branches:
|
21
|
+
only:
|
22
|
+
- master
|
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
@@ -1,23 +1,3 @@
|
|
1
|
-
source
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
1
|
+
source 'https://rubygems.org'
|
5
2
|
|
6
|
-
|
7
|
-
gem "rest-client"
|
8
|
-
gem "builder"
|
9
|
-
gem "json"
|
10
|
-
gem "unicode_utils"
|
11
|
-
gem "unsupervised-language-detection"
|
12
|
-
|
13
|
-
# Add dependencies to develop your gem here.
|
14
|
-
# Include everything needed to run rake, tests, features, etc.
|
15
|
-
group :development do
|
16
|
-
gem "rspec"
|
17
|
-
gem "rspec-expectations"
|
18
|
-
gem "cucumber", ">= 0"
|
19
|
-
gem "capybara"
|
20
|
-
gem "bundler"
|
21
|
-
gem "jeweler", "~> 1.6.4"
|
22
|
-
gem "debugger"
|
23
|
-
end
|
3
|
+
gemspec
|
data/README.md
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
name-spotter
|
2
|
+
============
|
3
|
+
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
7
|
+
|
8
|
+
|
9
|
+
Finds biodiversity scientific names in texts using TaxonFinder
|
10
|
+
(by Patrick Leary) or NetiNeti (by Lakshmi Manohar Akella) libraries.
|
11
|
+
This gem works with Ruby >= 2.0
|
12
|
+
|
13
|
+
Requirements
|
14
|
+
------------
|
15
|
+
|
16
|
+
* Docker
|
17
|
+
|
18
|
+
Installation
|
19
|
+
------------
|
20
|
+
|
21
|
+
Install the gem
|
22
|
+
|
23
|
+
gem install name-spotter
|
24
|
+
|
25
|
+
Install and run TaxonFinder and NetiNeti docker containers
|
26
|
+
|
27
|
+
```bash
|
28
|
+
docker pull gnames/netineti
|
29
|
+
docker pull gnames/taxonfinder
|
30
|
+
docker run -d -p 0.0.0.0:1234:1234 --name tf gnames/taxonfinder
|
31
|
+
docker run -d -p 0.0.0.0:6384:6384 --name nn gnames/netineti
|
32
|
+
```
|
33
|
+
|
34
|
+
Usage
|
35
|
+
-----
|
36
|
+
|
37
|
+
If you are using localhost and default ports for NetiNeti and TaxonFinder:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
require "name-spotter"
|
41
|
+
|
42
|
+
neti_client = NameSpotter::NetiNetiClient.new()
|
43
|
+
tf_client = NameSpotter::TaxonFinderClient.new()
|
44
|
+
neti_name_spotter = NameSpotter.new(neti_client)
|
45
|
+
tf_name_spotter = NameSpotter.new(tf_client)
|
46
|
+
|
47
|
+
neti_name_spotter.find(your_text)
|
48
|
+
tf_name_spotter.find(your_text)
|
49
|
+
```
|
50
|
+
|
51
|
+
If you have installed NetiNeti and TaxonFinder on a machine
|
52
|
+
with non-default port:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
neti_client = NameSpotter::NetiNetiClient.new(host: "example.com",
|
56
|
+
port: 5555)
|
57
|
+
#or
|
58
|
+
neti_client = NameSpotter::NetiNetiClient.new(host: "123.123.123.111",
|
59
|
+
port: 5555)
|
60
|
+
```
|
61
|
+
|
62
|
+
If you want to get results in JSON or XML formats
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
neti_name_spotter.find(your_text, "json")
|
66
|
+
neti_name_spotter.find(your_text, "xml")
|
67
|
+
```
|
68
|
+
|
69
|
+
Development
|
70
|
+
-----------
|
71
|
+
|
72
|
+
To run tests start TaxonFinder and NetiNeti on your local machine with
|
73
|
+
default configurations and run
|
74
|
+
|
75
|
+
```
|
76
|
+
bundle exec rake
|
77
|
+
```
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
Contributing to name-spotter
|
82
|
+
----------------------------
|
83
|
+
|
84
|
+
* Check out the latest master to make sure the feature hasn't been implemented
|
85
|
+
or the bug hasn't been fixed yet
|
86
|
+
* Check out the issue tracker to make sure someone already hasn't requested
|
87
|
+
it and/or contributed it
|
88
|
+
* Fork the project
|
89
|
+
* Start a feature/bugfix branch
|
90
|
+
* Commit and push until you are happy with your contribution
|
91
|
+
* Make sure to add tests for it. This is important so I don't break it in a
|
92
|
+
future version unintentionally.
|
93
|
+
* Please try not to mess with the Rakefile, version, or history. If you want
|
94
|
+
to have your own version, or is otherwise necessary, that is fine, but please
|
95
|
+
isolate to its own commit so I can cherry-pick around it.
|
96
|
+
|
97
|
+
Copyright
|
98
|
+
---------
|
99
|
+
|
100
|
+
Authors: [Chuck Ha][7], [Anthony Goddard][8], [Dmitry Mozzherin][9],
|
101
|
+
[David Shorthouse][10]
|
102
|
+
|
103
|
+
Copyright (c) 2012-2016 Marine Biological Laboratory. See [LICENSE.txt][11] for
|
104
|
+
further details.
|
105
|
+
|
106
|
+
[1]: https://badge.fury.io/rb/name-spotter.svg
|
107
|
+
[2]: http://badge.fury.io/rb/name-spotter
|
108
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/name-spotter.svg
|
109
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/name-spotter
|
110
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/name-spotter.svg
|
111
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/name-spotter
|
112
|
+
[7]: https://github.com/ChuckHa
|
113
|
+
[8]: https://github.com/agoddard
|
114
|
+
[9]: https://github.com/dimus
|
115
|
+
[10]: https://github.com/dshorthouse
|
116
|
+
[11]: https://raw.githubusercontent.com/GlobalNamesArchitecture/name-spotter/master/LICENSE.txt
|
data/Rakefile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
2
|
require 'bundler'
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
|
5
5
|
begin
|
6
6
|
Bundler.setup(:default, :development)
|
7
7
|
rescue Bundler::BundlerError => e
|
@@ -11,20 +11,6 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
-
require 'jeweler'
|
15
|
-
Jeweler::Tasks.new do |gem|
|
16
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
-
gem.name = "name-spotter"
|
18
|
-
gem.homepage = "http://github.com/GlobalNamesArchitecture/name-spotter"
|
19
|
-
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{Scientific names finder}
|
21
|
-
gem.description = %Q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
|
22
|
-
gem.email = "dmozzherin@gmail.com"
|
23
|
-
gem.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
24
|
-
# dependencies defined in Gemfile
|
25
|
-
end
|
26
|
-
Jeweler::RubygemsDotOrgTasks.new
|
27
|
-
|
28
14
|
require 'rspec/core'
|
29
15
|
require 'rspec/core/rake_task'
|
30
16
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
@@ -36,8 +22,5 @@ RSpec::Core::RakeTask.new(:rcov) do |spec|
|
|
36
22
|
spec.rcov = true
|
37
23
|
end
|
38
24
|
|
39
|
-
require 'cucumber/rake/task'
|
40
|
-
Cucumber::Rake::Task.new(:features)
|
41
|
-
|
42
25
|
task :default => :spec
|
43
26
|
|
data/lib/name-spotter.rb
CHANGED
@@ -23,7 +23,9 @@ class NameSpotter
|
|
23
23
|
end
|
24
24
|
res
|
25
25
|
end
|
26
|
-
eng, not_eng = tweets.shuffle[0...50].partition
|
26
|
+
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
27
|
+
UnsupervisedLanguageDetection.is_english_tweet?(a.join(" "))
|
28
|
+
end
|
27
29
|
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
28
30
|
percentage > 0.5
|
29
31
|
end
|
@@ -8,11 +8,13 @@ end
|
|
8
8
|
class String
|
9
9
|
def constantize()
|
10
10
|
camel_cased_word = self
|
11
|
-
names = camel_cased_word.split(
|
11
|
+
names = camel_cased_word.split("::")
|
12
12
|
names.shift if names.empty? || names.first.empty?
|
13
13
|
constant = Object
|
14
14
|
names.each do |name|
|
15
|
-
constant = constant.const_defined?(name) ?
|
15
|
+
constant = constant.const_defined?(name) ?
|
16
|
+
constant.const_get(name) :
|
17
|
+
constant.const_missing(name)
|
16
18
|
end
|
17
19
|
constant
|
18
20
|
end
|
@@ -1,22 +1,29 @@
|
|
1
1
|
class NameSpotter
|
2
2
|
class NetiNetiClient < Client
|
3
|
-
def initialize(opts = { host:
|
4
|
-
super
|
3
|
+
def initialize(opts = { host: "0.0.0.0", port: 6384 })
|
4
|
+
super
|
5
5
|
end
|
6
6
|
|
7
7
|
def find(text)
|
8
8
|
# the form does not get sent if text is nil or empty
|
9
9
|
return [] if text.nil? || text.empty?
|
10
|
-
|
11
|
-
|
10
|
+
text << " " # hack to find the last name
|
11
|
+
resource = RestClient::Resource.new("http://#{@host}:#{@port}",
|
12
|
+
timeout: 9_000_000,
|
13
|
+
open_timeout: 9_000_000,
|
14
|
+
connection: "Keep-Alive")
|
15
|
+
#TODO: we should figure out a better delimiter in NetiNeti (or use json)
|
16
|
+
# so we don't need to susbitute pipe with a letter here
|
12
17
|
response = resource.post(data: text.gsub("|", "l")) #hhhhhhack
|
13
18
|
response.body.split("|").collect do |info|
|
14
19
|
res = info.split(",")
|
15
20
|
name = res[0...-2].join(",")
|
16
21
|
offset_start = res[-2]
|
17
|
-
name.force_encoding(
|
22
|
+
name.force_encoding("utf-8")
|
18
23
|
normalized_name = NameSpotter::ScientificName.normalize(name)
|
19
|
-
NameSpotter::ScientificName.new(name,
|
24
|
+
NameSpotter::ScientificName.new(name,
|
25
|
+
scientific_name: normalized_name,
|
26
|
+
start_position: offset_start.to_i)
|
20
27
|
end
|
21
28
|
end
|
22
29
|
end
|
@@ -3,7 +3,7 @@ class NameSpotter
|
|
3
3
|
attr_reader :verbatim, :scientific, :start_pos, :end_pos, :score
|
4
4
|
|
5
5
|
def self.normalize(name)
|
6
|
-
name = name.gsub(",", " ")
|
6
|
+
name = name.gsub(",", " ")
|
7
7
|
name = name.gsub(/\s+/, " ")
|
8
8
|
end
|
9
9
|
|
@@ -22,8 +22,8 @@ class NameSpotter
|
|
22
22
|
other_name.is_a?(Name) &&
|
23
23
|
other_name.verbatim.eql?(verbatim) &&
|
24
24
|
other_name.scientific.eql?(scientific) &&
|
25
|
-
other_name.start_pos.eql?(start_pos) &&
|
26
|
-
other_name.end_pos.eql?(end_pos) &&
|
25
|
+
other_name.start_pos.eql?(start_pos) &&
|
26
|
+
other_name.end_pos.eql?(end_pos) &&
|
27
27
|
other_name.score.eql?(score)
|
28
28
|
end
|
29
29
|
|
@@ -7,14 +7,14 @@ class NameSpotter
|
|
7
7
|
|
8
8
|
def find(str, from_web_form=false)
|
9
9
|
@names = []
|
10
|
-
@document_verbatim = str
|
11
10
|
return [] if str.nil? || str.empty?
|
12
|
-
|
11
|
+
str << " ." # hack to find last name
|
12
|
+
@document_verbatim = str
|
13
13
|
# These are for the data-send-back that happens in TaxonFinder
|
14
|
-
@current_string =
|
15
|
-
@current_string_state =
|
14
|
+
@current_string = ""
|
15
|
+
@current_string_state = ""
|
16
16
|
@word_list_matches = 0
|
17
|
-
@cursor = 8.times.inject([]) { |res| res << [
|
17
|
+
@cursor = 8.times.inject([]) { |res| res << ["",0, 0] }
|
18
18
|
@current_index = nil
|
19
19
|
words = str.split(/\s/)
|
20
20
|
words.each do |word|
|
@@ -34,11 +34,13 @@ class NameSpotter
|
|
34
34
|
@socket = nil
|
35
35
|
@names
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
private
|
39
39
|
|
40
40
|
def process_word(word, word_separator_size)
|
41
|
-
cursor_entry = [word,
|
41
|
+
cursor_entry = [word,
|
42
|
+
@cursor[-1][0].size + @cursor[-1][1] + @cursor[-1][2],
|
43
|
+
word_separator_size]
|
42
44
|
@cursor.shift
|
43
45
|
@cursor << cursor_entry
|
44
46
|
taxon_find(word)
|
@@ -47,30 +49,37 @@ class NameSpotter
|
|
47
49
|
def socket
|
48
50
|
unless @socket
|
49
51
|
@socket = TCPSocket.open(@host, @port)
|
50
|
-
@socket.set_encoding(
|
52
|
+
@socket.set_encoding("utf-8")
|
51
53
|
end
|
52
54
|
@socket
|
53
55
|
end
|
54
56
|
|
55
57
|
def taxon_find(word)
|
56
|
-
input =
|
58
|
+
input =
|
59
|
+
"#{word}|#{@current_string}|#{@current_string_state}|#{@word_list_matches}|0"
|
57
60
|
socket.write(input + "\n")
|
58
61
|
if output = socket.gets
|
59
62
|
response = parse_socket_response(output)
|
60
63
|
return if not response
|
61
|
-
|
62
|
-
[response.return_string,
|
64
|
+
|
65
|
+
[response.return_string,
|
66
|
+
response.return_string_2].each_with_index do |str, i|
|
63
67
|
next if !str || str.split(" ").size > 6
|
64
|
-
verbatim_string, scientific_string, start_position =
|
68
|
+
verbatim_string, scientific_string, start_position =
|
69
|
+
process_response(str, i)
|
65
70
|
next if scientific_string.empty?
|
66
|
-
add_name NameSpotter::ScientificName.new(verbatim_string,
|
71
|
+
add_name NameSpotter::ScientificName.new(verbatim_string,
|
72
|
+
start_position: start_position,
|
73
|
+
scientific_name: scientific_string)
|
67
74
|
end
|
68
75
|
@current_index = @current_string.empty? ? nil : @cursor[-1][1]
|
69
76
|
end
|
70
77
|
end
|
71
78
|
|
72
79
|
def parse_socket_response(response)
|
73
|
-
current_string, current_string_state, word_list_matches,
|
80
|
+
current_string, current_string_state, word_list_matches,
|
81
|
+
return_string, return_score, return_string_2,
|
82
|
+
return_score_2 = response.strip.split "|"
|
74
83
|
@current_string = current_string
|
75
84
|
@current_string_state = current_string_state
|
76
85
|
@word_list_matches = word_list_matches
|
@@ -78,14 +87,14 @@ class NameSpotter
|
|
78
87
|
if !@current_index && @current_string.size > 0
|
79
88
|
@current_index = @cursor[-1][1]
|
80
89
|
end
|
81
|
-
if not return_string.blank? or not return_string_2.blank?
|
82
|
-
OpenStruct.new( { :current_string
|
83
|
-
:
|
84
|
-
:
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
88
|
-
:
|
90
|
+
if not return_string.blank? or not return_string_2.blank?
|
91
|
+
OpenStruct.new( { current_string: current_string,
|
92
|
+
current_string_state: current_string_state,
|
93
|
+
word_list_matches: word_list_matches,
|
94
|
+
return_string: return_string,
|
95
|
+
return_score: return_score,
|
96
|
+
return_string_2: return_string_2,
|
97
|
+
return_score_2: return_score_2 })
|
89
98
|
else
|
90
99
|
@current_index = nil if @current_string.empty? && @current_index
|
91
100
|
false
|
@@ -94,7 +103,7 @@ class NameSpotter
|
|
94
103
|
|
95
104
|
def process_response(str, index)
|
96
105
|
is_return_string2 = (index == 1)
|
97
|
-
str.force_encoding(
|
106
|
+
str.force_encoding("utf-8")
|
98
107
|
start_position = verbatim_string = nil
|
99
108
|
if @current_index
|
100
109
|
start_position = is_return_string2 ? @cursor[-1][1] : @current_index
|
@@ -102,7 +111,9 @@ class NameSpotter
|
|
102
111
|
verbatim_components = @cursor[indices.rindex(start_position)..-1]
|
103
112
|
sci_name_items_num = str.split(" ").size
|
104
113
|
verbatim_components = verbatim_components[0...sci_name_items_num]
|
105
|
-
verbatim_string = verbatim_components.map
|
114
|
+
verbatim_string = verbatim_components.map do |w|
|
115
|
+
w[0] + (" " * w[2])
|
116
|
+
end.join("").gsub(/[\.\,\!\;]*\s*$/, "")
|
106
117
|
else
|
107
118
|
verbatim_string, start_position, space_size = @cursor[-1]
|
108
119
|
end
|