name-spotter 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.byebug_history +44 -0
- data/.gitignore +51 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +22 -0
- data/CHANGELOG +2 -0
- data/Gemfile +2 -22
- data/README.md +116 -0
- data/Rakefile +2 -19
- data/lib/name-spotter.rb +3 -1
- data/lib/name-spotter/monkey_patches.rb +4 -2
- data/lib/name-spotter/neti_neti_client.rb +13 -6
- data/lib/name-spotter/scientific_name.rb +3 -3
- data/lib/name-spotter/taxon_finder_client.rb +35 -24
- data/lib/name-spotter/version.rb +8 -0
- data/name-spotter.gemspec +26 -98
- data/spec/name-spotter_spec.rb +334 -131
- data/spec/scientific_name_spec.rb +14 -19
- data/spec/spec_helper.rb +2 -12
- data/tf_logic.txt +3 -3
- metadata +69 -142
- data/.rvmrc +0 -1
- data/Gemfile.lock +0 -84
- data/README.rdoc +0 -95
- data/VERSION +0 -1
- data/features/name-spotter.feature +0 -9
- data/features/step_definitions/name-spotter_steps.rb +0 -0
- data/features/support/env.rb +0 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7d5a8968a40ba3511bda226ce4e7cbbae11189e9
|
4
|
+
data.tar.gz: 02dd6325c7a5786737a3ddbf73f2282c9e3b9f3d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7c3b6c2c51b869b45b1dd7a057a5439595978efa967005871689392d9fdb5f6c5e5fb92beeebb0769aa3d1318417bb0ea1f819b5ecd28014ebd3b24a7694242a
|
7
|
+
data.tar.gz: a7acd16270b09005f0732319e15264b3373519852d940af4ed47db2d94d70b296e021abc64f2a9b6cb6397aa5874bd967ef73cf7e8ada4874a0cbf5ec4061c4c
|
data/.byebug_history
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
q
|
2
|
+
c
|
3
|
+
q
|
4
|
+
p response
|
5
|
+
q
|
6
|
+
@names
|
7
|
+
c
|
8
|
+
words
|
9
|
+
q
|
10
|
+
fg
|
11
|
+
words
|
12
|
+
q
|
13
|
+
fg
|
14
|
+
words
|
15
|
+
q
|
16
|
+
fg
|
17
|
+
c
|
18
|
+
words
|
19
|
+
fg
|
20
|
+
q
|
21
|
+
fg
|
22
|
+
@names
|
23
|
+
q
|
24
|
+
res
|
25
|
+
q
|
26
|
+
fg
|
27
|
+
name
|
28
|
+
q
|
29
|
+
fg
|
30
|
+
response
|
31
|
+
q
|
32
|
+
p response
|
33
|
+
q
|
34
|
+
fg
|
35
|
+
text
|
36
|
+
q
|
37
|
+
p response
|
38
|
+
expect(NameSpotter.english?(eng3)).to be true
|
39
|
+
expect(NameSpotter.english?(eng2)).to be true
|
40
|
+
expect(NameSpotter.english?(eng)).to be true
|
41
|
+
NameSpotter.english?(not_eng)
|
42
|
+
NameSpotter.english?(eng3)
|
43
|
+
NameSpotter.english?(eng2)
|
44
|
+
NameSpotter.english?(eng)
|
data/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#Gemfile.lock
|
2
|
+
Gemfile.lock
|
3
|
+
|
4
|
+
# rcov generated
|
5
|
+
coverage
|
6
|
+
|
7
|
+
# rdoc generated
|
8
|
+
rdoc
|
9
|
+
|
10
|
+
# yard generated
|
11
|
+
doc
|
12
|
+
.yardoc
|
13
|
+
|
14
|
+
# bundler
|
15
|
+
.bundle
|
16
|
+
|
17
|
+
# jeweler generated
|
18
|
+
pkg
|
19
|
+
|
20
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
21
|
+
#
|
22
|
+
# * Create a file at ~/.gitignore
|
23
|
+
# * Include files you want ignored
|
24
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
25
|
+
#
|
26
|
+
# After doing this, these files will be ignored in all your git projects,
|
27
|
+
# saving you from having to 'pollute' every project you touch with them
|
28
|
+
#
|
29
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
30
|
+
#
|
31
|
+
# For MacOS:
|
32
|
+
#
|
33
|
+
#.DS_Store
|
34
|
+
|
35
|
+
# For TextMate
|
36
|
+
#*.tmproj
|
37
|
+
#tmtags
|
38
|
+
|
39
|
+
# For emacs:
|
40
|
+
#*~
|
41
|
+
#\#*
|
42
|
+
#.\#*
|
43
|
+
|
44
|
+
# For vim:
|
45
|
+
#*.swp
|
46
|
+
|
47
|
+
# For redcar:
|
48
|
+
#.redcar
|
49
|
+
|
50
|
+
# For rubinius:
|
51
|
+
#*.rbc
|
data/.rspec
CHANGED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.6
|
data/.travis.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
sudo: required
|
2
|
+
language: ruby
|
3
|
+
services:
|
4
|
+
- docker
|
5
|
+
|
6
|
+
rvm:
|
7
|
+
- 2.0
|
8
|
+
- 2.1
|
9
|
+
- 2.2
|
10
|
+
|
11
|
+
# bundler_args: --without development
|
12
|
+
|
13
|
+
before_install:
|
14
|
+
- docker pull gnames/netineti
|
15
|
+
- docker pull gnames/taxonfinder
|
16
|
+
- docker run -d -p 0.0.0.0:1234:1234 --name tf gnames/taxonfinder
|
17
|
+
- docker run -d -p 0.0.0.0:6384:6384 --name nn gnames/netineti
|
18
|
+
# before_script:
|
19
|
+
# - sleep 100
|
20
|
+
branches:
|
21
|
+
only:
|
22
|
+
- master
|
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
@@ -1,23 +1,3 @@
|
|
1
|
-
source
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
1
|
+
source 'https://rubygems.org'
|
5
2
|
|
6
|
-
|
7
|
-
gem "rest-client"
|
8
|
-
gem "builder"
|
9
|
-
gem "json"
|
10
|
-
gem "unicode_utils"
|
11
|
-
gem "unsupervised-language-detection"
|
12
|
-
|
13
|
-
# Add dependencies to develop your gem here.
|
14
|
-
# Include everything needed to run rake, tests, features, etc.
|
15
|
-
group :development do
|
16
|
-
gem "rspec"
|
17
|
-
gem "rspec-expectations"
|
18
|
-
gem "cucumber", ">= 0"
|
19
|
-
gem "capybara"
|
20
|
-
gem "bundler"
|
21
|
-
gem "jeweler", "~> 1.6.4"
|
22
|
-
gem "debugger"
|
23
|
-
end
|
3
|
+
gemspec
|
data/README.md
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
name-spotter
|
2
|
+
============
|
3
|
+
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
7
|
+
|
8
|
+
|
9
|
+
Finds biodiversity scientific names in texts using TaxonFinder
|
10
|
+
(by Patrick Leary) or NetiNeti (by Lakshmi Manohar Akella) libraries.
|
11
|
+
This gem works with Ruby >= 2.0
|
12
|
+
|
13
|
+
Requirements
|
14
|
+
------------
|
15
|
+
|
16
|
+
* Docker
|
17
|
+
|
18
|
+
Installation
|
19
|
+
------------
|
20
|
+
|
21
|
+
Install the gem
|
22
|
+
|
23
|
+
gem install name-spotter
|
24
|
+
|
25
|
+
Install and run TaxonFinder and NetiNeti docker containers
|
26
|
+
|
27
|
+
```bash
|
28
|
+
docker pull gnames/netineti
|
29
|
+
docker pull gnames/taxonfinder
|
30
|
+
docker run -d -p 0.0.0.0:1234:1234 --name tf gnames/taxonfinder
|
31
|
+
docker run -d -p 0.0.0.0:6384:6384 --name nn gnames/netineti
|
32
|
+
```
|
33
|
+
|
34
|
+
Usage
|
35
|
+
-----
|
36
|
+
|
37
|
+
If you are using localhost and default ports for NetiNeti and TaxonFinder:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
require "name-spotter"
|
41
|
+
|
42
|
+
neti_client = NameSpotter::NetiNetiClient.new()
|
43
|
+
tf_client = NameSpotter::TaxonFinderClient.new()
|
44
|
+
neti_name_spotter = NameSpotter.new(neti_client)
|
45
|
+
tf_name_spotter = NameSpotter.new(tf_client)
|
46
|
+
|
47
|
+
neti_name_spotter.find(your_text)
|
48
|
+
tf_name_spotter.find(your_text)
|
49
|
+
```
|
50
|
+
|
51
|
+
If you have installed NetiNeti and TaxonFinder on a machine
|
52
|
+
with non-default port:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
neti_client = NameSpotter::NetiNetiClient.new(host: "example.com",
|
56
|
+
port: 5555)
|
57
|
+
#or
|
58
|
+
neti_client = NameSpotter::NetiNetiClient.new(host: "123.123.123.111",
|
59
|
+
port: 5555)
|
60
|
+
```
|
61
|
+
|
62
|
+
If you want to get results in JSON or XML formats
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
neti_name_spotter.find(your_text, "json")
|
66
|
+
neti_name_spotter.find(your_text, "xml")
|
67
|
+
```
|
68
|
+
|
69
|
+
Development
|
70
|
+
-----------
|
71
|
+
|
72
|
+
To run tests start TaxonFinder and NetiNeti on your local machine with
|
73
|
+
default configurations and run
|
74
|
+
|
75
|
+
```
|
76
|
+
bundle exec rake
|
77
|
+
```
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
Contributing to name-spotter
|
82
|
+
----------------------------
|
83
|
+
|
84
|
+
* Check out the latest master to make sure the feature hasn't been implemented
|
85
|
+
or the bug hasn't been fixed yet
|
86
|
+
* Check out the issue tracker to make sure someone already hasn't requested
|
87
|
+
it and/or contributed it
|
88
|
+
* Fork the project
|
89
|
+
* Start a feature/bugfix branch
|
90
|
+
* Commit and push until you are happy with your contribution
|
91
|
+
* Make sure to add tests for it. This is important so I don't break it in a
|
92
|
+
future version unintentionally.
|
93
|
+
* Please try not to mess with the Rakefile, version, or history. If you want
|
94
|
+
to have your own version, or is otherwise necessary, that is fine, but please
|
95
|
+
isolate to its own commit so I can cherry-pick around it.
|
96
|
+
|
97
|
+
Copyright
|
98
|
+
---------
|
99
|
+
|
100
|
+
Authors: [Chuck Ha][7], [Anthony Goddard][8], [Dmitry Mozzherin][9],
|
101
|
+
[David Shorthouse][10]
|
102
|
+
|
103
|
+
Copyright (c) 2012-2016 Marine Biological Laboratory. See [LICENSE.txt][11] for
|
104
|
+
further details.
|
105
|
+
|
106
|
+
[1]: https://badge.fury.io/rb/name-spotter.svg
|
107
|
+
[2]: http://badge.fury.io/rb/name-spotter
|
108
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/name-spotter.svg
|
109
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/name-spotter
|
110
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/name-spotter.svg
|
111
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/name-spotter
|
112
|
+
[7]: https://github.com/ChuckHa
|
113
|
+
[8]: https://github.com/agoddard
|
114
|
+
[9]: https://github.com/dimus
|
115
|
+
[10]: https://github.com/dshorthouse
|
116
|
+
[11]: https://raw.githubusercontent.com/GlobalNamesArchitecture/name-spotter/master/LICENSE.txt
|
data/Rakefile
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
2
|
require 'bundler'
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
|
5
5
|
begin
|
6
6
|
Bundler.setup(:default, :development)
|
7
7
|
rescue Bundler::BundlerError => e
|
@@ -11,20 +11,6 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
-
require 'jeweler'
|
15
|
-
Jeweler::Tasks.new do |gem|
|
16
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
-
gem.name = "name-spotter"
|
18
|
-
gem.homepage = "http://github.com/GlobalNamesArchitecture/name-spotter"
|
19
|
-
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{Scientific names finder}
|
21
|
-
gem.description = %Q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
|
22
|
-
gem.email = "dmozzherin@gmail.com"
|
23
|
-
gem.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
24
|
-
# dependencies defined in Gemfile
|
25
|
-
end
|
26
|
-
Jeweler::RubygemsDotOrgTasks.new
|
27
|
-
|
28
14
|
require 'rspec/core'
|
29
15
|
require 'rspec/core/rake_task'
|
30
16
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
@@ -36,8 +22,5 @@ RSpec::Core::RakeTask.new(:rcov) do |spec|
|
|
36
22
|
spec.rcov = true
|
37
23
|
end
|
38
24
|
|
39
|
-
require 'cucumber/rake/task'
|
40
|
-
Cucumber::Rake::Task.new(:features)
|
41
|
-
|
42
25
|
task :default => :spec
|
43
26
|
|
data/lib/name-spotter.rb
CHANGED
@@ -23,7 +23,9 @@ class NameSpotter
|
|
23
23
|
end
|
24
24
|
res
|
25
25
|
end
|
26
|
-
eng, not_eng = tweets.shuffle[0...50].partition
|
26
|
+
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
27
|
+
UnsupervisedLanguageDetection.is_english_tweet?(a.join(" "))
|
28
|
+
end
|
27
29
|
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
28
30
|
percentage > 0.5
|
29
31
|
end
|
@@ -8,11 +8,13 @@ end
|
|
8
8
|
class String
|
9
9
|
def constantize()
|
10
10
|
camel_cased_word = self
|
11
|
-
names = camel_cased_word.split(
|
11
|
+
names = camel_cased_word.split("::")
|
12
12
|
names.shift if names.empty? || names.first.empty?
|
13
13
|
constant = Object
|
14
14
|
names.each do |name|
|
15
|
-
constant = constant.const_defined?(name) ?
|
15
|
+
constant = constant.const_defined?(name) ?
|
16
|
+
constant.const_get(name) :
|
17
|
+
constant.const_missing(name)
|
16
18
|
end
|
17
19
|
constant
|
18
20
|
end
|
@@ -1,22 +1,29 @@
|
|
1
1
|
class NameSpotter
|
2
2
|
class NetiNetiClient < Client
|
3
|
-
def initialize(opts = { host:
|
4
|
-
super
|
3
|
+
def initialize(opts = { host: "0.0.0.0", port: 6384 })
|
4
|
+
super
|
5
5
|
end
|
6
6
|
|
7
7
|
def find(text)
|
8
8
|
# the form does not get sent if text is nil or empty
|
9
9
|
return [] if text.nil? || text.empty?
|
10
|
-
|
11
|
-
|
10
|
+
text << " " # hack to find the last name
|
11
|
+
resource = RestClient::Resource.new("http://#{@host}:#{@port}",
|
12
|
+
timeout: 9_000_000,
|
13
|
+
open_timeout: 9_000_000,
|
14
|
+
connection: "Keep-Alive")
|
15
|
+
#TODO: we should figure out a better delimiter in NetiNeti (or use json)
|
16
|
+
# so we don't need to susbitute pipe with a letter here
|
12
17
|
response = resource.post(data: text.gsub("|", "l")) #hhhhhhack
|
13
18
|
response.body.split("|").collect do |info|
|
14
19
|
res = info.split(",")
|
15
20
|
name = res[0...-2].join(",")
|
16
21
|
offset_start = res[-2]
|
17
|
-
name.force_encoding(
|
22
|
+
name.force_encoding("utf-8")
|
18
23
|
normalized_name = NameSpotter::ScientificName.normalize(name)
|
19
|
-
NameSpotter::ScientificName.new(name,
|
24
|
+
NameSpotter::ScientificName.new(name,
|
25
|
+
scientific_name: normalized_name,
|
26
|
+
start_position: offset_start.to_i)
|
20
27
|
end
|
21
28
|
end
|
22
29
|
end
|
@@ -3,7 +3,7 @@ class NameSpotter
|
|
3
3
|
attr_reader :verbatim, :scientific, :start_pos, :end_pos, :score
|
4
4
|
|
5
5
|
def self.normalize(name)
|
6
|
-
name = name.gsub(",", " ")
|
6
|
+
name = name.gsub(",", " ")
|
7
7
|
name = name.gsub(/\s+/, " ")
|
8
8
|
end
|
9
9
|
|
@@ -22,8 +22,8 @@ class NameSpotter
|
|
22
22
|
other_name.is_a?(Name) &&
|
23
23
|
other_name.verbatim.eql?(verbatim) &&
|
24
24
|
other_name.scientific.eql?(scientific) &&
|
25
|
-
other_name.start_pos.eql?(start_pos) &&
|
26
|
-
other_name.end_pos.eql?(end_pos) &&
|
25
|
+
other_name.start_pos.eql?(start_pos) &&
|
26
|
+
other_name.end_pos.eql?(end_pos) &&
|
27
27
|
other_name.score.eql?(score)
|
28
28
|
end
|
29
29
|
|
@@ -7,14 +7,14 @@ class NameSpotter
|
|
7
7
|
|
8
8
|
def find(str, from_web_form=false)
|
9
9
|
@names = []
|
10
|
-
@document_verbatim = str
|
11
10
|
return [] if str.nil? || str.empty?
|
12
|
-
|
11
|
+
str << " ." # hack to find last name
|
12
|
+
@document_verbatim = str
|
13
13
|
# These are for the data-send-back that happens in TaxonFinder
|
14
|
-
@current_string =
|
15
|
-
@current_string_state =
|
14
|
+
@current_string = ""
|
15
|
+
@current_string_state = ""
|
16
16
|
@word_list_matches = 0
|
17
|
-
@cursor = 8.times.inject([]) { |res| res << [
|
17
|
+
@cursor = 8.times.inject([]) { |res| res << ["",0, 0] }
|
18
18
|
@current_index = nil
|
19
19
|
words = str.split(/\s/)
|
20
20
|
words.each do |word|
|
@@ -34,11 +34,13 @@ class NameSpotter
|
|
34
34
|
@socket = nil
|
35
35
|
@names
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
private
|
39
39
|
|
40
40
|
def process_word(word, word_separator_size)
|
41
|
-
cursor_entry = [word,
|
41
|
+
cursor_entry = [word,
|
42
|
+
@cursor[-1][0].size + @cursor[-1][1] + @cursor[-1][2],
|
43
|
+
word_separator_size]
|
42
44
|
@cursor.shift
|
43
45
|
@cursor << cursor_entry
|
44
46
|
taxon_find(word)
|
@@ -47,30 +49,37 @@ class NameSpotter
|
|
47
49
|
def socket
|
48
50
|
unless @socket
|
49
51
|
@socket = TCPSocket.open(@host, @port)
|
50
|
-
@socket.set_encoding(
|
52
|
+
@socket.set_encoding("utf-8")
|
51
53
|
end
|
52
54
|
@socket
|
53
55
|
end
|
54
56
|
|
55
57
|
def taxon_find(word)
|
56
|
-
input =
|
58
|
+
input =
|
59
|
+
"#{word}|#{@current_string}|#{@current_string_state}|#{@word_list_matches}|0"
|
57
60
|
socket.write(input + "\n")
|
58
61
|
if output = socket.gets
|
59
62
|
response = parse_socket_response(output)
|
60
63
|
return if not response
|
61
|
-
|
62
|
-
[response.return_string,
|
64
|
+
|
65
|
+
[response.return_string,
|
66
|
+
response.return_string_2].each_with_index do |str, i|
|
63
67
|
next if !str || str.split(" ").size > 6
|
64
|
-
verbatim_string, scientific_string, start_position =
|
68
|
+
verbatim_string, scientific_string, start_position =
|
69
|
+
process_response(str, i)
|
65
70
|
next if scientific_string.empty?
|
66
|
-
add_name NameSpotter::ScientificName.new(verbatim_string,
|
71
|
+
add_name NameSpotter::ScientificName.new(verbatim_string,
|
72
|
+
start_position: start_position,
|
73
|
+
scientific_name: scientific_string)
|
67
74
|
end
|
68
75
|
@current_index = @current_string.empty? ? nil : @cursor[-1][1]
|
69
76
|
end
|
70
77
|
end
|
71
78
|
|
72
79
|
def parse_socket_response(response)
|
73
|
-
current_string, current_string_state, word_list_matches,
|
80
|
+
current_string, current_string_state, word_list_matches,
|
81
|
+
return_string, return_score, return_string_2,
|
82
|
+
return_score_2 = response.strip.split "|"
|
74
83
|
@current_string = current_string
|
75
84
|
@current_string_state = current_string_state
|
76
85
|
@word_list_matches = word_list_matches
|
@@ -78,14 +87,14 @@ class NameSpotter
|
|
78
87
|
if !@current_index && @current_string.size > 0
|
79
88
|
@current_index = @cursor[-1][1]
|
80
89
|
end
|
81
|
-
if not return_string.blank? or not return_string_2.blank?
|
82
|
-
OpenStruct.new( { :current_string
|
83
|
-
:
|
84
|
-
:
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
88
|
-
:
|
90
|
+
if not return_string.blank? or not return_string_2.blank?
|
91
|
+
OpenStruct.new( { current_string: current_string,
|
92
|
+
current_string_state: current_string_state,
|
93
|
+
word_list_matches: word_list_matches,
|
94
|
+
return_string: return_string,
|
95
|
+
return_score: return_score,
|
96
|
+
return_string_2: return_string_2,
|
97
|
+
return_score_2: return_score_2 })
|
89
98
|
else
|
90
99
|
@current_index = nil if @current_string.empty? && @current_index
|
91
100
|
false
|
@@ -94,7 +103,7 @@ class NameSpotter
|
|
94
103
|
|
95
104
|
def process_response(str, index)
|
96
105
|
is_return_string2 = (index == 1)
|
97
|
-
str.force_encoding(
|
106
|
+
str.force_encoding("utf-8")
|
98
107
|
start_position = verbatim_string = nil
|
99
108
|
if @current_index
|
100
109
|
start_position = is_return_string2 ? @cursor[-1][1] : @current_index
|
@@ -102,7 +111,9 @@ class NameSpotter
|
|
102
111
|
verbatim_components = @cursor[indices.rindex(start_position)..-1]
|
103
112
|
sci_name_items_num = str.split(" ").size
|
104
113
|
verbatim_components = verbatim_components[0...sci_name_items_num]
|
105
|
-
verbatim_string = verbatim_components.map
|
114
|
+
verbatim_string = verbatim_components.map do |w|
|
115
|
+
w[0] + (" " * w[2])
|
116
|
+
end.join("").gsub(/[\.\,\!\;]*\s*$/, "")
|
106
117
|
else
|
107
118
|
verbatim_string, start_position, space_size = @cursor[-1]
|
108
119
|
end
|