name-spotter 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.rvmrc +1 -0
- data/Gemfile +21 -0
- data/Gemfile.lock +86 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/features/name-spotter.feature +9 -0
- data/features/step_definitions/name-spotter_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/lib/name-spotter.rb +50 -0
- data/lib/name-spotter/client.rb +22 -0
- data/lib/name-spotter/monkey_patches.rb +20 -0
- data/lib/name-spotter/neti_neti_client.rb +19 -0
- data/lib/name-spotter/scientific_name.rb +37 -0
- data/lib/name-spotter/taxon_finder_client.rb +78 -0
- data/name-spotter.gemspec +91 -0
- data/spec/name-spotter_spec.rb +48 -0
- data/spec/scientific_name_spec.rb +25 -0
- data/spec/spec_helper.rb +12 -0
- metadata +197 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2-p290@namespotter --create
|
data/Gemfile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
gem "rake"
|
7
|
+
gem "rest-client"
|
8
|
+
gem "builder"
|
9
|
+
gem "json"
|
10
|
+
|
11
|
+
# Add dependencies to develop your gem here.
|
12
|
+
# Include everything needed to run rake, tests, features, etc.
|
13
|
+
group :development do
|
14
|
+
gem "rspec", "~> 2.3.0"
|
15
|
+
gem "cucumber", ">= 0"
|
16
|
+
gem "capybara"
|
17
|
+
gem "bundler", "~> 1.0.0"
|
18
|
+
gem "jeweler", "~> 1.6.4"
|
19
|
+
gem "rcov", ">= 0"
|
20
|
+
gem "ruby-debug19"
|
21
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
archive-tar-minitar (0.5.2)
|
5
|
+
builder (3.0.0)
|
6
|
+
capybara (1.1.2)
|
7
|
+
mime-types (>= 1.16)
|
8
|
+
nokogiri (>= 1.3.3)
|
9
|
+
rack (>= 1.0.0)
|
10
|
+
rack-test (>= 0.5.4)
|
11
|
+
selenium-webdriver (~> 2.0)
|
12
|
+
xpath (~> 0.1.4)
|
13
|
+
childprocess (0.2.8)
|
14
|
+
ffi (~> 1.0.6)
|
15
|
+
columnize (0.3.6)
|
16
|
+
cucumber (1.1.4)
|
17
|
+
builder (>= 2.1.2)
|
18
|
+
diff-lcs (>= 1.1.2)
|
19
|
+
gherkin (~> 2.7.1)
|
20
|
+
json (>= 1.4.6)
|
21
|
+
term-ansicolor (>= 1.0.6)
|
22
|
+
diff-lcs (1.1.3)
|
23
|
+
ffi (1.0.11)
|
24
|
+
gherkin (2.7.2)
|
25
|
+
json (>= 1.4.6)
|
26
|
+
git (1.2.5)
|
27
|
+
jeweler (1.6.4)
|
28
|
+
bundler (~> 1.0)
|
29
|
+
git (>= 1.2.5)
|
30
|
+
rake
|
31
|
+
json (1.6.4)
|
32
|
+
linecache19 (0.5.12)
|
33
|
+
ruby_core_source (>= 0.1.4)
|
34
|
+
mime-types (1.17.2)
|
35
|
+
multi_json (1.0.4)
|
36
|
+
nokogiri (1.5.0)
|
37
|
+
rack (1.4.0)
|
38
|
+
rack-test (0.6.1)
|
39
|
+
rack (>= 1.0)
|
40
|
+
rake (0.9.2.2)
|
41
|
+
rcov (0.9.11)
|
42
|
+
rest-client (1.6.7)
|
43
|
+
mime-types (>= 1.16)
|
44
|
+
rspec (2.3.0)
|
45
|
+
rspec-core (~> 2.3.0)
|
46
|
+
rspec-expectations (~> 2.3.0)
|
47
|
+
rspec-mocks (~> 2.3.0)
|
48
|
+
rspec-core (2.3.1)
|
49
|
+
rspec-expectations (2.3.0)
|
50
|
+
diff-lcs (~> 1.1.2)
|
51
|
+
rspec-mocks (2.3.0)
|
52
|
+
ruby-debug-base19 (0.11.25)
|
53
|
+
columnize (>= 0.3.1)
|
54
|
+
linecache19 (>= 0.5.11)
|
55
|
+
ruby_core_source (>= 0.1.4)
|
56
|
+
ruby-debug19 (0.11.6)
|
57
|
+
columnize (>= 0.3.1)
|
58
|
+
linecache19 (>= 0.5.11)
|
59
|
+
ruby-debug-base19 (>= 0.11.19)
|
60
|
+
ruby_core_source (0.1.5)
|
61
|
+
archive-tar-minitar (>= 0.5.2)
|
62
|
+
rubyzip (0.9.5)
|
63
|
+
selenium-webdriver (2.16.0)
|
64
|
+
childprocess (>= 0.2.5)
|
65
|
+
ffi (~> 1.0.9)
|
66
|
+
multi_json (~> 1.0.4)
|
67
|
+
rubyzip
|
68
|
+
term-ansicolor (1.0.7)
|
69
|
+
xpath (0.1.4)
|
70
|
+
nokogiri (~> 1.3)
|
71
|
+
|
72
|
+
PLATFORMS
|
73
|
+
ruby
|
74
|
+
|
75
|
+
DEPENDENCIES
|
76
|
+
builder
|
77
|
+
bundler (~> 1.0.0)
|
78
|
+
capybara
|
79
|
+
cucumber
|
80
|
+
jeweler (~> 1.6.4)
|
81
|
+
json
|
82
|
+
rake
|
83
|
+
rcov
|
84
|
+
rest-client
|
85
|
+
rspec (~> 2.3.0)
|
86
|
+
ruby-debug19
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Dmitry Mozzherin
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= name-spotter
|
2
|
+
|
3
|
+
Finds biodiversity scientific names in texts using TaxonFinder or NetiNeti libraries.
|
4
|
+
|
5
|
+
== Contributing to name-spotter
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
9
|
+
* Fork the project
|
10
|
+
* Start a feature/bugfix branch
|
11
|
+
* Commit and push until you are happy with your contribution
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2012 Marine Biologica Laboratory. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "name-spotter"
|
18
|
+
gem.homepage = "http://github.com/GlobalNamesArchitecture/name-spotter"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Scientific names finder}
|
21
|
+
gem.description = %Q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
|
22
|
+
gem.email = "dmozzherin@gmail.com"
|
23
|
+
gem.authors = ["Ryan Schenk", "Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'cucumber/rake/task'
|
40
|
+
Cucumber::Rake::Task.new(:features)
|
41
|
+
|
42
|
+
task :default => :spec
|
43
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.2
|
File without changes
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'name-spotter'
|
12
|
+
|
13
|
+
require 'rspec/expectations'
|
data/lib/name-spotter.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
require "rest_client"
|
3
|
+
require "uri"
|
4
|
+
require "json"
|
5
|
+
require "nokogiri"
|
6
|
+
require "socket"
|
7
|
+
require File.join(File.dirname(__FILE__), 'name-spotter', 'client')
|
8
|
+
|
9
|
+
Dir["#{File.dirname(__FILE__)}/name-spotter/**/*.rb"].each {|f| require f}
|
10
|
+
|
11
|
+
class NameSpotter
|
12
|
+
|
13
|
+
def initialize(client)
|
14
|
+
@client = client
|
15
|
+
end
|
16
|
+
|
17
|
+
def find(input, format = nil)
|
18
|
+
text = to_text(input)
|
19
|
+
names = @client.find(text)
|
20
|
+
names = names.map{ |n| n.to_hash }
|
21
|
+
return { names: names } unless format
|
22
|
+
format == "json" ? to_json(names) : to_xml(names)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def to_text(input)
|
28
|
+
input
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_json(names)
|
32
|
+
return JSON.fast_generate({ names: names })
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_xml(names)
|
36
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
37
|
+
xml.names do
|
38
|
+
names.each do |name|
|
39
|
+
xml.verbatim name[:verbatim]
|
40
|
+
xml.scientificName name[:scientificName]
|
41
|
+
xml.offsetStart name[:offsetStart]
|
42
|
+
xml.offsetEnd name[:offsetEnd]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
builder.to_xml
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class NameSpotter
|
2
|
+
class Client
|
3
|
+
class ClientError < Exception; end
|
4
|
+
|
5
|
+
attr_reader :host
|
6
|
+
attr_reader :port
|
7
|
+
|
8
|
+
def initialize(opts)
|
9
|
+
@host = opts[:host]
|
10
|
+
@port = opts[:port]
|
11
|
+
@names = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def find(text)
|
15
|
+
raise "Subclass must implement find"
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_name(name)
|
19
|
+
@names << name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Object
|
2
|
+
#note: Object does not define Object#empty?
|
3
|
+
def blank?
|
4
|
+
respond_to?(:empty?) ? empty? : !self
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
class String
|
9
|
+
def constantize()
|
10
|
+
camel_cased_word = self
|
11
|
+
names = camel_cased_word.split('::')
|
12
|
+
names.shift if names.empty? || names.first.empty?
|
13
|
+
constant = Object
|
14
|
+
names.each do |name|
|
15
|
+
constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
|
16
|
+
end
|
17
|
+
constant
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class NameSpotter
|
2
|
+
class NetiNetiClient < Client
|
3
|
+
def initialize(opts = { host: '0.0.0.0', port: 6384 })
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def find(text)
|
8
|
+
# the form does not get sent if text is nil or empty
|
9
|
+
return [] if text.nil? || text.empty?
|
10
|
+
response = RestClient.post("http://#{@host}:#{@port}", data: text)
|
11
|
+
|
12
|
+
response.body.split("|").collect do |info|
|
13
|
+
name, offset_start = info.split(',')
|
14
|
+
normalized_name = NameSpotter::ScientificName.normalize(name)
|
15
|
+
NameSpotter::ScientificName.new(name, :scientific_name => normalized_name, :start_position => offset_start.to_i)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class NameSpotter
|
2
|
+
class ScientificName
|
3
|
+
attr_reader :verbatim, :scientific, :start_pos, :end_pos, :score
|
4
|
+
|
5
|
+
def self.normalize(name)
|
6
|
+
name.gsub(/\s+/, ' ')
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(verbatim_name, options={})
|
10
|
+
@verbatim = verbatim_name
|
11
|
+
if options[:start_position]
|
12
|
+
@start_pos = options[:start_position]
|
13
|
+
@end_pos = @start_pos + @verbatim.length
|
14
|
+
end
|
15
|
+
@score = options[:score] if options[:score]
|
16
|
+
@scientific = options[:scientific_name] if options[:scientific_name]
|
17
|
+
end
|
18
|
+
|
19
|
+
# Use this in specs
|
20
|
+
def eql?(other_name)
|
21
|
+
other_name.is_a?(Name) &&
|
22
|
+
other_name.verbatim.eql?(verbatim) &&
|
23
|
+
other_name.scientific.eql?(scientific) &&
|
24
|
+
other_name.start_pos.eql?(start_pos) &&
|
25
|
+
other_name.end_pos.eql?(end_pos) &&
|
26
|
+
other_name.score.eql?(score)
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_hash
|
30
|
+
name_hash = {:verbatim => verbatim}
|
31
|
+
name_hash[:scientificName] = scientific if scientific
|
32
|
+
name_hash[:offsetStart] = start_pos if start_pos
|
33
|
+
name_hash[:offsetEnd] = end_pos if end_pos
|
34
|
+
name_hash
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class NameSpotter
|
2
|
+
class TaxonFinderClient < NameSpotter::Client
|
3
|
+
def initialize(opts = { host: "0.0.0.0", port: "1234" })
|
4
|
+
super
|
5
|
+
# We keep track of the document to get accurate offsets.
|
6
|
+
# Other methods such as keeping track of the character number
|
7
|
+
# didn't work so well due to the nature of TaxonFinder.
|
8
|
+
@document = ""
|
9
|
+
end
|
10
|
+
|
11
|
+
def socket
|
12
|
+
@socket ||= TCPSocket.open @host, @port
|
13
|
+
end
|
14
|
+
|
15
|
+
def find(str, from_web_form=false)
|
16
|
+
return [] if str.nil? || str.empty?
|
17
|
+
|
18
|
+
# These are for the data-send-back that happens in TaxonFinder
|
19
|
+
@current_string = ''
|
20
|
+
@current_string_state = ''
|
21
|
+
@word_list_matches = 0
|
22
|
+
|
23
|
+
words = str.split(/\s/)
|
24
|
+
words.each do |word|
|
25
|
+
# Since we split on whitespace, this addition of a " " char
|
26
|
+
# allows us to keep the document accurate and is basically
|
27
|
+
# replacing all \s matches with " "
|
28
|
+
@document << word + " "
|
29
|
+
unless word.empty?
|
30
|
+
taxon_find(word)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
socket.close
|
34
|
+
@socket = nil
|
35
|
+
@document = ""
|
36
|
+
@names
|
37
|
+
end
|
38
|
+
|
39
|
+
def taxon_find(word)
|
40
|
+
input = "#{word}|#{@current_string}|#{@current_string_state}|#{@word_list_matches}|0"
|
41
|
+
socket.write(input + "\n")
|
42
|
+
if output = socket.gets
|
43
|
+
response = parse_socket_response(output)
|
44
|
+
return if not response
|
45
|
+
|
46
|
+
unless response.return_string.blank?
|
47
|
+
verbatim_string = response.return_string.sub(/\[.*\]/, '.')
|
48
|
+
scientific_string = response.return_string
|
49
|
+
add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
|
50
|
+
end
|
51
|
+
unless response.return_string_2.blank?
|
52
|
+
verbatim_string = response.return_string_2.sub(/\[.*\]/, '.')
|
53
|
+
scientific_string = response.return_string_2
|
54
|
+
add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_socket_response(response)
|
60
|
+
current_string, current_string_state, word_list_matches, return_string, return_score, return_string_2, return_score_2 = response.strip.split '|'
|
61
|
+
@current_string = current_string
|
62
|
+
@current_string_state = current_string_state
|
63
|
+
@word_list_matches = word_list_matches
|
64
|
+
@return_score = return_score
|
65
|
+
if not return_string.blank? or not return_string_2.blank?
|
66
|
+
OpenStruct.new( { :current_string => current_string,
|
67
|
+
:current_string_state => current_string_state,
|
68
|
+
:word_list_matches => word_list_matches,
|
69
|
+
:return_string => return_string,
|
70
|
+
:return_score => return_score,
|
71
|
+
:return_string_2 => return_string_2,
|
72
|
+
:return_score_2 => return_score_2 })
|
73
|
+
else
|
74
|
+
false
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{name-spotter}
|
8
|
+
s.version = "0.0.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{Ryan Schenk}, %q{Anthony Goddard}, %q{Chuck Ha}, %q{Dmitry Mozzherin}]
|
12
|
+
s.date = %q{2012-01-12}
|
13
|
+
s.description = %q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
|
14
|
+
s.email = %q{dmozzherin@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
".rvmrc",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"features/name-spotter.feature",
|
30
|
+
"features/step_definitions/name-spotter_steps.rb",
|
31
|
+
"features/support/env.rb",
|
32
|
+
"lib/name-spotter.rb",
|
33
|
+
"lib/name-spotter/client.rb",
|
34
|
+
"lib/name-spotter/monkey_patches.rb",
|
35
|
+
"lib/name-spotter/neti_neti_client.rb",
|
36
|
+
"lib/name-spotter/scientific_name.rb",
|
37
|
+
"lib/name-spotter/taxon_finder_client.rb",
|
38
|
+
"name-spotter.gemspec",
|
39
|
+
"spec/name-spotter_spec.rb",
|
40
|
+
"spec/scientific_name_spec.rb",
|
41
|
+
"spec/spec_helper.rb"
|
42
|
+
]
|
43
|
+
s.homepage = %q{http://github.com/GlobalNamesArchitecture/name-spotter}
|
44
|
+
s.licenses = [%q{MIT}]
|
45
|
+
s.require_paths = [%q{lib}]
|
46
|
+
s.rubygems_version = %q{1.8.6}
|
47
|
+
s.summary = %q{Scientific names finder}
|
48
|
+
|
49
|
+
if s.respond_to? :specification_version then
|
50
|
+
s.specification_version = 3
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_runtime_dependency(%q<rake>, [">= 0"])
|
54
|
+
s.add_runtime_dependency(%q<rest-client>, [">= 0"])
|
55
|
+
s.add_runtime_dependency(%q<builder>, [">= 0"])
|
56
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
57
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
58
|
+
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<capybara>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
61
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
62
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
63
|
+
s.add_development_dependency(%q<ruby-debug19>, [">= 0"])
|
64
|
+
else
|
65
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
66
|
+
s.add_dependency(%q<rest-client>, [">= 0"])
|
67
|
+
s.add_dependency(%q<builder>, [">= 0"])
|
68
|
+
s.add_dependency(%q<json>, [">= 0"])
|
69
|
+
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
70
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
71
|
+
s.add_dependency(%q<capybara>, [">= 0"])
|
72
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
73
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
74
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
75
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
76
|
+
end
|
77
|
+
else
|
78
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
79
|
+
s.add_dependency(%q<rest-client>, [">= 0"])
|
80
|
+
s.add_dependency(%q<builder>, [">= 0"])
|
81
|
+
s.add_dependency(%q<json>, [">= 0"])
|
82
|
+
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
83
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
84
|
+
s.add_dependency(%q<capybara>, [">= 0"])
|
85
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
86
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
87
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
88
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "NameSpotter" do
|
4
|
+
before(:all) do
|
5
|
+
neti_neti = NameSpotter::NetiNetiClient.new()
|
6
|
+
taxon_finder = NameSpotter::TaxonFinderClient.new()
|
7
|
+
@neti = NameSpotter.new(neti_neti)
|
8
|
+
@tf = NameSpotter.new(taxon_finder)
|
9
|
+
@clients = [@neti, @tf]
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should exist" do
|
13
|
+
@neti.is_a?(NameSpotter).should be_true
|
14
|
+
@tf.is_a?(NameSpotter).should be_true
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should use ruby as default format" do
|
18
|
+
@clients.each do |c|
|
19
|
+
c.find(nil).should == {names: []}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should return empty result if input is empty" do
|
24
|
+
@clients.each do |c|
|
25
|
+
c.find(nil, 'json').should == "{\"names\":[]}"
|
26
|
+
c.find(nil, "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
|
27
|
+
c.find('', 'json').should == "{\"names\":[]}"
|
28
|
+
c.find('', "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should return empty result if no names are found" do
|
33
|
+
text = "one two three, no scientific names"
|
34
|
+
@clients.each do |c|
|
35
|
+
c.find(text, "json").should == "{\"names\":[]}"
|
36
|
+
c.find(text, "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should be able to find scientific names in text" do
|
41
|
+
text = "Some text that has Betula\n alba and Mus musculus and \neven B. alba and even M. mus-\nculus. Also it has name unknown before: Varanus bitatawa species"
|
42
|
+
res = @neti.find(text)[:names].map { |n| n[:scientificName] }
|
43
|
+
res.should == ["Betula alba", "Mus musculus", "B. alba", "Varanus bitatawa"]
|
44
|
+
res = @tf.find(text)[:names].map { |n| n[:scientificName] }
|
45
|
+
res.should == ["Betula alba", "Mus musculus", "B[etula] alba", "Varanus"]
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe NameSpotter::ScientificName do
|
5
|
+
|
6
|
+
describe "without doing anything" do
|
7
|
+
before :each do
|
8
|
+
@find_me = "M. musculus"
|
9
|
+
@name = NameSpotter::ScientificName.new(@find_me, {:start_position => 30, :scientific_name => "Mus musculus"})
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have figured out the end position" do
|
13
|
+
@name.end_pos.should eq @name.start_pos + @find_me.length
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "unicode" do
|
18
|
+
it "should handle unicode characters" do
|
19
|
+
verbatim = "Slovenščina"
|
20
|
+
name = NameSpotter::ScientificName.new(verbatim, {:start_position => 48193})
|
21
|
+
name.verbatim.should eq verbatim
|
22
|
+
name.end_pos.should eq name.start_pos + verbatim.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'name-spotter'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: name-spotter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ryan Schenk
|
9
|
+
- Anthony Goddard
|
10
|
+
- Chuck Ha
|
11
|
+
- Dmitry Mozzherin
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
date: 2012-01-12 00:00:00.000000000Z
|
16
|
+
dependencies:
|
17
|
+
- !ruby/object:Gem::Dependency
|
18
|
+
name: rake
|
19
|
+
requirement: &70256498363600 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ! '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '0'
|
25
|
+
type: :runtime
|
26
|
+
prerelease: false
|
27
|
+
version_requirements: *70256498363600
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rest-client
|
30
|
+
requirement: &70256498363000 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
type: :runtime
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: *70256498363000
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: builder
|
41
|
+
requirement: &70256498362380 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
type: :runtime
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: *70256498362380
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: json
|
52
|
+
requirement: &70256498361800 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
type: :runtime
|
59
|
+
prerelease: false
|
60
|
+
version_requirements: *70256498361800
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: rspec
|
63
|
+
requirement: &70256498361220 !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.3.0
|
69
|
+
type: :development
|
70
|
+
prerelease: false
|
71
|
+
version_requirements: *70256498361220
|
72
|
+
- !ruby/object:Gem::Dependency
|
73
|
+
name: cucumber
|
74
|
+
requirement: &70256498360620 !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
type: :development
|
81
|
+
prerelease: false
|
82
|
+
version_requirements: *70256498360620
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: capybara
|
85
|
+
requirement: &70256498360020 !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ! '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: *70256498360020
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: bundler
|
96
|
+
requirement: &70256498359340 !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ~>
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.0.0
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: *70256498359340
|
105
|
+
- !ruby/object:Gem::Dependency
|
106
|
+
name: jeweler
|
107
|
+
requirement: &70256498358740 !ruby/object:Gem::Requirement
|
108
|
+
none: false
|
109
|
+
requirements:
|
110
|
+
- - ~>
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 1.6.4
|
113
|
+
type: :development
|
114
|
+
prerelease: false
|
115
|
+
version_requirements: *70256498358740
|
116
|
+
- !ruby/object:Gem::Dependency
|
117
|
+
name: rcov
|
118
|
+
requirement: &70256498358140 !ruby/object:Gem::Requirement
|
119
|
+
none: false
|
120
|
+
requirements:
|
121
|
+
- - ! '>='
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: *70256498358140
|
127
|
+
- !ruby/object:Gem::Dependency
|
128
|
+
name: ruby-debug19
|
129
|
+
requirement: &70256498357540 !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
131
|
+
requirements:
|
132
|
+
- - ! '>='
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
type: :development
|
136
|
+
prerelease: false
|
137
|
+
version_requirements: *70256498357540
|
138
|
+
description: The gem searches for scientific names in texts using socket servers running
|
139
|
+
TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)
|
140
|
+
email: dmozzherin@gmail.com
|
141
|
+
executables: []
|
142
|
+
extensions: []
|
143
|
+
extra_rdoc_files:
|
144
|
+
- LICENSE.txt
|
145
|
+
- README.rdoc
|
146
|
+
files:
|
147
|
+
- .document
|
148
|
+
- .rspec
|
149
|
+
- .rvmrc
|
150
|
+
- Gemfile
|
151
|
+
- Gemfile.lock
|
152
|
+
- LICENSE.txt
|
153
|
+
- README.rdoc
|
154
|
+
- Rakefile
|
155
|
+
- VERSION
|
156
|
+
- features/name-spotter.feature
|
157
|
+
- features/step_definitions/name-spotter_steps.rb
|
158
|
+
- features/support/env.rb
|
159
|
+
- lib/name-spotter.rb
|
160
|
+
- lib/name-spotter/client.rb
|
161
|
+
- lib/name-spotter/monkey_patches.rb
|
162
|
+
- lib/name-spotter/neti_neti_client.rb
|
163
|
+
- lib/name-spotter/scientific_name.rb
|
164
|
+
- lib/name-spotter/taxon_finder_client.rb
|
165
|
+
- name-spotter.gemspec
|
166
|
+
- spec/name-spotter_spec.rb
|
167
|
+
- spec/scientific_name_spec.rb
|
168
|
+
- spec/spec_helper.rb
|
169
|
+
homepage: http://github.com/GlobalNamesArchitecture/name-spotter
|
170
|
+
licenses:
|
171
|
+
- MIT
|
172
|
+
post_install_message:
|
173
|
+
rdoc_options: []
|
174
|
+
require_paths:
|
175
|
+
- lib
|
176
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
177
|
+
none: false
|
178
|
+
requirements:
|
179
|
+
- - ! '>='
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: '0'
|
182
|
+
segments:
|
183
|
+
- 0
|
184
|
+
hash: -2758893711713117720
|
185
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
+
none: false
|
187
|
+
requirements:
|
188
|
+
- - ! '>='
|
189
|
+
- !ruby/object:Gem::Version
|
190
|
+
version: '0'
|
191
|
+
requirements: []
|
192
|
+
rubyforge_project:
|
193
|
+
rubygems_version: 1.8.6
|
194
|
+
signing_key:
|
195
|
+
specification_version: 3
|
196
|
+
summary: Scientific names finder
|
197
|
+
test_files: []
|