name-spotter 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use ruby-1.9.2-p290@namespotter --create
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ gem "rake"
7
+ gem "rest-client"
8
+ gem "builder"
9
+ gem "json"
10
+
11
+ # Add dependencies to develop your gem here.
12
+ # Include everything needed to run rake, tests, features, etc.
13
+ group :development do
14
+ gem "rspec", "~> 2.3.0"
15
+ gem "cucumber", ">= 0"
16
+ gem "capybara"
17
+ gem "bundler", "~> 1.0.0"
18
+ gem "jeweler", "~> 1.6.4"
19
+ gem "rcov", ">= 0"
20
+ gem "ruby-debug19"
21
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,86 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ archive-tar-minitar (0.5.2)
5
+ builder (3.0.0)
6
+ capybara (1.1.2)
7
+ mime-types (>= 1.16)
8
+ nokogiri (>= 1.3.3)
9
+ rack (>= 1.0.0)
10
+ rack-test (>= 0.5.4)
11
+ selenium-webdriver (~> 2.0)
12
+ xpath (~> 0.1.4)
13
+ childprocess (0.2.8)
14
+ ffi (~> 1.0.6)
15
+ columnize (0.3.6)
16
+ cucumber (1.1.4)
17
+ builder (>= 2.1.2)
18
+ diff-lcs (>= 1.1.2)
19
+ gherkin (~> 2.7.1)
20
+ json (>= 1.4.6)
21
+ term-ansicolor (>= 1.0.6)
22
+ diff-lcs (1.1.3)
23
+ ffi (1.0.11)
24
+ gherkin (2.7.2)
25
+ json (>= 1.4.6)
26
+ git (1.2.5)
27
+ jeweler (1.6.4)
28
+ bundler (~> 1.0)
29
+ git (>= 1.2.5)
30
+ rake
31
+ json (1.6.4)
32
+ linecache19 (0.5.12)
33
+ ruby_core_source (>= 0.1.4)
34
+ mime-types (1.17.2)
35
+ multi_json (1.0.4)
36
+ nokogiri (1.5.0)
37
+ rack (1.4.0)
38
+ rack-test (0.6.1)
39
+ rack (>= 1.0)
40
+ rake (0.9.2.2)
41
+ rcov (0.9.11)
42
+ rest-client (1.6.7)
43
+ mime-types (>= 1.16)
44
+ rspec (2.3.0)
45
+ rspec-core (~> 2.3.0)
46
+ rspec-expectations (~> 2.3.0)
47
+ rspec-mocks (~> 2.3.0)
48
+ rspec-core (2.3.1)
49
+ rspec-expectations (2.3.0)
50
+ diff-lcs (~> 1.1.2)
51
+ rspec-mocks (2.3.0)
52
+ ruby-debug-base19 (0.11.25)
53
+ columnize (>= 0.3.1)
54
+ linecache19 (>= 0.5.11)
55
+ ruby_core_source (>= 0.1.4)
56
+ ruby-debug19 (0.11.6)
57
+ columnize (>= 0.3.1)
58
+ linecache19 (>= 0.5.11)
59
+ ruby-debug-base19 (>= 0.11.19)
60
+ ruby_core_source (0.1.5)
61
+ archive-tar-minitar (>= 0.5.2)
62
+ rubyzip (0.9.5)
63
+ selenium-webdriver (2.16.0)
64
+ childprocess (>= 0.2.5)
65
+ ffi (~> 1.0.9)
66
+ multi_json (~> 1.0.4)
67
+ rubyzip
68
+ term-ansicolor (1.0.7)
69
+ xpath (0.1.4)
70
+ nokogiri (~> 1.3)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ builder
77
+ bundler (~> 1.0.0)
78
+ capybara
79
+ cucumber
80
+ jeweler (~> 1.6.4)
81
+ json
82
+ rake
83
+ rcov
84
+ rest-client
85
+ rspec (~> 2.3.0)
86
+ ruby-debug19
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Dmitry Mozzherin
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = name-spotter
2
+
3
+ Finds biodiversity scientific names in texts using TaxonFinder or NetiNeti libraries.
4
+
5
+ == Contributing to name-spotter
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 Marine Biologica Laboratory. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "name-spotter"
18
+ gem.homepage = "http://github.com/GlobalNamesArchitecture/name-spotter"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Scientific names finder}
21
+ gem.description = %Q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
22
+ gem.email = "dmozzherin@gmail.com"
23
+ gem.authors = ["Ryan Schenk", "Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ require 'cucumber/rake/task'
40
+ Cucumber::Rake::Task.new(:features)
41
+
42
+ task :default => :spec
43
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.2
@@ -0,0 +1,9 @@
1
+ Feature: something something
2
+ In order to something something
3
+ A user something something
4
+ something something something
5
+
6
+ Scenario: something something
7
+ Given inspiration
8
+ When I create a sweet new gem
9
+ Then everyone should see how awesome I am
File without changes
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
+ require 'name-spotter'
12
+
13
+ require 'rspec/expectations'
@@ -0,0 +1,50 @@
1
+ require "ostruct"
2
+ require "rest_client"
3
+ require "uri"
4
+ require "json"
5
+ require "nokogiri"
6
+ require "socket"
7
+ require File.join(File.dirname(__FILE__), 'name-spotter', 'client')
8
+
9
+ Dir["#{File.dirname(__FILE__)}/name-spotter/**/*.rb"].each {|f| require f}
10
+
11
+ class NameSpotter
12
+
13
+ def initialize(client)
14
+ @client = client
15
+ end
16
+
17
+ def find(input, format = nil)
18
+ text = to_text(input)
19
+ names = @client.find(text)
20
+ names = names.map{ |n| n.to_hash }
21
+ return { names: names } unless format
22
+ format == "json" ? to_json(names) : to_xml(names)
23
+ end
24
+
25
+ private
26
+
27
+ def to_text(input)
28
+ input
29
+ end
30
+
31
+ def to_json(names)
32
+ return JSON.fast_generate({ names: names })
33
+ end
34
+
35
+ def to_xml(names)
36
+ builder = Nokogiri::XML::Builder.new do |xml|
37
+ xml.names do
38
+ names.each do |name|
39
+ xml.verbatim name[:verbatim]
40
+ xml.scientificName name[:scientificName]
41
+ xml.offsetStart name[:offsetStart]
42
+ xml.offsetEnd name[:offsetEnd]
43
+ end
44
+ end
45
+ end
46
+ builder.to_xml
47
+ end
48
+
49
+ end
50
+
@@ -0,0 +1,22 @@
1
+ class NameSpotter
2
+ class Client
3
+ class ClientError < Exception; end
4
+
5
+ attr_reader :host
6
+ attr_reader :port
7
+
8
+ def initialize(opts)
9
+ @host = opts[:host]
10
+ @port = opts[:port]
11
+ @names = []
12
+ end
13
+
14
+ def find(text)
15
+ raise "Subclass must implement find"
16
+ end
17
+
18
+ def add_name(name)
19
+ @names << name
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ class Object
2
+ #note: Object does not define Object#empty?
3
+ def blank?
4
+ respond_to?(:empty?) ? empty? : !self
5
+ end
6
+ end
7
+
8
+ class String
9
+ def constantize()
10
+ camel_cased_word = self
11
+ names = camel_cased_word.split('::')
12
+ names.shift if names.empty? || names.first.empty?
13
+ constant = Object
14
+ names.each do |name|
15
+ constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
16
+ end
17
+ constant
18
+ end
19
+ end
20
+
@@ -0,0 +1,19 @@
1
+ class NameSpotter
2
+ class NetiNetiClient < Client
3
+ def initialize(opts = { host: '0.0.0.0', port: 6384 })
4
+ super
5
+ end
6
+
7
+ def find(text)
8
+ # the form does not get sent if text is nil or empty
9
+ return [] if text.nil? || text.empty?
10
+ response = RestClient.post("http://#{@host}:#{@port}", data: text)
11
+
12
+ response.body.split("|").collect do |info|
13
+ name, offset_start = info.split(',')
14
+ normalized_name = NameSpotter::ScientificName.normalize(name)
15
+ NameSpotter::ScientificName.new(name, :scientific_name => normalized_name, :start_position => offset_start.to_i)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,37 @@
1
+ class NameSpotter
2
+ class ScientificName
3
+ attr_reader :verbatim, :scientific, :start_pos, :end_pos, :score
4
+
5
+ def self.normalize(name)
6
+ name.gsub(/\s+/, ' ')
7
+ end
8
+
9
+ def initialize(verbatim_name, options={})
10
+ @verbatim = verbatim_name
11
+ if options[:start_position]
12
+ @start_pos = options[:start_position]
13
+ @end_pos = @start_pos + @verbatim.length
14
+ end
15
+ @score = options[:score] if options[:score]
16
+ @scientific = options[:scientific_name] if options[:scientific_name]
17
+ end
18
+
19
+ # Use this in specs
20
+ def eql?(other_name)
21
+ other_name.is_a?(Name) &&
22
+ other_name.verbatim.eql?(verbatim) &&
23
+ other_name.scientific.eql?(scientific) &&
24
+ other_name.start_pos.eql?(start_pos) &&
25
+ other_name.end_pos.eql?(end_pos) &&
26
+ other_name.score.eql?(score)
27
+ end
28
+
29
+ def to_hash
30
+ name_hash = {:verbatim => verbatim}
31
+ name_hash[:scientificName] = scientific if scientific
32
+ name_hash[:offsetStart] = start_pos if start_pos
33
+ name_hash[:offsetEnd] = end_pos if end_pos
34
+ name_hash
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,78 @@
1
+ class NameSpotter
2
+ class TaxonFinderClient < NameSpotter::Client
3
+ def initialize(opts = { host: "0.0.0.0", port: "1234" })
4
+ super
5
+ # We keep track of the document to get accurate offsets.
6
+ # Other methods such as keeping track of the character number
7
+ # didn't work so well due to the nature of TaxonFinder.
8
+ @document = ""
9
+ end
10
+
11
+ def socket
12
+ @socket ||= TCPSocket.open @host, @port
13
+ end
14
+
15
+ def find(str, from_web_form=false)
16
+ return [] if str.nil? || str.empty?
17
+
18
+ # These are for the data-send-back that happens in TaxonFinder
19
+ @current_string = ''
20
+ @current_string_state = ''
21
+ @word_list_matches = 0
22
+
23
+ words = str.split(/\s/)
24
+ words.each do |word|
25
+ # Since we split on whitespace, this addition of a " " char
26
+ # allows us to keep the document accurate and is basically
27
+ # replacing all \s matches with " "
28
+ @document << word + " "
29
+ unless word.empty?
30
+ taxon_find(word)
31
+ end
32
+ end
33
+ socket.close
34
+ @socket = nil
35
+ @document = ""
36
+ @names
37
+ end
38
+
39
+ def taxon_find(word)
40
+ input = "#{word}|#{@current_string}|#{@current_string_state}|#{@word_list_matches}|0"
41
+ socket.write(input + "\n")
42
+ if output = socket.gets
43
+ response = parse_socket_response(output)
44
+ return if not response
45
+
46
+ unless response.return_string.blank?
47
+ verbatim_string = response.return_string.sub(/\[.*\]/, '.')
48
+ scientific_string = response.return_string
49
+ add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
50
+ end
51
+ unless response.return_string_2.blank?
52
+ verbatim_string = response.return_string_2.sub(/\[.*\]/, '.')
53
+ scientific_string = response.return_string_2
54
+ add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
55
+ end
56
+ end
57
+ end
58
+
59
+ def parse_socket_response(response)
60
+ current_string, current_string_state, word_list_matches, return_string, return_score, return_string_2, return_score_2 = response.strip.split '|'
61
+ @current_string = current_string
62
+ @current_string_state = current_string_state
63
+ @word_list_matches = word_list_matches
64
+ @return_score = return_score
65
+ if not return_string.blank? or not return_string_2.blank?
66
+ OpenStruct.new( { :current_string => current_string,
67
+ :current_string_state => current_string_state,
68
+ :word_list_matches => word_list_matches,
69
+ :return_string => return_string,
70
+ :return_score => return_score,
71
+ :return_string_2 => return_string_2,
72
+ :return_score_2 => return_score_2 })
73
+ else
74
+ false
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,91 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{name-spotter}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = [%q{Ryan Schenk}, %q{Anthony Goddard}, %q{Chuck Ha}, %q{Dmitry Mozzherin}]
12
+ s.date = %q{2012-01-12}
13
+ s.description = %q{The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)}
14
+ s.email = %q{dmozzherin@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ ".rvmrc",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "features/name-spotter.feature",
30
+ "features/step_definitions/name-spotter_steps.rb",
31
+ "features/support/env.rb",
32
+ "lib/name-spotter.rb",
33
+ "lib/name-spotter/client.rb",
34
+ "lib/name-spotter/monkey_patches.rb",
35
+ "lib/name-spotter/neti_neti_client.rb",
36
+ "lib/name-spotter/scientific_name.rb",
37
+ "lib/name-spotter/taxon_finder_client.rb",
38
+ "name-spotter.gemspec",
39
+ "spec/name-spotter_spec.rb",
40
+ "spec/scientific_name_spec.rb",
41
+ "spec/spec_helper.rb"
42
+ ]
43
+ s.homepage = %q{http://github.com/GlobalNamesArchitecture/name-spotter}
44
+ s.licenses = [%q{MIT}]
45
+ s.require_paths = [%q{lib}]
46
+ s.rubygems_version = %q{1.8.6}
47
+ s.summary = %q{Scientific names finder}
48
+
49
+ if s.respond_to? :specification_version then
50
+ s.specification_version = 3
51
+
52
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
53
+ s.add_runtime_dependency(%q<rake>, [">= 0"])
54
+ s.add_runtime_dependency(%q<rest-client>, [">= 0"])
55
+ s.add_runtime_dependency(%q<builder>, [">= 0"])
56
+ s.add_runtime_dependency(%q<json>, [">= 0"])
57
+ s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
58
+ s.add_development_dependency(%q<cucumber>, [">= 0"])
59
+ s.add_development_dependency(%q<capybara>, [">= 0"])
60
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
62
+ s.add_development_dependency(%q<rcov>, [">= 0"])
63
+ s.add_development_dependency(%q<ruby-debug19>, [">= 0"])
64
+ else
65
+ s.add_dependency(%q<rake>, [">= 0"])
66
+ s.add_dependency(%q<rest-client>, [">= 0"])
67
+ s.add_dependency(%q<builder>, [">= 0"])
68
+ s.add_dependency(%q<json>, [">= 0"])
69
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
70
+ s.add_dependency(%q<cucumber>, [">= 0"])
71
+ s.add_dependency(%q<capybara>, [">= 0"])
72
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
73
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
74
+ s.add_dependency(%q<rcov>, [">= 0"])
75
+ s.add_dependency(%q<ruby-debug19>, [">= 0"])
76
+ end
77
+ else
78
+ s.add_dependency(%q<rake>, [">= 0"])
79
+ s.add_dependency(%q<rest-client>, [">= 0"])
80
+ s.add_dependency(%q<builder>, [">= 0"])
81
+ s.add_dependency(%q<json>, [">= 0"])
82
+ s.add_dependency(%q<rspec>, ["~> 2.3.0"])
83
+ s.add_dependency(%q<cucumber>, [">= 0"])
84
+ s.add_dependency(%q<capybara>, [">= 0"])
85
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
86
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
87
+ s.add_dependency(%q<rcov>, [">= 0"])
88
+ s.add_dependency(%q<ruby-debug19>, [">= 0"])
89
+ end
90
+ end
91
+
@@ -0,0 +1,48 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "NameSpotter" do
4
+ before(:all) do
5
+ neti_neti = NameSpotter::NetiNetiClient.new()
6
+ taxon_finder = NameSpotter::TaxonFinderClient.new()
7
+ @neti = NameSpotter.new(neti_neti)
8
+ @tf = NameSpotter.new(taxon_finder)
9
+ @clients = [@neti, @tf]
10
+ end
11
+
12
+ it "should exist" do
13
+ @neti.is_a?(NameSpotter).should be_true
14
+ @tf.is_a?(NameSpotter).should be_true
15
+ end
16
+
17
+ it "should use ruby as default format" do
18
+ @clients.each do |c|
19
+ c.find(nil).should == {names: []}
20
+ end
21
+ end
22
+
23
+ it "should return empty result if input is empty" do
24
+ @clients.each do |c|
25
+ c.find(nil, 'json').should == "{\"names\":[]}"
26
+ c.find(nil, "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
27
+ c.find('', 'json').should == "{\"names\":[]}"
28
+ c.find('', "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
29
+ end
30
+ end
31
+
32
+ it "should return empty result if no names are found" do
33
+ text = "one two three, no scientific names"
34
+ @clients.each do |c|
35
+ c.find(text, "json").should == "{\"names\":[]}"
36
+ c.find(text, "xml").should == "<?xml version=\"1.0\"?>\n<names/>\n"
37
+ end
38
+ end
39
+
40
+ it "should be able to find scientific names in text" do
41
+ text = "Some text that has Betula\n alba and Mus musculus and \neven B. alba and even M. mus-\nculus. Also it has name unknown before: Varanus bitatawa species"
42
+ res = @neti.find(text)[:names].map { |n| n[:scientificName] }
43
+ res.should == ["Betula alba", "Mus musculus", "B. alba", "Varanus bitatawa"]
44
+ res = @tf.find(text)[:names].map { |n| n[:scientificName] }
45
+ res.should == ["Betula alba", "Mus musculus", "B[etula] alba", "Varanus"]
46
+ end
47
+
48
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
+
4
+ describe NameSpotter::ScientificName do
5
+
6
+ describe "without doing anything" do
7
+ before :each do
8
+ @find_me = "M. musculus"
9
+ @name = NameSpotter::ScientificName.new(@find_me, {:start_position => 30, :scientific_name => "Mus musculus"})
10
+ end
11
+
12
+ it "should have figured out the end position" do
13
+ @name.end_pos.should eq @name.start_pos + @find_me.length
14
+ end
15
+ end
16
+
17
+ describe "unicode" do
18
+ it "should handle unicode characters" do
19
+ verbatim = "Slovenščina"
20
+ name = NameSpotter::ScientificName.new(verbatim, {:start_position => 48193})
21
+ name.verbatim.should eq verbatim
22
+ name.end_pos.should eq name.start_pos + verbatim.length
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'name-spotter'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,197 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: name-spotter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Schenk
9
+ - Anthony Goddard
10
+ - Chuck Ha
11
+ - Dmitry Mozzherin
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+ date: 2012-01-12 00:00:00.000000000Z
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: rake
19
+ requirement: &70256498363600 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ! '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: *70256498363600
28
+ - !ruby/object:Gem::Dependency
29
+ name: rest-client
30
+ requirement: &70256498363000 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: *70256498363000
39
+ - !ruby/object:Gem::Dependency
40
+ name: builder
41
+ requirement: &70256498362380 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: *70256498362380
50
+ - !ruby/object:Gem::Dependency
51
+ name: json
52
+ requirement: &70256498361800 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ type: :runtime
59
+ prerelease: false
60
+ version_requirements: *70256498361800
61
+ - !ruby/object:Gem::Dependency
62
+ name: rspec
63
+ requirement: &70256498361220 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.3.0
69
+ type: :development
70
+ prerelease: false
71
+ version_requirements: *70256498361220
72
+ - !ruby/object:Gem::Dependency
73
+ name: cucumber
74
+ requirement: &70256498360620 !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ type: :development
81
+ prerelease: false
82
+ version_requirements: *70256498360620
83
+ - !ruby/object:Gem::Dependency
84
+ name: capybara
85
+ requirement: &70256498360020 !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: *70256498360020
94
+ - !ruby/object:Gem::Dependency
95
+ name: bundler
96
+ requirement: &70256498359340 !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.0.0
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: *70256498359340
105
+ - !ruby/object:Gem::Dependency
106
+ name: jeweler
107
+ requirement: &70256498358740 !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ~>
111
+ - !ruby/object:Gem::Version
112
+ version: 1.6.4
113
+ type: :development
114
+ prerelease: false
115
+ version_requirements: *70256498358740
116
+ - !ruby/object:Gem::Dependency
117
+ name: rcov
118
+ requirement: &70256498358140 !ruby/object:Gem::Requirement
119
+ none: false
120
+ requirements:
121
+ - - ! '>='
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: *70256498358140
127
+ - !ruby/object:Gem::Dependency
128
+ name: ruby-debug19
129
+ requirement: &70256498357540 !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ! '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ type: :development
136
+ prerelease: false
137
+ version_requirements: *70256498357540
138
+ description: The gem searches for scientific names in texts using socket servers running
139
+ TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)
140
+ email: dmozzherin@gmail.com
141
+ executables: []
142
+ extensions: []
143
+ extra_rdoc_files:
144
+ - LICENSE.txt
145
+ - README.rdoc
146
+ files:
147
+ - .document
148
+ - .rspec
149
+ - .rvmrc
150
+ - Gemfile
151
+ - Gemfile.lock
152
+ - LICENSE.txt
153
+ - README.rdoc
154
+ - Rakefile
155
+ - VERSION
156
+ - features/name-spotter.feature
157
+ - features/step_definitions/name-spotter_steps.rb
158
+ - features/support/env.rb
159
+ - lib/name-spotter.rb
160
+ - lib/name-spotter/client.rb
161
+ - lib/name-spotter/monkey_patches.rb
162
+ - lib/name-spotter/neti_neti_client.rb
163
+ - lib/name-spotter/scientific_name.rb
164
+ - lib/name-spotter/taxon_finder_client.rb
165
+ - name-spotter.gemspec
166
+ - spec/name-spotter_spec.rb
167
+ - spec/scientific_name_spec.rb
168
+ - spec/spec_helper.rb
169
+ homepage: http://github.com/GlobalNamesArchitecture/name-spotter
170
+ licenses:
171
+ - MIT
172
+ post_install_message:
173
+ rdoc_options: []
174
+ require_paths:
175
+ - lib
176
+ required_ruby_version: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ segments:
183
+ - 0
184
+ hash: -2758893711713117720
185
+ required_rubygems_version: !ruby/object:Gem::Requirement
186
+ none: false
187
+ requirements:
188
+ - - ! '>='
189
+ - !ruby/object:Gem::Version
190
+ version: '0'
191
+ requirements: []
192
+ rubyforge_project:
193
+ rubygems_version: 1.8.6
194
+ signing_key:
195
+ specification_version: 3
196
+ summary: Scientific names finder
197
+ test_files: []