name-spotter 0.0.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -12,9 +12,10 @@ gem "json"
12
12
  # Include everything needed to run rake, tests, features, etc.
13
13
  group :development do
14
14
  gem "rspec", "~> 2.3.0"
15
+ gem "rspec-expectations"
15
16
  gem "cucumber", ">= 0"
16
17
  gem "capybara"
17
- gem "bundler", "~> 1.0.0"
18
+ gem "bundler"
18
19
  gem "jeweler", "~> 1.6.4"
19
20
  gem "rcov", ">= 0"
20
21
  gem "ruby-debug19"
data/Gemfile.lock CHANGED
@@ -74,7 +74,7 @@ PLATFORMS
74
74
 
75
75
  DEPENDENCIES
76
76
  builder
77
- bundler (~> 1.0.0)
77
+ bundler
78
78
  capybara
79
79
  cucumber
80
80
  jeweler (~> 1.6.4)
@@ -83,4 +83,5 @@ DEPENDENCIES
83
83
  rcov
84
84
  rest-client
85
85
  rspec (~> 2.3.0)
86
+ rspec-expectations
86
87
  ruby-debug19
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.7
1
+ 0.1.0
@@ -8,19 +8,16 @@ class NameSpotter
8
8
  @document = ""
9
9
  end
10
10
 
11
- def socket
12
- @socket ||= TCPSocket.open @host, @port
13
- end
14
-
15
11
  def find(str, from_web_form=false)
16
12
  @names = []
13
+ @document_verbatim = str
17
14
  return [] if str.nil? || str.empty?
18
15
 
19
16
  # These are for the data-send-back that happens in TaxonFinder
20
17
  @current_string = ''
21
18
  @current_string_state = ''
22
19
  @word_list_matches = 0
23
-
20
+ @empty_count = 0
24
21
  words = str.split(/\s/)
25
22
  words.each do |word|
26
23
  # Since we split on whitespace, this addition of a " " char
@@ -36,6 +33,12 @@ class NameSpotter
36
33
  @document = ""
37
34
  @names
38
35
  end
36
+
37
+ private
38
+
39
+ def socket
40
+ @socket ||= TCPSocket.open @host, @port
41
+ end
39
42
 
40
43
  def taxon_find(word)
41
44
  input = "#{word}|#{@current_string}|#{@current_string_state}|#{@word_list_matches}|0"
@@ -45,16 +48,12 @@ class NameSpotter
45
48
  return if not response
46
49
 
47
50
  unless response.return_string.blank?
48
- response.return_string.force_encoding('utf-8')
49
- verbatim_string = response.return_string.sub(/\[.*\]/, '.')
50
- scientific_string = response.return_string
51
- add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
51
+ verbatim_string, scientific_string, start_position = process_response(response.return_string)
52
+ add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => start_position, :scientific_name => scientific_string)
52
53
  end
53
54
  unless response.return_string_2.blank?
54
- response.return_string_2.force_encoding('utf-8')
55
- verbatim_string = response.return_string_2.sub(/\[.*\]/, '.')
56
- scientific_string = response.return_string_2
57
- add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => @document.rindex(verbatim_string), :scientific_name => scientific_string)
55
+ verbatim_string, scientific_string, start_position = process_response(response.return_string_2)
56
+ add_name NameSpotter::ScientificName.new(verbatim_string, :start_position => start_position, :scientific_name => scientific_string)
58
57
  end
59
58
  end
60
59
  end
@@ -77,5 +76,16 @@ class NameSpotter
77
76
  false
78
77
  end
79
78
  end
79
+
80
+ def process_response(str)
81
+ str.force_encoding('utf-8')
82
+ verbatim_string = str.sub(/\[.*\]/, '.')
83
+ verbatim_regex = Regexp.new(verbatim_string.split(/\s/).join('\s+'), true)
84
+ start_position = @document.rindex(verbatim_regex)
85
+ verbatim_string = @document_verbatim[start_position..-1].match(verbatim_regex)[0] if start_position
86
+ scientific_string = str
87
+ [verbatim_string, scientific_string, start_position]
88
+ end
89
+
80
90
  end
81
91
  end
data/name-spotter.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "name-spotter"
8
- s.version = "0.0.7"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Anthony Goddard", "Chuck Ha", "Dmitry Mozzherin"]
12
- s.date = "2012-05-04"
12
+ s.date = "2012-06-04"
13
13
  s.description = "The gem searches for scientific names in texts using socket servers running TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)"
14
14
  s.email = "dmozzherin@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -36,6 +36,7 @@ Gem::Specification.new do |s|
36
36
  "lib/name-spotter/scientific_name.rb",
37
37
  "lib/name-spotter/taxon_finder_client.rb",
38
38
  "name-spotter.gemspec",
39
+ "spec/files/journalofentomol13pomo_0018.txt",
39
40
  "spec/name-spotter_spec.rb",
40
41
  "spec/scientific_name_spec.rb",
41
42
  "spec/spec_helper.rb"
@@ -43,7 +44,7 @@ Gem::Specification.new do |s|
43
44
  s.homepage = "http://github.com/GlobalNamesArchitecture/name-spotter"
44
45
  s.licenses = ["MIT"]
45
46
  s.require_paths = ["lib"]
46
- s.rubygems_version = "1.8.10"
47
+ s.rubygems_version = "1.8.24"
47
48
  s.summary = "Scientific names finder"
48
49
 
49
50
  if s.respond_to? :specification_version then
@@ -55,9 +56,10 @@ Gem::Specification.new do |s|
55
56
  s.add_runtime_dependency(%q<builder>, [">= 0"])
56
57
  s.add_runtime_dependency(%q<json>, [">= 0"])
57
58
  s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
59
+ s.add_development_dependency(%q<rspec-expectations>, [">= 0"])
58
60
  s.add_development_dependency(%q<cucumber>, [">= 0"])
59
61
  s.add_development_dependency(%q<capybara>, [">= 0"])
60
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
62
+ s.add_development_dependency(%q<bundler>, [">= 0"])
61
63
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
62
64
  s.add_development_dependency(%q<rcov>, [">= 0"])
63
65
  s.add_development_dependency(%q<ruby-debug19>, [">= 0"])
@@ -67,9 +69,10 @@ Gem::Specification.new do |s|
67
69
  s.add_dependency(%q<builder>, [">= 0"])
68
70
  s.add_dependency(%q<json>, [">= 0"])
69
71
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
72
+ s.add_dependency(%q<rspec-expectations>, [">= 0"])
70
73
  s.add_dependency(%q<cucumber>, [">= 0"])
71
74
  s.add_dependency(%q<capybara>, [">= 0"])
72
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
75
+ s.add_dependency(%q<bundler>, [">= 0"])
73
76
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
74
77
  s.add_dependency(%q<rcov>, [">= 0"])
75
78
  s.add_dependency(%q<ruby-debug19>, [">= 0"])
@@ -80,9 +83,10 @@ Gem::Specification.new do |s|
80
83
  s.add_dependency(%q<builder>, [">= 0"])
81
84
  s.add_dependency(%q<json>, [">= 0"])
82
85
  s.add_dependency(%q<rspec>, ["~> 2.3.0"])
86
+ s.add_dependency(%q<rspec-expectations>, [">= 0"])
83
87
  s.add_dependency(%q<cucumber>, [">= 0"])
84
88
  s.add_dependency(%q<capybara>, [">= 0"])
85
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
89
+ s.add_dependency(%q<bundler>, [">= 0"])
86
90
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
87
91
  s.add_dependency(%q<rcov>, [">= 0"])
88
92
  s.add_dependency(%q<ruby-debug19>, [">= 0"])
@@ -0,0 +1,39 @@
1
+ A List of California Arachnida
2
+
3
+ This list is compiled from already published but scattered papers. Many of these
4
+ are local records of specimens and new species collected by many students through a
5
+ number of years and determined for us for the most part by Banks and Chamberlin.
6
+ As numerous earlier papers in this Journal have taken up the distribution of local
7
+ forms only a hint of this will be given. There are included in this list records other
8
+ than local. If the distribution is general some indication is given. A few hints as to
9
+ characteristic features are given when possible. The family characteristics are com-
10
+ piled by the aid of the works of Banks, Ewing, Comstock and several others. In
11
+ order to save space the literature references are given in abbreviated form at the end
12
+ of each section, especially as there are a number of papers and lists already published
13
+ which give this material in great detail.
14
+
15
+ I. PSEUDOSCORPIONIDA
16
+ M. Moles and W. Moore
17
+
18
+ Cheliferid..\e. Evidences of segmentation of thorax in some species. Serrula
19
+ attached all its length to finger of chelicera. Spinneret long and slender. Flagellum
20
+ absent. Tarsi of legs one-jointed. Tarsal claws short and thick, split on some of
21
+ the feet.
22
+
23
+ Chelifer cancroides Linn, about buildings, oak, sycamore trees, Claeremont, mts.
24
+
25
+ C. fuscipes Bks. Calif.
26
+
27
+ C. scabrisciilus Simon. N. Calif, to Claremont.
28
+
29
+ Chelanops ohlongus Say. Palm springs. Brown's flats.
30
+
31
+ C. validus Bks. From Lake Tahoe.
32
+
33
+ f^i;^
34
+
35
+ C. pallipes Bks., luuler stones Claremont, l,(is Angeles.
36
+ C. dorsalis Bks., Lake Tahoe and San Francisco.
37
+ C. acuminatus Sim. Maraposa, Claremont, Laguna Beach.
38
+ C. lagunae Moles, Two eye spots. Claremont.
39
+
@@ -42,8 +42,8 @@ describe "NameSpotter" do
42
42
  text = "Some text that has Betula\n alba and Mus musculus and \neven B. alba and even M. mus-\nculus and unicoded name Aranea röselii. Also it has name unknown before: Varanus bitatawa species"
43
43
  res = @neti.find(text)[:names].map { |n| n[:scientificName] }
44
44
  res.should == ["Betula alba", "Mus musculus", "B. alba", "Aranea röselii", "Varanus bitatawa"]
45
- res = @tf.find(text)[:names].map { |n| n[:scientificName] }
46
-
45
+ tf_res = @tf.find(text)
46
+ res = tf_res[:names].map { |n| n[:scientificName] }
47
47
  res.should == ["Betula alba", "Mus musculus", "B[etula] alba", "Aranea röselii", "Varanus"]
48
48
  end
49
49
 
@@ -71,4 +71,12 @@ describe "NameSpotter" do
71
71
  end
72
72
  end
73
73
 
74
+ it "should be able to return offsets for all names found by taxonfinder" do
75
+ text = "We have to be sure that Betula\n alba and PSEUDOSCORPIONIDA and Aranea röselii and capitalized ARANEA RÖSELII and Pardosa\n moesta f. moesta Banks, 1892 all get their offsets"
76
+ res = @neti.find(text)
77
+ res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>63, :offsetEnd=>76}, {:verbatim=>"Pardosa\n moesta", :scientificName=>"Pardosa moesta", :offsetStart=>113, :offsetEnd=>127}]}
78
+ tf_res = @tf.find(text)
79
+ tf_res.should == {:names=>[{:verbatim=>"Betula\n alba", :scientificName=>"Betula alba", :offsetStart=>24, :offsetEnd=>35}, {:verbatim=>"PSEUDOSCORPIONIDA", :scientificName=>"Pseudoscorpionida", :offsetStart=>41, :offsetEnd=>57}, {:verbatim=>"Aranea röselii", :scientificName=>"Aranea röselii", :offsetStart=>63, :offsetEnd=>76}, {:verbatim=>"ARANEA", :scientificName=>"Aranea", :offsetStart=>94, :offsetEnd=>99}, {:verbatim=>"Pardosa\n moesta f. moesta", :scientificName=>"Pardosa moesta f. moesta", :offsetStart=>113, :offsetEnd=>137}]}
80
+ end
81
+
74
82
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-spotter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-05-04 00:00:00.000000000Z
14
+ date: 2012-06-04 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rake
18
- requirement: &70202321843640 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,15 @@ dependencies:
23
23
  version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *70202321843640
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
27
32
  - !ruby/object:Gem::Dependency
28
33
  name: rest-client
29
- requirement: &70202321842880 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
30
35
  none: false
31
36
  requirements:
32
37
  - - ! '>='
@@ -34,10 +39,15 @@ dependencies:
34
39
  version: '0'
35
40
  type: :runtime
36
41
  prerelease: false
37
- version_requirements: *70202321842880
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
38
48
  - !ruby/object:Gem::Dependency
39
49
  name: builder
40
- requirement: &70202321841860 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
41
51
  none: false
42
52
  requirements:
43
53
  - - ! '>='
@@ -45,10 +55,15 @@ dependencies:
45
55
  version: '0'
46
56
  type: :runtime
47
57
  prerelease: false
48
- version_requirements: *70202321841860
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: json
51
- requirement: &70202321840740 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ! '>='
@@ -56,10 +71,15 @@ dependencies:
56
71
  version: '0'
57
72
  type: :runtime
58
73
  prerelease: false
59
- version_requirements: *70202321840740
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
60
80
  - !ruby/object:Gem::Dependency
61
81
  name: rspec
62
- requirement: &70202321839600 !ruby/object:Gem::Requirement
82
+ requirement: !ruby/object:Gem::Requirement
63
83
  none: false
64
84
  requirements:
65
85
  - - ~>
@@ -67,10 +87,31 @@ dependencies:
67
87
  version: 2.3.0
68
88
  type: :development
69
89
  prerelease: false
70
- version_requirements: *70202321839600
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ~>
94
+ - !ruby/object:Gem::Version
95
+ version: 2.3.0
96
+ - !ruby/object:Gem::Dependency
97
+ name: rspec-expectations
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
71
112
  - !ruby/object:Gem::Dependency
72
113
  name: cucumber
73
- requirement: &70202321838920 !ruby/object:Gem::Requirement
114
+ requirement: !ruby/object:Gem::Requirement
74
115
  none: false
75
116
  requirements:
76
117
  - - ! '>='
@@ -78,10 +119,15 @@ dependencies:
78
119
  version: '0'
79
120
  type: :development
80
121
  prerelease: false
81
- version_requirements: *70202321838920
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
82
128
  - !ruby/object:Gem::Dependency
83
129
  name: capybara
84
- requirement: &70202321838220 !ruby/object:Gem::Requirement
130
+ requirement: !ruby/object:Gem::Requirement
85
131
  none: false
86
132
  requirements:
87
133
  - - ! '>='
@@ -89,21 +135,31 @@ dependencies:
89
135
  version: '0'
90
136
  type: :development
91
137
  prerelease: false
92
- version_requirements: *70202321838220
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
93
144
  - !ruby/object:Gem::Dependency
94
145
  name: bundler
95
- requirement: &70202321837480 !ruby/object:Gem::Requirement
146
+ requirement: !ruby/object:Gem::Requirement
96
147
  none: false
97
148
  requirements:
98
- - - ~>
149
+ - - ! '>='
99
150
  - !ruby/object:Gem::Version
100
- version: 1.0.0
151
+ version: '0'
101
152
  type: :development
102
153
  prerelease: false
103
- version_requirements: *70202321837480
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
104
160
  - !ruby/object:Gem::Dependency
105
161
  name: jeweler
106
- requirement: &70202321836820 !ruby/object:Gem::Requirement
162
+ requirement: !ruby/object:Gem::Requirement
107
163
  none: false
108
164
  requirements:
109
165
  - - ~>
@@ -111,10 +167,15 @@ dependencies:
111
167
  version: 1.6.4
112
168
  type: :development
113
169
  prerelease: false
114
- version_requirements: *70202321836820
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ~>
174
+ - !ruby/object:Gem::Version
175
+ version: 1.6.4
115
176
  - !ruby/object:Gem::Dependency
116
177
  name: rcov
117
- requirement: &70202321836060 !ruby/object:Gem::Requirement
178
+ requirement: !ruby/object:Gem::Requirement
118
179
  none: false
119
180
  requirements:
120
181
  - - ! '>='
@@ -122,10 +183,15 @@ dependencies:
122
183
  version: '0'
123
184
  type: :development
124
185
  prerelease: false
125
- version_requirements: *70202321836060
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
126
192
  - !ruby/object:Gem::Dependency
127
193
  name: ruby-debug19
128
- requirement: &70202321835280 !ruby/object:Gem::Requirement
194
+ requirement: !ruby/object:Gem::Requirement
129
195
  none: false
130
196
  requirements:
131
197
  - - ! '>='
@@ -133,7 +199,12 @@ dependencies:
133
199
  version: '0'
134
200
  type: :development
135
201
  prerelease: false
136
- version_requirements: *70202321835280
202
+ version_requirements: !ruby/object:Gem::Requirement
203
+ none: false
204
+ requirements:
205
+ - - ! '>='
206
+ - !ruby/object:Gem::Version
207
+ version: '0'
137
208
  description: The gem searches for scientific names in texts using socket servers running
138
209
  TaxonFinder (by Patrick Leary) and NetiNeti (by Lakshmi Manohar Akella)
139
210
  email: dmozzherin@gmail.com
@@ -162,6 +233,7 @@ files:
162
233
  - lib/name-spotter/scientific_name.rb
163
234
  - lib/name-spotter/taxon_finder_client.rb
164
235
  - name-spotter.gemspec
236
+ - spec/files/journalofentomol13pomo_0018.txt
165
237
  - spec/name-spotter_spec.rb
166
238
  - spec/scientific_name_spec.rb
167
239
  - spec/spec_helper.rb
@@ -180,7 +252,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
180
252
  version: '0'
181
253
  segments:
182
254
  - 0
183
- hash: 2095636295522729544
255
+ hash: 4382720522777082124
184
256
  required_rubygems_version: !ruby/object:Gem::Requirement
185
257
  none: false
186
258
  requirements:
@@ -189,7 +261,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
261
  version: '0'
190
262
  requirements: []
191
263
  rubyforge_project:
192
- rubygems_version: 1.8.10
264
+ rubygems_version: 1.8.24
193
265
  signing_key:
194
266
  specification_version: 3
195
267
  summary: Scientific names finder