taxamatch_rb 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Makefile DELETED
@@ -1,157 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- #### Start of system configuration section. ####
5
-
6
- srcdir = /Users/dimus/.rvm/gems/ruby-1.9.2-p0/bin
7
- topdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1
8
- hdrdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1
9
- arch_hdrdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1/$(arch)
10
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
11
- prefix = $(DESTDIR)/Users/dimus/.rvm/rubies/ruby-1.9.2-p0
12
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
13
- exec_prefix = $(prefix)
14
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
15
- sitehdrdir = $(rubyhdrdir)/site_ruby
16
- rubyhdrdir = $(includedir)/$(RUBY_BASE_NAME)-$(ruby_version)
17
- vendordir = $(rubylibprefix)/vendor_ruby
18
- sitedir = $(rubylibprefix)/site_ruby
19
- ridir = $(datarootdir)/$(RI_BASE_NAME)
20
- mandir = $(datarootdir)/man
21
- localedir = $(datarootdir)/locale
22
- libdir = $(exec_prefix)/lib
23
- psdir = $(docdir)
24
- pdfdir = $(docdir)
25
- dvidir = $(docdir)
26
- htmldir = $(docdir)
27
- infodir = $(datarootdir)/info
28
- docdir = $(datarootdir)/doc/$(PACKAGE)
29
- oldincludedir = $(DESTDIR)/usr/include
30
- includedir = $(prefix)/include
31
- localstatedir = $(prefix)/var
32
- sharedstatedir = $(prefix)/com
33
- sysconfdir = $(prefix)/etc
34
- datadir = $(datarootdir)
35
- datarootdir = $(prefix)/share
36
- libexecdir = $(exec_prefix)/libexec
37
- sbindir = $(exec_prefix)/sbin
38
- bindir = $(exec_prefix)/bin
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- archdir = $(rubylibdir)/$(arch)
41
- sitelibdir = $(sitedir)/$(ruby_version)
42
- sitearchdir = $(sitelibdir)/$(sitearch)
43
- vendorlibdir = $(vendordir)/$(ruby_version)
44
- vendorarchdir = $(vendorlibdir)/$(sitearch)
45
-
46
- CC = gcc
47
- CXX = g++
48
- LIBRUBY = $(LIBRUBY_SO)
49
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
50
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
51
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
52
- OUTFLAG = -o
53
- COUTFLAG = -o
54
-
55
- RUBY_EXTCONF_H =
56
- cflags = $(optflags) $(debugflags) $(warnflags)
57
- optflags = -O3
58
- debugflags = -ggdb
59
- warnflags = -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long
60
- CFLAGS = -fno-common $(cflags) -fno-common -pipe
61
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
62
- DEFS =
63
- CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
64
- CXXFLAGS = $(CFLAGS) $(cxxflags)
65
- ldflags = -L.
66
- dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress -Wl,-flat_namespace
67
- ARCH_FLAG =
68
- DLDFLAGS = $(ldflags) $(dldflags)
69
- LDSHARED = $(CC) -dynamic -bundle
70
- LDSHAREDXX = $(CXX) -dynamic -bundle
71
- AR = ar
72
- EXEEXT =
73
-
74
- RUBY_BASE_NAME = ruby
75
- RUBY_INSTALL_NAME = ruby
76
- RUBY_SO_NAME = ruby.1.9.1
77
- arch = x86_64-darwin10.3.1
78
- sitearch = $(arch)
79
- ruby_version = 1.9.1
80
- ruby = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/bin/ruby
81
- RUBY = $(ruby)
82
- RM = rm -f
83
- RM_RF = $(RUBY) -run -e rm -- -rf
84
- RMDIRS = $(RUBY) -run -e rmdir -- -p
85
- MAKEDIRS = mkdir -p
86
- INSTALL = /usr/bin/install -c
87
- INSTALL_PROG = $(INSTALL) -m 0755
88
- INSTALL_DATA = $(INSTALL) -m 644
89
- COPY = cp
90
-
91
- #### End of system configuration section. ####
92
-
93
- preload =
94
-
95
- libpath = . $(libdir)
96
- LIBPATH = -L. -L$(libdir)
97
- DEFFILE =
98
-
99
- CLEANFILES = mkmf.log
100
- DISTCLEANFILES =
101
- DISTCLEANDIRS =
102
-
103
- extout =
104
- extout_prefix =
105
- target_prefix =
106
- LOCAL_LIBS =
107
- LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
108
- SRCS =
109
- OBJS =
110
- TARGET =
111
- DLLIB =
112
- EXTSTATIC =
113
- STATIC_LIB =
114
-
115
- BINDIR = $(bindir)
116
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
117
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
118
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
119
- HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
120
- ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
121
-
122
- TARGET_SO = $(DLLIB)
123
- CLEANLIBS = $(TARGET).bundle
124
- CLEANOBJS = *.o *.bak
125
-
126
- all: Makefile
127
- static: $(STATIC_LIB)
128
- .PHONY: all install static install-so install-rb
129
- .PHONY: clean clean-so clean-rb
130
-
131
- clean-rb-default::
132
- clean-rb::
133
- clean-so::
134
- clean: clean-so clean-rb-default clean-rb
135
- @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
136
-
137
- distclean-rb-default::
138
- distclean-rb::
139
- distclean-so::
140
- distclean: clean distclean-so distclean-rb-default distclean-rb
141
- @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
142
- @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
143
- @-$(RMDIRS) $(DISTCLEANDIRS)
144
-
145
- realclean: distclean
146
- install: install-so install-rb
147
-
148
- install-so: Makefile
149
- install-rb: pre-install-rb install-rb-default
150
- install-rb-default: pre-install-rb-default
151
- pre-install-rb: Makefile
152
- pre-install-rb-default: Makefile
153
-
154
- site-install: site-install-so site-install-rb
155
- site-install-so: install-so
156
- site-install-rb: install-rb
157
-
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 1.0.1
data/spec/spec.opts DELETED
@@ -1 +0,0 @@
1
- --colour
data/spec/spec_helper.rb DELETED
@@ -1,22 +0,0 @@
1
- require 'rspec'
2
-
3
- $:.unshift(File.dirname(__FILE__) + '/../lib')
4
- require 'taxamatch_rb'
5
-
6
- def read_test_file(file, fields_num)
7
- f = open(file)
8
- f.each do |line|
9
- fields = line.split("|")
10
- if line.match(/^\s*#/) == nil && fields.size == fields_num
11
- fields[-1] = fields[-1].split('#')[0].strip
12
- yield(fields)
13
- else
14
- yield(nil)
15
- end
16
- end
17
- end
18
-
19
- def make_taxamatch_hash(string)
20
- normalized = Taxamatch::Normalizer.normalize(string)
21
- {:string => string, :normalized => normalized, :phonetized => Taxamatch::Phonetizer.near_match(normalized)}
22
- end
@@ -1,406 +0,0 @@
1
- # encoding: UTF-8
2
- require 'spec_helper'
3
-
4
- describe 'Atomizer' do
5
- before(:all) do
6
- @parser = Taxamatch::Atomizer.new
7
- end
8
-
9
- it 'should parse uninomials' do
10
- @parser.parse('Betula').should == { :all_authors => [], :all_years => [],
11
- :canonical_form => "Betula", :uninomial => { :string => "Betula",
12
- :normalized => 'BETULA', :phonetized => "BITILA", :authors => [],
13
- :years => [], :normalized_authors => [] } }
14
- @parser.parse('Ærenea Lacordaire, 1872').should == {
15
- :all_authors => ["LACORDAIRE"], :all_years => [1872],
16
- :canonical_form => "Aerenea", :uninomial => { :string => "Aerenea",
17
- :normalized => "AERENEA", :phonetized => "ERINIA",
18
- :authors => ["Lacordaire"], :years => [1872],
19
- :normalized_authors => ["LACORDAIRE"] } }
20
- end
21
-
22
- it 'should parse binomials' do
23
- @parser.parse('Leœptura laetifica Dow, 1913').should == {
24
- :all_authors => ["DOW"], :all_years => [1913],
25
- :canonical_form => "Leoeptura laetifica", :genus => {
26
- :string => "Leoeptura", :normalized => "LEOEPTURA",
27
- :phonetized => "LIPTIRA", :authors => [], :years => [],
28
- :normalized_authors => []}, :species => {
29
- :string => "laetifica", :normalized => "LAETIFICA",
30
- :phonetized => "LITIFICA", :authors => ["Dow"],
31
- :years => [1913], :normalized_authors => ["DOW"] } }
32
- end
33
-
34
- it 'should parse trinomials' do
35
- @parser.parse('Hydnellum scrobiculatum zonatum ' +
36
- '(Banker) D. Hall et D.E. Stuntz 1972').should == {
37
- :all_authors => ["BANKER", "D HALL", "D E STUNTZ"], :all_years => [1972],
38
- :canonical_form => "Hydnellum scrobiculatum zonatum", :genus=>{
39
- :string => "Hydnellum", :normalized => "HYDNELLUM",
40
- :phonetized => "HIDNILIM", :authors => [], :years => [],
41
- :normalized_authors => [] }, :species => { :string => "scrobiculatum",
42
- :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
43
- :authors => [], :years => [], :normalized_authors => [] },
44
- :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
45
- :phonetized => "ZANATA", :authors => ["Banker", "D. Hall", "D.E. Stuntz"],
46
- :years => [1972], :normalized_authors => ["BANKER", "D HALL",
47
- "D E STUNTZ"] }] }
48
- end
49
-
50
- it 'should normalize years to integers' do
51
- future_year = Time.now.year + 10
52
- @parser.parse("Hydnellum scrobiculatum Kern #{future_year} " +
53
- "zonatum (Banker) D. Hall et D.E. Stuntz 1972?").should == {
54
- :all_authors => ["KERN", "BANKER", "D HALL", "D E STUNTZ"],
55
- :all_years => [1972],
56
- :canonical_form => "Hydnellum scrobiculatum zonatum", :genus => {
57
- :string => "Hydnellum", :normalized => "HYDNELLUM",
58
- :phonetized => "HIDNILIM", :authors => [], :years => [],
59
- :normalized_authors => [] }, :species => { :string => "scrobiculatum",
60
- :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
61
- :authors => ["Kern"], :years => [], :normalized_authors => ["KERN"] },
62
- :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
63
- :phonetized => "ZANATA", :authors =>
64
- ["Banker", "D. Hall", "D.E. Stuntz"], :years => [1972],
65
- :normalized_authors => ["BANKER", "D HALL", "D E STUNTZ"] }] }
66
- end
67
-
68
- it 'should normalize names with abbreviated genus after cf.' do
69
- @parser.parse('Unio cf. U. alba').should == { :all_authors => [],
70
- :all_years => [], :canonical_form => "Unio",
71
- :genus => { :string => "Unio", :normalized => "UNIO",
72
- :phonetized => "UNIA", :authors => [], :years => [],
73
- :normalized_authors => [] } }
74
- end
75
-
76
- it 'should parse names which broke it before' do
77
- ['Parus caeruleus species complex',
78
- 'Euxoa nr. idahoensis sp. 1clay',
79
- 'Cetraria islandica ? islandica',
80
- 'Buteo borealis ? ventralis'].each do |n|
81
- res = @parser.parse(n)
82
- res.class.should == Hash
83
- res.empty?.should be_false
84
- end
85
- end
86
- end
87
-
88
-
89
- describe 'Taxamatch::Normalizer' do
90
- it 'should normalize strings' do
91
- Taxamatch::Normalizer.normalize('abcd').should == 'ABCD'
92
- Taxamatch::Normalizer.normalize('Leœptura').should == 'LEOEPTURA'
93
- Taxamatch::Normalizer.normalize('Ærenea').should == 'AERENEA'
94
- Taxamatch::Normalizer.normalize('Fallén').should == 'FALLEN'
95
- Taxamatch::Normalizer.normalize('Fallé€n').should == 'FALLE?N'
96
- Taxamatch::Normalizer.normalize('Fallén привет').should == 'FALLEN ??????'
97
- Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should ==
98
- 'CHORIOZOPELLA TRAGARDHI'
99
- Taxamatch::Normalizer.normalize('×Zygomena').should == 'xZYGOMENA'
100
- end
101
-
102
- it 'should normalize words' do
103
- Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should ==
104
- 'L-3EOEPTURA'
105
- end
106
- end
107
-
108
- describe 'Taxamatch::Base' do
109
- before(:all) do
110
- @tm = Taxamatch::Base.new
111
- end
112
-
113
- it 'should get txt tests' do
114
- test_file = File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt'
115
- read_test_file(test_file, 4) do |y|
116
- if y
117
- y[2] = y[2] == 'true' ? true : false
118
- res = @tm.taxamatch(y[0], y[1], false)
119
- # puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
120
- res['match'].should == y[2]
121
- res['edit_distance'].should == y[3].to_i
122
- end
123
- end
124
- end
125
-
126
- it 'should work with names that cannot be parsed' do
127
- res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921',
128
- 'Quadraspidiotus ostreaeformis Curtis)')
129
- res = false
130
- end
131
-
132
- it 'should compare genera' do
133
- # edit distance 1 always match
134
- g1 = make_taxamatch_hash 'Plantago'
135
- g2 = make_taxamatch_hash 'Plantagon'
136
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
137
- 'edit_distance' => 1, 'match' => true }
138
- # edit_distance above threshold does not math
139
- g1 = make_taxamatch_hash 'Plantago'
140
- g2 = make_taxamatch_hash 'This shouldnt match'
141
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
142
- 'match' => false, 'edit_distance' => 4 }
143
- # phonetic_match matches
144
- g1 = make_taxamatch_hash 'Plantagi'
145
- g2 = make_taxamatch_hash 'Plantagy'
146
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
147
- 'edit_distance' => 1, 'match' => true }
148
- @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
149
- 'phonetic_match' => false, 'edit_distance' => 1, 'match' => true }
150
- # distance 1 in first letter also matches
151
- g1 = make_taxamatch_hash 'Xantheri'
152
- g2 = make_taxamatch_hash 'Pantheri'
153
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
154
- 'edit_distance' => 1, 'match' => true }
155
- # phonetic match tramps everything
156
- g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
157
- g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
158
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
159
- 'edit_distance' => 4, 'match' => true }
160
- @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
161
- 'phonetic_match' => false, 'edit_distance' => 4, 'match' => false }
162
- # same first letter and distance 2 should match
163
- g1 = make_taxamatch_hash 'Xaaaantherii'
164
- g2 = make_taxamatch_hash 'Xaaaantherrr'
165
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
166
- 'match' => true, 'edit_distance' => 2 }
167
- # First letter is the same and distance is 3 should match, no phonetic match
168
- g1 = make_taxamatch_hash 'Xaaaaaaaaaaantheriii'
169
- g2 = make_taxamatch_hash 'Xaaaaaaaaaaantherrrr'
170
- @tm.match_genera(g1, g2).should ==
171
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 3 }
172
- # Should not match if one of words is shorter than 2x edit
173
- # distance and distance is 2 or 3
174
- g1 = make_taxamatch_hash 'Xant'
175
- g2 = make_taxamatch_hash 'Xanthe'
176
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
177
- 'match' => false, 'edit_distance' => 2 }
178
- # Should not match if edit distance > 3 and no phonetic match
179
- g1 = make_taxamatch_hash 'Xantheriiii'
180
- g2 = make_taxamatch_hash 'Xantherrrrr'
181
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
182
- 'match' => false, 'edit_distance' => 4 }
183
- end
184
-
185
- it 'should compare species' do
186
- # Exact match
187
- s1 = make_taxamatch_hash 'major'
188
- s2 = make_taxamatch_hash 'major'
189
- @tm.match_species(s1, s2).should == { 'phonetic_match' => true,
190
- 'match' => true, 'edit_distance' => 0 }
191
- @tm.match_species(s1, s2, :with_phonetic_match => false).should == {
192
- 'phonetic_match' => false, 'match' => true, 'edit_distance' => 0 }
193
- # Phonetic match always works
194
- s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
195
- s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
196
- @tm.match_species(s1, s2).should == { 'phonetic_match' => true,
197
- 'match' => true, 'edit_distance' => 4 }
198
- @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
199
- { 'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
200
- # Phonetic match works with different endings
201
- s1 = make_taxamatch_hash 'majorum'
202
- s2 = make_taxamatch_hash 'majoris'
203
- @tm.match_species(s1, s2).should == {
204
- 'phonetic_match' => true, 'match' => true, 'edit_distance' => 2 }
205
- @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
206
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 2 }
207
- # Distance 4 matches if first 3 chars are the same
208
- s1 = make_taxamatch_hash 'majjjjorrrrr'
209
- s2 = make_taxamatch_hash 'majjjjoraaaa'
210
- @tm.match_species(s1, s2).should ==
211
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 4 }
212
- # Should not match if Distance 4 matches and first 3 chars are not the same
213
- s1 = make_taxamatch_hash 'majorrrrr'
214
- s2 = make_taxamatch_hash 'marorraaa'
215
- @tm.match_species(s1, s2).should == {
216
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
217
- # Distance 2 or 3 matches if first 1 char is the same
218
- s1 = make_taxamatch_hash 'moooorrrr'
219
- s2 = make_taxamatch_hash 'mooooraaa'
220
- @tm.match_species(s1, s2).should == { 'phonetic_match' => false,
221
- 'match' => true, 'edit_distance' => 3 }
222
- # Should not match if Distance 2 or 3 and first 1 char is not the same
223
- s1 = make_taxamatch_hash 'morrrr'
224
- s2 = make_taxamatch_hash 'torraa'
225
- @tm.match_species(s1, s2).should == {
226
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
227
- # Distance 1 will match anywhere
228
- s1 = make_taxamatch_hash 'major'
229
- s2 = make_taxamatch_hash 'rajor'
230
- @tm.match_species(s1, s2).should == {
231
- 'phonetic_match' => false, 'match' => true, 'edit_distance' => 1 }
232
- # Will not match if distance 3 and length is less then twice
233
- # of the edit distance
234
- s1 = make_taxamatch_hash 'marrr'
235
- s2 = make_taxamatch_hash 'maaaa'
236
- @tm.match_species(s1, s2).should == {
237
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
238
- end
239
-
240
- it 'should match matches' do
241
- # No trobule case
242
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
243
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
244
- @tm.match_matches(gmatch, smatch).should ==
245
- { 'phonetic_match' => true, 'edit_distance' => 2, 'match' => true }
246
- # Will not match if either genus or sp. epithet dont match
247
- gmatch = { 'match' => false,
248
- 'phonetic_match' => false, 'edit_distance' => 1 }
249
- smatch = { 'match' => true,
250
- 'phonetic_match' => true, 'edit_distance' => 1 }
251
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
252
- 'edit_distance' => 2, 'match' => false }
253
- gmatch = { 'match' => true, 'phonetic_match' => true,
254
- 'edit_distance' => 1 }
255
- smatch = { 'match' => false, 'phonetic_match' => false,
256
- 'edit_distance' => 1 }
257
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
258
- 'edit_distance' => 2, 'match' => false }
259
- # Should not match if binomial edit distance > 4
260
- # NOTE: EVEN with full phonetic match
261
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 3 }
262
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
263
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => true,
264
- 'edit_distance' => 5, 'match' => false }
265
- # Should not have phonetic match if one of the components
266
- # does not match phonetically
267
- gmatch = { 'match' => true,
268
- 'phonetic_match' => false, 'edit_distance' => 1 }
269
- smatch = { 'match' => true,
270
- 'phonetic_match' => true, 'edit_distance' => 1 }
271
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
272
- 'edit_distance' => 2, 'match' => true }
273
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
274
- smatch = { 'match' => true,
275
- 'phonetic_match' => false, 'edit_distance' => 1 }
276
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
277
- 'edit_distance' => 2, 'match' => true }
278
- # edit distance should be equal the sum of of edit distances
279
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
280
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
281
- @tm.match_matches(gmatch, smatch).should == {
282
- 'phonetic_match'=>true, 'edit_distance'=>4, 'match'=>true }
283
- end
284
-
285
- it 'should return only boolean values' do
286
- @tm.taxamatch("AJLJljljlj", "sls").should_not be_nil
287
- @tm.taxamatch('Olsl','a')
288
- end
289
-
290
- it "should not match authors from different parts of name" do
291
- parser = Taxamatch::Atomizer.new
292
- t = Taxamatch::Base.new
293
- n1 = parser.parse "Betula Linnaeus"
294
- n2 = parser.parse "Betula alba Linnaeus"
295
- n3 = parser.parse "Betula alba alba Linnaeus"
296
- n4 = parser.parse "Betula alba L."
297
- n5 = parser.parse "Betula alba"
298
- n6 = parser.parse "Betula olba"
299
- n7 = parser.parse "Betula alba Linnaeus alba"
300
- n8 = parser.parse "Betula alba Linnaeus alba Smith"
301
- n9 = parser.parse "Betula alba Smith alba L."
302
- n10 = parser.parse "Betula Linn."
303
- # if one authorship is empty, return 0
304
- t.match_authors(n1, n5).should == 0
305
- t.match_authors(n5, n1).should == 0
306
- t.match_authors(n5, n6).should == 0
307
- # if authorship matches on different levels ignore
308
- t.match_authors(n7, n3).should == 0
309
- t.match_authors(n8, n3).should == -1
310
- t.match_authors(n2, n8).should == 0
311
- t.match_authors(n1, n2).should == 0
312
- # match on infraspecies level
313
- t.match_authors(n9, n3).should == 1
314
- # match on species level
315
- t.match_authors(n2, n4).should == 1
316
- # match on uninomial level
317
- t.match_authors(n1, n10).should == 1
318
- end
319
-
320
-
321
- describe 'Taxamatch::Authmatch' do
322
- before(:all) do
323
- @am = Taxamatch::Authmatch
324
- end
325
-
326
- it 'should calculate score' do
327
- res = @am.authmatch(['Linnaeus', 'Muller'], ['L'], [], [1788])
328
- res.should == 90
329
- res = @am.authmatch(['Linnaeus'],['Kurtz'], [], [])
330
- res.should == 0
331
- # found all authors, same year
332
- res = @am.authmatch(['Linnaeus', 'Muller'],
333
- ['Muller', 'Linnaeus'], [1766], [1766])
334
- res.should == 100
335
- # all authors, 1 year diff
336
- res = @am.authmatch(['Linnaeus', 'Muller'],
337
- ['Muller', 'Linnaeus'], [1767], [1766])
338
- res.should == 54
339
- # year is not counted in
340
- res = @am.authmatch(['Linnaeus', 'Muller'],
341
- ['Muller', 'Linnaeus'], [1767], [])
342
- res.should == 94
343
- # found all authors on one side, same year
344
- res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
345
- ['Muller', 'Linnaeus'], [1767], [1767])
346
- res.should == 91
347
- # found all authors on one side, 1 year diff
348
- res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
349
- ['Muller', 'Linnaeus'], [1766], [1767])
350
- res.should == 51
351
- # found all authors on one side, year does not count
352
- res = @am.authmatch(['Linnaeus', 'Muller'],
353
- ['Muller', 'Linnaeus', 'Kurtz'], [1766], [])
354
- res.should == 90
355
- # found some authors
356
- res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
357
- ['Muller', 'Kurtz', 'Stepanov'], [1766], [])
358
- res.should == 67
359
- # if year does not match or not present no match for previous case
360
- res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
361
- ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
362
- res.should == 0
363
- end
364
-
365
- it 'should compare years' do
366
- @am.compare_years([1882],[1880]).should == 2
367
- @am.compare_years([1882],[]).should == nil
368
- @am.compare_years([],[]).should == 0
369
- @am.compare_years([1788,1798], [1788,1798]).should be_nil
370
- end
371
-
372
- it 'should remove duplicate authors' do
373
- # Li submatches Linnaeus and it its size 3 is big enought to remove
374
- # Linnaeus Muller is identical
375
- res = @am.remove_duplicate_authors(['Lin', 'Muller'],
376
- ['Linnaeus', 'Muller'])
377
- res.should == [[], []]
378
- # same in different order
379
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
380
- ['Linn', 'Muller'])
381
- res.should == [[], []]
382
- # auth Li submatches Linnaeus, but Li size less then 3
383
- # required to remove Linnaeus
384
- res = @am.remove_duplicate_authors(['Dem', 'Li'],
385
- ['Linnaeus', 'Stepanov'])
386
- res.should == [["Dem"], ["Linnaeus", "Stepanov"]]
387
- # fuzzy match
388
- res = @am.remove_duplicate_authors(['Dem', 'Lennaeus'],
389
- ['Linnaeus', 'Stepanov'])
390
- res.should == [["Dem"], ["Stepanov"]]
391
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
392
- ['L', 'Kenn'])
393
- res.should == [['Linnaeus', 'Muller'], ['Kenn']]
394
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
395
- ['Muller', 'Linnaeus', 'Kurtz'])
396
- res.should == [[],['Kurtz']]
397
- end
398
-
399
- it 'should fuzzy match authors' do
400
- res = @am.fuzzy_match_authors('L', 'Muller')
401
- res.should be_false
402
- end
403
-
404
- end
405
-
406
- end
@@ -1,46 +0,0 @@
1
- ###
2
- #
3
- # Tests for string comparison by taxamatch algorithm
4
- # name1|name2|match|edit_distance
5
- #
6
- ##
7
- # Comparing uninomials
8
- Pomatomus|Pomatomas|true|1
9
- Pomatomus L.|Pomatomas Linn.|true|1
10
- Pomatomus Ber|Pomatomas Linn|false|1
11
- Pomatomus L. 1758|Pomatomus Linn. 1800|false|0
12
- Patella|Abbella|false|3
13
-
14
- ## additional authorship should match
15
- Puma concolor|Puma concolor L.|true|0
16
- #
17
- ## one-letter misspeling in species epithet should match
18
- Puma concolor|Puma cancolor|true|1
19
- #
20
- Pomatomus saltatrix|Pomatomus saltratix|true|2
21
- Pomatomus saltator|Pomatomus saltatrix|false|3 #!!!
22
- #
23
- Loligo pealeii|Loligo plei|false|3
24
- #
25
- ## different authors should not match
26
- Puma concolor Linnaeus|Puma concolor Kurtz|false|0
27
- #
28
- ##real life examples
29
- Biatora borealis|Bactra borealis Diakonoff 1964|false|3
30
- #
31
- Homo sapien|Homo sapiens|true|1
32
- Homo sapiens Linnaeus|Homo sapens (Linn. 1758) |true|1
33
- Homo sapiens Mozzherin|Homo sapiens Linneaus|false|0
34
- #
35
- Quinqueloculina punctata|Quinqueloculina punctata d'Orbigny 1905|true|0
36
- Pomatomus saltator (Linnaeus, 1766)|Pomatomus saltatrix (Linnaeus, 1766)|true|0|3
37
- #
38
- #Trinomial names
39
- Homo sapiens stupidus|Homo spiens stupidus|true|1
40
- Pomatomus saltator saltator L. 1758|Pomatomus saltator var. saltatror L. 1758|true|1
41
- Pomatomus saltator L. 1758|Pomatomus saltator var. saltatror L. 1758|false|5
42
- Pomatomus saltator saltator saltatorische|Pomatomus saltator soltator|true|1
43
-
44
-
45
-
46
-