taxamatch_rb 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Makefile DELETED
@@ -1,157 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- #### Start of system configuration section. ####
5
-
6
- srcdir = /Users/dimus/.rvm/gems/ruby-1.9.2-p0/bin
7
- topdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1
8
- hdrdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1
9
- arch_hdrdir = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/include/ruby-1.9.1/$(arch)
10
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
11
- prefix = $(DESTDIR)/Users/dimus/.rvm/rubies/ruby-1.9.2-p0
12
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
13
- exec_prefix = $(prefix)
14
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
15
- sitehdrdir = $(rubyhdrdir)/site_ruby
16
- rubyhdrdir = $(includedir)/$(RUBY_BASE_NAME)-$(ruby_version)
17
- vendordir = $(rubylibprefix)/vendor_ruby
18
- sitedir = $(rubylibprefix)/site_ruby
19
- ridir = $(datarootdir)/$(RI_BASE_NAME)
20
- mandir = $(datarootdir)/man
21
- localedir = $(datarootdir)/locale
22
- libdir = $(exec_prefix)/lib
23
- psdir = $(docdir)
24
- pdfdir = $(docdir)
25
- dvidir = $(docdir)
26
- htmldir = $(docdir)
27
- infodir = $(datarootdir)/info
28
- docdir = $(datarootdir)/doc/$(PACKAGE)
29
- oldincludedir = $(DESTDIR)/usr/include
30
- includedir = $(prefix)/include
31
- localstatedir = $(prefix)/var
32
- sharedstatedir = $(prefix)/com
33
- sysconfdir = $(prefix)/etc
34
- datadir = $(datarootdir)
35
- datarootdir = $(prefix)/share
36
- libexecdir = $(exec_prefix)/libexec
37
- sbindir = $(exec_prefix)/sbin
38
- bindir = $(exec_prefix)/bin
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- archdir = $(rubylibdir)/$(arch)
41
- sitelibdir = $(sitedir)/$(ruby_version)
42
- sitearchdir = $(sitelibdir)/$(sitearch)
43
- vendorlibdir = $(vendordir)/$(ruby_version)
44
- vendorarchdir = $(vendorlibdir)/$(sitearch)
45
-
46
- CC = gcc
47
- CXX = g++
48
- LIBRUBY = $(LIBRUBY_SO)
49
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
50
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
51
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
52
- OUTFLAG = -o
53
- COUTFLAG = -o
54
-
55
- RUBY_EXTCONF_H =
56
- cflags = $(optflags) $(debugflags) $(warnflags)
57
- optflags = -O3
58
- debugflags = -ggdb
59
- warnflags = -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long
60
- CFLAGS = -fno-common $(cflags) -fno-common -pipe
61
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
62
- DEFS =
63
- CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
64
- CXXFLAGS = $(CFLAGS) $(cxxflags)
65
- ldflags = -L.
66
- dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress -Wl,-flat_namespace
67
- ARCH_FLAG =
68
- DLDFLAGS = $(ldflags) $(dldflags)
69
- LDSHARED = $(CC) -dynamic -bundle
70
- LDSHAREDXX = $(CXX) -dynamic -bundle
71
- AR = ar
72
- EXEEXT =
73
-
74
- RUBY_BASE_NAME = ruby
75
- RUBY_INSTALL_NAME = ruby
76
- RUBY_SO_NAME = ruby.1.9.1
77
- arch = x86_64-darwin10.3.1
78
- sitearch = $(arch)
79
- ruby_version = 1.9.1
80
- ruby = /Users/dimus/.rvm/rubies/ruby-1.9.2-p0/bin/ruby
81
- RUBY = $(ruby)
82
- RM = rm -f
83
- RM_RF = $(RUBY) -run -e rm -- -rf
84
- RMDIRS = $(RUBY) -run -e rmdir -- -p
85
- MAKEDIRS = mkdir -p
86
- INSTALL = /usr/bin/install -c
87
- INSTALL_PROG = $(INSTALL) -m 0755
88
- INSTALL_DATA = $(INSTALL) -m 644
89
- COPY = cp
90
-
91
- #### End of system configuration section. ####
92
-
93
- preload =
94
-
95
- libpath = . $(libdir)
96
- LIBPATH = -L. -L$(libdir)
97
- DEFFILE =
98
-
99
- CLEANFILES = mkmf.log
100
- DISTCLEANFILES =
101
- DISTCLEANDIRS =
102
-
103
- extout =
104
- extout_prefix =
105
- target_prefix =
106
- LOCAL_LIBS =
107
- LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
108
- SRCS =
109
- OBJS =
110
- TARGET =
111
- DLLIB =
112
- EXTSTATIC =
113
- STATIC_LIB =
114
-
115
- BINDIR = $(bindir)
116
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
117
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
118
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
119
- HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
120
- ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
121
-
122
- TARGET_SO = $(DLLIB)
123
- CLEANLIBS = $(TARGET).bundle
124
- CLEANOBJS = *.o *.bak
125
-
126
- all: Makefile
127
- static: $(STATIC_LIB)
128
- .PHONY: all install static install-so install-rb
129
- .PHONY: clean clean-so clean-rb
130
-
131
- clean-rb-default::
132
- clean-rb::
133
- clean-so::
134
- clean: clean-so clean-rb-default clean-rb
135
- @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
136
-
137
- distclean-rb-default::
138
- distclean-rb::
139
- distclean-so::
140
- distclean: clean distclean-so distclean-rb-default distclean-rb
141
- @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
142
- @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
143
- @-$(RMDIRS) $(DISTCLEANDIRS)
144
-
145
- realclean: distclean
146
- install: install-so install-rb
147
-
148
- install-so: Makefile
149
- install-rb: pre-install-rb install-rb-default
150
- install-rb-default: pre-install-rb-default
151
- pre-install-rb: Makefile
152
- pre-install-rb-default: Makefile
153
-
154
- site-install: site-install-so site-install-rb
155
- site-install-so: install-so
156
- site-install-rb: install-rb
157
-
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 1.0.1
data/spec/spec.opts DELETED
@@ -1 +0,0 @@
1
- --colour
data/spec/spec_helper.rb DELETED
@@ -1,22 +0,0 @@
1
- require 'rspec'
2
-
3
- $:.unshift(File.dirname(__FILE__) + '/../lib')
4
- require 'taxamatch_rb'
5
-
6
- def read_test_file(file, fields_num)
7
- f = open(file)
8
- f.each do |line|
9
- fields = line.split("|")
10
- if line.match(/^\s*#/) == nil && fields.size == fields_num
11
- fields[-1] = fields[-1].split('#')[0].strip
12
- yield(fields)
13
- else
14
- yield(nil)
15
- end
16
- end
17
- end
18
-
19
- def make_taxamatch_hash(string)
20
- normalized = Taxamatch::Normalizer.normalize(string)
21
- {:string => string, :normalized => normalized, :phonetized => Taxamatch::Phonetizer.near_match(normalized)}
22
- end
@@ -1,406 +0,0 @@
1
- # encoding: UTF-8
2
- require 'spec_helper'
3
-
4
- describe 'Atomizer' do
5
- before(:all) do
6
- @parser = Taxamatch::Atomizer.new
7
- end
8
-
9
- it 'should parse uninomials' do
10
- @parser.parse('Betula').should == { :all_authors => [], :all_years => [],
11
- :canonical_form => "Betula", :uninomial => { :string => "Betula",
12
- :normalized => 'BETULA', :phonetized => "BITILA", :authors => [],
13
- :years => [], :normalized_authors => [] } }
14
- @parser.parse('Ærenea Lacordaire, 1872').should == {
15
- :all_authors => ["LACORDAIRE"], :all_years => [1872],
16
- :canonical_form => "Aerenea", :uninomial => { :string => "Aerenea",
17
- :normalized => "AERENEA", :phonetized => "ERINIA",
18
- :authors => ["Lacordaire"], :years => [1872],
19
- :normalized_authors => ["LACORDAIRE"] } }
20
- end
21
-
22
- it 'should parse binomials' do
23
- @parser.parse('Leœptura laetifica Dow, 1913').should == {
24
- :all_authors => ["DOW"], :all_years => [1913],
25
- :canonical_form => "Leoeptura laetifica", :genus => {
26
- :string => "Leoeptura", :normalized => "LEOEPTURA",
27
- :phonetized => "LIPTIRA", :authors => [], :years => [],
28
- :normalized_authors => []}, :species => {
29
- :string => "laetifica", :normalized => "LAETIFICA",
30
- :phonetized => "LITIFICA", :authors => ["Dow"],
31
- :years => [1913], :normalized_authors => ["DOW"] } }
32
- end
33
-
34
- it 'should parse trinomials' do
35
- @parser.parse('Hydnellum scrobiculatum zonatum ' +
36
- '(Banker) D. Hall et D.E. Stuntz 1972').should == {
37
- :all_authors => ["BANKER", "D HALL", "D E STUNTZ"], :all_years => [1972],
38
- :canonical_form => "Hydnellum scrobiculatum zonatum", :genus=>{
39
- :string => "Hydnellum", :normalized => "HYDNELLUM",
40
- :phonetized => "HIDNILIM", :authors => [], :years => [],
41
- :normalized_authors => [] }, :species => { :string => "scrobiculatum",
42
- :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
43
- :authors => [], :years => [], :normalized_authors => [] },
44
- :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
45
- :phonetized => "ZANATA", :authors => ["Banker", "D. Hall", "D.E. Stuntz"],
46
- :years => [1972], :normalized_authors => ["BANKER", "D HALL",
47
- "D E STUNTZ"] }] }
48
- end
49
-
50
- it 'should normalize years to integers' do
51
- future_year = Time.now.year + 10
52
- @parser.parse("Hydnellum scrobiculatum Kern #{future_year} " +
53
- "zonatum (Banker) D. Hall et D.E. Stuntz 1972?").should == {
54
- :all_authors => ["KERN", "BANKER", "D HALL", "D E STUNTZ"],
55
- :all_years => [1972],
56
- :canonical_form => "Hydnellum scrobiculatum zonatum", :genus => {
57
- :string => "Hydnellum", :normalized => "HYDNELLUM",
58
- :phonetized => "HIDNILIM", :authors => [], :years => [],
59
- :normalized_authors => [] }, :species => { :string => "scrobiculatum",
60
- :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
61
- :authors => ["Kern"], :years => [], :normalized_authors => ["KERN"] },
62
- :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
63
- :phonetized => "ZANATA", :authors =>
64
- ["Banker", "D. Hall", "D.E. Stuntz"], :years => [1972],
65
- :normalized_authors => ["BANKER", "D HALL", "D E STUNTZ"] }] }
66
- end
67
-
68
- it 'should normalize names with abbreviated genus after cf.' do
69
- @parser.parse('Unio cf. U. alba').should == { :all_authors => [],
70
- :all_years => [], :canonical_form => "Unio",
71
- :genus => { :string => "Unio", :normalized => "UNIO",
72
- :phonetized => "UNIA", :authors => [], :years => [],
73
- :normalized_authors => [] } }
74
- end
75
-
76
- it 'should parse names which broke it before' do
77
- ['Parus caeruleus species complex',
78
- 'Euxoa nr. idahoensis sp. 1clay',
79
- 'Cetraria islandica ? islandica',
80
- 'Buteo borealis ? ventralis'].each do |n|
81
- res = @parser.parse(n)
82
- res.class.should == Hash
83
- res.empty?.should be_false
84
- end
85
- end
86
- end
87
-
88
-
89
- describe 'Taxamatch::Normalizer' do
90
- it 'should normalize strings' do
91
- Taxamatch::Normalizer.normalize('abcd').should == 'ABCD'
92
- Taxamatch::Normalizer.normalize('Leœptura').should == 'LEOEPTURA'
93
- Taxamatch::Normalizer.normalize('Ærenea').should == 'AERENEA'
94
- Taxamatch::Normalizer.normalize('Fallén').should == 'FALLEN'
95
- Taxamatch::Normalizer.normalize('Fallé€n').should == 'FALLE?N'
96
- Taxamatch::Normalizer.normalize('Fallén привет').should == 'FALLEN ??????'
97
- Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should ==
98
- 'CHORIOZOPELLA TRAGARDHI'
99
- Taxamatch::Normalizer.normalize('×Zygomena').should == 'xZYGOMENA'
100
- end
101
-
102
- it 'should normalize words' do
103
- Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should ==
104
- 'L-3EOEPTURA'
105
- end
106
- end
107
-
108
- describe 'Taxamatch::Base' do
109
- before(:all) do
110
- @tm = Taxamatch::Base.new
111
- end
112
-
113
- it 'should get txt tests' do
114
- test_file = File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt'
115
- read_test_file(test_file, 4) do |y|
116
- if y
117
- y[2] = y[2] == 'true' ? true : false
118
- res = @tm.taxamatch(y[0], y[1], false)
119
- # puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
120
- res['match'].should == y[2]
121
- res['edit_distance'].should == y[3].to_i
122
- end
123
- end
124
- end
125
-
126
- it 'should work with names that cannot be parsed' do
127
- res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921',
128
- 'Quadraspidiotus ostreaeformis Curtis)')
129
- res = false
130
- end
131
-
132
- it 'should compare genera' do
133
- # edit distance 1 always match
134
- g1 = make_taxamatch_hash 'Plantago'
135
- g2 = make_taxamatch_hash 'Plantagon'
136
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
137
- 'edit_distance' => 1, 'match' => true }
138
- # edit_distance above threshold does not math
139
- g1 = make_taxamatch_hash 'Plantago'
140
- g2 = make_taxamatch_hash 'This shouldnt match'
141
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
142
- 'match' => false, 'edit_distance' => 4 }
143
- # phonetic_match matches
144
- g1 = make_taxamatch_hash 'Plantagi'
145
- g2 = make_taxamatch_hash 'Plantagy'
146
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
147
- 'edit_distance' => 1, 'match' => true }
148
- @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
149
- 'phonetic_match' => false, 'edit_distance' => 1, 'match' => true }
150
- # distance 1 in first letter also matches
151
- g1 = make_taxamatch_hash 'Xantheri'
152
- g2 = make_taxamatch_hash 'Pantheri'
153
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
154
- 'edit_distance' => 1, 'match' => true }
155
- # phonetic match tramps everything
156
- g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
157
- g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
158
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
159
- 'edit_distance' => 4, 'match' => true }
160
- @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
161
- 'phonetic_match' => false, 'edit_distance' => 4, 'match' => false }
162
- # same first letter and distance 2 should match
163
- g1 = make_taxamatch_hash 'Xaaaantherii'
164
- g2 = make_taxamatch_hash 'Xaaaantherrr'
165
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
166
- 'match' => true, 'edit_distance' => 2 }
167
- # First letter is the same and distance is 3 should match, no phonetic match
168
- g1 = make_taxamatch_hash 'Xaaaaaaaaaaantheriii'
169
- g2 = make_taxamatch_hash 'Xaaaaaaaaaaantherrrr'
170
- @tm.match_genera(g1, g2).should ==
171
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 3 }
172
- # Should not match if one of words is shorter than 2x edit
173
- # distance and distance is 2 or 3
174
- g1 = make_taxamatch_hash 'Xant'
175
- g2 = make_taxamatch_hash 'Xanthe'
176
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
177
- 'match' => false, 'edit_distance' => 2 }
178
- # Should not match if edit distance > 3 and no phonetic match
179
- g1 = make_taxamatch_hash 'Xantheriiii'
180
- g2 = make_taxamatch_hash 'Xantherrrrr'
181
- @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
182
- 'match' => false, 'edit_distance' => 4 }
183
- end
184
-
185
- it 'should compare species' do
186
- # Exact match
187
- s1 = make_taxamatch_hash 'major'
188
- s2 = make_taxamatch_hash 'major'
189
- @tm.match_species(s1, s2).should == { 'phonetic_match' => true,
190
- 'match' => true, 'edit_distance' => 0 }
191
- @tm.match_species(s1, s2, :with_phonetic_match => false).should == {
192
- 'phonetic_match' => false, 'match' => true, 'edit_distance' => 0 }
193
- # Phonetic match always works
194
- s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
195
- s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
196
- @tm.match_species(s1, s2).should == { 'phonetic_match' => true,
197
- 'match' => true, 'edit_distance' => 4 }
198
- @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
199
- { 'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
200
- # Phonetic match works with different endings
201
- s1 = make_taxamatch_hash 'majorum'
202
- s2 = make_taxamatch_hash 'majoris'
203
- @tm.match_species(s1, s2).should == {
204
- 'phonetic_match' => true, 'match' => true, 'edit_distance' => 2 }
205
- @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
206
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 2 }
207
- # Distance 4 matches if first 3 chars are the same
208
- s1 = make_taxamatch_hash 'majjjjorrrrr'
209
- s2 = make_taxamatch_hash 'majjjjoraaaa'
210
- @tm.match_species(s1, s2).should ==
211
- { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 4 }
212
- # Should not match if Distance 4 matches and first 3 chars are not the same
213
- s1 = make_taxamatch_hash 'majorrrrr'
214
- s2 = make_taxamatch_hash 'marorraaa'
215
- @tm.match_species(s1, s2).should == {
216
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
217
- # Distance 2 or 3 matches if first 1 char is the same
218
- s1 = make_taxamatch_hash 'moooorrrr'
219
- s2 = make_taxamatch_hash 'mooooraaa'
220
- @tm.match_species(s1, s2).should == { 'phonetic_match' => false,
221
- 'match' => true, 'edit_distance' => 3 }
222
- # Should not match if Distance 2 or 3 and first 1 char is not the same
223
- s1 = make_taxamatch_hash 'morrrr'
224
- s2 = make_taxamatch_hash 'torraa'
225
- @tm.match_species(s1, s2).should == {
226
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
227
- # Distance 1 will match anywhere
228
- s1 = make_taxamatch_hash 'major'
229
- s2 = make_taxamatch_hash 'rajor'
230
- @tm.match_species(s1, s2).should == {
231
- 'phonetic_match' => false, 'match' => true, 'edit_distance' => 1 }
232
- # Will not match if distance 3 and length is less then twice
233
- # of the edit distance
234
- s1 = make_taxamatch_hash 'marrr'
235
- s2 = make_taxamatch_hash 'maaaa'
236
- @tm.match_species(s1, s2).should == {
237
- 'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
238
- end
239
-
240
- it 'should match matches' do
241
- # No trobule case
242
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
243
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
244
- @tm.match_matches(gmatch, smatch).should ==
245
- { 'phonetic_match' => true, 'edit_distance' => 2, 'match' => true }
246
- # Will not match if either genus or sp. epithet dont match
247
- gmatch = { 'match' => false,
248
- 'phonetic_match' => false, 'edit_distance' => 1 }
249
- smatch = { 'match' => true,
250
- 'phonetic_match' => true, 'edit_distance' => 1 }
251
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
252
- 'edit_distance' => 2, 'match' => false }
253
- gmatch = { 'match' => true, 'phonetic_match' => true,
254
- 'edit_distance' => 1 }
255
- smatch = { 'match' => false, 'phonetic_match' => false,
256
- 'edit_distance' => 1 }
257
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
258
- 'edit_distance' => 2, 'match' => false }
259
- # Should not match if binomial edit distance > 4
260
- # NOTE: EVEN with full phonetic match
261
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 3 }
262
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
263
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => true,
264
- 'edit_distance' => 5, 'match' => false }
265
- # Should not have phonetic match if one of the components
266
- # does not match phonetically
267
- gmatch = { 'match' => true,
268
- 'phonetic_match' => false, 'edit_distance' => 1 }
269
- smatch = { 'match' => true,
270
- 'phonetic_match' => true, 'edit_distance' => 1 }
271
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
272
- 'edit_distance' => 2, 'match' => true }
273
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
274
- smatch = { 'match' => true,
275
- 'phonetic_match' => false, 'edit_distance' => 1 }
276
- @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
277
- 'edit_distance' => 2, 'match' => true }
278
- # edit distance should be equal the sum of of edit distances
279
- gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
280
- smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
281
- @tm.match_matches(gmatch, smatch).should == {
282
- 'phonetic_match'=>true, 'edit_distance'=>4, 'match'=>true }
283
- end
284
-
285
- it 'should return only boolean values' do
286
- @tm.taxamatch("AJLJljljlj", "sls").should_not be_nil
287
- @tm.taxamatch('Olsl','a')
288
- end
289
-
290
- it "should not match authors from different parts of name" do
291
- parser = Taxamatch::Atomizer.new
292
- t = Taxamatch::Base.new
293
- n1 = parser.parse "Betula Linnaeus"
294
- n2 = parser.parse "Betula alba Linnaeus"
295
- n3 = parser.parse "Betula alba alba Linnaeus"
296
- n4 = parser.parse "Betula alba L."
297
- n5 = parser.parse "Betula alba"
298
- n6 = parser.parse "Betula olba"
299
- n7 = parser.parse "Betula alba Linnaeus alba"
300
- n8 = parser.parse "Betula alba Linnaeus alba Smith"
301
- n9 = parser.parse "Betula alba Smith alba L."
302
- n10 = parser.parse "Betula Linn."
303
- # if one authorship is empty, return 0
304
- t.match_authors(n1, n5).should == 0
305
- t.match_authors(n5, n1).should == 0
306
- t.match_authors(n5, n6).should == 0
307
- # if authorship matches on different levels ignore
308
- t.match_authors(n7, n3).should == 0
309
- t.match_authors(n8, n3).should == -1
310
- t.match_authors(n2, n8).should == 0
311
- t.match_authors(n1, n2).should == 0
312
- # match on infraspecies level
313
- t.match_authors(n9, n3).should == 1
314
- # match on species level
315
- t.match_authors(n2, n4).should == 1
316
- # match on uninomial level
317
- t.match_authors(n1, n10).should == 1
318
- end
319
-
320
-
321
- describe 'Taxamatch::Authmatch' do
322
- before(:all) do
323
- @am = Taxamatch::Authmatch
324
- end
325
-
326
- it 'should calculate score' do
327
- res = @am.authmatch(['Linnaeus', 'Muller'], ['L'], [], [1788])
328
- res.should == 90
329
- res = @am.authmatch(['Linnaeus'],['Kurtz'], [], [])
330
- res.should == 0
331
- # found all authors, same year
332
- res = @am.authmatch(['Linnaeus', 'Muller'],
333
- ['Muller', 'Linnaeus'], [1766], [1766])
334
- res.should == 100
335
- # all authors, 1 year diff
336
- res = @am.authmatch(['Linnaeus', 'Muller'],
337
- ['Muller', 'Linnaeus'], [1767], [1766])
338
- res.should == 54
339
- # year is not counted in
340
- res = @am.authmatch(['Linnaeus', 'Muller'],
341
- ['Muller', 'Linnaeus'], [1767], [])
342
- res.should == 94
343
- # found all authors on one side, same year
344
- res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
345
- ['Muller', 'Linnaeus'], [1767], [1767])
346
- res.should == 91
347
- # found all authors on one side, 1 year diff
348
- res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
349
- ['Muller', 'Linnaeus'], [1766], [1767])
350
- res.should == 51
351
- # found all authors on one side, year does not count
352
- res = @am.authmatch(['Linnaeus', 'Muller'],
353
- ['Muller', 'Linnaeus', 'Kurtz'], [1766], [])
354
- res.should == 90
355
- # found some authors
356
- res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
357
- ['Muller', 'Kurtz', 'Stepanov'], [1766], [])
358
- res.should == 67
359
- # if year does not match or not present no match for previous case
360
- res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
361
- ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
362
- res.should == 0
363
- end
364
-
365
- it 'should compare years' do
366
- @am.compare_years([1882],[1880]).should == 2
367
- @am.compare_years([1882],[]).should == nil
368
- @am.compare_years([],[]).should == 0
369
- @am.compare_years([1788,1798], [1788,1798]).should be_nil
370
- end
371
-
372
- it 'should remove duplicate authors' do
373
- # Li submatches Linnaeus and it its size 3 is big enought to remove
374
- # Linnaeus Muller is identical
375
- res = @am.remove_duplicate_authors(['Lin', 'Muller'],
376
- ['Linnaeus', 'Muller'])
377
- res.should == [[], []]
378
- # same in different order
379
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
380
- ['Linn', 'Muller'])
381
- res.should == [[], []]
382
- # auth Li submatches Linnaeus, but Li size less then 3
383
- # required to remove Linnaeus
384
- res = @am.remove_duplicate_authors(['Dem', 'Li'],
385
- ['Linnaeus', 'Stepanov'])
386
- res.should == [["Dem"], ["Linnaeus", "Stepanov"]]
387
- # fuzzy match
388
- res = @am.remove_duplicate_authors(['Dem', 'Lennaeus'],
389
- ['Linnaeus', 'Stepanov'])
390
- res.should == [["Dem"], ["Stepanov"]]
391
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
392
- ['L', 'Kenn'])
393
- res.should == [['Linnaeus', 'Muller'], ['Kenn']]
394
- res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
395
- ['Muller', 'Linnaeus', 'Kurtz'])
396
- res.should == [[],['Kurtz']]
397
- end
398
-
399
- it 'should fuzzy match authors' do
400
- res = @am.fuzzy_match_authors('L', 'Muller')
401
- res.should be_false
402
- end
403
-
404
- end
405
-
406
- end
@@ -1,46 +0,0 @@
1
- ###
2
- #
3
- # Tests for string comparison by taxamatch algorithm
4
- # name1|name2|match|edit_distance
5
- #
6
- ##
7
- # Comparing uninomials
8
- Pomatomus|Pomatomas|true|1
9
- Pomatomus L.|Pomatomas Linn.|true|1
10
- Pomatomus Ber|Pomatomas Linn|false|1
11
- Pomatomus L. 1758|Pomatomus Linn. 1800|false|0
12
- Patella|Abbella|false|3
13
-
14
- ## additional authorship should match
15
- Puma concolor|Puma concolor L.|true|0
16
- #
17
- ## one-letter misspeling in species epithet should match
18
- Puma concolor|Puma cancolor|true|1
19
- #
20
- Pomatomus saltatrix|Pomatomus saltratix|true|2
21
- Pomatomus saltator|Pomatomus saltatrix|false|3 #!!!
22
- #
23
- Loligo pealeii|Loligo plei|false|3
24
- #
25
- ## different authors should not match
26
- Puma concolor Linnaeus|Puma concolor Kurtz|false|0
27
- #
28
- ##real life examples
29
- Biatora borealis|Bactra borealis Diakonoff 1964|false|3
30
- #
31
- Homo sapien|Homo sapiens|true|1
32
- Homo sapiens Linnaeus|Homo sapens (Linn. 1758) |true|1
33
- Homo sapiens Mozzherin|Homo sapiens Linneaus|false|0
34
- #
35
- Quinqueloculina punctata|Quinqueloculina punctata d'Orbigny 1905|true|0
36
- Pomatomus saltator (Linnaeus, 1766)|Pomatomus saltatrix (Linnaeus, 1766)|true|0|3
37
- #
38
- #Trinomial names
39
- Homo sapiens stupidus|Homo spiens stupidus|true|1
40
- Pomatomus saltator saltator L. 1758|Pomatomus saltator var. saltatror L. 1758|true|1
41
- Pomatomus saltator L. 1758|Pomatomus saltator var. saltatror L. 1758|false|5
42
- Pomatomus saltator saltator saltatorische|Pomatomus saltator soltator|true|1
43
-
44
-
45
-
46
-