taxamatch_rb 0.9.2 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -3
- data/Gemfile.lock +21 -26
- data/VERSION +1 -1
- data/lib/taxamatch_rb/normalizer.rb +17 -30
- data/spec/taxamatch_rb_spec.rb +1 -0
- data/taxamatch_rb.gemspec +14 -11
- metadata +26 -10
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
0.9.4 - updated parser (to 1.0.16), updated code to ruby 1.9.3
|
2
|
+
|
3
|
+
0.9.3 - Taxamatch::Normalizer substitutes multiplication sign to 'x'
|
4
|
+
(lowcase) instead of '?'
|
5
|
+
|
1
6
|
0.9.2 - Taxamatch::Normalizer.normalize always returns only ASCII
|
2
7
|
characters, all utf-8 characters unknown to normalizer are becoming '?'
|
3
8
|
|
data/Gemfile
CHANGED
@@ -2,17 +2,18 @@ source "http://rubygems.org"
|
|
2
2
|
require 'yaml'
|
3
3
|
# YAML::ENGINE.yamler= 'syck'
|
4
4
|
|
5
|
-
gem "biodiversity19",">= 1.0.
|
5
|
+
gem "biodiversity19",">= 1.0.16"
|
6
6
|
gem "damerau-levenshtein", ">= 0.5.4"
|
7
7
|
|
8
8
|
|
9
9
|
group :development do
|
10
|
+
gem "rake"
|
10
11
|
gem "rake-compiler"
|
11
|
-
gem "rspec"
|
12
|
+
gem "rspec"
|
12
13
|
gem "cucumber", ">= 0"
|
13
14
|
gem "bundler", "~> 1.1.3"
|
14
15
|
gem "jeweler", "~> 1.6.0"
|
15
|
-
gem "
|
16
|
+
gem "debugger"
|
16
17
|
gem "ruby-prof"
|
17
18
|
gem "shoulda"
|
18
19
|
gem "mocha"
|
data/Gemfile.lock
CHANGED
@@ -4,8 +4,7 @@ GEM
|
|
4
4
|
activesupport (3.2.6)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
|
-
|
8
|
-
biodiversity19 (1.0.14)
|
7
|
+
biodiversity19 (1.0.16)
|
9
8
|
parallel
|
10
9
|
parallel
|
11
10
|
treetop
|
@@ -18,6 +17,13 @@ GEM
|
|
18
17
|
gherkin (~> 2.11.0)
|
19
18
|
json (>= 1.4.6)
|
20
19
|
damerau-levenshtein (0.5.4)
|
20
|
+
debugger (1.2.0)
|
21
|
+
columnize (>= 0.3.1)
|
22
|
+
debugger-linecache (~> 1.1.1)
|
23
|
+
debugger-ruby_core_source (~> 1.1.3)
|
24
|
+
debugger-linecache (1.1.2)
|
25
|
+
debugger-ruby_core_source (>= 1.1.1)
|
26
|
+
debugger-ruby_core_source (1.1.3)
|
21
27
|
diff-lcs (1.1.3)
|
22
28
|
gherkin (2.11.1)
|
23
29
|
json (>= 1.4.6)
|
@@ -28,36 +34,24 @@ GEM
|
|
28
34
|
git (>= 1.2.5)
|
29
35
|
rake
|
30
36
|
json (1.7.3)
|
31
|
-
linecache19 (0.5.12)
|
32
|
-
ruby_core_source (>= 0.1.4)
|
33
37
|
metaclass (0.0.1)
|
34
38
|
mocha (0.12.0)
|
35
39
|
metaclass (~> 0.0.1)
|
36
40
|
multi_json (1.3.6)
|
37
|
-
parallel (0.5.
|
41
|
+
parallel (0.5.18)
|
38
42
|
polyglot (0.3.3)
|
39
43
|
rake (0.9.2.2)
|
40
44
|
rake-compiler (0.8.1)
|
41
45
|
rake
|
42
|
-
rspec (2.
|
43
|
-
rspec-core (~> 2.
|
44
|
-
rspec-expectations (~> 2.
|
45
|
-
rspec-mocks (~> 2.
|
46
|
-
rspec-core (2.
|
47
|
-
rspec-expectations (2.
|
48
|
-
diff-lcs (~> 1.1.
|
49
|
-
rspec-mocks (2.
|
50
|
-
ruby-debug-base19 (0.11.25)
|
51
|
-
columnize (>= 0.3.1)
|
52
|
-
linecache19 (>= 0.5.11)
|
53
|
-
ruby_core_source (>= 0.1.4)
|
54
|
-
ruby-debug19 (0.11.6)
|
55
|
-
columnize (>= 0.3.1)
|
56
|
-
linecache19 (>= 0.5.11)
|
57
|
-
ruby-debug-base19 (>= 0.11.19)
|
46
|
+
rspec (2.11.0)
|
47
|
+
rspec-core (~> 2.11.0)
|
48
|
+
rspec-expectations (~> 2.11.0)
|
49
|
+
rspec-mocks (~> 2.11.0)
|
50
|
+
rspec-core (2.11.1)
|
51
|
+
rspec-expectations (2.11.2)
|
52
|
+
diff-lcs (~> 1.1.3)
|
53
|
+
rspec-mocks (2.11.2)
|
58
54
|
ruby-prof (0.11.2)
|
59
|
-
ruby_core_source (0.1.5)
|
60
|
-
archive-tar-minitar (>= 0.5.2)
|
61
55
|
shoulda (3.1.1)
|
62
56
|
shoulda-context (~> 1.0)
|
63
57
|
shoulda-matchers (~> 1.2)
|
@@ -72,14 +66,15 @@ PLATFORMS
|
|
72
66
|
ruby
|
73
67
|
|
74
68
|
DEPENDENCIES
|
75
|
-
biodiversity19 (>= 1.0.
|
69
|
+
biodiversity19 (>= 1.0.16)
|
76
70
|
bundler (~> 1.1.3)
|
77
71
|
cucumber
|
78
72
|
damerau-levenshtein (>= 0.5.4)
|
73
|
+
debugger
|
79
74
|
jeweler (~> 1.6.0)
|
80
75
|
mocha
|
76
|
+
rake
|
81
77
|
rake-compiler
|
82
|
-
rspec
|
83
|
-
ruby-debug19
|
78
|
+
rspec
|
84
79
|
ruby-prof
|
85
80
|
shoulda
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.4
|
@@ -4,7 +4,7 @@ module Taxamatch
|
|
4
4
|
|
5
5
|
module Normalizer
|
6
6
|
def self.normalize(string)
|
7
|
-
utf8_to_ascii(string.strip
|
7
|
+
utf8_to_ascii(string.strip.upcase).gsub(/[^\x00-\x7F]/,'?')
|
8
8
|
end
|
9
9
|
|
10
10
|
def self.normalize_word(word)
|
@@ -25,37 +25,24 @@ module Taxamatch
|
|
25
25
|
private
|
26
26
|
def self.utf8_to_ascii(string)
|
27
27
|
string = string.gsub(/\s{2,}/, ' ')
|
28
|
-
string = string.gsub(
|
29
|
-
string = string.gsub(/[
|
30
|
-
string = string.gsub(/[
|
31
|
-
string = string.gsub(/[
|
32
|
-
string = string.gsub(/[
|
33
|
-
string = string.gsub(/[
|
34
|
-
string = string.gsub(
|
35
|
-
string = string.gsub(/[
|
36
|
-
string = string.gsub(/[
|
37
|
-
string = string.gsub(/[
|
38
|
-
string = string.gsub(
|
39
|
-
string = string.gsub(
|
40
|
-
string = string.gsub(
|
28
|
+
string = string.gsub("×", "x")
|
29
|
+
string = string.gsub(/[ÀÂÅÃÄÁẤẠÁáàâåãäăãắảạậầằá]/, "A")
|
30
|
+
string = string.gsub(/[ÉÈÊËéèêëĕěếệểễềẻ]/, "E")
|
31
|
+
string = string.gsub(/[ÍÌÎÏíìîïǐĭīĩỉï]/, "I")
|
32
|
+
string = string.gsub(/[ÓÒÔØÕÖỚỔóòôøõöŏỏỗộơọỡốơồờớổő]/, "O")
|
33
|
+
string = string.gsub(/[ÚÙÛÜúùûüůưừựủứụű]/, "U")
|
34
|
+
string = string.gsub(/[Ýýÿỹ]/, "Y")
|
35
|
+
string = string.gsub(/[Ææ]/, "AE")
|
36
|
+
string = string.gsub(/[ČÇčćç]/, "C")
|
37
|
+
string = string.gsub(/[ŠŞśšşſ]/, "S")
|
38
|
+
string = string.gsub(/[Đđð]/, "D")
|
39
|
+
string = string.gsub(/Žžź/, "Z")
|
40
|
+
string = string.gsub(/[Ññńň]/, "N")
|
41
|
+
string = string.gsub(/[Œœ]/, "OE")
|
41
42
|
string = string.gsub(/ß/, "B")
|
42
43
|
string = string.gsub(/Ķ/, "K")
|
43
|
-
string = string.gsub(
|
44
|
-
string = string.gsub(/[
|
45
|
-
string = string.gsub(/[íìîïǐĭīĩỉï]/, "i")
|
46
|
-
string = string.gsub(/[óòôøõöŏỏỗộơọỡốơồờớổő]/, "o")
|
47
|
-
string = string.gsub(/[úùûüůưừựủứụű]/, "u")
|
48
|
-
string = string.gsub(/[žź]/, "z")
|
49
|
-
string = string.gsub(/[ýÿỹ]/, "y")
|
50
|
-
string = string.gsub(/[đð]/, "d")
|
51
|
-
string = string.gsub(/æ/, "ae")
|
52
|
-
string = string.gsub(/[čćç]/, "c")
|
53
|
-
string = string.gsub(/[ñńň]/, "n")
|
54
|
-
string = string.gsub(/œ/, "oe")
|
55
|
-
string = string.gsub(/[śšş]/, "s")
|
56
|
-
string = string.gsub(/ř/, "r")
|
57
|
-
string = string.gsub(/ğ/, "g")
|
58
|
-
string = string.gsub(/Ř/, "R")
|
44
|
+
string = string.gsub(/ğ/, "G")
|
45
|
+
string = string.gsub(/[Řř]/, "R")
|
59
46
|
end
|
60
47
|
|
61
48
|
end
|
data/spec/taxamatch_rb_spec.rb
CHANGED
@@ -35,6 +35,7 @@ describe 'Taxamatch::Normalizer' do
|
|
35
35
|
Taxamatch::Normalizer.normalize('Fallé€n').should == 'FALLE?N'
|
36
36
|
Taxamatch::Normalizer.normalize('Fallén привет').should == 'FALLEN ??????'
|
37
37
|
Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should == 'CHORIOZOPELLA TRAGARDHI'
|
38
|
+
Taxamatch::Normalizer.normalize('×Zygomena').should == 'xZYGOMENA'
|
38
39
|
end
|
39
40
|
|
40
41
|
it 'should normalize words' do
|
data/taxamatch_rb.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "taxamatch_rb"
|
8
|
-
s.version = "0.9.
|
8
|
+
s.version = "0.9.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-08-27"
|
13
13
|
s.description = "This gem implements algorithm for fuzzy matching scientific names developed by Tony Rees"
|
14
14
|
s.email = "dmozzherin@eol.org"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -45,39 +45,42 @@ Gem::Specification.new do |s|
|
|
45
45
|
s.specification_version = 3
|
46
46
|
|
47
47
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
48
|
-
s.add_runtime_dependency(%q<biodiversity19>, [">= 1.0.
|
48
|
+
s.add_runtime_dependency(%q<biodiversity19>, [">= 1.0.16"])
|
49
49
|
s.add_runtime_dependency(%q<damerau-levenshtein>, [">= 0.5.4"])
|
50
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
50
51
|
s.add_development_dependency(%q<rake-compiler>, [">= 0"])
|
51
|
-
s.add_development_dependency(%q<rspec>, ["
|
52
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
52
53
|
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
53
54
|
s.add_development_dependency(%q<bundler>, ["~> 1.1.3"])
|
54
55
|
s.add_development_dependency(%q<jeweler>, ["~> 1.6.0"])
|
55
|
-
s.add_development_dependency(%q<
|
56
|
+
s.add_development_dependency(%q<debugger>, [">= 0"])
|
56
57
|
s.add_development_dependency(%q<ruby-prof>, [">= 0"])
|
57
58
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
58
59
|
s.add_development_dependency(%q<mocha>, [">= 0"])
|
59
60
|
else
|
60
|
-
s.add_dependency(%q<biodiversity19>, [">= 1.0.
|
61
|
+
s.add_dependency(%q<biodiversity19>, [">= 1.0.16"])
|
61
62
|
s.add_dependency(%q<damerau-levenshtein>, [">= 0.5.4"])
|
63
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
62
64
|
s.add_dependency(%q<rake-compiler>, [">= 0"])
|
63
|
-
s.add_dependency(%q<rspec>, ["
|
65
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
64
66
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
65
67
|
s.add_dependency(%q<bundler>, ["~> 1.1.3"])
|
66
68
|
s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
|
67
|
-
s.add_dependency(%q<
|
69
|
+
s.add_dependency(%q<debugger>, [">= 0"])
|
68
70
|
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
69
71
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
70
72
|
s.add_dependency(%q<mocha>, [">= 0"])
|
71
73
|
end
|
72
74
|
else
|
73
|
-
s.add_dependency(%q<biodiversity19>, [">= 1.0.
|
75
|
+
s.add_dependency(%q<biodiversity19>, [">= 1.0.16"])
|
74
76
|
s.add_dependency(%q<damerau-levenshtein>, [">= 0.5.4"])
|
77
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
75
78
|
s.add_dependency(%q<rake-compiler>, [">= 0"])
|
76
|
-
s.add_dependency(%q<rspec>, ["
|
79
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
77
80
|
s.add_dependency(%q<cucumber>, [">= 0"])
|
78
81
|
s.add_dependency(%q<bundler>, ["~> 1.1.3"])
|
79
82
|
s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
|
80
|
-
s.add_dependency(%q<
|
83
|
+
s.add_dependency(%q<debugger>, [">= 0"])
|
81
84
|
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
82
85
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
83
86
|
s.add_dependency(%q<mocha>, [">= 0"])
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taxamatch_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-08-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: biodiversity19
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.0.
|
21
|
+
version: 1.0.16
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 1.0.
|
29
|
+
version: 1.0.16
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: damerau-levenshtein
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: 0.5.4
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
- !ruby/object:Gem::Dependency
|
47
63
|
name: rake-compiler
|
48
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -64,17 +80,17 @@ dependencies:
|
|
64
80
|
requirement: !ruby/object:Gem::Requirement
|
65
81
|
none: false
|
66
82
|
requirements:
|
67
|
-
- -
|
83
|
+
- - ! '>='
|
68
84
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
85
|
+
version: '0'
|
70
86
|
type: :development
|
71
87
|
prerelease: false
|
72
88
|
version_requirements: !ruby/object:Gem::Requirement
|
73
89
|
none: false
|
74
90
|
requirements:
|
75
|
-
- -
|
91
|
+
- - ! '>='
|
76
92
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
93
|
+
version: '0'
|
78
94
|
- !ruby/object:Gem::Dependency
|
79
95
|
name: cucumber
|
80
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -124,7 +140,7 @@ dependencies:
|
|
124
140
|
- !ruby/object:Gem::Version
|
125
141
|
version: 1.6.0
|
126
142
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
143
|
+
name: debugger
|
128
144
|
requirement: !ruby/object:Gem::Requirement
|
129
145
|
none: false
|
130
146
|
requirements:
|
@@ -228,7 +244,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
228
244
|
version: '0'
|
229
245
|
segments:
|
230
246
|
- 0
|
231
|
-
hash:
|
247
|
+
hash: 2280518613036556094
|
232
248
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
233
249
|
none: false
|
234
250
|
requirements:
|