dimus-taxamatch_rb 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ tmp
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Dmitry Mozzherin
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,7 @@
1
+ = taxamatch_rb
2
+
3
+ Description goes here.
4
+
5
+ == Copyright
6
+
7
+ Copyright (c) 2009 Dmitry Mozzherin. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "taxamatch_rb"
8
+ gem.summary = %Q{TODO}
9
+ gem.email = "dmozzherin@eol.org"
10
+ gem.homepage = "http://github.com/dimus/taxamatch_rb"
11
+ gem.authors = ["Dmitry Mozzherin"]
12
+ gem.add_dependency('RubyInline')
13
+ gem.add_dependency('dimus-biodiversity')
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+
34
+ task :default => :spec
35
+
36
+ require 'rake/rdoctask'
37
+ Rake::RDocTask.new do |rdoc|
38
+ if File.exist?('VERSION.yml')
39
+ config = YAML.load(File.read('VERSION.yml'))
40
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
41
+ else
42
+ version = ""
43
+ end
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "taxamatch_rb #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
50
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
@@ -0,0 +1,163 @@
1
+ Given /^this project is active project folder/ do
2
+ @active_project_folder = File.expand_path(File.dirname(__FILE__) + "/../..")
3
+ end
4
+
5
+ Given /^env variable \$([\w_]+) set to "(.*)"/ do |env_var, value|
6
+ ENV[env_var] = value
7
+ end
8
+
9
+ Given /"(.*)" folder is deleted/ do |folder|
10
+ in_project_folder { FileUtils.rm_rf folder }
11
+ end
12
+
13
+ When /^I invoke "(.*)" generator with arguments "(.*)"$/ do |generator, arguments|
14
+ @stdout = StringIO.new
15
+ in_project_folder do
16
+ if Object.const_defined?("APP_ROOT")
17
+ APP_ROOT.replace(FileUtils.pwd)
18
+ else
19
+ APP_ROOT = FileUtils.pwd
20
+ end
21
+ run_generator(generator, arguments.split(' '), SOURCES, :stdout => @stdout)
22
+ end
23
+ File.open(File.join(@tmp_root, "generator.out"), "w") do |f|
24
+ @stdout.rewind
25
+ f << @stdout.read
26
+ end
27
+ end
28
+
29
+ When /^I run executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
30
+ @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
31
+ in_project_folder do
32
+ system "#{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
33
+ end
34
+ end
35
+
36
+ When /^I run project executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
37
+ @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
38
+ in_project_folder do
39
+ system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
40
+ end
41
+ end
42
+
43
+ When /^I run local executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
44
+ @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
45
+ executable = File.expand_path(File.join(File.dirname(__FILE__), "/../../bin", executable))
46
+ in_project_folder do
47
+ system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
48
+ end
49
+ end
50
+
51
+ When /^I invoke task "rake (.*)"/ do |task|
52
+ @stdout = File.expand_path(File.join(@tmp_root, "tests.out"))
53
+ in_project_folder do
54
+ system "rake #{task} --trace > #{@stdout} 2> #{@stdout}"
55
+ end
56
+ end
57
+
58
+ Then /^folder "(.*)" (is|is not) created/ do |folder, is|
59
+ in_project_folder do
60
+ File.exists?(folder).should(is == 'is' ? be_true : be_false)
61
+ end
62
+ end
63
+
64
+ Then /^file "(.*)" (is|is not) created/ do |file, is|
65
+ in_project_folder do
66
+ File.exists?(file).should(is == 'is' ? be_true : be_false)
67
+ end
68
+ end
69
+
70
+ Then /^file with name matching "(.*)" is created/ do |pattern|
71
+ in_project_folder do
72
+ Dir[pattern].should_not be_empty
73
+ end
74
+ end
75
+
76
+ Then /^file "(.*)" contents (does|does not) match \/(.*)\// do |file, does, regex|
77
+ in_project_folder do
78
+ actual_output = File.read(file)
79
+ (does == 'does') ?
80
+ actual_output.should(match(/#{regex}/)) :
81
+ actual_output.should_not(match(/#{regex}/))
82
+ end
83
+ end
84
+
85
+ Then /gem file "(.*)" and generated file "(.*)" should be the same/ do |gem_file, project_file|
86
+ File.exists?(gem_file).should be_true
87
+ File.exists?(project_file).should be_true
88
+ gem_file_contents = File.read(File.dirname(__FILE__) + "/../../#{gem_file}")
89
+ project_file_contents = File.read(File.join(@active_project_folder, project_file))
90
+ project_file_contents.should == gem_file_contents
91
+ end
92
+
93
+ Then /^(does|does not) invoke generator "(.*)"$/ do |does_invoke, generator|
94
+ actual_output = File.read(@stdout)
95
+ does_invoke == "does" ?
96
+ actual_output.should(match(/dependency\s+#{generator}/)) :
97
+ actual_output.should_not(match(/dependency\s+#{generator}/))
98
+ end
99
+
100
+ Then /help options "(.*)" and "(.*)" are displayed/ do |opt1, opt2|
101
+ actual_output = File.read(@stdout)
102
+ actual_output.should match(/#{opt1}/)
103
+ actual_output.should match(/#{opt2}/)
104
+ end
105
+
106
+ Then /^I should see$/ do |text|
107
+ actual_output = File.read(@stdout)
108
+ actual_output.should contain(text)
109
+ end
110
+
111
+ Then /^I should not see$/ do |text|
112
+ actual_output = File.read(@stdout)
113
+ actual_output.should_not contain(text)
114
+ end
115
+
116
+ Then /^I should see exactly$/ do |text|
117
+ actual_output = File.read(@stdout)
118
+ actual_output.should == text
119
+ end
120
+
121
+ Then /^I should see all (\d+) tests pass/ do |expected_test_count|
122
+ expected = %r{^#{expected_test_count} tests, \d+ assertions, 0 failures, 0 errors}
123
+ actual_output = File.read(@stdout)
124
+ actual_output.should match(expected)
125
+ end
126
+
127
+ Then /^I should see all (\d+) examples pass/ do |expected_test_count|
128
+ expected = %r{^#{expected_test_count} examples?, 0 failures}
129
+ actual_output = File.read(@stdout)
130
+ actual_output.should match(expected)
131
+ end
132
+
133
+ Then /^yaml file "(.*)" contains (\{.*\})/ do |file, yaml|
134
+ in_project_folder do
135
+ yaml = eval yaml
136
+ YAML.load(File.read(file)).should == yaml
137
+ end
138
+ end
139
+
140
+ Then /^Rakefile can display tasks successfully/ do
141
+ @stdout = File.expand_path(File.join(@tmp_root, "rakefile.out"))
142
+ in_project_folder do
143
+ system "rake -T > #{@stdout} 2> #{@stdout}"
144
+ end
145
+ actual_output = File.read(@stdout)
146
+ actual_output.should match(/^rake\s+\w+\s+#\s.*/)
147
+ end
148
+
149
+ Then /^task "rake (.*)" is executed successfully/ do |task|
150
+ @stdout.should_not be_nil
151
+ actual_output = File.read(@stdout)
152
+ actual_output.should_not match(/^Don't know how to build task '#{task}'/)
153
+ actual_output.should_not match(/Error/i)
154
+ end
155
+
156
+ Then /^gem spec key "(.*)" contains \/(.*)\// do |key, regex|
157
+ in_project_folder do
158
+ gem_file = Dir["pkg/*.gem"].first
159
+ gem_spec = Gem::Specification.from_yaml(`gem spec #{gem_file}`)
160
+ spec_value = gem_spec.send(key.to_sym)
161
+ spec_value.to_s.should match(/#{regex}/)
162
+ end
163
+ end
@@ -0,0 +1,92 @@
1
+ str1 = str2 = block_size = max_distance = distance = dlm = nil
2
+
3
+ ###############
4
+ #DAMERAU LEVENSHTEIN MOD
5
+ ###############
6
+
7
+ Given /^strings "([^\"]*)" and "([^\"]*)", transposition block size "([^\"]*)", and a maximum allowed distance "([^\"]*)"$/ do |a,b,c,d|
8
+ str1 = a
9
+ str2 = b
10
+ block_size = c.to_i
11
+ max_distance = d.to_i
12
+ end
13
+
14
+ When /^I run "([^\"]*)" instance function "([^\"]*)"$/ do |arg1, arg2|
15
+ dl = eval(arg1 + ".new")
16
+ distance = dl.distance(str1, str2, block_size, max_distance)
17
+ end
18
+
19
+ Then /^I should receive edit distance "([^\"]*)"$/ do |arg1|
20
+ distance.should == arg1.to_i
21
+ end
22
+
23
+ #############
24
+ #PARSER
25
+ #############
26
+
27
+ sci_name = result = nil
28
+ parser = Parser.new
29
+
30
+ Given /^a name "([^\"]*)"$/ do |arg1|
31
+ sci_name = arg1
32
+ end
33
+
34
+ When /^I run a Parser function parse$/ do
35
+ result = parser.parse(sci_name)
36
+ end
37
+
38
+ Then /^I should receive "([^\"]*)" as genus epithet, "([^\"]*)" as species epithet, "([^\"]*)" and "([^\"]*)" as species authors, "([^\"]*)" as a species year$/ do |gen_val, sp_val, au_val1, au_val2, yr_val|
39
+ result[:genus][:epitheton].should == gen_val
40
+ result[:species][:epitheton].should == sp_val
41
+ result[:species][:authors].include?(au_val1).should be_true
42
+ result[:species][:authors].include?(au_val2).should be_true
43
+ result[:species][:years].include?(yr_val).should be_true
44
+ require 'pp'
45
+ print result
46
+ end
47
+
48
+ #############
49
+ # NORMALIZER
50
+ #############
51
+
52
+ string = normalized_string = nil
53
+
54
+ Given /^a string "([^\"]*)"$/ do |arg1|
55
+ string = arg1
56
+ end
57
+
58
+ When /^I run a Normalizer function normalize$/ do
59
+ normalized_string = Normalizer.normalize(string)
60
+ end
61
+
62
+ Then /^I should receive "([^\"]*)" as a normalized form of the string$/ do |arg1|
63
+ normalized_string.should == arg1
64
+ end
65
+
66
+ ######
67
+ # PHONETIZER
68
+ #####
69
+
70
+ word = phonetized_word = nil
71
+
72
+ Given /^a word "([^\"]*)"$/ do |arg1|
73
+ word = arg1
74
+ end
75
+
76
+ When /^I run a Phonetizer function near_match$/ do
77
+ phonetized_word = Phonetizer.near_match(word)
78
+ end
79
+
80
+ Then /^I should receive "([^\"]*)" as a phonetic form of the word$/ do |arg1|
81
+ phonetized_word.should == arg1
82
+ end
83
+
84
+
85
+ When /^I run a Phonetizer function near_match with an option normalize_ending$/ do
86
+ phonetized_word = Phonetizer.near_match(word,true)
87
+ end
88
+
89
+ Then /^I should receive "([^\"]*)" as a normalized phonetic form of the word$/ do |arg1|
90
+ phonetized_word.should == arg1
91
+ end
92
+
@@ -0,0 +1,29 @@
1
+ module CommonHelpers
2
+ def in_tmp_folder(&block)
3
+ FileUtils.chdir(@tmp_root, &block)
4
+ end
5
+
6
+ def in_project_folder(&block)
7
+ project_folder = @active_project_folder || @tmp_root
8
+ FileUtils.chdir(project_folder, &block)
9
+ end
10
+
11
+ def in_home_folder(&block)
12
+ FileUtils.chdir(@home_path, &block)
13
+ end
14
+
15
+ def force_local_lib_override(project_name = @project_name)
16
+ rakefile = File.read(File.join(project_name, 'Rakefile'))
17
+ File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
18
+ f << "$:.unshift('#{@lib_path}')\n"
19
+ f << rakefile
20
+ end
21
+ end
22
+
23
+ def setup_active_project_folder project_name
24
+ @active_project_folder = File.join(@tmp_root, project_name)
25
+ @project_name = project_name
26
+ end
27
+ end
28
+
29
+ World(CommonHelpers)
@@ -0,0 +1,14 @@
1
+ require File.dirname(__FILE__) + "/../../lib/taxamatch_rb"
2
+
3
+ gem 'cucumber'
4
+ require 'cucumber'
5
+ gem 'rspec'
6
+ require 'spec'
7
+
8
+ Before do
9
+ @tmp_root = File.dirname(__FILE__) + "/../../tmp"
10
+ @home_path = File.expand_path(File.join(@tmp_root, "home"))
11
+ FileUtils.rm_rf @tmp_root
12
+ FileUtils.mkdir_p @home_path
13
+ ENV['HOME'] = @home_path
14
+ end
@@ -0,0 +1,11 @@
1
+ module Matchers
2
+ def contain(expected)
3
+ simple_matcher("contain #{expected.inspect}") do |given, matcher|
4
+ matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
5
+ matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
6
+ given.index expected
7
+ end
8
+ end
9
+ end
10
+
11
+ World(Matchers)
@@ -0,0 +1,33 @@
1
+ Feature: Find if two scientific names are lexical variants of each other
2
+
3
+ As a Biodiversity Informatician
4
+ I want to be able to compare scientific names to determine if they are variants of the same name.
5
+ And I want to be able to combine names that are the same into lexical groups, so they appear together in names list
6
+ So I want to implement Tony Rees and Barbara Boehmer taxamatch algorithms http://bit.ly/boWyG
7
+
8
+
9
+ Scenario: find edit distance between two unicode (utf8) strings
10
+ Given strings "Sjostedt" and "Sojstedt", transposition block size "1", and a maximum allowed distance "4"
11
+ When I run "DamerauLevenshteinMod" instance function "distance"
12
+ Then I should receive edit distance "1"
13
+
14
+ Scenario: find parts of a name in unicode
15
+ Given a name "Arthopyrenia hyalospora (Banker) D. Hall 1988 hyalosporis Kutz 1999"
16
+ When I run a Parser function parse
17
+ Then I should receive "Arthopyrenia" as genus epithet, "hyalospora" as species epithet, "Banker" and "D. Hall" as species authors, "1988" as a species year
18
+
19
+ Scenario: normalize a string into ASCII upcase
20
+ Given a string "Choriozopella trägårdhi"
21
+ When I run a Normalizer function normalize
22
+ Then I should receive "CHORIOZOPELLA TRAGARDHI" as a normalized form of the string
23
+
24
+ Scenario: create phonetic version of a word
25
+ Given a word "bifasciata"
26
+ When I run a Phonetizer function near_match
27
+ Then I should receive "BIFASATA" as a phonetic form of the word
28
+
29
+ Scenario: create phonetic version of a species epithet normalizing ending
30
+ Given a word "bifasciatum"
31
+ When I run a Phonetizer function near_match with an option normalize_ending
32
+ Then I should receive "BIFASATA" as a normalized phonetic form of the word
33
+
@@ -0,0 +1,136 @@
1
+ # encoding: UTF-8
2
+ require 'rubygems'
3
+ require 'inline'
4
+ require 'time'
5
+
6
+ class DamerauLevenshteinMod
7
+ def distance(str1, str2, block_size=2, max_distance=10)
8
+ # puts str1.unpack("U*");
9
+ res = distance_utf(str1.unpack("U*"), str2.unpack("U*"), block_size, max_distance)
10
+ (res > max_distance) ? nil : res
11
+ end
12
+
13
+ inline do |builder|
14
+ builder.c "
15
+ static VALUE distance_utf(VALUE _s, VALUE _t, long block_size, long max_distance){
16
+ long min, i, i1, j, j1, k, sl, half_sl, tl, half_tl, cost, *d, distance, del, ins, subs, transp, block, current_distance;
17
+ long stop_execution = 0;
18
+
19
+ VALUE *sv = RARRAY_PTR(_s);
20
+ VALUE *tv = RARRAY_PTR(_t);
21
+
22
+ sl = RARRAY_LEN(_s);
23
+ tl = RARRAY_LEN(_t);
24
+
25
+ if (sl == 0) return LONG2NUM(tl);
26
+ if (tl == 0) return LONG2NUM(sl);
27
+ //case of lengths 1 must present or it will break further in the code
28
+ if (sl == 1 && tl == 1 && sv[0] != tv[0]) return LONG2NUM(1);
29
+
30
+ long s[sl];
31
+ long t[tl];
32
+
33
+ for (i=0; i < sl; i++) s[i] = NUM2LONG(sv[i]);
34
+ for (i=0; i < tl; i++) t[i] = NUM2LONG(tv[i]);
35
+
36
+ sl++;
37
+ tl++;
38
+
39
+ //one-dimentional representation of 2 dimentional array len(s)+1 * len(t)+1
40
+ d = malloc((sizeof(long))*(sl)*(tl));
41
+ //populate 'vertical' row starting from the 2nd position (first one is filled already)
42
+ for(i = 0; i < tl; i++){
43
+ d[i*sl] = i;
44
+ }
45
+
46
+ //fill up array with scores
47
+ for(i = 1; i<sl; i++){
48
+ d[i] = i;
49
+ if (stop_execution == 1) break;
50
+ current_distance = 10000;
51
+ for(j = 1; j<tl; j++){
52
+
53
+ cost = 1;
54
+ if(s[i-1] == t[j-1]) cost = 0;
55
+
56
+ half_sl = (sl - 1)/2;
57
+ half_tl = (tl - 1)/2;
58
+
59
+ block = block_size < half_sl ? block_size : half_sl;
60
+ block = block < half_tl ? block : half_tl;
61
+
62
+ while (block >= 1){
63
+ long swap1 = 1;
64
+ long swap2 = 1;
65
+ i1 = i - (block * 2);
66
+ j1 = j - (block * 2);
67
+ for (k = i1; k < i1 + block; k++) {
68
+ if (s[k] != t[k + block]){
69
+ swap1 = 0;
70
+ break;
71
+ }
72
+ }
73
+ for (k = j1; k < j1 + block; k++) {
74
+ if (t[k] != s[k + block]){
75
+ swap2 = 0;
76
+ break;
77
+ }
78
+ }
79
+
80
+ del = d[j*sl + i - 1] + 1;
81
+ ins = d[(j-1)*sl + i] + 1;
82
+ min = del;
83
+ if (ins < min) min = ins;
84
+ //if (i == 2 && j==2) return LONG2NUM(swap2+5);
85
+ if (i >= block && j >= block && swap1 == 1 && swap2 == 1){
86
+ transp = d[(j - block * 2) * sl + i - block * 2] + cost + block -1;
87
+ if (transp < min) min = transp;
88
+ block = 0;
89
+ } else if (block == 1) {
90
+ subs = d[(j-1)*sl + i - 1] + cost;
91
+ if (subs < min) min = subs;
92
+ }
93
+ block--;
94
+ }
95
+ d[j*sl+i]=min;
96
+ if (current_distance > d[j*sl+i]) current_distance = d[j*sl+i];
97
+ }
98
+ if (current_distance > max_distance) {
99
+ stop_execution = 1;
100
+ }
101
+ }
102
+ distance=d[sl * tl - 1];
103
+ if (stop_execution == 1) distance = current_distance;
104
+
105
+ free(d);
106
+ return LONG2NUM(distance);
107
+ }
108
+ "
109
+ end
110
+ end
111
+
112
+ if __FILE__ == $0
113
+ a=DamerauLevenshteinMod.new
114
+ s = 'Cedarinia scabra Sjöstedt 1921'.unpack('U*')
115
+ t = 'Cedarinia scabra Söjstedt 1921'.unpack('U*')
116
+
117
+ #puts s.join(",")
118
+ #puts t.join(",")
119
+
120
+ start = Time.now
121
+ (1..100000).each do
122
+ a.distance('Cedarinia scabra Sjöstedt 1921', 'Cedarinia scabra Söjstedt 1921',1,10)
123
+ end
124
+ puts "with unpack time: " + (Time.now - start).to_s + ' sec'
125
+
126
+ start = Time.now
127
+ (1..100000).each do
128
+ a.distance_utf(s, t, 1, 10)
129
+ end
130
+ puts 'utf time: ' + (Time.now - start).to_s + ' sec'
131
+
132
+ #puts a.distance('Cedarinia scabra Sjöstedt 1921','Cedarinia scabra Söjstedt 1921')
133
+ #puts a.distance_utf(s, t, 2, 10)
134
+ #puts a.distance('tar','atp',1,10);
135
+ puts a.distance('sub', 'usb', 1, 10);
136
+ end
@@ -0,0 +1,47 @@
1
+ # encoding: UTF-8
2
+
3
+ module Normalizer
4
+ def self.normalize(string)
5
+ utf8_to_ascii(string).upcase
6
+ end
7
+
8
+ def self.normalize_word(word)
9
+ self.normalize(word).gsub(/[^A-Z\.\-]/, '')
10
+ end
11
+
12
+ protected
13
+ def self.utf8_to_ascii(string)
14
+ string = string.gsub(/[ÀÂÅÃÄÁẤẠ]/, "A")
15
+ string = string.gsub(/[ÉÈÊË]/, "E")
16
+ string = string.gsub(/[ÍÌÎÏ]/, "I")
17
+ string = string.gsub(/[ÓÒÔØÕÖỚỔ]/, "O")
18
+ string = string.gsub(/[ÚÙÛÜ]/, "U")
19
+ string = string.gsub(/[Ý]/, "Y")
20
+ string = string.gsub(/Æ/, "AE")
21
+ string = string.gsub(/[ČÇ]/, "C")
22
+ string = string.gsub(/[ŠŞ]/, "S")
23
+ string = string.gsub(/[Đ]/, "D")
24
+ string = string.gsub(/Ž/, "Z")
25
+ string = string.gsub(/Ñ/, "N")
26
+ string = string.gsub(/Œ/, "OE")
27
+ string = string.gsub(/ß/, "B")
28
+ string = string.gsub(/Ķ/, "K")
29
+ string = string.gsub(/[áàâåãäăãắảạậầằ]/, "a")
30
+ string = string.gsub(/[éèêëĕěếệểễềẻ]/, "e")
31
+ string = string.gsub(/[íìîïǐĭīĩỉï]/, "i")
32
+ string = string.gsub(/[óòôøõöŏỏỗộơọỡốơồờớổ]/, "o")
33
+ string = string.gsub(/[úùûüůưừựủứụ]/, "u")
34
+ string = string.gsub(/[žź]/, "z")
35
+ string = string.gsub(/[ýÿỹ]/, "y")
36
+ string = string.gsub(/[đ]/, "d")
37
+ string = string.gsub(/æ/, "ae")
38
+ string = string.gsub(/[čćç]/, "c")
39
+ string = string.gsub(/[ñńň]/, "n")
40
+ string = string.gsub(/œ/, "oe")
41
+ string = string.gsub(/[śšş]/, "s")
42
+ string = string.gsub(/ř/, "r")
43
+ string = string.gsub(/ğ/, "g")
44
+ string = string.gsub(/Ř/, "R")
45
+ end
46
+
47
+ end