RubyGems - dimus-taxamatch_rb - Versions diffs - 0.1.1 - Mend

dimus-taxamatch_rb 0.1.1

Files changed (23) hide show

data/.document +5 -0
data/.gitignore +6 -0
data/LICENSE +20 -0
data/README.rdoc +7 -0
data/Rakefile +50 -0
data/VERSION +1 -0
data/features/step_definitions/common_steps.rb +163 -0
data/features/step_definitions/taxamatch_rb.rb +92 -0
data/features/support/common.rb +29 -0
data/features/support/env.rb +14 -0
data/features/support/matchers.rb +11 -0
data/features/taxamatch_rb.feature +33 -0
data/lib/taxamatch_rb/damerau_levenshtein_mod.rb +136 -0
data/lib/taxamatch_rb/normalizer.rb +47 -0
data/lib/taxamatch_rb/parser.rb +83 -0
data/lib/taxamatch_rb/phonetizer.rb +74 -0
data/lib/taxamatch_rb.rb +444 -0
data/spec/damerau_levenshtein_mod_test.txt +58 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +24 -0
data/spec/taxamatch_rb_spec.rb +50 -0
data/taxamatch_rb.gemspec +65 -0
metadata +96 -0

data/.document ADDED Viewed

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED Viewed

@@ -0,0 +1,6 @@
+*.sw?
+.DS_Store
+coverage
+rdoc
+pkg
+tmp

data/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2009 Dmitry Mozzherin
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.rdoc ADDED Viewed

@@ -0,0 +1,7 @@
+= taxamatch_rb
+Description goes here.
+== Copyright
+Copyright (c) 2009 Dmitry Mozzherin. See LICENSE for details.

data/Rakefile ADDED Viewed

@@ -0,0 +1,50 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gem|
+    gem.name = "taxamatch_rb"
+    gem.summary = %Q{TODO}
+    gem.email = "dmozzherin@eol.org"
+    gem.homepage = "http://github.com/dimus/taxamatch_rb"
+    gem.authors = ["Dmitry Mozzherin"]
+    gem.add_dependency('RubyInline')
+    gem.add_dependency('dimus-biodiversity')
+    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+  end
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
+end
+require 'spec/rake/spectask'
+Spec::Rake::SpecTask.new(:spec) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.spec_files = FileList['spec/**/*_spec.rb']
+end
+Spec::Rake::SpecTask.new(:rcov) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.pattern = 'spec/**/*_spec.rb'
+  spec.rcov = true
+end
+task :default => :spec
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  if File.exist?('VERSION.yml')
+    config = YAML.load(File.read('VERSION.yml'))
+    version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
+  else
+    version = ""
+  end
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "taxamatch_rb #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.1

data/features/step_definitions/common_steps.rb ADDED Viewed

@@ -0,0 +1,163 @@
+Given /^this project is active project folder/ do
+  @active_project_folder = File.expand_path(File.dirname(__FILE__) + "/../..")
+end
+Given /^env variable \$([\w_]+) set to "(.*)"/ do |env_var, value|
+  ENV[env_var] = value
+end
+Given /"(.*)" folder is deleted/ do |folder|
+  in_project_folder { FileUtils.rm_rf folder }
+end
+When /^I invoke "(.*)" generator with arguments "(.*)"$/ do |generator, arguments|
+  @stdout = StringIO.new
+  in_project_folder do
+    if Object.const_defined?("APP_ROOT")
+      APP_ROOT.replace(FileUtils.pwd)
+    else
+      APP_ROOT = FileUtils.pwd
+    end
+    run_generator(generator, arguments.split(' '), SOURCES, :stdout => @stdout)
+  end
+  File.open(File.join(@tmp_root, "generator.out"), "w") do |f|
+    @stdout.rewind
+    f << @stdout.read
+  end
+end
+When /^I run executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
+  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
+  in_project_folder do
+    system "#{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
+  end
+end
+When /^I run project executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
+  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
+  in_project_folder do
+    system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
+  end
+end
+When /^I run local executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
+  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
+  executable = File.expand_path(File.join(File.dirname(__FILE__), "/../../bin", executable))
+  in_project_folder do
+    system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
+  end
+end
+When /^I invoke task "rake (.*)"/ do |task|
+  @stdout = File.expand_path(File.join(@tmp_root, "tests.out"))
+  in_project_folder do
+    system "rake #{task} --trace > #{@stdout} 2> #{@stdout}"
+  end
+end
+Then /^folder "(.*)" (is|is not) created/ do |folder, is|
+  in_project_folder do
+    File.exists?(folder).should(is == 'is' ? be_true : be_false)
+  end
+end
+Then /^file "(.*)" (is|is not) created/ do |file, is|
+  in_project_folder do
+    File.exists?(file).should(is == 'is' ? be_true : be_false)
+  end
+end
+Then /^file with name matching "(.*)" is created/ do |pattern|
+  in_project_folder do
+    Dir[pattern].should_not be_empty
+  end
+end
+Then /^file "(.*)" contents (does|does not) match \/(.*)\// do |file, does, regex|
+  in_project_folder do
+    actual_output = File.read(file)
+    (does == 'does') ?
+      actual_output.should(match(/#{regex}/)) :
+      actual_output.should_not(match(/#{regex}/))
+  end
+end
+Then /gem file "(.*)" and generated file "(.*)" should be the same/ do |gem_file, project_file|
+  File.exists?(gem_file).should be_true
+  File.exists?(project_file).should be_true
+  gem_file_contents = File.read(File.dirname(__FILE__) + "/../../#{gem_file}")
+  project_file_contents = File.read(File.join(@active_project_folder, project_file))
+  project_file_contents.should == gem_file_contents
+end
+Then /^(does|does not) invoke generator "(.*)"$/ do |does_invoke, generator|
+  actual_output = File.read(@stdout)
+  does_invoke == "does" ?
+    actual_output.should(match(/dependency\s+#{generator}/)) :
+    actual_output.should_not(match(/dependency\s+#{generator}/))
+end
+Then /help options "(.*)" and "(.*)" are displayed/ do |opt1, opt2|
+  actual_output = File.read(@stdout)
+  actual_output.should match(/#{opt1}/)
+  actual_output.should match(/#{opt2}/)
+end
+Then /^I should see$/ do |text|
+  actual_output = File.read(@stdout)
+  actual_output.should contain(text)
+end
+Then /^I should not see$/ do |text|
+  actual_output = File.read(@stdout)
+  actual_output.should_not contain(text)
+end
+Then /^I should see exactly$/ do |text|
+  actual_output = File.read(@stdout)
+  actual_output.should == text
+end
+Then /^I should see all (\d+) tests pass/ do |expected_test_count|
+  expected = %r{^#{expected_test_count} tests, \d+ assertions, 0 failures, 0 errors}
+  actual_output = File.read(@stdout)
+  actual_output.should match(expected)
+end
+Then /^I should see all (\d+) examples pass/ do |expected_test_count|
+  expected = %r{^#{expected_test_count} examples?, 0 failures}
+  actual_output = File.read(@stdout)
+  actual_output.should match(expected)
+end
+Then /^yaml file "(.*)" contains (\{.*\})/ do |file, yaml|
+  in_project_folder do
+    yaml = eval yaml
+    YAML.load(File.read(file)).should == yaml
+  end
+end
+Then /^Rakefile can display tasks successfully/ do
+  @stdout = File.expand_path(File.join(@tmp_root, "rakefile.out"))
+  in_project_folder do
+    system "rake -T > #{@stdout} 2> #{@stdout}"
+  end
+  actual_output = File.read(@stdout)
+  actual_output.should match(/^rake\s+\w+\s+#\s.*/)
+end
+Then /^task "rake (.*)" is executed successfully/ do |task|
+  @stdout.should_not be_nil
+  actual_output = File.read(@stdout)
+  actual_output.should_not match(/^Don't know how to build task '#{task}'/)
+  actual_output.should_not match(/Error/i)
+end
+Then /^gem spec key "(.*)" contains \/(.*)\// do |key, regex|
+  in_project_folder do
+    gem_file = Dir["pkg/*.gem"].first
+    gem_spec = Gem::Specification.from_yaml(`gem spec #{gem_file}`)
+    spec_value = gem_spec.send(key.to_sym)
+    spec_value.to_s.should match(/#{regex}/)
+  end
+end

data/features/step_definitions/taxamatch_rb.rb ADDED Viewed

@@ -0,0 +1,92 @@
+str1 = str2 = block_size = max_distance = distance = dlm = nil
+###############
+#DAMERAU LEVENSHTEIN MOD
+###############
+Given /^strings "([^\"]*)" and "([^\"]*)", transposition block size "([^\"]*)", and a maximum allowed distance "([^\"]*)"$/ do |a,b,c,d|
+  str1 = a
+  str2 = b
+  block_size = c.to_i
+  max_distance = d.to_i
+end
+When /^I run "([^\"]*)" instance function "([^\"]*)"$/ do |arg1, arg2|
+  dl = eval(arg1 + ".new")
+  distance = dl.distance(str1, str2, block_size, max_distance)
+end
+Then /^I should receive edit distance "([^\"]*)"$/ do |arg1|
+  distance.should == arg1.to_i
+end
+#############
+#PARSER
+#############
+sci_name =  result = nil
+parser = Parser.new
+Given /^a name "([^\"]*)"$/ do |arg1|
+  sci_name = arg1
+end
+When /^I run a Parser function parse$/ do
+  result = parser.parse(sci_name)
+end
+Then /^I should receive "([^\"]*)" as genus epithet, "([^\"]*)" as species epithet, "([^\"]*)" and "([^\"]*)" as species authors, "([^\"]*)" as a species year$/ do |gen_val, sp_val, au_val1, au_val2, yr_val|
+  result[:genus][:epitheton].should == gen_val
+  result[:species][:epitheton].should == sp_val
+  result[:species][:authors].include?(au_val1).should be_true
+  result[:species][:authors].include?(au_val2).should be_true
+  result[:species][:years].include?(yr_val).should be_true
+  require 'pp'
+  print result
+end
+#############
+# NORMALIZER
+#############
+string = normalized_string = nil
+Given /^a string "([^\"]*)"$/ do |arg1|
+  string = arg1
+end
+When /^I run a Normalizer function normalize$/ do
+  normalized_string = Normalizer.normalize(string)
+end
+Then /^I should receive "([^\"]*)" as a normalized form of the string$/ do |arg1|
+  normalized_string.should == arg1
+end
+######
+# PHONETIZER
+#####
+word = phonetized_word = nil
+Given /^a word "([^\"]*)"$/ do |arg1|
+  word = arg1
+end
+When /^I run a Phonetizer function near_match$/ do
+  phonetized_word = Phonetizer.near_match(word)
+end
+Then /^I should receive "([^\"]*)" as a phonetic form of the word$/ do |arg1|
+  phonetized_word.should == arg1
+end
+When /^I run a Phonetizer function near_match with an option normalize_ending$/ do
+  phonetized_word = Phonetizer.near_match(word,true)
+end
+Then /^I should receive "([^\"]*)" as a normalized phonetic form of the word$/ do |arg1|
+  phonetized_word.should == arg1
+end

data/features/support/common.rb ADDED Viewed

@@ -0,0 +1,29 @@
+module CommonHelpers
+  def in_tmp_folder(&block)
+    FileUtils.chdir(@tmp_root, &block)
+  end
+  def in_project_folder(&block)
+    project_folder = @active_project_folder || @tmp_root
+    FileUtils.chdir(project_folder, &block)
+  end
+  def in_home_folder(&block)
+    FileUtils.chdir(@home_path, &block)
+  end
+  def force_local_lib_override(project_name = @project_name)
+    rakefile = File.read(File.join(project_name, 'Rakefile'))
+    File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
+      f << "$:.unshift('#{@lib_path}')\n"
+      f << rakefile
+    end
+  end
+  def setup_active_project_folder project_name
+    @active_project_folder = File.join(@tmp_root, project_name)
+    @project_name = project_name
+  end
+end
+World(CommonHelpers)

data/features/support/env.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require File.dirname(__FILE__) + "/../../lib/taxamatch_rb"
+gem 'cucumber'
+require 'cucumber'
+gem 'rspec'
+require 'spec'
+Before do
+  @tmp_root = File.dirname(__FILE__) + "/../../tmp"
+  @home_path = File.expand_path(File.join(@tmp_root, "home"))
+  FileUtils.rm_rf   @tmp_root
+  FileUtils.mkdir_p @home_path
+  ENV['HOME'] = @home_path
+end

data/features/support/matchers.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Matchers
+  def contain(expected)
+    simple_matcher("contain #{expected.inspect}") do |given, matcher|
+      matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
+      matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
+      given.index expected
+    end
+  end
+end
+World(Matchers)

data/features/taxamatch_rb.feature ADDED Viewed

@@ -0,0 +1,33 @@
+Feature: Find if two scientific names are lexical variants of each other
+  As a Biodiversity Informatician
+  I want to be able to compare scientific names to determine if they are variants of the same name.
+  And I want to be able to combine names that are the same into lexical groups, so they appear together in names list
+  So I want to implement Tony Rees and Barbara Boehmer taxamatch algorithms http://bit.ly/boWyG
+  Scenario: find edit distance between two unicode (utf8) strings
+    Given strings "Sjostedt" and "Sojstedt", transposition block size "1", and a maximum allowed distance "4"
+    When I run "DamerauLevenshteinMod" instance function "distance"
+    Then I should receive edit distance "1"
+  Scenario: find parts of a name in unicode
+    Given a name "Arthopyrenia hyalospora (Banker) D. Hall 1988 hyalosporis Kutz 1999"
+    When I run a Parser function parse
+    Then I should receive "Arthopyrenia" as genus epithet, "hyalospora" as species epithet, "Banker" and "D. Hall" as species authors, "1988" as a species year
+  Scenario: normalize a string into ASCII upcase
+    Given a string "Choriozopella trägårdhi"
+    When I run a Normalizer function normalize
+    Then I should receive "CHORIOZOPELLA TRAGARDHI" as a normalized form of the string
+  Scenario: create phonetic version of a word
+    Given a word "bifasciata"
+    When I run a Phonetizer function near_match
+    Then I should receive "BIFASATA" as a phonetic form of the word
+  Scenario: create phonetic version of a species epithet normalizing ending
+    Given a word "bifasciatum"
+    When I run a Phonetizer function near_match with an option normalize_ending
+    Then I should receive "BIFASATA" as a normalized phonetic form of the word

data/lib/taxamatch_rb/damerau_levenshtein_mod.rb ADDED Viewed

@@ -0,0 +1,136 @@
+# encoding: UTF-8
+require 'rubygems'
+require 'inline'
+require 'time'
+class DamerauLevenshteinMod
+  def distance(str1, str2, block_size=2, max_distance=10)
+    # puts str1.unpack("U*");
+    res = distance_utf(str1.unpack("U*"), str2.unpack("U*"), block_size, max_distance)
+    (res > max_distance) ? nil : res
+  end
+  inline do |builder|
+    builder.c "
+    static VALUE distance_utf(VALUE _s, VALUE _t, long block_size, long max_distance){
+      long min, i, i1, j, j1, k, sl, half_sl, tl, half_tl, cost, *d, distance, del, ins, subs, transp, block, current_distance;
+      long stop_execution = 0;
+      VALUE *sv = RARRAY_PTR(_s);
+      VALUE *tv = RARRAY_PTR(_t);
+      sl = RARRAY_LEN(_s);
+      tl = RARRAY_LEN(_t);
+      if (sl == 0) return LONG2NUM(tl);
+      if (tl == 0) return LONG2NUM(sl);
+      //case of lengths 1 must present or it will break further in the code
+      if (sl == 1 && tl == 1 && sv[0] != tv[0]) return LONG2NUM(1);
+      long s[sl];
+      long t[tl];
+      for (i=0; i < sl; i++) s[i] = NUM2LONG(sv[i]);
+      for (i=0; i < tl; i++) t[i] = NUM2LONG(tv[i]);
+      sl++;
+      tl++;
+      //one-dimentional representation of 2 dimentional array len(s)+1 * len(t)+1
+      d = malloc((sizeof(long))*(sl)*(tl));
+      //populate 'vertical' row starting from the 2nd position (first one is filled already)
+      for(i = 0; i < tl; i++){
+        d[i*sl] = i;
+      }
+      //fill up array with scores
+      for(i = 1; i<sl; i++){
+        d[i] = i;
+        if (stop_execution == 1) break;
+        current_distance = 10000;
+        for(j = 1; j<tl; j++){
+          cost = 1;
+          if(s[i-1] == t[j-1]) cost = 0;
+          half_sl = (sl - 1)/2;
+          half_tl = (tl - 1)/2;
+          block = block_size < half_sl ? block_size : half_sl;
+          block = block < half_tl ? block : half_tl;
+          while (block >= 1){
+            long swap1 = 1;
+            long swap2 = 1;
+            i1 = i - (block * 2);
+            j1 = j - (block * 2);
+            for (k = i1; k < i1 + block; k++) {
+              if (s[k] != t[k + block]){
+                swap1 = 0;
+                break;
+              }
+            }
+            for (k = j1; k < j1 + block; k++) {
+              if (t[k] != s[k + block]){
+                swap2 = 0;
+                break;
+              }
+            }
+            del = d[j*sl + i - 1] + 1;
+            ins = d[(j-1)*sl + i] + 1;
+            min = del;
+            if (ins < min) min = ins;
+            //if (i == 2 && j==2) return LONG2NUM(swap2+5);
+            if (i >= block && j >= block && swap1 == 1 && swap2 == 1){
+              transp = d[(j - block * 2) * sl + i - block * 2] + cost + block -1;
+              if (transp < min) min = transp;
+              block = 0;
+            } else if (block == 1) {
+              subs = d[(j-1)*sl + i - 1] + cost;
+              if (subs < min) min = subs;
+            }
+            block--;
+          }
+          d[j*sl+i]=min;
+          if (current_distance > d[j*sl+i]) current_distance = d[j*sl+i];
+        }
+        if (current_distance > max_distance) {
+          stop_execution = 1;
+        }
+      }
+      distance=d[sl * tl - 1];
+      if (stop_execution == 1) distance = current_distance;
+      free(d);
+      return LONG2NUM(distance);
+    }
+   "
+  end
+end
+if __FILE__ == $0
+  a=DamerauLevenshteinMod.new
+  s = 'Cedarinia scabra Sjöstedt 1921'.unpack('U*')
+  t = 'Cedarinia scabra Söjstedt 1921'.unpack('U*')
+  #puts s.join(",")
+  #puts t.join(",")
+  start = Time.now
+  (1..100000).each do
+   a.distance('Cedarinia scabra Sjöstedt 1921', 'Cedarinia scabra Söjstedt 1921',1,10)
+  end
+  puts "with unpack time: " + (Time.now - start).to_s + ' sec'
+  start = Time.now
+  (1..100000).each do
+   a.distance_utf(s, t, 1, 10)
+  end
+  puts 'utf time: ' + (Time.now - start).to_s + ' sec'
+  #puts a.distance('Cedarinia scabra Sjöstedt 1921','Cedarinia scabra Söjstedt 1921')
+  #puts a.distance_utf(s, t, 2, 10)
+  #puts a.distance('tar','atp',1,10);
+  puts a.distance('sub', 'usb', 1, 10);
+end

data/lib/taxamatch_rb/normalizer.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# encoding: UTF-8
+module Normalizer
+  def self.normalize(string)
+    utf8_to_ascii(string).upcase
+  end
+  def self.normalize_word(word)
+    self.normalize(word).gsub(/[^A-Z\.\-]/, '')
+  end
+protected
+  def self.utf8_to_ascii(string)
+    string = string.gsub(/[ÀÂÅÃÄÁẤẠ]/, "A")
+    string = string.gsub(/[ÉÈÊË]/, "E")
+    string = string.gsub(/[ÍÌÎÏ]/, "I")
+    string = string.gsub(/[ÓÒÔØÕÖỚỔ]/, "O")
+    string = string.gsub(/[ÚÙÛÜ]/, "U")
+    string = string.gsub(/[Ý]/, "Y")
+    string = string.gsub(/Æ/, "AE")
+    string = string.gsub(/[ČÇ]/, "C")
+    string = string.gsub(/[ŠŞ]/, "S")
+    string = string.gsub(/[Đ]/, "D")
+    string = string.gsub(/Ž/, "Z")
+    string = string.gsub(/Ñ/, "N")
+    string = string.gsub(/Œ/, "OE")
+    string = string.gsub(/ß/, "B")
+    string = string.gsub(/Ķ/, "K")
+    string = string.gsub(/[áàâåãäăãắảạậầằ]/, "a")
+    string = string.gsub(/[éèêëĕěếệểễềẻ]/, "e")
+    string = string.gsub(/[íìîïǐĭīĩỉï]/, "i")
+    string = string.gsub(/[óòôøõöŏỏỗộơọỡốơồờớổ]/, "o")
+    string = string.gsub(/[úùûüůưừựủứụ]/, "u")
+    string = string.gsub(/[žź]/, "z")
+    string = string.gsub(/[ýÿỹ]/, "y")
+    string = string.gsub(/[đ]/, "d")
+    string = string.gsub(/æ/, "ae")
+    string = string.gsub(/[čćç]/, "c")
+    string = string.gsub(/[ñńň]/, "n")
+    string = string.gsub(/œ/, "oe")
+    string = string.gsub(/[śšş]/, "s")
+    string = string.gsub(/ř/, "r")
+    string = string.gsub(/ğ/, "g")
+    string = string.gsub(/Ř/, "R")
+  end
+end