koelner_phonetic_encoder 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source :rubygems
2
+ group :test do
3
+ gem "rspec"
4
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Michael Kohl, Anton Bangratz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.mkd ADDED
@@ -0,0 +1,21 @@
1
+ # Koelner Phonetic Encoder
2
+
3
+ by Michael Kohl <michi@tupalo.com>, Anton Bangratz <anton@tupalo.com>
4
+
5
+ This is a ruby implementation of the [Kölner Phonetic (Kölner Verfahren)](http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik) phonetic algorithm that tries to match strings to a phonetic code. This should assist by comparing similar souding german words.
6
+
7
+ Including the library will enhance the String class of the standard library with the __phonetic_code__ method.
8
+
9
+ ## Usage
10
+
11
+ require 'koelner_phonetic_encoder'
12
+
13
+ "Wikipedia".phonetic_code # => '3412'
14
+
15
+ ## LICENSE
16
+
17
+ see LICENSE
18
+
19
+ ## HOMEPAGE
20
+
21
+ [self](http://github.com/tupalo/koelner_phonetic_encoder)
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ require 'koelner_phonetic_encoder/koelner_phonetic_encoder'
3
+ require 'koelner_phonetic_encoder/version'
4
+ require 'koelner_phonetic_encoder/string'
@@ -0,0 +1,69 @@
1
+ #encoding: utf-8
2
+ module KoelnerPhoneticEncoder
3
+ def phonetic_code
4
+ code = KoelnerPhoneticEncoder.encode_string(self.to_str)
5
+ code = KoelnerPhoneticEncoder.reduce_multiples(code)
6
+ KoelnerPhoneticEncoder.remove_zeroes(code)
7
+ end
8
+
9
+ def self.encode_string(string)
10
+ string = string.downcase.gsub('ß','ss').gsub('ä', 'ae').gsub('ö', 'oe').gsub('ü', 'ue')
11
+ code = ''
12
+ syllable_onset = true
13
+ characters = string.split('') + %w[- -]
14
+ characters.each_cons(3) do |slice|
15
+ case slice.join
16
+ when /^[aeioujy]/ then
17
+ code << '0'
18
+ when /^b/ then
19
+ code << '1'
20
+ when /^p[^h]/ then
21
+ code << '1'
22
+ when /^ph/ then
23
+ code << '3'
24
+ when /^[dt][^csz]/ then
25
+ code << '2'
26
+ when /^[dt][csz]/ then
27
+ code << '8'
28
+ when /^[fvw]/
29
+ code << '3'
30
+ when /^[gkq]/
31
+ code << '4'
32
+ when /^c[ahkloqrux]/ && syllable_onset
33
+ code << '4'
34
+ when /^[^sz]c[ahkloqrux]/
35
+ code << '4'
36
+ when /^[^cqk]x/
37
+ code << '48'
38
+ when /^l/
39
+ code << '5'
40
+ when /^[mn]/
41
+ code << '6'
42
+ when /^r/
43
+ code << '7'
44
+ when /^[sz]/
45
+ code << '8'
46
+ when /^[sz]c/
47
+ code << '8'
48
+ when /^c[^ahkloqrux]/ && syllable_onset
49
+ code << '8'
50
+ when /^c[^ahkoqux]/
51
+ code << '8'
52
+ when /^[ckq]x/
53
+ code << '8'
54
+ end
55
+ syllable_onset = false
56
+ end
57
+ code
58
+ end
59
+ def self.reduce_multiples(code)
60
+ code.gsub /(\d)\1/, '\1'
61
+ end
62
+ def self.remove_zeroes(code)
63
+ unless code.empty?
64
+ code[0..0] << code[1..-1].delete('0')
65
+ else
66
+ code
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,3 @@
1
+ class String
2
+ include KoelnerPhoneticEncoder
3
+ end
@@ -0,0 +1,3 @@
1
+ module KoelnerPhoneticEncoder
2
+ VERSION = "1.0.2"
3
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe KoelnerPhoneticEncoder do
5
+ it "should enhance string with the 'phonetic code' method" do
6
+ "MyString".should respond_to(:phonetic_code)
7
+ end
8
+ it "should return the correct values for the known strings" do
9
+ "Wikipedia".phonetic_code.should == '3412'
10
+ "Breschnew".phonetic_code.should == '17863'
11
+ "Müller-Lüdenscheidt".phonetic_code.should == '65752682'
12
+ end
13
+ it "should encode a string to a raw code string per letter (2 per umlaut)" do
14
+ KoelnerPhoneticEncoder.encode_string("Müller-Lüdenscheidt").should == '600550750020680022'
15
+ KoelnerPhoneticEncoder.encode_string("Müller-Lüdenscheidt").size.should == 18
16
+ end
17
+ it "should remove all duplicated codes from a code string" do
18
+ code = KoelnerPhoneticEncoder.encode_string("Müller-Lüdenscheidt")
19
+ KoelnerPhoneticEncoder.reduce_multiples(code).should == '6050750206802'
20
+ end
21
+ it "should remove all 0 codes except at the beginning" do
22
+ KoelnerPhoneticEncoder.remove_zeroes('6050750206802').should == '65752682'
23
+ KoelnerPhoneticEncoder.remove_zeroes('06050750206802').should == '065752682'
24
+ end
25
+ end
@@ -0,0 +1 @@
1
+ require './lib/koelner_phonetic_encoder'
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: koelner_phonetic_encoder
3
+ version: !ruby/object:Gem::Version
4
+ hash: 17
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 3
10
+ version: 1.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Michael Kohl
14
+ - Anton Bangratz
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-09-23 00:00:00 +02:00
20
+ default_executable:
21
+ dependencies: []
22
+
23
+ description:
24
+ email:
25
+ - michi@tupalo.com
26
+ - tony@tupalo.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - lib/koelner_phonetic_encoder.rb
35
+ - lib/koelner_phonetic_encoder/koelner_phonetic_encoder.rb
36
+ - lib/koelner_phonetic_encoder/string.rb
37
+ - lib/koelner_phonetic_encoder/version.rb
38
+ - spec/koelner_phonetic_encoder_spec.rb
39
+ - spec/spec_helper.rb
40
+ - README.mkd
41
+ - LICENSE
42
+ - Gemfile
43
+ has_rdoc: false
44
+ homepage: https://github.com/tupalo/koelner_phonetic_encoder
45
+ licenses: []
46
+
47
+ post_install_message:
48
+ rdoc_options: []
49
+
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ hash: 3
67
+ segments:
68
+ - 0
69
+ version: "0"
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.7
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: "An implementation of the K\xC3\xB6lner Phonetik phonetic algorithm"
77
+ test_files: []
78
+