Text 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text.rb +5 -0
- data/lib/text/double_metaphone.rb +356 -0
- data/lib/text/figlet.rb +17 -0
- data/lib/text/figlet/font.rb +117 -0
- data/lib/text/figlet/smusher.rb +64 -0
- data/lib/text/figlet/typesetter.rb +68 -0
- data/lib/text/levenshtein.rb +65 -0
- data/lib/text/metaphone.rb +97 -0
- data/lib/text/soundex.rb +61 -0
- data/rakefile.rb +46 -0
- data/test/data/big.flf +2204 -0
- data/test/data/big.txt +8 -0
- data/test/data/chunky.flf +512 -0
- data/test/data/chunky.txt +5 -0
- data/test/data/double_metaphone.csv +1218 -0
- data/test/data/metaphone.txt +51 -0
- data/test/data/metaphone_buggy.txt +52 -0
- data/test/double_metaphone_test.rb +16 -0
- data/test/figlet_test.rb +17 -0
- data/test/levenshtein_test.rb +80 -0
- data/test/metaphone_test.rb +39 -0
- data/test/preamble.rb +10 -0
- data/test/soundex_test.rb +27 -0
- metadata +68 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
|
3
|
+
# with surprising results changed to 'correct' ones (according to my interpretation
|
4
|
+
# of the algorithm description), and some more results from around the web:
|
5
|
+
#
|
6
|
+
ANASTHA: ANS0
|
7
|
+
DAVIS-CARTER: TFSKRTR
|
8
|
+
ESCARMANT: ESKRMNT
|
9
|
+
MCCALL: MKL
|
10
|
+
MCCROREY: MKRR
|
11
|
+
MERSEAL: MRSL
|
12
|
+
PIEURISSAINT: PRSNT
|
13
|
+
ROTMAN: RTMN
|
14
|
+
SCHEVEL: SXFL
|
15
|
+
SCHROM: SXRM
|
16
|
+
SEAL: SL
|
17
|
+
SPARR: SPR
|
18
|
+
STARLEPER: STRLPR
|
19
|
+
THRASH: 0RX
|
20
|
+
LOGGING: LKNK
|
21
|
+
LOGIC: LJK
|
22
|
+
JUDGES: JJS
|
23
|
+
SHOOS: XS
|
24
|
+
SHOES: XS
|
25
|
+
CHUTE: XT
|
26
|
+
SCHUSS: SXS
|
27
|
+
OTTO: OT
|
28
|
+
ERIC: ERK
|
29
|
+
DAVE: TF
|
30
|
+
CATHERINE: K0RN
|
31
|
+
KATHERINE: K0RN
|
32
|
+
AUBREY: ABR
|
33
|
+
BRYAN: BRYN
|
34
|
+
BRYCE: BRS
|
35
|
+
STEVEN: STFN
|
36
|
+
RICHARD: RXRT
|
37
|
+
HEIDI: HT
|
38
|
+
AUTO: AT
|
39
|
+
MAURICE: MRS
|
40
|
+
RANDY: RNT
|
41
|
+
CAMBRILLO: KMBRL
|
42
|
+
BRIAN: BRN
|
43
|
+
RAY: R
|
44
|
+
GEOFF: JF
|
45
|
+
BOB: BB
|
46
|
+
AHA: AH
|
47
|
+
AAH: A
|
48
|
+
PAUL: PL
|
49
|
+
BATTLEY: BTL
|
50
|
+
WROTE: RT
|
51
|
+
THIS: 0S
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#
|
2
|
+
# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
|
3
|
+
# this mimics the behaviour of Lawrence Philips's BASIC implementation,
|
4
|
+
# which appears to contain bugs when compared to his description of the
|
5
|
+
# algorithm.
|
6
|
+
#
|
7
|
+
ANASTHA: ANS0
|
8
|
+
DAVIS-CARTER: TFSKRTR
|
9
|
+
ESCARMANT: ESKRMNT
|
10
|
+
MCCALL: MKKL
|
11
|
+
MCCROREY: MKKRR
|
12
|
+
MERSEAL: MRSL
|
13
|
+
PIEURISSAINT: PRSNT
|
14
|
+
ROTMAN: RTMN
|
15
|
+
SCHEVEL: SXFL
|
16
|
+
SCHROM: SXRM
|
17
|
+
SEAL: SL
|
18
|
+
SPARR: SPR
|
19
|
+
STARLEPER: STRLPR
|
20
|
+
THRASH: 0RX
|
21
|
+
LOGGING: LKNK
|
22
|
+
LOGIC: LJK
|
23
|
+
JUDGES: JJS
|
24
|
+
SHOOS: XS
|
25
|
+
SHOES: XS
|
26
|
+
CHUTE: XT
|
27
|
+
SCHUSS: SXS
|
28
|
+
OTTO: OT
|
29
|
+
ERIC: ERK
|
30
|
+
DAVE: TF
|
31
|
+
CATHERINE: K0RN
|
32
|
+
KATHERINE: K0RN
|
33
|
+
AUBREY: ABR
|
34
|
+
BRYAN: BRYN
|
35
|
+
BRYCE: BRS
|
36
|
+
STEVEN: STFN
|
37
|
+
RICHARD: RXRT
|
38
|
+
HEIDI: HT
|
39
|
+
AUTO: AT
|
40
|
+
MAURICE: MRS
|
41
|
+
RANDY: RNT
|
42
|
+
CAMBRILLO: KMRL
|
43
|
+
BRIAN: BRN
|
44
|
+
RAY: R
|
45
|
+
GEOFF: JF
|
46
|
+
BOB: BB
|
47
|
+
AHA: AH
|
48
|
+
AAH: A
|
49
|
+
PAUL: PL
|
50
|
+
BATTLEY: BTL
|
51
|
+
WROTE: RT
|
52
|
+
THIS: 0S
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'rubygems'
|
3
|
+
require 'fastercsv'
|
4
|
+
|
5
|
+
class DoubleMetaphoneTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_cases
|
8
|
+
FasterCSV.read(File.rel('data', 'double_metaphone.csv'), :col_sep => ', ').each_with_index do |row, i|
|
9
|
+
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
10
|
+
|
11
|
+
assert_equal row[1], primary
|
12
|
+
assert_equal row[2], secondary.nil?? primary : secondary
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/test/figlet_test.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class FigletTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_hello_world
|
6
|
+
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
7
|
+
figlet = Text::Figlet::Typesetter.new(font)
|
8
|
+
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_no_smushing
|
12
|
+
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
13
|
+
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
14
|
+
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class LevenshteinTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Text::Levenshtein
|
6
|
+
|
7
|
+
TEST_CASES = {
|
8
|
+
:easy => [
|
9
|
+
['test', 'test', 0],
|
10
|
+
['test', 'tent', 1],
|
11
|
+
['gumbo', 'gambol', 2],
|
12
|
+
['kitten', 'sitting', 3]
|
13
|
+
],
|
14
|
+
:empty => [
|
15
|
+
['foo', '', 3],
|
16
|
+
['', '', 0],
|
17
|
+
['a', '', 1]
|
18
|
+
],
|
19
|
+
:utf8 => [
|
20
|
+
["f\303\266o", 'foo', 1],
|
21
|
+
["fran\303\247ais", 'francais', 1],
|
22
|
+
["fran\303\247ais", "fran\303\246ais", 1],
|
23
|
+
[
|
24
|
+
"\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
|
25
|
+
"\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
26
|
+
"\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
|
27
|
+
"\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
28
|
+
2
|
29
|
+
] # Japanese
|
30
|
+
],
|
31
|
+
:iso_8859_1 => [
|
32
|
+
["f\366o", 'foo', 1],
|
33
|
+
["fran\347ais", 'francais', 1],
|
34
|
+
["fran\347ais", "fran\346ais", 1]
|
35
|
+
],
|
36
|
+
:edge => [
|
37
|
+
['a', 'a', 0],
|
38
|
+
['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
|
39
|
+
]
|
40
|
+
}
|
41
|
+
|
42
|
+
def assert_set(name)
|
43
|
+
TEST_CASES[name].each do |s, t, x|
|
44
|
+
assert_equal x, distance(s, t)
|
45
|
+
assert_equal x, distance(t, s)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_kcode(k)
|
50
|
+
old_kcode = $KCODE
|
51
|
+
$KCODE = k
|
52
|
+
yield
|
53
|
+
$KCODE = old_kcode
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_easy_cases
|
57
|
+
assert_set(:easy)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_empty_cases
|
61
|
+
assert_set(:empty)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_edge_cases
|
65
|
+
assert_set(:edge)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_utf8_cases
|
69
|
+
with_kcode('U') do
|
70
|
+
assert_set(:utf8)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_iso_8859_1_cases
|
75
|
+
with_kcode('NONE') do
|
76
|
+
assert_set(:iso_8859_1)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MetaphoneTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
|
8
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_cases_for_buggy_implementation
|
13
|
+
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_junk
|
19
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
20
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
|
21
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
22
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_caps
|
26
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
27
|
+
Text::Metaphone.metaphone('FOOBAR')
|
28
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
29
|
+
Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_string
|
33
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
|
34
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
|
35
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
|
36
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
data/test/preamble.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class SoundexTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(%{
|
8
|
+
|
9
|
+
Euler: E460
|
10
|
+
Ellery: E460
|
11
|
+
Gauss: G200
|
12
|
+
Ghosh: G200
|
13
|
+
Hilbert: H416
|
14
|
+
Heilbronn: H416
|
15
|
+
Knuth: K530
|
16
|
+
Kant: K530
|
17
|
+
Lloyd: L300
|
18
|
+
Ladd: L300
|
19
|
+
Lukasiewicz: L222
|
20
|
+
Lissajous: L222
|
21
|
+
|
22
|
+
}).each do |input, expected_output|
|
23
|
+
assert_equal expected_output, Text::Soundex.soundex(input)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: Text
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2006-09-23 00:00:00 +01:00
|
8
|
+
summary: A collection of text algorithms
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email:
|
12
|
+
homepage: http://text.rubyforge.org/
|
13
|
+
rubyforge_project: text
|
14
|
+
description:
|
15
|
+
autorequire: text
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Paul Battley, Michael Neumann, Tim Fletcher
|
31
|
+
files:
|
32
|
+
- lib/text.rb
|
33
|
+
- lib/text/double_metaphone.rb
|
34
|
+
- lib/text/figlet.rb
|
35
|
+
- lib/text/levenshtein.rb
|
36
|
+
- lib/text/metaphone.rb
|
37
|
+
- lib/text/soundex.rb
|
38
|
+
- lib/text/figlet/font.rb
|
39
|
+
- lib/text/figlet/smusher.rb
|
40
|
+
- lib/text/figlet/typesetter.rb
|
41
|
+
- test/double_metaphone_test.rb
|
42
|
+
- test/figlet_test.rb
|
43
|
+
- test/levenshtein_test.rb
|
44
|
+
- test/metaphone_test.rb
|
45
|
+
- test/preamble.rb
|
46
|
+
- test/soundex_test.rb
|
47
|
+
- test/data/big.flf
|
48
|
+
- test/data/big.txt
|
49
|
+
- test/data/chunky.flf
|
50
|
+
- test/data/chunky.txt
|
51
|
+
- test/data/double_metaphone.csv
|
52
|
+
- test/data/metaphone.txt
|
53
|
+
- test/data/metaphone_buggy.txt
|
54
|
+
- rakefile.rb
|
55
|
+
test_files: []
|
56
|
+
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
extra_rdoc_files: []
|
60
|
+
|
61
|
+
executables: []
|
62
|
+
|
63
|
+
extensions: []
|
64
|
+
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
dependencies: []
|
68
|
+
|