Text 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/text.rb +5 -0
- data/lib/text/double_metaphone.rb +356 -0
- data/lib/text/figlet.rb +17 -0
- data/lib/text/figlet/font.rb +117 -0
- data/lib/text/figlet/smusher.rb +64 -0
- data/lib/text/figlet/typesetter.rb +68 -0
- data/lib/text/levenshtein.rb +65 -0
- data/lib/text/metaphone.rb +97 -0
- data/lib/text/soundex.rb +61 -0
- data/rakefile.rb +46 -0
- data/test/data/big.flf +2204 -0
- data/test/data/big.txt +8 -0
- data/test/data/chunky.flf +512 -0
- data/test/data/chunky.txt +5 -0
- data/test/data/double_metaphone.csv +1218 -0
- data/test/data/metaphone.txt +51 -0
- data/test/data/metaphone_buggy.txt +52 -0
- data/test/double_metaphone_test.rb +16 -0
- data/test/figlet_test.rb +17 -0
- data/test/levenshtein_test.rb +80 -0
- data/test/metaphone_test.rb +39 -0
- data/test/preamble.rb +10 -0
- data/test/soundex_test.rb +27 -0
- metadata +68 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
#
|
2
|
+
# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
|
3
|
+
# with surprising results changed to 'correct' ones (according to my interpretation
|
4
|
+
# of the algorithm description), and some more results from around the web:
|
5
|
+
#
|
6
|
+
ANASTHA: ANS0
|
7
|
+
DAVIS-CARTER: TFSKRTR
|
8
|
+
ESCARMANT: ESKRMNT
|
9
|
+
MCCALL: MKL
|
10
|
+
MCCROREY: MKRR
|
11
|
+
MERSEAL: MRSL
|
12
|
+
PIEURISSAINT: PRSNT
|
13
|
+
ROTMAN: RTMN
|
14
|
+
SCHEVEL: SXFL
|
15
|
+
SCHROM: SXRM
|
16
|
+
SEAL: SL
|
17
|
+
SPARR: SPR
|
18
|
+
STARLEPER: STRLPR
|
19
|
+
THRASH: 0RX
|
20
|
+
LOGGING: LKNK
|
21
|
+
LOGIC: LJK
|
22
|
+
JUDGES: JJS
|
23
|
+
SHOOS: XS
|
24
|
+
SHOES: XS
|
25
|
+
CHUTE: XT
|
26
|
+
SCHUSS: SXS
|
27
|
+
OTTO: OT
|
28
|
+
ERIC: ERK
|
29
|
+
DAVE: TF
|
30
|
+
CATHERINE: K0RN
|
31
|
+
KATHERINE: K0RN
|
32
|
+
AUBREY: ABR
|
33
|
+
BRYAN: BRYN
|
34
|
+
BRYCE: BRS
|
35
|
+
STEVEN: STFN
|
36
|
+
RICHARD: RXRT
|
37
|
+
HEIDI: HT
|
38
|
+
AUTO: AT
|
39
|
+
MAURICE: MRS
|
40
|
+
RANDY: RNT
|
41
|
+
CAMBRILLO: KMBRL
|
42
|
+
BRIAN: BRN
|
43
|
+
RAY: R
|
44
|
+
GEOFF: JF
|
45
|
+
BOB: BB
|
46
|
+
AHA: AH
|
47
|
+
AAH: A
|
48
|
+
PAUL: PL
|
49
|
+
BATTLEY: BTL
|
50
|
+
WROTE: RT
|
51
|
+
THIS: 0S
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#
|
2
|
+
# Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
|
3
|
+
# this mimics the behaviour of Lawrence Philips's BASIC implementation,
|
4
|
+
# which appears to contain bugs when compared to his description of the
|
5
|
+
# algorithm.
|
6
|
+
#
|
7
|
+
ANASTHA: ANS0
|
8
|
+
DAVIS-CARTER: TFSKRTR
|
9
|
+
ESCARMANT: ESKRMNT
|
10
|
+
MCCALL: MKKL
|
11
|
+
MCCROREY: MKKRR
|
12
|
+
MERSEAL: MRSL
|
13
|
+
PIEURISSAINT: PRSNT
|
14
|
+
ROTMAN: RTMN
|
15
|
+
SCHEVEL: SXFL
|
16
|
+
SCHROM: SXRM
|
17
|
+
SEAL: SL
|
18
|
+
SPARR: SPR
|
19
|
+
STARLEPER: STRLPR
|
20
|
+
THRASH: 0RX
|
21
|
+
LOGGING: LKNK
|
22
|
+
LOGIC: LJK
|
23
|
+
JUDGES: JJS
|
24
|
+
SHOOS: XS
|
25
|
+
SHOES: XS
|
26
|
+
CHUTE: XT
|
27
|
+
SCHUSS: SXS
|
28
|
+
OTTO: OT
|
29
|
+
ERIC: ERK
|
30
|
+
DAVE: TF
|
31
|
+
CATHERINE: K0RN
|
32
|
+
KATHERINE: K0RN
|
33
|
+
AUBREY: ABR
|
34
|
+
BRYAN: BRYN
|
35
|
+
BRYCE: BRS
|
36
|
+
STEVEN: STFN
|
37
|
+
RICHARD: RXRT
|
38
|
+
HEIDI: HT
|
39
|
+
AUTO: AT
|
40
|
+
MAURICE: MRS
|
41
|
+
RANDY: RNT
|
42
|
+
CAMBRILLO: KMRL
|
43
|
+
BRIAN: BRN
|
44
|
+
RAY: R
|
45
|
+
GEOFF: JF
|
46
|
+
BOB: BB
|
47
|
+
AHA: AH
|
48
|
+
AAH: A
|
49
|
+
PAUL: PL
|
50
|
+
BATTLEY: BTL
|
51
|
+
WROTE: RT
|
52
|
+
THIS: 0S
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'rubygems'
|
3
|
+
require 'fastercsv'
|
4
|
+
|
5
|
+
class DoubleMetaphoneTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_cases
|
8
|
+
FasterCSV.read(File.rel('data', 'double_metaphone.csv'), :col_sep => ', ').each_with_index do |row, i|
|
9
|
+
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
10
|
+
|
11
|
+
assert_equal row[1], primary
|
12
|
+
assert_equal row[2], secondary.nil?? primary : secondary
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/test/figlet_test.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class FigletTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_hello_world
|
6
|
+
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
7
|
+
figlet = Text::Figlet::Typesetter.new(font)
|
8
|
+
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_no_smushing
|
12
|
+
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
13
|
+
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
14
|
+
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class LevenshteinTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Text::Levenshtein
|
6
|
+
|
7
|
+
TEST_CASES = {
|
8
|
+
:easy => [
|
9
|
+
['test', 'test', 0],
|
10
|
+
['test', 'tent', 1],
|
11
|
+
['gumbo', 'gambol', 2],
|
12
|
+
['kitten', 'sitting', 3]
|
13
|
+
],
|
14
|
+
:empty => [
|
15
|
+
['foo', '', 3],
|
16
|
+
['', '', 0],
|
17
|
+
['a', '', 1]
|
18
|
+
],
|
19
|
+
:utf8 => [
|
20
|
+
["f\303\266o", 'foo', 1],
|
21
|
+
["fran\303\247ais", 'francais', 1],
|
22
|
+
["fran\303\247ais", "fran\303\246ais", 1],
|
23
|
+
[
|
24
|
+
"\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
|
25
|
+
"\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
26
|
+
"\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
|
27
|
+
"\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
28
|
+
2
|
29
|
+
] # Japanese
|
30
|
+
],
|
31
|
+
:iso_8859_1 => [
|
32
|
+
["f\366o", 'foo', 1],
|
33
|
+
["fran\347ais", 'francais', 1],
|
34
|
+
["fran\347ais", "fran\346ais", 1]
|
35
|
+
],
|
36
|
+
:edge => [
|
37
|
+
['a', 'a', 0],
|
38
|
+
['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
|
39
|
+
]
|
40
|
+
}
|
41
|
+
|
42
|
+
def assert_set(name)
|
43
|
+
TEST_CASES[name].each do |s, t, x|
|
44
|
+
assert_equal x, distance(s, t)
|
45
|
+
assert_equal x, distance(t, s)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_kcode(k)
|
50
|
+
old_kcode = $KCODE
|
51
|
+
$KCODE = k
|
52
|
+
yield
|
53
|
+
$KCODE = old_kcode
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_easy_cases
|
57
|
+
assert_set(:easy)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_empty_cases
|
61
|
+
assert_set(:empty)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_edge_cases
|
65
|
+
assert_set(:edge)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_utf8_cases
|
69
|
+
with_kcode('U') do
|
70
|
+
assert_set(:utf8)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_iso_8859_1_cases
|
75
|
+
with_kcode('NONE') do
|
76
|
+
assert_set(:iso_8859_1)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MetaphoneTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
|
8
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_cases_for_buggy_implementation
|
13
|
+
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_junk
|
19
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
20
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
|
21
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
22
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_caps
|
26
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
27
|
+
Text::Metaphone.metaphone('FOOBAR')
|
28
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
29
|
+
Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_string
|
33
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
|
34
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
|
35
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
|
36
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
data/test/preamble.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class SoundexTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(%{
|
8
|
+
|
9
|
+
Euler: E460
|
10
|
+
Ellery: E460
|
11
|
+
Gauss: G200
|
12
|
+
Ghosh: G200
|
13
|
+
Hilbert: H416
|
14
|
+
Heilbronn: H416
|
15
|
+
Knuth: K530
|
16
|
+
Kant: K530
|
17
|
+
Lloyd: L300
|
18
|
+
Ladd: L300
|
19
|
+
Lukasiewicz: L222
|
20
|
+
Lissajous: L222
|
21
|
+
|
22
|
+
}).each do |input, expected_output|
|
23
|
+
assert_equal expected_output, Text::Soundex.soundex(input)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: Text
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2006-09-23 00:00:00 +01:00
|
8
|
+
summary: A collection of text algorithms
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email:
|
12
|
+
homepage: http://text.rubyforge.org/
|
13
|
+
rubyforge_project: text
|
14
|
+
description:
|
15
|
+
autorequire: text
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Paul Battley, Michael Neumann, Tim Fletcher
|
31
|
+
files:
|
32
|
+
- lib/text.rb
|
33
|
+
- lib/text/double_metaphone.rb
|
34
|
+
- lib/text/figlet.rb
|
35
|
+
- lib/text/levenshtein.rb
|
36
|
+
- lib/text/metaphone.rb
|
37
|
+
- lib/text/soundex.rb
|
38
|
+
- lib/text/figlet/font.rb
|
39
|
+
- lib/text/figlet/smusher.rb
|
40
|
+
- lib/text/figlet/typesetter.rb
|
41
|
+
- test/double_metaphone_test.rb
|
42
|
+
- test/figlet_test.rb
|
43
|
+
- test/levenshtein_test.rb
|
44
|
+
- test/metaphone_test.rb
|
45
|
+
- test/preamble.rb
|
46
|
+
- test/soundex_test.rb
|
47
|
+
- test/data/big.flf
|
48
|
+
- test/data/big.txt
|
49
|
+
- test/data/chunky.flf
|
50
|
+
- test/data/chunky.txt
|
51
|
+
- test/data/double_metaphone.csv
|
52
|
+
- test/data/metaphone.txt
|
53
|
+
- test/data/metaphone_buggy.txt
|
54
|
+
- rakefile.rb
|
55
|
+
test_files: []
|
56
|
+
|
57
|
+
rdoc_options: []
|
58
|
+
|
59
|
+
extra_rdoc_files: []
|
60
|
+
|
61
|
+
executables: []
|
62
|
+
|
63
|
+
extensions: []
|
64
|
+
|
65
|
+
requirements: []
|
66
|
+
|
67
|
+
dependencies: []
|
68
|
+
|