Text 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ #
2
+ # Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
3
+ # with surprising results changed to 'correct' ones (according to my interpretation
4
+ # of the algorithm description), and some more results from around the web:
5
+ #
6
+ ANASTHA: ANS0
7
+ DAVIS-CARTER: TFSKRTR
8
+ ESCARMANT: ESKRMNT
9
+ MCCALL: MKL
10
+ MCCROREY: MKRR
11
+ MERSEAL: MRSL
12
+ PIEURISSAINT: PRSNT
13
+ ROTMAN: RTMN
14
+ SCHEVEL: SXFL
15
+ SCHROM: SXRM
16
+ SEAL: SL
17
+ SPARR: SPR
18
+ STARLEPER: STRLPR
19
+ THRASH: 0RX
20
+ LOGGING: LKNK
21
+ LOGIC: LJK
22
+ JUDGES: JJS
23
+ SHOOS: XS
24
+ SHOES: XS
25
+ CHUTE: XT
26
+ SCHUSS: SXS
27
+ OTTO: OT
28
+ ERIC: ERK
29
+ DAVE: TF
30
+ CATHERINE: K0RN
31
+ KATHERINE: K0RN
32
+ AUBREY: ABR
33
+ BRYAN: BRYN
34
+ BRYCE: BRS
35
+ STEVEN: STFN
36
+ RICHARD: RXRT
37
+ HEIDI: HT
38
+ AUTO: AT
39
+ MAURICE: MRS
40
+ RANDY: RNT
41
+ CAMBRILLO: KMBRL
42
+ BRIAN: BRN
43
+ RAY: R
44
+ GEOFF: JF
45
+ BOB: BB
46
+ AHA: AH
47
+ AAH: A
48
+ PAUL: PL
49
+ BATTLEY: BTL
50
+ WROTE: RT
51
+ THIS: 0S
@@ -0,0 +1,52 @@
1
+ #
2
+ # Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
3
+ # this mimics the behaviour of Lawrence Philips's BASIC implementation,
4
+ # which appears to contain bugs when compared to his description of the
5
+ # algorithm.
6
+ #
7
+ ANASTHA: ANS0
8
+ DAVIS-CARTER: TFSKRTR
9
+ ESCARMANT: ESKRMNT
10
+ MCCALL: MKKL
11
+ MCCROREY: MKKRR
12
+ MERSEAL: MRSL
13
+ PIEURISSAINT: PRSNT
14
+ ROTMAN: RTMN
15
+ SCHEVEL: SXFL
16
+ SCHROM: SXRM
17
+ SEAL: SL
18
+ SPARR: SPR
19
+ STARLEPER: STRLPR
20
+ THRASH: 0RX
21
+ LOGGING: LKNK
22
+ LOGIC: LJK
23
+ JUDGES: JJS
24
+ SHOOS: XS
25
+ SHOES: XS
26
+ CHUTE: XT
27
+ SCHUSS: SXS
28
+ OTTO: OT
29
+ ERIC: ERK
30
+ DAVE: TF
31
+ CATHERINE: K0RN
32
+ KATHERINE: K0RN
33
+ AUBREY: ABR
34
+ BRYAN: BRYN
35
+ BRYCE: BRS
36
+ STEVEN: STFN
37
+ RICHARD: RXRT
38
+ HEIDI: HT
39
+ AUTO: AT
40
+ MAURICE: MRS
41
+ RANDY: RNT
42
+ CAMBRILLO: KMRL
43
+ BRIAN: BRN
44
+ RAY: R
45
+ GEOFF: JF
46
+ BOB: BB
47
+ AHA: AH
48
+ AAH: A
49
+ PAUL: PL
50
+ BATTLEY: BTL
51
+ WROTE: RT
52
+ THIS: 0S
@@ -0,0 +1,16 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'rubygems'
3
+ require 'fastercsv'
4
+
5
+ class DoubleMetaphoneTest < Test::Unit::TestCase
6
+
7
+ def test_cases
8
+ FasterCSV.read(File.rel('data', 'double_metaphone.csv'), :col_sep => ', ').each_with_index do |row, i|
9
+ primary, secondary = Text::Metaphone.double_metaphone(row[0])
10
+
11
+ assert_equal row[1], primary
12
+ assert_equal row[2], secondary.nil?? primary : secondary
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class FigletTest < Test::Unit::TestCase
4
+
5
+ def test_hello_world
6
+ font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
7
+ figlet = Text::Figlet::Typesetter.new(font)
8
+ assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
9
+ end
10
+
11
+ def test_no_smushing
12
+ font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
13
+ figlet = Text::Figlet::Typesetter.new(font, :smush => false)
14
+ assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
15
+ end
16
+
17
+ end
@@ -0,0 +1,80 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class LevenshteinTest < Test::Unit::TestCase
4
+
5
+ include Text::Levenshtein
6
+
7
+ TEST_CASES = {
8
+ :easy => [
9
+ ['test', 'test', 0],
10
+ ['test', 'tent', 1],
11
+ ['gumbo', 'gambol', 2],
12
+ ['kitten', 'sitting', 3]
13
+ ],
14
+ :empty => [
15
+ ['foo', '', 3],
16
+ ['', '', 0],
17
+ ['a', '', 1]
18
+ ],
19
+ :utf8 => [
20
+ ["f\303\266o", 'foo', 1],
21
+ ["fran\303\247ais", 'francais', 1],
22
+ ["fran\303\247ais", "fran\303\246ais", 1],
23
+ [
24
+ "\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
25
+ "\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
26
+ "\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
27
+ "\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
28
+ 2
29
+ ] # Japanese
30
+ ],
31
+ :iso_8859_1 => [
32
+ ["f\366o", 'foo', 1],
33
+ ["fran\347ais", 'francais', 1],
34
+ ["fran\347ais", "fran\346ais", 1]
35
+ ],
36
+ :edge => [
37
+ ['a', 'a', 0],
38
+ ['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
39
+ ]
40
+ }
41
+
42
+ def assert_set(name)
43
+ TEST_CASES[name].each do |s, t, x|
44
+ assert_equal x, distance(s, t)
45
+ assert_equal x, distance(t, s)
46
+ end
47
+ end
48
+
49
+ def with_kcode(k)
50
+ old_kcode = $KCODE
51
+ $KCODE = k
52
+ yield
53
+ $KCODE = old_kcode
54
+ end
55
+
56
+ def test_easy_cases
57
+ assert_set(:easy)
58
+ end
59
+
60
+ def test_empty_cases
61
+ assert_set(:empty)
62
+ end
63
+
64
+ def test_edge_cases
65
+ assert_set(:edge)
66
+ end
67
+
68
+ def test_utf8_cases
69
+ with_kcode('U') do
70
+ assert_set(:utf8)
71
+ end
72
+ end
73
+
74
+ def test_iso_8859_1_cases
75
+ with_kcode('NONE') do
76
+ assert_set(:iso_8859_1)
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,39 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class MetaphoneTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
8
+ assert_equal expected_output, Text::Metaphone.metaphone(input)
9
+ end
10
+ end
11
+
12
+ def test_cases_for_buggy_implementation
13
+ YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
14
+ assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
15
+ end
16
+ end
17
+
18
+ def test_junk
19
+ assert_equal Text::Metaphone.metaphone('foobar'),
20
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
21
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
22
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
23
+ end
24
+
25
+ def test_caps
26
+ assert_equal Text::Metaphone.metaphone('foobar'),
27
+ Text::Metaphone.metaphone('FOOBAR')
28
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
29
+ Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
30
+ end
31
+
32
+ def test_string
33
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
34
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
35
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
36
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
37
+ end
38
+
39
+ end
@@ -0,0 +1,10 @@
1
+ require 'test/unit'
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'text'
5
+
6
+ class File
7
+ def self.rel(*path)
8
+ join(dirname(__FILE__), *path)
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class SoundexTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(%{
8
+
9
+ Euler: E460
10
+ Ellery: E460
11
+ Gauss: G200
12
+ Ghosh: G200
13
+ Hilbert: H416
14
+ Heilbronn: H416
15
+ Knuth: K530
16
+ Kant: K530
17
+ Lloyd: L300
18
+ Ladd: L300
19
+ Lukasiewicz: L222
20
+ Lissajous: L222
21
+
22
+ }).each do |input, expected_output|
23
+ assert_equal expected_output, Text::Soundex.soundex(input)
24
+ end
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: Text
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2006-09-23 00:00:00 +01:00
8
+ summary: A collection of text algorithms
9
+ require_paths:
10
+ - lib
11
+ email:
12
+ homepage: http://text.rubyforge.org/
13
+ rubyforge_project: text
14
+ description:
15
+ autorequire: text
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Paul Battley, Michael Neumann, Tim Fletcher
31
+ files:
32
+ - lib/text.rb
33
+ - lib/text/double_metaphone.rb
34
+ - lib/text/figlet.rb
35
+ - lib/text/levenshtein.rb
36
+ - lib/text/metaphone.rb
37
+ - lib/text/soundex.rb
38
+ - lib/text/figlet/font.rb
39
+ - lib/text/figlet/smusher.rb
40
+ - lib/text/figlet/typesetter.rb
41
+ - test/double_metaphone_test.rb
42
+ - test/figlet_test.rb
43
+ - test/levenshtein_test.rb
44
+ - test/metaphone_test.rb
45
+ - test/preamble.rb
46
+ - test/soundex_test.rb
47
+ - test/data/big.flf
48
+ - test/data/big.txt
49
+ - test/data/chunky.flf
50
+ - test/data/chunky.txt
51
+ - test/data/double_metaphone.csv
52
+ - test/data/metaphone.txt
53
+ - test/data/metaphone_buggy.txt
54
+ - rakefile.rb
55
+ test_files: []
56
+
57
+ rdoc_options: []
58
+
59
+ extra_rdoc_files: []
60
+
61
+ executables: []
62
+
63
+ extensions: []
64
+
65
+ requirements: []
66
+
67
+ dependencies: []
68
+