Text 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ #
2
+ # Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
3
+ # with surprising results changed to 'correct' ones (according to my interpretation
4
+ # of the algorithm description), and some more results from around the web:
5
+ #
6
+ ANASTHA: ANS0
7
+ DAVIS-CARTER: TFSKRTR
8
+ ESCARMANT: ESKRMNT
9
+ MCCALL: MKL
10
+ MCCROREY: MKRR
11
+ MERSEAL: MRSL
12
+ PIEURISSAINT: PRSNT
13
+ ROTMAN: RTMN
14
+ SCHEVEL: SXFL
15
+ SCHROM: SXRM
16
+ SEAL: SL
17
+ SPARR: SPR
18
+ STARLEPER: STRLPR
19
+ THRASH: 0RX
20
+ LOGGING: LKNK
21
+ LOGIC: LJK
22
+ JUDGES: JJS
23
+ SHOOS: XS
24
+ SHOES: XS
25
+ CHUTE: XT
26
+ SCHUSS: SXS
27
+ OTTO: OT
28
+ ERIC: ERK
29
+ DAVE: TF
30
+ CATHERINE: K0RN
31
+ KATHERINE: K0RN
32
+ AUBREY: ABR
33
+ BRYAN: BRYN
34
+ BRYCE: BRS
35
+ STEVEN: STFN
36
+ RICHARD: RXRT
37
+ HEIDI: HT
38
+ AUTO: AT
39
+ MAURICE: MRS
40
+ RANDY: RNT
41
+ CAMBRILLO: KMBRL
42
+ BRIAN: BRN
43
+ RAY: R
44
+ GEOFF: JF
45
+ BOB: BB
46
+ AHA: AH
47
+ AAH: A
48
+ PAUL: PL
49
+ BATTLEY: BTL
50
+ WROTE: RT
51
+ THIS: 0S
@@ -0,0 +1,52 @@
1
+ #
2
+ # Based on the table at http://aspell.net/metaphone/metaphone-kuhn.txt,
3
+ # this mimics the behaviour of Lawrence Philips's BASIC implementation,
4
+ # which appears to contain bugs when compared to his description of the
5
+ # algorithm.
6
+ #
7
+ ANASTHA: ANS0
8
+ DAVIS-CARTER: TFSKRTR
9
+ ESCARMANT: ESKRMNT
10
+ MCCALL: MKKL
11
+ MCCROREY: MKKRR
12
+ MERSEAL: MRSL
13
+ PIEURISSAINT: PRSNT
14
+ ROTMAN: RTMN
15
+ SCHEVEL: SXFL
16
+ SCHROM: SXRM
17
+ SEAL: SL
18
+ SPARR: SPR
19
+ STARLEPER: STRLPR
20
+ THRASH: 0RX
21
+ LOGGING: LKNK
22
+ LOGIC: LJK
23
+ JUDGES: JJS
24
+ SHOOS: XS
25
+ SHOES: XS
26
+ CHUTE: XT
27
+ SCHUSS: SXS
28
+ OTTO: OT
29
+ ERIC: ERK
30
+ DAVE: TF
31
+ CATHERINE: K0RN
32
+ KATHERINE: K0RN
33
+ AUBREY: ABR
34
+ BRYAN: BRYN
35
+ BRYCE: BRS
36
+ STEVEN: STFN
37
+ RICHARD: RXRT
38
+ HEIDI: HT
39
+ AUTO: AT
40
+ MAURICE: MRS
41
+ RANDY: RNT
42
+ CAMBRILLO: KMRL
43
+ BRIAN: BRN
44
+ RAY: R
45
+ GEOFF: JF
46
+ BOB: BB
47
+ AHA: AH
48
+ AAH: A
49
+ PAUL: PL
50
+ BATTLEY: BTL
51
+ WROTE: RT
52
+ THIS: 0S
@@ -0,0 +1,16 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'rubygems'
3
+ require 'fastercsv'
4
+
5
+ class DoubleMetaphoneTest < Test::Unit::TestCase
6
+
7
+ def test_cases
8
+ FasterCSV.read(File.rel('data', 'double_metaphone.csv'), :col_sep => ', ').each_with_index do |row, i|
9
+ primary, secondary = Text::Metaphone.double_metaphone(row[0])
10
+
11
+ assert_equal row[1], primary
12
+ assert_equal row[2], secondary.nil?? primary : secondary
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class FigletTest < Test::Unit::TestCase
4
+
5
+ def test_hello_world
6
+ font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
7
+ figlet = Text::Figlet::Typesetter.new(font)
8
+ assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
9
+ end
10
+
11
+ def test_no_smushing
12
+ font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
13
+ figlet = Text::Figlet::Typesetter.new(font, :smush => false)
14
+ assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
15
+ end
16
+
17
+ end
@@ -0,0 +1,80 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class LevenshteinTest < Test::Unit::TestCase
4
+
5
+ include Text::Levenshtein
6
+
7
+ TEST_CASES = {
8
+ :easy => [
9
+ ['test', 'test', 0],
10
+ ['test', 'tent', 1],
11
+ ['gumbo', 'gambol', 2],
12
+ ['kitten', 'sitting', 3]
13
+ ],
14
+ :empty => [
15
+ ['foo', '', 3],
16
+ ['', '', 0],
17
+ ['a', '', 1]
18
+ ],
19
+ :utf8 => [
20
+ ["f\303\266o", 'foo', 1],
21
+ ["fran\303\247ais", 'francais', 1],
22
+ ["fran\303\247ais", "fran\303\246ais", 1],
23
+ [
24
+ "\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
25
+ "\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
26
+ "\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
27
+ "\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
28
+ 2
29
+ ] # Japanese
30
+ ],
31
+ :iso_8859_1 => [
32
+ ["f\366o", 'foo', 1],
33
+ ["fran\347ais", 'francais', 1],
34
+ ["fran\347ais", "fran\346ais", 1]
35
+ ],
36
+ :edge => [
37
+ ['a', 'a', 0],
38
+ ['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
39
+ ]
40
+ }
41
+
42
+ def assert_set(name)
43
+ TEST_CASES[name].each do |s, t, x|
44
+ assert_equal x, distance(s, t)
45
+ assert_equal x, distance(t, s)
46
+ end
47
+ end
48
+
49
+ def with_kcode(k)
50
+ old_kcode = $KCODE
51
+ $KCODE = k
52
+ yield
53
+ $KCODE = old_kcode
54
+ end
55
+
56
+ def test_easy_cases
57
+ assert_set(:easy)
58
+ end
59
+
60
+ def test_empty_cases
61
+ assert_set(:empty)
62
+ end
63
+
64
+ def test_edge_cases
65
+ assert_set(:edge)
66
+ end
67
+
68
+ def test_utf8_cases
69
+ with_kcode('U') do
70
+ assert_set(:utf8)
71
+ end
72
+ end
73
+
74
+ def test_iso_8859_1_cases
75
+ with_kcode('NONE') do
76
+ assert_set(:iso_8859_1)
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,39 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class MetaphoneTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
8
+ assert_equal expected_output, Text::Metaphone.metaphone(input)
9
+ end
10
+ end
11
+
12
+ def test_cases_for_buggy_implementation
13
+ YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
14
+ assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
15
+ end
16
+ end
17
+
18
+ def test_junk
19
+ assert_equal Text::Metaphone.metaphone('foobar'),
20
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
21
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
22
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
23
+ end
24
+
25
+ def test_caps
26
+ assert_equal Text::Metaphone.metaphone('foobar'),
27
+ Text::Metaphone.metaphone('FOOBAR')
28
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
29
+ Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
30
+ end
31
+
32
+ def test_string
33
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
34
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
35
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
36
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
37
+ end
38
+
39
+ end
@@ -0,0 +1,10 @@
1
+ require 'test/unit'
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'text'
5
+
6
+ class File
7
+ def self.rel(*path)
8
+ join(dirname(__FILE__), *path)
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class SoundexTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(%{
8
+
9
+ Euler: E460
10
+ Ellery: E460
11
+ Gauss: G200
12
+ Ghosh: G200
13
+ Hilbert: H416
14
+ Heilbronn: H416
15
+ Knuth: K530
16
+ Kant: K530
17
+ Lloyd: L300
18
+ Ladd: L300
19
+ Lukasiewicz: L222
20
+ Lissajous: L222
21
+
22
+ }).each do |input, expected_output|
23
+ assert_equal expected_output, Text::Soundex.soundex(input)
24
+ end
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: Text
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2006-09-23 00:00:00 +01:00
8
+ summary: A collection of text algorithms
9
+ require_paths:
10
+ - lib
11
+ email:
12
+ homepage: http://text.rubyforge.org/
13
+ rubyforge_project: text
14
+ description:
15
+ autorequire: text
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Paul Battley, Michael Neumann, Tim Fletcher
31
+ files:
32
+ - lib/text.rb
33
+ - lib/text/double_metaphone.rb
34
+ - lib/text/figlet.rb
35
+ - lib/text/levenshtein.rb
36
+ - lib/text/metaphone.rb
37
+ - lib/text/soundex.rb
38
+ - lib/text/figlet/font.rb
39
+ - lib/text/figlet/smusher.rb
40
+ - lib/text/figlet/typesetter.rb
41
+ - test/double_metaphone_test.rb
42
+ - test/figlet_test.rb
43
+ - test/levenshtein_test.rb
44
+ - test/metaphone_test.rb
45
+ - test/preamble.rb
46
+ - test/soundex_test.rb
47
+ - test/data/big.flf
48
+ - test/data/big.txt
49
+ - test/data/chunky.flf
50
+ - test/data/chunky.txt
51
+ - test/data/double_metaphone.csv
52
+ - test/data/metaphone.txt
53
+ - test/data/metaphone_buggy.txt
54
+ - rakefile.rb
55
+ test_files: []
56
+
57
+ rdoc_options: []
58
+
59
+ extra_rdoc_files: []
60
+
61
+ executables: []
62
+
63
+ extensions: []
64
+
65
+ requirements: []
66
+
67
+ dependencies: []
68
+