text 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ require 'test/unit'
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+ require 'text'
5
+
6
+ class File
7
+ def self.rel(*path)
8
+ join(dirname(__FILE__), *path)
9
+ end
10
+ end
@@ -0,0 +1,23 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ begin
4
+ require 'rubygems' rescue nil
5
+ require 'fastercsv'
6
+ METHOD = [ FasterCSV, :foreach, { :col_sep => ', ' } ]
7
+ rescue LoadError
8
+ require 'csv'
9
+ METHOD = [ CSV, :open, 'r', ', ' ]
10
+ end
11
+
12
+ class DoubleMetaphoneTest < Test::Unit::TestCase
13
+
14
+ def test_cases
15
+ METHOD.shift.send(METHOD.shift, File.rel('data', 'double_metaphone.csv'), *METHOD) do |row|
16
+ primary, secondary = Text::Metaphone.double_metaphone(row[0])
17
+
18
+ assert_equal row[1], primary
19
+ assert_equal row[2], secondary.nil?? primary : secondary
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class FigletTest < Test::Unit::TestCase
4
+
5
+ def test_hello_world
6
+ font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
7
+ figlet = Text::Figlet::Typesetter.new(font)
8
+ assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
9
+ end
10
+
11
+ def test_no_smushing
12
+ font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
13
+ figlet = Text::Figlet::Typesetter.new(font, :smush => false)
14
+ assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
15
+ end
16
+
17
+ end
@@ -0,0 +1,80 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class LevenshteinTest < Test::Unit::TestCase
4
+
5
+ include Text::Levenshtein
6
+
7
+ TEST_CASES = {
8
+ :easy => [
9
+ ['test', 'test', 0],
10
+ ['test', 'tent', 1],
11
+ ['gumbo', 'gambol', 2],
12
+ ['kitten', 'sitting', 3]
13
+ ],
14
+ :empty => [
15
+ ['foo', '', 3],
16
+ ['', '', 0],
17
+ ['a', '', 1]
18
+ ],
19
+ :utf8 => [
20
+ ["f\303\266o", 'foo', 1],
21
+ ["fran\303\247ais", 'francais', 1],
22
+ ["fran\303\247ais", "fran\303\246ais", 1],
23
+ [
24
+ "\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
25
+ "\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
26
+ "\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
27
+ "\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
28
+ 2
29
+ ] # Japanese
30
+ ],
31
+ :iso_8859_1 => [
32
+ ["f\366o", 'foo', 1],
33
+ ["fran\347ais", 'francais', 1],
34
+ ["fran\347ais", "fran\346ais", 1]
35
+ ],
36
+ :edge => [
37
+ ['a', 'a', 0],
38
+ ['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
39
+ ]
40
+ }
41
+
42
+ def assert_set(name)
43
+ TEST_CASES[name].each do |s, t, x|
44
+ assert_equal x, distance(s, t)
45
+ assert_equal x, distance(t, s)
46
+ end
47
+ end
48
+
49
+ def with_kcode(k)
50
+ old_kcode = $KCODE
51
+ $KCODE = k
52
+ yield
53
+ $KCODE = old_kcode
54
+ end
55
+
56
+ def test_easy_cases
57
+ assert_set(:easy)
58
+ end
59
+
60
+ def test_empty_cases
61
+ assert_set(:empty)
62
+ end
63
+
64
+ def test_edge_cases
65
+ assert_set(:edge)
66
+ end
67
+
68
+ def test_utf8_cases
69
+ with_kcode('U') do
70
+ assert_set(:utf8)
71
+ end
72
+ end
73
+
74
+ def test_iso_8859_1_cases
75
+ with_kcode('NONE') do
76
+ assert_set(:iso_8859_1)
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,39 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class MetaphoneTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
8
+ assert_equal expected_output, Text::Metaphone.metaphone(input)
9
+ end
10
+ end
11
+
12
+ def test_cases_for_buggy_implementation
13
+ YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
14
+ assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
15
+ end
16
+ end
17
+
18
+ def test_junk
19
+ assert_equal Text::Metaphone.metaphone('foobar'),
20
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
21
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
22
+ Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
23
+ end
24
+
25
+ def test_caps
26
+ assert_equal Text::Metaphone.metaphone('foobar'),
27
+ Text::Metaphone.metaphone('FOOBAR')
28
+ assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
29
+ Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
30
+ end
31
+
32
+ def test_string
33
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
34
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
35
+ assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
36
+ assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
37
+ end
38
+
39
+ end
@@ -0,0 +1,16 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ class PorterStemmingTest < Test::Unit::TestCase
4
+
5
+ def slurp(*path)
6
+ File.read(File.rel(*path)).split(/\n/)
7
+ end
8
+
9
+ def test_cases
10
+ cases = slurp('data', 'porter_stemming_input.txt').zip(slurp('data', 'porter_stemming_output.txt'))
11
+ cases.each do |word, expected_output|
12
+ assert_equal expected_output, Text::PorterStemming.stem(word)
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,27 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+ require 'yaml'
3
+
4
+ class SoundexTest < Test::Unit::TestCase
5
+
6
+ def test_cases
7
+ YAML.load(%{
8
+
9
+ Euler: E460
10
+ Ellery: E460
11
+ Gauss: G200
12
+ Ghosh: G200
13
+ Hilbert: H416
14
+ Heilbronn: H416
15
+ Knuth: K530
16
+ Kant: K530
17
+ Lloyd: L300
18
+ Ladd: L300
19
+ Lukasiewicz: L222
20
+ Lissajous: L222
21
+
22
+ }).each do |input, expected_output|
23
+ assert_equal expected_output, Text::Soundex.soundex(input)
24
+ end
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: text
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.13
5
+ platform: ruby
6
+ authors:
7
+ - Paul Battley
8
+ - Michael Neumann
9
+ - Tim Fletcher
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+
14
+ date: 2009-10-11 00:00:00 +01:00
15
+ default_executable:
16
+ dependencies: []
17
+
18
+ description: "A collection of text algorithms: Levenshtein, Soundex, Metaphone, Double Metaphone, Figlet, Porter Stemming"
19
+ email: pbattley@gmail.com
20
+ executables: []
21
+
22
+ extensions: []
23
+
24
+ extra_rdoc_files:
25
+ - README.rdoc
26
+ files:
27
+ - lib/text.rb
28
+ - lib/text/figlet/typesetter.rb
29
+ - lib/text/figlet/smusher.rb
30
+ - lib/text/figlet/font.rb
31
+ - lib/text/porter_stemming.rb
32
+ - lib/text/double_metaphone.rb
33
+ - lib/text/soundex.rb
34
+ - lib/text/figlet.rb
35
+ - lib/text/metaphone.rb
36
+ - lib/text/version.rb
37
+ - lib/text/levenshtein.rb
38
+ - test/preamble.rb
39
+ - test/test_double_metaphone.rb
40
+ - test/data/chunky.flf
41
+ - test/data/porter_stemming_input.txt
42
+ - test/data/metaphone.txt
43
+ - test/data/double_metaphone.csv
44
+ - test/data/big.flf
45
+ - test/data/chunky.txt
46
+ - test/data/porter_stemming_output.txt
47
+ - test/data/big.txt
48
+ - test/data/metaphone_buggy.txt
49
+ - test/test_levenshtein.rb
50
+ - test/test_soundex.rb
51
+ - test/test_porter_stemming.rb
52
+ - test/test_metaphone.rb
53
+ - test/test_figlet.rb
54
+ - README.rdoc
55
+ - Rakefile
56
+ has_rdoc: true
57
+ homepage: http://github.com/threedaymonk/text
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options: []
62
+
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: "0"
76
+ version:
77
+ requirements: []
78
+
79
+ rubyforge_project: text
80
+ rubygems_version: 1.3.5
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: A collection of text algorithms
84
+ test_files: []
85
+