text 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +28 -0
- data/Rakefile +48 -0
- data/lib/text.rb +7 -0
- data/lib/text/double_metaphone.rb +356 -0
- data/lib/text/figlet.rb +17 -0
- data/lib/text/figlet/font.rb +117 -0
- data/lib/text/figlet/smusher.rb +64 -0
- data/lib/text/figlet/typesetter.rb +68 -0
- data/lib/text/levenshtein.rb +65 -0
- data/lib/text/metaphone.rb +97 -0
- data/lib/text/porter_stemming.rb +171 -0
- data/lib/text/soundex.rb +61 -0
- data/lib/text/version.rb +9 -0
- data/test/data/big.flf +2204 -0
- data/test/data/big.txt +8 -0
- data/test/data/chunky.flf +512 -0
- data/test/data/chunky.txt +5 -0
- data/test/data/double_metaphone.csv +1218 -0
- data/test/data/metaphone.txt +51 -0
- data/test/data/metaphone_buggy.txt +52 -0
- data/test/data/porter_stemming_input.txt +23531 -0
- data/test/data/porter_stemming_output.txt +23531 -0
- data/test/preamble.rb +10 -0
- data/test/test_double_metaphone.rb +23 -0
- data/test/test_figlet.rb +17 -0
- data/test/test_levenshtein.rb +80 -0
- data/test/test_metaphone.rb +39 -0
- data/test/test_porter_stemming.rb +16 -0
- data/test/test_soundex.rb +27 -0
- metadata +85 -0
data/test/preamble.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems' rescue nil
|
5
|
+
require 'fastercsv'
|
6
|
+
METHOD = [ FasterCSV, :foreach, { :col_sep => ', ' } ]
|
7
|
+
rescue LoadError
|
8
|
+
require 'csv'
|
9
|
+
METHOD = [ CSV, :open, 'r', ', ' ]
|
10
|
+
end
|
11
|
+
|
12
|
+
class DoubleMetaphoneTest < Test::Unit::TestCase
|
13
|
+
|
14
|
+
def test_cases
|
15
|
+
METHOD.shift.send(METHOD.shift, File.rel('data', 'double_metaphone.csv'), *METHOD) do |row|
|
16
|
+
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
17
|
+
|
18
|
+
assert_equal row[1], primary
|
19
|
+
assert_equal row[2], secondary.nil?? primary : secondary
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/test/test_figlet.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class FigletTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_hello_world
|
6
|
+
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
7
|
+
figlet = Text::Figlet::Typesetter.new(font)
|
8
|
+
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_no_smushing
|
12
|
+
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
13
|
+
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
14
|
+
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class LevenshteinTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Text::Levenshtein
|
6
|
+
|
7
|
+
TEST_CASES = {
|
8
|
+
:easy => [
|
9
|
+
['test', 'test', 0],
|
10
|
+
['test', 'tent', 1],
|
11
|
+
['gumbo', 'gambol', 2],
|
12
|
+
['kitten', 'sitting', 3]
|
13
|
+
],
|
14
|
+
:empty => [
|
15
|
+
['foo', '', 3],
|
16
|
+
['', '', 0],
|
17
|
+
['a', '', 1]
|
18
|
+
],
|
19
|
+
:utf8 => [
|
20
|
+
["f\303\266o", 'foo', 1],
|
21
|
+
["fran\303\247ais", 'francais', 1],
|
22
|
+
["fran\303\247ais", "fran\303\246ais", 1],
|
23
|
+
[
|
24
|
+
"\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
|
25
|
+
"\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
26
|
+
"\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
|
27
|
+
"\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
28
|
+
2
|
29
|
+
] # Japanese
|
30
|
+
],
|
31
|
+
:iso_8859_1 => [
|
32
|
+
["f\366o", 'foo', 1],
|
33
|
+
["fran\347ais", 'francais', 1],
|
34
|
+
["fran\347ais", "fran\346ais", 1]
|
35
|
+
],
|
36
|
+
:edge => [
|
37
|
+
['a', 'a', 0],
|
38
|
+
['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
|
39
|
+
]
|
40
|
+
}
|
41
|
+
|
42
|
+
def assert_set(name)
|
43
|
+
TEST_CASES[name].each do |s, t, x|
|
44
|
+
assert_equal x, distance(s, t)
|
45
|
+
assert_equal x, distance(t, s)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_kcode(k)
|
50
|
+
old_kcode = $KCODE
|
51
|
+
$KCODE = k
|
52
|
+
yield
|
53
|
+
$KCODE = old_kcode
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_easy_cases
|
57
|
+
assert_set(:easy)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_empty_cases
|
61
|
+
assert_set(:empty)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_edge_cases
|
65
|
+
assert_set(:edge)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_utf8_cases
|
69
|
+
with_kcode('U') do
|
70
|
+
assert_set(:utf8)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_iso_8859_1_cases
|
75
|
+
with_kcode('NONE') do
|
76
|
+
assert_set(:iso_8859_1)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MetaphoneTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
|
8
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_cases_for_buggy_implementation
|
13
|
+
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_junk
|
19
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
20
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
|
21
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
22
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_caps
|
26
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
27
|
+
Text::Metaphone.metaphone('FOOBAR')
|
28
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
29
|
+
Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_string
|
33
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
|
34
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
|
35
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
|
36
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class PorterStemmingTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def slurp(*path)
|
6
|
+
File.read(File.rel(*path)).split(/\n/)
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_cases
|
10
|
+
cases = slurp('data', 'porter_stemming_input.txt').zip(slurp('data', 'porter_stemming_output.txt'))
|
11
|
+
cases.each do |word, expected_output|
|
12
|
+
assert_equal expected_output, Text::PorterStemming.stem(word)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class SoundexTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(%{
|
8
|
+
|
9
|
+
Euler: E460
|
10
|
+
Ellery: E460
|
11
|
+
Gauss: G200
|
12
|
+
Ghosh: G200
|
13
|
+
Hilbert: H416
|
14
|
+
Heilbronn: H416
|
15
|
+
Knuth: K530
|
16
|
+
Kant: K530
|
17
|
+
Lloyd: L300
|
18
|
+
Ladd: L300
|
19
|
+
Lukasiewicz: L222
|
20
|
+
Lissajous: L222
|
21
|
+
|
22
|
+
}).each do |input, expected_output|
|
23
|
+
assert_equal expected_output, Text::Soundex.soundex(input)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.13
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Battley
|
8
|
+
- Michael Neumann
|
9
|
+
- Tim Fletcher
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
|
14
|
+
date: 2009-10-11 00:00:00 +01:00
|
15
|
+
default_executable:
|
16
|
+
dependencies: []
|
17
|
+
|
18
|
+
description: "A collection of text algorithms: Levenshtein, Soundex, Metaphone, Double Metaphone, Figlet, Porter Stemming"
|
19
|
+
email: pbattley@gmail.com
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- README.rdoc
|
26
|
+
files:
|
27
|
+
- lib/text.rb
|
28
|
+
- lib/text/figlet/typesetter.rb
|
29
|
+
- lib/text/figlet/smusher.rb
|
30
|
+
- lib/text/figlet/font.rb
|
31
|
+
- lib/text/porter_stemming.rb
|
32
|
+
- lib/text/double_metaphone.rb
|
33
|
+
- lib/text/soundex.rb
|
34
|
+
- lib/text/figlet.rb
|
35
|
+
- lib/text/metaphone.rb
|
36
|
+
- lib/text/version.rb
|
37
|
+
- lib/text/levenshtein.rb
|
38
|
+
- test/preamble.rb
|
39
|
+
- test/test_double_metaphone.rb
|
40
|
+
- test/data/chunky.flf
|
41
|
+
- test/data/porter_stemming_input.txt
|
42
|
+
- test/data/metaphone.txt
|
43
|
+
- test/data/double_metaphone.csv
|
44
|
+
- test/data/big.flf
|
45
|
+
- test/data/chunky.txt
|
46
|
+
- test/data/porter_stemming_output.txt
|
47
|
+
- test/data/big.txt
|
48
|
+
- test/data/metaphone_buggy.txt
|
49
|
+
- test/test_levenshtein.rb
|
50
|
+
- test/test_soundex.rb
|
51
|
+
- test/test_porter_stemming.rb
|
52
|
+
- test/test_metaphone.rb
|
53
|
+
- test/test_figlet.rb
|
54
|
+
- README.rdoc
|
55
|
+
- Rakefile
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: http://github.com/threedaymonk/text
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: "0"
|
76
|
+
version:
|
77
|
+
requirements: []
|
78
|
+
|
79
|
+
rubyforge_project: text
|
80
|
+
rubygems_version: 1.3.5
|
81
|
+
signing_key:
|
82
|
+
specification_version: 3
|
83
|
+
summary: A collection of text algorithms
|
84
|
+
test_files: []
|
85
|
+
|