text 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +28 -0
- data/Rakefile +48 -0
- data/lib/text.rb +7 -0
- data/lib/text/double_metaphone.rb +356 -0
- data/lib/text/figlet.rb +17 -0
- data/lib/text/figlet/font.rb +117 -0
- data/lib/text/figlet/smusher.rb +64 -0
- data/lib/text/figlet/typesetter.rb +68 -0
- data/lib/text/levenshtein.rb +65 -0
- data/lib/text/metaphone.rb +97 -0
- data/lib/text/porter_stemming.rb +171 -0
- data/lib/text/soundex.rb +61 -0
- data/lib/text/version.rb +9 -0
- data/test/data/big.flf +2204 -0
- data/test/data/big.txt +8 -0
- data/test/data/chunky.flf +512 -0
- data/test/data/chunky.txt +5 -0
- data/test/data/double_metaphone.csv +1218 -0
- data/test/data/metaphone.txt +51 -0
- data/test/data/metaphone_buggy.txt +52 -0
- data/test/data/porter_stemming_input.txt +23531 -0
- data/test/data/porter_stemming_output.txt +23531 -0
- data/test/preamble.rb +10 -0
- data/test/test_double_metaphone.rb +23 -0
- data/test/test_figlet.rb +17 -0
- data/test/test_levenshtein.rb +80 -0
- data/test/test_metaphone.rb +39 -0
- data/test/test_porter_stemming.rb +16 -0
- data/test/test_soundex.rb +27 -0
- metadata +85 -0
data/test/preamble.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems' rescue nil
|
5
|
+
require 'fastercsv'
|
6
|
+
METHOD = [ FasterCSV, :foreach, { :col_sep => ', ' } ]
|
7
|
+
rescue LoadError
|
8
|
+
require 'csv'
|
9
|
+
METHOD = [ CSV, :open, 'r', ', ' ]
|
10
|
+
end
|
11
|
+
|
12
|
+
class DoubleMetaphoneTest < Test::Unit::TestCase
|
13
|
+
|
14
|
+
def test_cases
|
15
|
+
METHOD.shift.send(METHOD.shift, File.rel('data', 'double_metaphone.csv'), *METHOD) do |row|
|
16
|
+
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
17
|
+
|
18
|
+
assert_equal row[1], primary
|
19
|
+
assert_equal row[2], secondary.nil?? primary : secondary
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/test/test_figlet.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class FigletTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_hello_world
|
6
|
+
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
7
|
+
figlet = Text::Figlet::Typesetter.new(font)
|
8
|
+
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_no_smushing
|
12
|
+
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
13
|
+
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
14
|
+
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class LevenshteinTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Text::Levenshtein
|
6
|
+
|
7
|
+
TEST_CASES = {
|
8
|
+
:easy => [
|
9
|
+
['test', 'test', 0],
|
10
|
+
['test', 'tent', 1],
|
11
|
+
['gumbo', 'gambol', 2],
|
12
|
+
['kitten', 'sitting', 3]
|
13
|
+
],
|
14
|
+
:empty => [
|
15
|
+
['foo', '', 3],
|
16
|
+
['', '', 0],
|
17
|
+
['a', '', 1]
|
18
|
+
],
|
19
|
+
:utf8 => [
|
20
|
+
["f\303\266o", 'foo', 1],
|
21
|
+
["fran\303\247ais", 'francais', 1],
|
22
|
+
["fran\303\247ais", "fran\303\246ais", 1],
|
23
|
+
[
|
24
|
+
"\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<<
|
25
|
+
"\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
26
|
+
"\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<<
|
27
|
+
"\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",
|
28
|
+
2
|
29
|
+
] # Japanese
|
30
|
+
],
|
31
|
+
:iso_8859_1 => [
|
32
|
+
["f\366o", 'foo', 1],
|
33
|
+
["fran\347ais", 'francais', 1],
|
34
|
+
["fran\347ais", "fran\346ais", 1]
|
35
|
+
],
|
36
|
+
:edge => [
|
37
|
+
['a', 'a', 0],
|
38
|
+
['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26]
|
39
|
+
]
|
40
|
+
}
|
41
|
+
|
42
|
+
def assert_set(name)
|
43
|
+
TEST_CASES[name].each do |s, t, x|
|
44
|
+
assert_equal x, distance(s, t)
|
45
|
+
assert_equal x, distance(t, s)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_kcode(k)
|
50
|
+
old_kcode = $KCODE
|
51
|
+
$KCODE = k
|
52
|
+
yield
|
53
|
+
$KCODE = old_kcode
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_easy_cases
|
57
|
+
assert_set(:easy)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_empty_cases
|
61
|
+
assert_set(:empty)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_edge_cases
|
65
|
+
assert_set(:edge)
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_utf8_cases
|
69
|
+
with_kcode('U') do
|
70
|
+
assert_set(:utf8)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_iso_8859_1_cases
|
75
|
+
with_kcode('NONE') do
|
76
|
+
assert_set(:iso_8859_1)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MetaphoneTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(File.read(File.rel('data', 'metaphone.txt'))).each do |input, expected_output|
|
8
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_cases_for_buggy_implementation
|
13
|
+
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
|
+
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_junk
|
19
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
20
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&')
|
21
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
22
|
+
Text::Metaphone.metaphone('%^@#$^f%^&o%^o@b#a@#r%^^&', :buggy=>true)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_caps
|
26
|
+
assert_equal Text::Metaphone.metaphone('foobar'),
|
27
|
+
Text::Metaphone.metaphone('FOOBAR')
|
28
|
+
assert_equal Text::Metaphone.metaphone('foobar', :buggy=>true),
|
29
|
+
Text::Metaphone.metaphone('FOOBAR', :buggy=>true)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_string
|
33
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz')
|
34
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what')
|
35
|
+
assert_equal 'F BR BS', Text::Metaphone.metaphone('foo bar baz', :buggy=>true)
|
36
|
+
assert_equal 'N WT', Text::Metaphone.metaphone('gnu what', :buggy=>true)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
class PorterStemmingTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def slurp(*path)
|
6
|
+
File.read(File.rel(*path)).split(/\n/)
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_cases
|
10
|
+
cases = slurp('data', 'porter_stemming_input.txt').zip(slurp('data', 'porter_stemming_output.txt'))
|
11
|
+
cases.each do |word, expected_output|
|
12
|
+
assert_equal expected_output, Text::PorterStemming.stem(word)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class SoundexTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def test_cases
|
7
|
+
YAML.load(%{
|
8
|
+
|
9
|
+
Euler: E460
|
10
|
+
Ellery: E460
|
11
|
+
Gauss: G200
|
12
|
+
Ghosh: G200
|
13
|
+
Hilbert: H416
|
14
|
+
Heilbronn: H416
|
15
|
+
Knuth: K530
|
16
|
+
Kant: K530
|
17
|
+
Lloyd: L300
|
18
|
+
Ladd: L300
|
19
|
+
Lukasiewicz: L222
|
20
|
+
Lissajous: L222
|
21
|
+
|
22
|
+
}).each do |input, expected_output|
|
23
|
+
assert_equal expected_output, Text::Soundex.soundex(input)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: text
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.13
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Battley
|
8
|
+
- Michael Neumann
|
9
|
+
- Tim Fletcher
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
|
14
|
+
date: 2009-10-11 00:00:00 +01:00
|
15
|
+
default_executable:
|
16
|
+
dependencies: []
|
17
|
+
|
18
|
+
description: "A collection of text algorithms: Levenshtein, Soundex, Metaphone, Double Metaphone, Figlet, Porter Stemming"
|
19
|
+
email: pbattley@gmail.com
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- README.rdoc
|
26
|
+
files:
|
27
|
+
- lib/text.rb
|
28
|
+
- lib/text/figlet/typesetter.rb
|
29
|
+
- lib/text/figlet/smusher.rb
|
30
|
+
- lib/text/figlet/font.rb
|
31
|
+
- lib/text/porter_stemming.rb
|
32
|
+
- lib/text/double_metaphone.rb
|
33
|
+
- lib/text/soundex.rb
|
34
|
+
- lib/text/figlet.rb
|
35
|
+
- lib/text/metaphone.rb
|
36
|
+
- lib/text/version.rb
|
37
|
+
- lib/text/levenshtein.rb
|
38
|
+
- test/preamble.rb
|
39
|
+
- test/test_double_metaphone.rb
|
40
|
+
- test/data/chunky.flf
|
41
|
+
- test/data/porter_stemming_input.txt
|
42
|
+
- test/data/metaphone.txt
|
43
|
+
- test/data/double_metaphone.csv
|
44
|
+
- test/data/big.flf
|
45
|
+
- test/data/chunky.txt
|
46
|
+
- test/data/porter_stemming_output.txt
|
47
|
+
- test/data/big.txt
|
48
|
+
- test/data/metaphone_buggy.txt
|
49
|
+
- test/test_levenshtein.rb
|
50
|
+
- test/test_soundex.rb
|
51
|
+
- test/test_porter_stemming.rb
|
52
|
+
- test/test_metaphone.rb
|
53
|
+
- test/test_figlet.rb
|
54
|
+
- README.rdoc
|
55
|
+
- Rakefile
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: http://github.com/threedaymonk/text
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: "0"
|
76
|
+
version:
|
77
|
+
requirements: []
|
78
|
+
|
79
|
+
rubyforge_project: text
|
80
|
+
rubygems_version: 1.3.5
|
81
|
+
signing_key:
|
82
|
+
specification_version: 3
|
83
|
+
summary: A collection of text algorithms
|
84
|
+
test_files: []
|
85
|
+
|