text 0.1.14 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text.rb +1 -24
- data/lib/text/double_metaphone.rb +5 -5
- data/lib/text/levenshtein.rb +6 -4
- data/lib/text/metaphone.rb +10 -10
- data/lib/text/porter_stemming.rb +12 -12
- data/lib/text/soundex.rb +61 -61
- data/lib/text/util.rb +19 -0
- data/lib/text/version.rb +2 -2
- data/test/preamble.rb +1 -2
- data/test/test_double_metaphone.rb +2 -1
- data/test/test_levenshtein.rb +2 -1
- data/test/test_metaphone.rb +2 -1
- data/test/test_porter_stemming.rb +3 -2
- data/test/test_soundex.rb +2 -1
- metadata +17 -21
- data/lib/text/figlet.rb +0 -17
- data/lib/text/figlet/font.rb +0 -119
- data/lib/text/figlet/smusher.rb +0 -65
- data/lib/text/figlet/typesetter.rb +0 -69
- data/test/test_figlet.rb +0 -18
data/lib/text.rb
CHANGED
@@ -1,30 +1,7 @@
|
|
1
|
+
require 'text/util'
|
1
2
|
require 'text/double_metaphone'
|
2
3
|
require 'text/levenshtein'
|
3
4
|
require 'text/metaphone'
|
4
5
|
require 'text/porter_stemming'
|
5
6
|
require 'text/soundex'
|
6
7
|
require 'text/version'
|
7
|
-
|
8
|
-
module Text
|
9
|
-
def self.is_19?
|
10
|
-
RUBY_VERSION[0, 3] == "1.9"
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.encoding_of(string)
|
14
|
-
if is_19?
|
15
|
-
string.encoding.to_s
|
16
|
-
else
|
17
|
-
$KCODE
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.raise_19_incompat
|
22
|
-
if is_19?
|
23
|
-
raise "Text::Figlet is not compatible with Ruby 1.9 at this time"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
if !Text.is_19?
|
29
|
-
require 'text/figlet'
|
30
|
-
end
|
@@ -56,7 +56,7 @@ module Metaphone
|
|
56
56
|
end
|
57
57
|
when 'B'
|
58
58
|
return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
|
59
|
-
when 'Ç'
|
59
|
+
when 'Ç'
|
60
60
|
return :S, :S, 1
|
61
61
|
when 'C'
|
62
62
|
if pos > 1 &&
|
@@ -114,7 +114,7 @@ module Metaphone
|
|
114
114
|
else
|
115
115
|
if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
|
116
116
|
return :K, :K, 3
|
117
|
-
else
|
117
|
+
else
|
118
118
|
return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
|
119
119
|
end
|
120
120
|
end
|
@@ -202,7 +202,7 @@ module Metaphone
|
|
202
202
|
end
|
203
203
|
else
|
204
204
|
current = ('J' == str[pos + 1, 1] ? 2 : 1)
|
205
|
-
|
205
|
+
|
206
206
|
if 0 == pos && 'JOSE' != str[pos, 4]
|
207
207
|
return :J, :A, current
|
208
208
|
else
|
@@ -243,7 +243,7 @@ module Metaphone
|
|
243
243
|
end
|
244
244
|
when 'N'
|
245
245
|
return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
|
246
|
-
when 'Ñ'
|
246
|
+
when 'Ñ'
|
247
247
|
return :N, :N, 1
|
248
248
|
when 'P'
|
249
249
|
if 'H' == str[pos + 1, 1]
|
@@ -255,7 +255,7 @@ module Metaphone
|
|
255
255
|
return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
|
256
256
|
when 'R'
|
257
257
|
current = ('R' == str[pos + 1, 1] ? 2 : 1)
|
258
|
-
|
258
|
+
|
259
259
|
if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
|
260
260
|
return nil, :R, current
|
261
261
|
else
|
data/lib/text/levenshtein.rb
CHANGED
@@ -11,6 +11,8 @@
|
|
11
11
|
# Author: Paul Battley (pbattley@gmail.com)
|
12
12
|
#
|
13
13
|
|
14
|
+
require "text/util"
|
15
|
+
|
14
16
|
module Text # :nodoc:
|
15
17
|
module Levenshtein
|
16
18
|
|
@@ -19,10 +21,10 @@ module Levenshtein
|
|
19
21
|
# as ISO-8859-*.
|
20
22
|
#
|
21
23
|
# The strings will be treated as UTF-8 if $KCODE is set appropriately (i.e. 'u').
|
22
|
-
# Otherwise, the comparison will be performed byte-by-byte. There is no specific support
|
24
|
+
# Otherwise, the comparison will be performed byte-by-byte. There is no specific support
|
23
25
|
# for Shift-JIS or EUC strings.
|
24
26
|
#
|
25
|
-
# When using Unicode text, be aware that this algorithm does not perform normalisation.
|
27
|
+
# When using Unicode text, be aware that this algorithm does not perform normalisation.
|
26
28
|
# If there is a possibility of different normalised forms being used, normalisation
|
27
29
|
# should be performed beforehand.
|
28
30
|
#
|
@@ -41,7 +43,7 @@ module Levenshtein
|
|
41
43
|
m = t.length
|
42
44
|
return m if (0 == n)
|
43
45
|
return n if (0 == m)
|
44
|
-
|
46
|
+
|
45
47
|
d = (0..m).to_a
|
46
48
|
x = nil
|
47
49
|
|
@@ -65,4 +67,4 @@ module Levenshtein
|
|
65
67
|
|
66
68
|
extend self
|
67
69
|
end
|
68
|
-
end
|
70
|
+
end
|
data/lib/text/metaphone.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
#
|
1
|
+
#
|
2
2
|
# An implementation of the Metaphone phonetic coding system in Ruby.
|
3
|
-
#
|
3
|
+
#
|
4
4
|
# Metaphone encodes names into a phonetic form such that similar-sounding names
|
5
5
|
# have the same or similar Metaphone encodings.
|
6
|
-
#
|
6
|
+
#
|
7
7
|
# The original system was described by Lawrence Philips in Computer Language
|
8
8
|
# Vol. 7 No. 12, December 1990, pp 39-43.
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# As there are multiple implementations of Metaphone, each with their own
|
11
11
|
# quirks, I have based this on my interpretation of the algorithm specification.
|
12
12
|
# Even LP's original BASIC implementation appears to contain bugs (specifically
|
13
13
|
# with the handling of CC and MB), when compared to his explanation of the
|
14
14
|
# algorithm.
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# I have also compared this implementation with that found in PHP's standard
|
17
17
|
# library, which appears to mimic the behaviour of LP's original BASIC
|
18
18
|
# implementation. For compatibility, these rules can also be used by passing
|
19
19
|
# :buggy=>true to the methods.
|
20
|
-
#
|
20
|
+
#
|
21
21
|
# Author: Paul Battley (pbattley@gmail.com)
|
22
22
|
#
|
23
23
|
|
@@ -25,10 +25,10 @@ module Text # :nodoc:
|
|
25
25
|
module Metaphone
|
26
26
|
|
27
27
|
module Rules # :nodoc:all
|
28
|
-
|
28
|
+
|
29
29
|
# Metaphone rules. These are simply applied in order.
|
30
30
|
#
|
31
|
-
STANDARD = [
|
31
|
+
STANDARD = [
|
32
32
|
# Regexp, replacement
|
33
33
|
[ /([bcdfhjklmnpqrstvwxyz])\1+/,
|
34
34
|
'\1' ], # Remove doubled consonants except g.
|
@@ -61,7 +61,7 @@ module Metaphone
|
|
61
61
|
[ /v/, 'F' ],
|
62
62
|
[ /(?!^)[aeiou]+/, '' ],
|
63
63
|
]
|
64
|
-
|
64
|
+
|
65
65
|
# The rules for the 'buggy' alternate implementation used by PHP etc.
|
66
66
|
#
|
67
67
|
BUGGY = STANDARD.dup
|
@@ -79,7 +79,7 @@ module Metaphone
|
|
79
79
|
def metaphone(str, options={})
|
80
80
|
return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ')
|
81
81
|
end
|
82
|
-
|
82
|
+
|
83
83
|
private
|
84
84
|
|
85
85
|
def metaphone_word(w, options={})
|
data/lib/text/porter_stemming.rb
CHANGED
@@ -20,7 +20,7 @@ module PorterStemming
|
|
20
20
|
'ousness' => 'ous', 'aliti' => 'al',
|
21
21
|
'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
|
22
22
|
}
|
23
|
-
|
23
|
+
|
24
24
|
STEP_3_LIST = {
|
25
25
|
'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
|
26
26
|
'ical' => 'ic', 'ful' => '', 'ness' => ''
|
@@ -54,7 +54,7 @@ module PorterStemming
|
|
54
54
|
ance |
|
55
55
|
ence |
|
56
56
|
er |
|
57
|
-
ic |
|
57
|
+
ic |
|
58
58
|
able |
|
59
59
|
ible |
|
60
60
|
ant |
|
@@ -78,30 +78,30 @@ module PorterStemming
|
|
78
78
|
MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
|
79
79
|
MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
|
80
80
|
VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
|
81
|
-
|
81
|
+
|
82
82
|
def self.stem(word)
|
83
83
|
|
84
84
|
# make a copy of the given object and convert it to a string.
|
85
85
|
word = word.dup.to_str
|
86
|
-
|
86
|
+
|
87
87
|
return word if word.length < 3
|
88
|
-
|
88
|
+
|
89
89
|
# now map initial y to Y so that the patterns never treat it as vowel
|
90
90
|
word[0] = 'Y' if word[0] == ?y
|
91
|
-
|
91
|
+
|
92
92
|
# Step 1a
|
93
93
|
if word =~ /(ss|i)es$/
|
94
94
|
word = $` + $1
|
95
|
-
elsif word =~ /([^s])s$/
|
95
|
+
elsif word =~ /([^s])s$/
|
96
96
|
word = $` + $1
|
97
97
|
end
|
98
98
|
|
99
99
|
# Step 1b
|
100
100
|
if word =~ /eed$/
|
101
|
-
word.chop! if $` =~ MGR0
|
101
|
+
word.chop! if $` =~ MGR0
|
102
102
|
elsif word =~ /(ed|ing)$/
|
103
103
|
stem = $`
|
104
|
-
if stem =~ VOWEL_IN_STEM
|
104
|
+
if stem =~ VOWEL_IN_STEM
|
105
105
|
word = stem
|
106
106
|
case word
|
107
107
|
when /(at|bl|iz)$/ then word << "e"
|
@@ -111,9 +111,9 @@ module PorterStemming
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
|
114
|
-
if word =~ /y$/
|
114
|
+
if word =~ /y$/
|
115
115
|
stem = $`
|
116
|
-
word = stem + "i" if stem =~ VOWEL_IN_STEM
|
116
|
+
word = stem + "i" if stem =~ VOWEL_IN_STEM
|
117
117
|
end
|
118
118
|
|
119
119
|
# Step 2
|
@@ -149,7 +149,7 @@ module PorterStemming
|
|
149
149
|
end
|
150
150
|
|
151
151
|
# Step 5
|
152
|
-
if word =~ /e$/
|
152
|
+
if word =~ /e$/
|
153
153
|
stem = $`
|
154
154
|
if (stem =~ MGR1) ||
|
155
155
|
(stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
|
data/lib/text/soundex.rb
CHANGED
@@ -1,61 +1,61 @@
|
|
1
|
-
#
|
2
|
-
# Ruby implementation of the Soundex algorithm,
|
3
|
-
# as described by Knuth in volume 3 of The Art of Computer Programming.
|
4
|
-
#
|
5
|
-
# Author: Michael Neumann (neumann@s-direktnet.de)
|
6
|
-
#
|
7
|
-
|
8
|
-
module Text # :nodoc:
|
9
|
-
module Soundex
|
10
|
-
|
11
|
-
def soundex(str_or_arr)
|
12
|
-
case str_or_arr
|
13
|
-
when String
|
14
|
-
soundex_str(str_or_arr)
|
15
|
-
when Array
|
16
|
-
str_or_arr.collect{|ele| soundex_str(ele)}
|
17
|
-
else
|
18
|
-
nil
|
19
|
-
end
|
20
|
-
end
|
21
|
-
module_function :soundex
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
#
|
26
|
-
# returns nil if the value couldn't be calculated (empty-string, wrong-character)
|
27
|
-
# do not change the parameter "str"
|
28
|
-
#
|
29
|
-
def soundex_str(str)
|
30
|
-
return nil if str.empty?
|
31
|
-
|
32
|
-
str = str.upcase
|
33
|
-
last_code = get_code(str[0,1])
|
34
|
-
soundex_code = str[0,1]
|
35
|
-
|
36
|
-
for index in 1...(str.size) do
|
37
|
-
return soundex_code if soundex_code.size == 4
|
38
|
-
|
39
|
-
code = get_code(str[index,1])
|
40
|
-
|
41
|
-
if code == "0" then
|
42
|
-
last_code = nil
|
43
|
-
elsif code == nil then
|
44
|
-
return nil
|
45
|
-
elsif code != last_code then
|
46
|
-
soundex_code += code
|
47
|
-
last_code = code
|
48
|
-
end
|
49
|
-
end # for
|
50
|
-
|
51
|
-
return soundex_code + "000"[0,4-soundex_code.size]
|
52
|
-
end
|
53
|
-
module_function :soundex_str
|
54
|
-
|
55
|
-
def get_code(char)
|
56
|
-
char.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
|
57
|
-
end
|
58
|
-
module_function :get_code
|
59
|
-
|
60
|
-
end # module Soundex
|
61
|
-
end # module Text
|
1
|
+
#
|
2
|
+
# Ruby implementation of the Soundex algorithm,
|
3
|
+
# as described by Knuth in volume 3 of The Art of Computer Programming.
|
4
|
+
#
|
5
|
+
# Author: Michael Neumann (neumann@s-direktnet.de)
|
6
|
+
#
|
7
|
+
|
8
|
+
module Text # :nodoc:
|
9
|
+
module Soundex
|
10
|
+
|
11
|
+
def soundex(str_or_arr)
|
12
|
+
case str_or_arr
|
13
|
+
when String
|
14
|
+
soundex_str(str_or_arr)
|
15
|
+
when Array
|
16
|
+
str_or_arr.collect{|ele| soundex_str(ele)}
|
17
|
+
else
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
module_function :soundex
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
#
|
26
|
+
# returns nil if the value couldn't be calculated (empty-string, wrong-character)
|
27
|
+
# do not change the parameter "str"
|
28
|
+
#
|
29
|
+
def soundex_str(str)
|
30
|
+
return nil if str.empty?
|
31
|
+
|
32
|
+
str = str.upcase
|
33
|
+
last_code = get_code(str[0,1])
|
34
|
+
soundex_code = str[0,1]
|
35
|
+
|
36
|
+
for index in 1...(str.size) do
|
37
|
+
return soundex_code if soundex_code.size == 4
|
38
|
+
|
39
|
+
code = get_code(str[index,1])
|
40
|
+
|
41
|
+
if code == "0" then
|
42
|
+
last_code = nil
|
43
|
+
elsif code == nil then
|
44
|
+
return nil
|
45
|
+
elsif code != last_code then
|
46
|
+
soundex_code += code
|
47
|
+
last_code = code
|
48
|
+
end
|
49
|
+
end # for
|
50
|
+
|
51
|
+
return soundex_code + "000"[0,4-soundex_code.size]
|
52
|
+
end
|
53
|
+
module_function :soundex_str
|
54
|
+
|
55
|
+
def get_code(char)
|
56
|
+
char.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
|
57
|
+
end
|
58
|
+
module_function :get_code
|
59
|
+
|
60
|
+
end # module Soundex
|
61
|
+
end # module Text
|
data/lib/text/util.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Text
|
2
|
+
def self.is_19?
|
3
|
+
RUBY_VERSION[0, 3] == "1.9"
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.encoding_of(string)
|
7
|
+
if is_19?
|
8
|
+
string.encoding.to_s
|
9
|
+
else
|
10
|
+
$KCODE
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.raise_19_incompat
|
15
|
+
if is_19?
|
16
|
+
raise "Text::Figlet is not compatible with Ruby 1.9 at this time"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/text/version.rb
CHANGED
data/test/preamble.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/double_metaphone"
|
2
3
|
|
3
4
|
require 'csv'
|
4
5
|
|
@@ -7,7 +8,7 @@ class DoubleMetaphoneTest < Test::Unit::TestCase
|
|
7
8
|
def test_cases
|
8
9
|
CSV.open(File.rel('data', 'double_metaphone.csv'), 'r').to_a.each do |row|
|
9
10
|
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
10
|
-
|
11
|
+
|
11
12
|
assert_equal row[1], primary
|
12
13
|
assert_equal row[2], secondary.nil?? primary : secondary
|
13
14
|
end
|
data/test/test_levenshtein.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/levenshtein"
|
2
3
|
|
3
4
|
class LevenshteinTest < Test::Unit::TestCase
|
4
5
|
|
@@ -53,7 +54,7 @@ class LevenshteinTest < Test::Unit::TestCase
|
|
53
54
|
|
54
55
|
def with_encoding(kcode, encoding)
|
55
56
|
if Text.is_19?
|
56
|
-
old_encoding = Encoding.default_internal
|
57
|
+
old_encoding = Encoding.default_internal
|
57
58
|
Encoding.default_internal = encoding
|
58
59
|
yield
|
59
60
|
Encoding.default_internal = old_encoding
|
data/test/test_metaphone.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/metaphone"
|
2
3
|
require 'yaml'
|
3
4
|
|
4
5
|
class MetaphoneTest < Test::Unit::TestCase
|
@@ -8,7 +9,7 @@ class MetaphoneTest < Test::Unit::TestCase
|
|
8
9
|
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
10
|
end
|
10
11
|
end
|
11
|
-
|
12
|
+
|
12
13
|
def test_cases_for_buggy_implementation
|
13
14
|
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
15
|
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
@@ -1,11 +1,12 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/porter_stemming"
|
2
3
|
|
3
4
|
class PorterStemmingTest < Test::Unit::TestCase
|
4
5
|
|
5
6
|
def slurp(*path)
|
6
7
|
File.read(File.rel(*path)).split(/\n/)
|
7
8
|
end
|
8
|
-
|
9
|
+
|
9
10
|
def test_cases
|
10
11
|
cases = slurp('data', 'porter_stemming_input.txt').zip(slurp('data', 'porter_stemming_output.txt'))
|
11
12
|
cases.each do |word, expected_output|
|
@@ -13,4 +14,4 @@ class PorterStemmingTest < Test::Unit::TestCase
|
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
16
|
-
end
|
17
|
+
end
|
data/test/test_soundex.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Battley
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2010-
|
14
|
+
date: 2010-03-03 00:00:00 +00:00
|
15
15
|
default_executable:
|
16
16
|
dependencies: []
|
17
17
|
|
@@ -24,33 +24,29 @@ extensions: []
|
|
24
24
|
extra_rdoc_files:
|
25
25
|
- README.rdoc
|
26
26
|
files:
|
27
|
-
- lib/text.rb
|
28
|
-
- lib/text/figlet/typesetter.rb
|
29
|
-
- lib/text/figlet/smusher.rb
|
30
|
-
- lib/text/figlet/font.rb
|
31
|
-
- lib/text/porter_stemming.rb
|
32
27
|
- lib/text/double_metaphone.rb
|
33
|
-
- lib/text/
|
34
|
-
- lib/text/figlet.rb
|
28
|
+
- lib/text/levenshtein.rb
|
35
29
|
- lib/text/metaphone.rb
|
30
|
+
- lib/text/porter_stemming.rb
|
31
|
+
- lib/text/soundex.rb
|
32
|
+
- lib/text/util.rb
|
36
33
|
- lib/text/version.rb
|
37
|
-
- lib/text
|
38
|
-
- test/preamble.rb
|
39
|
-
- test/test_double_metaphone.rb
|
40
|
-
- test/data/chunky.flf
|
41
|
-
- test/data/porter_stemming_input.txt
|
42
|
-
- test/data/metaphone.txt
|
43
|
-
- test/data/double_metaphone.csv
|
34
|
+
- lib/text.rb
|
44
35
|
- test/data/big.flf
|
45
|
-
- test/data/chunky.txt
|
46
|
-
- test/data/porter_stemming_output.txt
|
47
36
|
- test/data/big.txt
|
37
|
+
- test/data/chunky.flf
|
38
|
+
- test/data/chunky.txt
|
39
|
+
- test/data/double_metaphone.csv
|
40
|
+
- test/data/metaphone.txt
|
48
41
|
- test/data/metaphone_buggy.txt
|
42
|
+
- test/data/porter_stemming_input.txt
|
43
|
+
- test/data/porter_stemming_output.txt
|
44
|
+
- test/preamble.rb
|
45
|
+
- test/test_double_metaphone.rb
|
49
46
|
- test/test_levenshtein.rb
|
50
|
-
- test/test_soundex.rb
|
51
|
-
- test/test_porter_stemming.rb
|
52
47
|
- test/test_metaphone.rb
|
53
|
-
- test/
|
48
|
+
- test/test_porter_stemming.rb
|
49
|
+
- test/test_soundex.rb
|
54
50
|
- README.rdoc
|
55
51
|
- Rakefile
|
56
52
|
has_rdoc: true
|
data/lib/text/figlet.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Ruby implementation of the Figlet program (http://www.figlet.org/).
|
3
|
-
#
|
4
|
-
# Author: Tim Fletcher (twoggle@gmail.com)
|
5
|
-
#
|
6
|
-
# Usage:
|
7
|
-
#
|
8
|
-
# big_font = Text::Figlet::Font.new('big.flf')
|
9
|
-
#
|
10
|
-
# figlet = Text::Figlet::Typesetter.new(big_font)
|
11
|
-
#
|
12
|
-
# puts figlet['hello world']
|
13
|
-
#
|
14
|
-
#
|
15
|
-
require 'text/figlet/font'
|
16
|
-
require 'text/figlet/smusher'
|
17
|
-
require 'text/figlet/typesetter'
|
data/lib/text/figlet/font.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class UnknownFontFormat < StandardError
|
5
|
-
end
|
6
|
-
|
7
|
-
class Font
|
8
|
-
def initialize(filename, load_german = true)
|
9
|
-
Text.raise_19_incompat
|
10
|
-
|
11
|
-
file = File.open(filename, 'rb')
|
12
|
-
|
13
|
-
header = file.gets.strip.split(/ /)
|
14
|
-
|
15
|
-
raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
|
16
|
-
|
17
|
-
@hard_blank = header.shift[-1, 1]
|
18
|
-
@height = header.shift.to_i
|
19
|
-
@baseline = header.shift
|
20
|
-
@max_length = header.shift
|
21
|
-
@old_layout = header.shift.to_i
|
22
|
-
@comment_count = header.shift.to_i
|
23
|
-
@right_to_left = header.shift
|
24
|
-
@right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
|
25
|
-
|
26
|
-
@load_german, @characters = load_german, {}
|
27
|
-
|
28
|
-
load_comments file
|
29
|
-
load_ascii_characters file
|
30
|
-
load_german_characters file
|
31
|
-
load_extended_characters file
|
32
|
-
|
33
|
-
file.close
|
34
|
-
end
|
35
|
-
|
36
|
-
def [](char)
|
37
|
-
@characters[char]
|
38
|
-
end
|
39
|
-
|
40
|
-
def has_char?(char)
|
41
|
-
@characters.has_key? char
|
42
|
-
end
|
43
|
-
|
44
|
-
attr_reader :height, :hard_blank, :old_layout
|
45
|
-
|
46
|
-
def right_to_left?
|
47
|
-
@right_to_left
|
48
|
-
end
|
49
|
-
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def load_comments(file)
|
54
|
-
@comment_count.times { file.gets.strip }
|
55
|
-
end
|
56
|
-
|
57
|
-
def load_ascii_characters(file)
|
58
|
-
(32..126).each { |i| @characters[i] = load_char(file) }
|
59
|
-
end
|
60
|
-
|
61
|
-
def load_german_characters(file)
|
62
|
-
[91, 92, 93, 123, 124, 125, 126].each do |i|
|
63
|
-
if @load_german
|
64
|
-
unless char = load_char(file)
|
65
|
-
return
|
66
|
-
end
|
67
|
-
@characters[i] = char
|
68
|
-
else
|
69
|
-
skip_char file
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def load_extended_characters(file)
|
75
|
-
until file.eof?
|
76
|
-
i = file.gets.strip.split(/ /).first
|
77
|
-
if i.empty?
|
78
|
-
next
|
79
|
-
elsif /^\-0x/i =~ i # comment
|
80
|
-
skip_char file
|
81
|
-
else
|
82
|
-
if /^0x/i =~ i
|
83
|
-
i = i[2, 1].hex
|
84
|
-
elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
|
85
|
-
i = i.oct
|
86
|
-
end
|
87
|
-
unless char = load_char(file)
|
88
|
-
return
|
89
|
-
end
|
90
|
-
@characters[i] = char
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def load_char(file)
|
96
|
-
char = []
|
97
|
-
@height.times do
|
98
|
-
return false if file.eof?
|
99
|
-
line = file.gets.rstrip
|
100
|
-
if match = /(.){1,2}$/.match(line)
|
101
|
-
line.gsub! match[1], ''
|
102
|
-
end
|
103
|
-
line << "\x00"
|
104
|
-
char << line
|
105
|
-
end
|
106
|
-
return char
|
107
|
-
end
|
108
|
-
|
109
|
-
def skip_char(file)
|
110
|
-
@height.times do
|
111
|
-
return if file.eof?
|
112
|
-
return if file.gets.strip.nil?
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
end # module Figlet
|
119
|
-
end # module Text
|
data/lib/text/figlet/smusher.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class Smusher
|
5
|
-
|
6
|
-
def initialize(font)
|
7
|
-
Text.raise_19_incompat
|
8
|
-
@font = font
|
9
|
-
end
|
10
|
-
|
11
|
-
def [](result)
|
12
|
-
todo = false
|
13
|
-
|
14
|
-
@font.height.times do |j|
|
15
|
-
result[j] = result[j].sub(pattern) { todo, x = callback(todo, $1, $2); x }
|
16
|
-
end
|
17
|
-
@font.height.times do |j|
|
18
|
-
result[j] = if todo
|
19
|
-
result[j].sub(/\s\x00(?!$)|\x00\s/, '').sub(/\x00(?!$)/, '')
|
20
|
-
else
|
21
|
-
result[j].sub(/\x00(?!$)/, '')
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def pattern
|
27
|
-
@pattern ||= /([^#{@font.hard_blank}\x00\s])\x00([^#{@font.hard_blank}\x00\s])/
|
28
|
-
end
|
29
|
-
|
30
|
-
def symbols
|
31
|
-
@@symbols ||= {
|
32
|
-
24 => '|/\\[]{}()<>',
|
33
|
-
8 => {'[' => ']', ']' => '[', '{' => '}', '}' => '{', '(' => ')', ')' => '('},
|
34
|
-
16 => {"/\\" => '|', "\\/" => 'Y', '><' => 'X'}
|
35
|
-
}
|
36
|
-
end
|
37
|
-
|
38
|
-
def old_layout?(n)
|
39
|
-
@font.old_layout & n > 0
|
40
|
-
end
|
41
|
-
|
42
|
-
def callback(s, a, b)
|
43
|
-
combined = a + b
|
44
|
-
|
45
|
-
if old_layout?(1) && a == b
|
46
|
-
return true, a
|
47
|
-
elsif old_layout?(2) && ('_' == a && symbols[24].include?(b) || '_' == b && symbols[24].include?(a))
|
48
|
-
return true, a
|
49
|
-
elsif old_layout?(4) && ((left = symbols[24].index(a)) && (right = symbols[24].index(b)))
|
50
|
-
return true, (right > left ? b : a)
|
51
|
-
elsif old_layout?(8) && (symbols[8].has_key?(b) && symbols[8][b] == a)
|
52
|
-
return true, '|'
|
53
|
-
elsif old_layout?(16) && symbols[16].has_key?(combined)
|
54
|
-
return true, symbols[16][combined]
|
55
|
-
elsif old_layout?(32) && (a == b && @font.hard_blank == a)
|
56
|
-
return true, @font.hard_blank
|
57
|
-
else
|
58
|
-
return s, "#{a}\00#{b}"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
end # module Figlet
|
65
|
-
end # module Text
|
@@ -1,69 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class Typesetter
|
5
|
-
|
6
|
-
def initialize(font, options = nil)
|
7
|
-
Text.raise_19_incompat
|
8
|
-
@font = font
|
9
|
-
@options = options || {}
|
10
|
-
@smush = @options.has_key?(:smush) ? @options[:smush] : true
|
11
|
-
end
|
12
|
-
|
13
|
-
def [](str)
|
14
|
-
result = []
|
15
|
-
str.length.times do |i|
|
16
|
-
char = str[i]
|
17
|
-
unless @font.has_char?(char)
|
18
|
-
if @font.has_char?(0)
|
19
|
-
char = 0
|
20
|
-
else
|
21
|
-
next
|
22
|
-
end
|
23
|
-
end
|
24
|
-
@font.height.times do |j|
|
25
|
-
line = @font[char][j]
|
26
|
-
if result[j].nil?
|
27
|
-
result[j] = line
|
28
|
-
else
|
29
|
-
result[j] = @font.right_to_left?? (line + result[j]) : (result[j] + line)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
if @font.old_layout > -1 && i > 0
|
33
|
-
diff = -1
|
34
|
-
@font.height.times do |j|
|
35
|
-
if match = /\S(\s*\x00\s*)\S/.match(result[j])
|
36
|
-
len = match[1].length
|
37
|
-
diff = (diff == -1 ? len : min(diff, len))
|
38
|
-
end
|
39
|
-
end
|
40
|
-
diff -= 1
|
41
|
-
if diff > 0
|
42
|
-
@font.height.times do |j|
|
43
|
-
if match = /\x00(\s{0,#{diff}})/.match(result[j])
|
44
|
-
b = diff - match[1].length
|
45
|
-
result[j] = result[j].sub(/\s{0,#{b}}\x00\s{#{match[1].length}}/, "\0")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
smush[result] if @smush
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return result.join("\n").gsub(/\0/, '').gsub(@font.hard_blank, ' ')
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def min(a, b)
|
59
|
-
a > b ? b : a
|
60
|
-
end
|
61
|
-
|
62
|
-
def smush
|
63
|
-
@smusher ||= Smusher.new(@font)
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
end # module Figlet
|
69
|
-
end # module Text
|
data/test/test_figlet.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
-
|
3
|
-
class FigletTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
if !Text.is_19?
|
6
|
-
def test_hello_world
|
7
|
-
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
8
|
-
figlet = Text::Figlet::Typesetter.new(font)
|
9
|
-
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_no_smushing
|
13
|
-
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
14
|
-
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
15
|
-
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|