text 0.1.14 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/text.rb +1 -24
- data/lib/text/double_metaphone.rb +5 -5
- data/lib/text/levenshtein.rb +6 -4
- data/lib/text/metaphone.rb +10 -10
- data/lib/text/porter_stemming.rb +12 -12
- data/lib/text/soundex.rb +61 -61
- data/lib/text/util.rb +19 -0
- data/lib/text/version.rb +2 -2
- data/test/preamble.rb +1 -2
- data/test/test_double_metaphone.rb +2 -1
- data/test/test_levenshtein.rb +2 -1
- data/test/test_metaphone.rb +2 -1
- data/test/test_porter_stemming.rb +3 -2
- data/test/test_soundex.rb +2 -1
- metadata +17 -21
- data/lib/text/figlet.rb +0 -17
- data/lib/text/figlet/font.rb +0 -119
- data/lib/text/figlet/smusher.rb +0 -65
- data/lib/text/figlet/typesetter.rb +0 -69
- data/test/test_figlet.rb +0 -18
data/lib/text.rb
CHANGED
@@ -1,30 +1,7 @@
|
|
1
|
+
require 'text/util'
|
1
2
|
require 'text/double_metaphone'
|
2
3
|
require 'text/levenshtein'
|
3
4
|
require 'text/metaphone'
|
4
5
|
require 'text/porter_stemming'
|
5
6
|
require 'text/soundex'
|
6
7
|
require 'text/version'
|
7
|
-
|
8
|
-
module Text
|
9
|
-
def self.is_19?
|
10
|
-
RUBY_VERSION[0, 3] == "1.9"
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.encoding_of(string)
|
14
|
-
if is_19?
|
15
|
-
string.encoding.to_s
|
16
|
-
else
|
17
|
-
$KCODE
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.raise_19_incompat
|
22
|
-
if is_19?
|
23
|
-
raise "Text::Figlet is not compatible with Ruby 1.9 at this time"
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
if !Text.is_19?
|
29
|
-
require 'text/figlet'
|
30
|
-
end
|
@@ -56,7 +56,7 @@ module Metaphone
|
|
56
56
|
end
|
57
57
|
when 'B'
|
58
58
|
return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
|
59
|
-
when 'Ç'
|
59
|
+
when 'Ç'
|
60
60
|
return :S, :S, 1
|
61
61
|
when 'C'
|
62
62
|
if pos > 1 &&
|
@@ -114,7 +114,7 @@ module Metaphone
|
|
114
114
|
else
|
115
115
|
if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
|
116
116
|
return :K, :K, 3
|
117
|
-
else
|
117
|
+
else
|
118
118
|
return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
|
119
119
|
end
|
120
120
|
end
|
@@ -202,7 +202,7 @@ module Metaphone
|
|
202
202
|
end
|
203
203
|
else
|
204
204
|
current = ('J' == str[pos + 1, 1] ? 2 : 1)
|
205
|
-
|
205
|
+
|
206
206
|
if 0 == pos && 'JOSE' != str[pos, 4]
|
207
207
|
return :J, :A, current
|
208
208
|
else
|
@@ -243,7 +243,7 @@ module Metaphone
|
|
243
243
|
end
|
244
244
|
when 'N'
|
245
245
|
return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
|
246
|
-
when 'Ñ'
|
246
|
+
when 'Ñ'
|
247
247
|
return :N, :N, 1
|
248
248
|
when 'P'
|
249
249
|
if 'H' == str[pos + 1, 1]
|
@@ -255,7 +255,7 @@ module Metaphone
|
|
255
255
|
return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
|
256
256
|
when 'R'
|
257
257
|
current = ('R' == str[pos + 1, 1] ? 2 : 1)
|
258
|
-
|
258
|
+
|
259
259
|
if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
|
260
260
|
return nil, :R, current
|
261
261
|
else
|
data/lib/text/levenshtein.rb
CHANGED
@@ -11,6 +11,8 @@
|
|
11
11
|
# Author: Paul Battley (pbattley@gmail.com)
|
12
12
|
#
|
13
13
|
|
14
|
+
require "text/util"
|
15
|
+
|
14
16
|
module Text # :nodoc:
|
15
17
|
module Levenshtein
|
16
18
|
|
@@ -19,10 +21,10 @@ module Levenshtein
|
|
19
21
|
# as ISO-8859-*.
|
20
22
|
#
|
21
23
|
# The strings will be treated as UTF-8 if $KCODE is set appropriately (i.e. 'u').
|
22
|
-
# Otherwise, the comparison will be performed byte-by-byte. There is no specific support
|
24
|
+
# Otherwise, the comparison will be performed byte-by-byte. There is no specific support
|
23
25
|
# for Shift-JIS or EUC strings.
|
24
26
|
#
|
25
|
-
# When using Unicode text, be aware that this algorithm does not perform normalisation.
|
27
|
+
# When using Unicode text, be aware that this algorithm does not perform normalisation.
|
26
28
|
# If there is a possibility of different normalised forms being used, normalisation
|
27
29
|
# should be performed beforehand.
|
28
30
|
#
|
@@ -41,7 +43,7 @@ module Levenshtein
|
|
41
43
|
m = t.length
|
42
44
|
return m if (0 == n)
|
43
45
|
return n if (0 == m)
|
44
|
-
|
46
|
+
|
45
47
|
d = (0..m).to_a
|
46
48
|
x = nil
|
47
49
|
|
@@ -65,4 +67,4 @@ module Levenshtein
|
|
65
67
|
|
66
68
|
extend self
|
67
69
|
end
|
68
|
-
end
|
70
|
+
end
|
data/lib/text/metaphone.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
#
|
1
|
+
#
|
2
2
|
# An implementation of the Metaphone phonetic coding system in Ruby.
|
3
|
-
#
|
3
|
+
#
|
4
4
|
# Metaphone encodes names into a phonetic form such that similar-sounding names
|
5
5
|
# have the same or similar Metaphone encodings.
|
6
|
-
#
|
6
|
+
#
|
7
7
|
# The original system was described by Lawrence Philips in Computer Language
|
8
8
|
# Vol. 7 No. 12, December 1990, pp 39-43.
|
9
|
-
#
|
9
|
+
#
|
10
10
|
# As there are multiple implementations of Metaphone, each with their own
|
11
11
|
# quirks, I have based this on my interpretation of the algorithm specification.
|
12
12
|
# Even LP's original BASIC implementation appears to contain bugs (specifically
|
13
13
|
# with the handling of CC and MB), when compared to his explanation of the
|
14
14
|
# algorithm.
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# I have also compared this implementation with that found in PHP's standard
|
17
17
|
# library, which appears to mimic the behaviour of LP's original BASIC
|
18
18
|
# implementation. For compatibility, these rules can also be used by passing
|
19
19
|
# :buggy=>true to the methods.
|
20
|
-
#
|
20
|
+
#
|
21
21
|
# Author: Paul Battley (pbattley@gmail.com)
|
22
22
|
#
|
23
23
|
|
@@ -25,10 +25,10 @@ module Text # :nodoc:
|
|
25
25
|
module Metaphone
|
26
26
|
|
27
27
|
module Rules # :nodoc:all
|
28
|
-
|
28
|
+
|
29
29
|
# Metaphone rules. These are simply applied in order.
|
30
30
|
#
|
31
|
-
STANDARD = [
|
31
|
+
STANDARD = [
|
32
32
|
# Regexp, replacement
|
33
33
|
[ /([bcdfhjklmnpqrstvwxyz])\1+/,
|
34
34
|
'\1' ], # Remove doubled consonants except g.
|
@@ -61,7 +61,7 @@ module Metaphone
|
|
61
61
|
[ /v/, 'F' ],
|
62
62
|
[ /(?!^)[aeiou]+/, '' ],
|
63
63
|
]
|
64
|
-
|
64
|
+
|
65
65
|
# The rules for the 'buggy' alternate implementation used by PHP etc.
|
66
66
|
#
|
67
67
|
BUGGY = STANDARD.dup
|
@@ -79,7 +79,7 @@ module Metaphone
|
|
79
79
|
def metaphone(str, options={})
|
80
80
|
return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ')
|
81
81
|
end
|
82
|
-
|
82
|
+
|
83
83
|
private
|
84
84
|
|
85
85
|
def metaphone_word(w, options={})
|
data/lib/text/porter_stemming.rb
CHANGED
@@ -20,7 +20,7 @@ module PorterStemming
|
|
20
20
|
'ousness' => 'ous', 'aliti' => 'al',
|
21
21
|
'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
|
22
22
|
}
|
23
|
-
|
23
|
+
|
24
24
|
STEP_3_LIST = {
|
25
25
|
'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
|
26
26
|
'ical' => 'ic', 'ful' => '', 'ness' => ''
|
@@ -54,7 +54,7 @@ module PorterStemming
|
|
54
54
|
ance |
|
55
55
|
ence |
|
56
56
|
er |
|
57
|
-
ic |
|
57
|
+
ic |
|
58
58
|
able |
|
59
59
|
ible |
|
60
60
|
ant |
|
@@ -78,30 +78,30 @@ module PorterStemming
|
|
78
78
|
MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
|
79
79
|
MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
|
80
80
|
VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
|
81
|
-
|
81
|
+
|
82
82
|
def self.stem(word)
|
83
83
|
|
84
84
|
# make a copy of the given object and convert it to a string.
|
85
85
|
word = word.dup.to_str
|
86
|
-
|
86
|
+
|
87
87
|
return word if word.length < 3
|
88
|
-
|
88
|
+
|
89
89
|
# now map initial y to Y so that the patterns never treat it as vowel
|
90
90
|
word[0] = 'Y' if word[0] == ?y
|
91
|
-
|
91
|
+
|
92
92
|
# Step 1a
|
93
93
|
if word =~ /(ss|i)es$/
|
94
94
|
word = $` + $1
|
95
|
-
elsif word =~ /([^s])s$/
|
95
|
+
elsif word =~ /([^s])s$/
|
96
96
|
word = $` + $1
|
97
97
|
end
|
98
98
|
|
99
99
|
# Step 1b
|
100
100
|
if word =~ /eed$/
|
101
|
-
word.chop! if $` =~ MGR0
|
101
|
+
word.chop! if $` =~ MGR0
|
102
102
|
elsif word =~ /(ed|ing)$/
|
103
103
|
stem = $`
|
104
|
-
if stem =~ VOWEL_IN_STEM
|
104
|
+
if stem =~ VOWEL_IN_STEM
|
105
105
|
word = stem
|
106
106
|
case word
|
107
107
|
when /(at|bl|iz)$/ then word << "e"
|
@@ -111,9 +111,9 @@ module PorterStemming
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
|
114
|
-
if word =~ /y$/
|
114
|
+
if word =~ /y$/
|
115
115
|
stem = $`
|
116
|
-
word = stem + "i" if stem =~ VOWEL_IN_STEM
|
116
|
+
word = stem + "i" if stem =~ VOWEL_IN_STEM
|
117
117
|
end
|
118
118
|
|
119
119
|
# Step 2
|
@@ -149,7 +149,7 @@ module PorterStemming
|
|
149
149
|
end
|
150
150
|
|
151
151
|
# Step 5
|
152
|
-
if word =~ /e$/
|
152
|
+
if word =~ /e$/
|
153
153
|
stem = $`
|
154
154
|
if (stem =~ MGR1) ||
|
155
155
|
(stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
|
data/lib/text/soundex.rb
CHANGED
@@ -1,61 +1,61 @@
|
|
1
|
-
#
|
2
|
-
# Ruby implementation of the Soundex algorithm,
|
3
|
-
# as described by Knuth in volume 3 of The Art of Computer Programming.
|
4
|
-
#
|
5
|
-
# Author: Michael Neumann (neumann@s-direktnet.de)
|
6
|
-
#
|
7
|
-
|
8
|
-
module Text # :nodoc:
|
9
|
-
module Soundex
|
10
|
-
|
11
|
-
def soundex(str_or_arr)
|
12
|
-
case str_or_arr
|
13
|
-
when String
|
14
|
-
soundex_str(str_or_arr)
|
15
|
-
when Array
|
16
|
-
str_or_arr.collect{|ele| soundex_str(ele)}
|
17
|
-
else
|
18
|
-
nil
|
19
|
-
end
|
20
|
-
end
|
21
|
-
module_function :soundex
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
#
|
26
|
-
# returns nil if the value couldn't be calculated (empty-string, wrong-character)
|
27
|
-
# do not change the parameter "str"
|
28
|
-
#
|
29
|
-
def soundex_str(str)
|
30
|
-
return nil if str.empty?
|
31
|
-
|
32
|
-
str = str.upcase
|
33
|
-
last_code = get_code(str[0,1])
|
34
|
-
soundex_code = str[0,1]
|
35
|
-
|
36
|
-
for index in 1...(str.size) do
|
37
|
-
return soundex_code if soundex_code.size == 4
|
38
|
-
|
39
|
-
code = get_code(str[index,1])
|
40
|
-
|
41
|
-
if code == "0" then
|
42
|
-
last_code = nil
|
43
|
-
elsif code == nil then
|
44
|
-
return nil
|
45
|
-
elsif code != last_code then
|
46
|
-
soundex_code += code
|
47
|
-
last_code = code
|
48
|
-
end
|
49
|
-
end # for
|
50
|
-
|
51
|
-
return soundex_code + "000"[0,4-soundex_code.size]
|
52
|
-
end
|
53
|
-
module_function :soundex_str
|
54
|
-
|
55
|
-
def get_code(char)
|
56
|
-
char.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
|
57
|
-
end
|
58
|
-
module_function :get_code
|
59
|
-
|
60
|
-
end # module Soundex
|
61
|
-
end # module Text
|
1
|
+
#
|
2
|
+
# Ruby implementation of the Soundex algorithm,
|
3
|
+
# as described by Knuth in volume 3 of The Art of Computer Programming.
|
4
|
+
#
|
5
|
+
# Author: Michael Neumann (neumann@s-direktnet.de)
|
6
|
+
#
|
7
|
+
|
8
|
+
module Text # :nodoc:
|
9
|
+
module Soundex
|
10
|
+
|
11
|
+
def soundex(str_or_arr)
|
12
|
+
case str_or_arr
|
13
|
+
when String
|
14
|
+
soundex_str(str_or_arr)
|
15
|
+
when Array
|
16
|
+
str_or_arr.collect{|ele| soundex_str(ele)}
|
17
|
+
else
|
18
|
+
nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
module_function :soundex
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
#
|
26
|
+
# returns nil if the value couldn't be calculated (empty-string, wrong-character)
|
27
|
+
# do not change the parameter "str"
|
28
|
+
#
|
29
|
+
def soundex_str(str)
|
30
|
+
return nil if str.empty?
|
31
|
+
|
32
|
+
str = str.upcase
|
33
|
+
last_code = get_code(str[0,1])
|
34
|
+
soundex_code = str[0,1]
|
35
|
+
|
36
|
+
for index in 1...(str.size) do
|
37
|
+
return soundex_code if soundex_code.size == 4
|
38
|
+
|
39
|
+
code = get_code(str[index,1])
|
40
|
+
|
41
|
+
if code == "0" then
|
42
|
+
last_code = nil
|
43
|
+
elsif code == nil then
|
44
|
+
return nil
|
45
|
+
elsif code != last_code then
|
46
|
+
soundex_code += code
|
47
|
+
last_code = code
|
48
|
+
end
|
49
|
+
end # for
|
50
|
+
|
51
|
+
return soundex_code + "000"[0,4-soundex_code.size]
|
52
|
+
end
|
53
|
+
module_function :soundex_str
|
54
|
+
|
55
|
+
def get_code(char)
|
56
|
+
char.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
|
57
|
+
end
|
58
|
+
module_function :get_code
|
59
|
+
|
60
|
+
end # module Soundex
|
61
|
+
end # module Text
|
data/lib/text/util.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Text
|
2
|
+
def self.is_19?
|
3
|
+
RUBY_VERSION[0, 3] == "1.9"
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.encoding_of(string)
|
7
|
+
if is_19?
|
8
|
+
string.encoding.to_s
|
9
|
+
else
|
10
|
+
$KCODE
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.raise_19_incompat
|
15
|
+
if is_19?
|
16
|
+
raise "Text::Figlet is not compatible with Ruby 1.9 at this time"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/text/version.rb
CHANGED
data/test/preamble.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/double_metaphone"
|
2
3
|
|
3
4
|
require 'csv'
|
4
5
|
|
@@ -7,7 +8,7 @@ class DoubleMetaphoneTest < Test::Unit::TestCase
|
|
7
8
|
def test_cases
|
8
9
|
CSV.open(File.rel('data', 'double_metaphone.csv'), 'r').to_a.each do |row|
|
9
10
|
primary, secondary = Text::Metaphone.double_metaphone(row[0])
|
10
|
-
|
11
|
+
|
11
12
|
assert_equal row[1], primary
|
12
13
|
assert_equal row[2], secondary.nil?? primary : secondary
|
13
14
|
end
|
data/test/test_levenshtein.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/levenshtein"
|
2
3
|
|
3
4
|
class LevenshteinTest < Test::Unit::TestCase
|
4
5
|
|
@@ -53,7 +54,7 @@ class LevenshteinTest < Test::Unit::TestCase
|
|
53
54
|
|
54
55
|
def with_encoding(kcode, encoding)
|
55
56
|
if Text.is_19?
|
56
|
-
old_encoding = Encoding.default_internal
|
57
|
+
old_encoding = Encoding.default_internal
|
57
58
|
Encoding.default_internal = encoding
|
58
59
|
yield
|
59
60
|
Encoding.default_internal = old_encoding
|
data/test/test_metaphone.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/metaphone"
|
2
3
|
require 'yaml'
|
3
4
|
|
4
5
|
class MetaphoneTest < Test::Unit::TestCase
|
@@ -8,7 +9,7 @@ class MetaphoneTest < Test::Unit::TestCase
|
|
8
9
|
assert_equal expected_output, Text::Metaphone.metaphone(input)
|
9
10
|
end
|
10
11
|
end
|
11
|
-
|
12
|
+
|
12
13
|
def test_cases_for_buggy_implementation
|
13
14
|
YAML.load(File.read(File.rel('data', 'metaphone_buggy.txt'))).each do |input, expected_output|
|
14
15
|
assert_equal expected_output, Text::Metaphone.metaphone(input, :buggy=>true)
|
@@ -1,11 +1,12 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
require "text/porter_stemming"
|
2
3
|
|
3
4
|
class PorterStemmingTest < Test::Unit::TestCase
|
4
5
|
|
5
6
|
def slurp(*path)
|
6
7
|
File.read(File.rel(*path)).split(/\n/)
|
7
8
|
end
|
8
|
-
|
9
|
+
|
9
10
|
def test_cases
|
10
11
|
cases = slurp('data', 'porter_stemming_input.txt').zip(slurp('data', 'porter_stemming_output.txt'))
|
11
12
|
cases.each do |word, expected_output|
|
@@ -13,4 +14,4 @@ class PorterStemmingTest < Test::Unit::TestCase
|
|
13
14
|
end
|
14
15
|
end
|
15
16
|
|
16
|
-
end
|
17
|
+
end
|
data/test/test_soundex.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Battley
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2010-
|
14
|
+
date: 2010-03-03 00:00:00 +00:00
|
15
15
|
default_executable:
|
16
16
|
dependencies: []
|
17
17
|
|
@@ -24,33 +24,29 @@ extensions: []
|
|
24
24
|
extra_rdoc_files:
|
25
25
|
- README.rdoc
|
26
26
|
files:
|
27
|
-
- lib/text.rb
|
28
|
-
- lib/text/figlet/typesetter.rb
|
29
|
-
- lib/text/figlet/smusher.rb
|
30
|
-
- lib/text/figlet/font.rb
|
31
|
-
- lib/text/porter_stemming.rb
|
32
27
|
- lib/text/double_metaphone.rb
|
33
|
-
- lib/text/
|
34
|
-
- lib/text/figlet.rb
|
28
|
+
- lib/text/levenshtein.rb
|
35
29
|
- lib/text/metaphone.rb
|
30
|
+
- lib/text/porter_stemming.rb
|
31
|
+
- lib/text/soundex.rb
|
32
|
+
- lib/text/util.rb
|
36
33
|
- lib/text/version.rb
|
37
|
-
- lib/text
|
38
|
-
- test/preamble.rb
|
39
|
-
- test/test_double_metaphone.rb
|
40
|
-
- test/data/chunky.flf
|
41
|
-
- test/data/porter_stemming_input.txt
|
42
|
-
- test/data/metaphone.txt
|
43
|
-
- test/data/double_metaphone.csv
|
34
|
+
- lib/text.rb
|
44
35
|
- test/data/big.flf
|
45
|
-
- test/data/chunky.txt
|
46
|
-
- test/data/porter_stemming_output.txt
|
47
36
|
- test/data/big.txt
|
37
|
+
- test/data/chunky.flf
|
38
|
+
- test/data/chunky.txt
|
39
|
+
- test/data/double_metaphone.csv
|
40
|
+
- test/data/metaphone.txt
|
48
41
|
- test/data/metaphone_buggy.txt
|
42
|
+
- test/data/porter_stemming_input.txt
|
43
|
+
- test/data/porter_stemming_output.txt
|
44
|
+
- test/preamble.rb
|
45
|
+
- test/test_double_metaphone.rb
|
49
46
|
- test/test_levenshtein.rb
|
50
|
-
- test/test_soundex.rb
|
51
|
-
- test/test_porter_stemming.rb
|
52
47
|
- test/test_metaphone.rb
|
53
|
-
- test/
|
48
|
+
- test/test_porter_stemming.rb
|
49
|
+
- test/test_soundex.rb
|
54
50
|
- README.rdoc
|
55
51
|
- Rakefile
|
56
52
|
has_rdoc: true
|
data/lib/text/figlet.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Ruby implementation of the Figlet program (http://www.figlet.org/).
|
3
|
-
#
|
4
|
-
# Author: Tim Fletcher (twoggle@gmail.com)
|
5
|
-
#
|
6
|
-
# Usage:
|
7
|
-
#
|
8
|
-
# big_font = Text::Figlet::Font.new('big.flf')
|
9
|
-
#
|
10
|
-
# figlet = Text::Figlet::Typesetter.new(big_font)
|
11
|
-
#
|
12
|
-
# puts figlet['hello world']
|
13
|
-
#
|
14
|
-
#
|
15
|
-
require 'text/figlet/font'
|
16
|
-
require 'text/figlet/smusher'
|
17
|
-
require 'text/figlet/typesetter'
|
data/lib/text/figlet/font.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class UnknownFontFormat < StandardError
|
5
|
-
end
|
6
|
-
|
7
|
-
class Font
|
8
|
-
def initialize(filename, load_german = true)
|
9
|
-
Text.raise_19_incompat
|
10
|
-
|
11
|
-
file = File.open(filename, 'rb')
|
12
|
-
|
13
|
-
header = file.gets.strip.split(/ /)
|
14
|
-
|
15
|
-
raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
|
16
|
-
|
17
|
-
@hard_blank = header.shift[-1, 1]
|
18
|
-
@height = header.shift.to_i
|
19
|
-
@baseline = header.shift
|
20
|
-
@max_length = header.shift
|
21
|
-
@old_layout = header.shift.to_i
|
22
|
-
@comment_count = header.shift.to_i
|
23
|
-
@right_to_left = header.shift
|
24
|
-
@right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
|
25
|
-
|
26
|
-
@load_german, @characters = load_german, {}
|
27
|
-
|
28
|
-
load_comments file
|
29
|
-
load_ascii_characters file
|
30
|
-
load_german_characters file
|
31
|
-
load_extended_characters file
|
32
|
-
|
33
|
-
file.close
|
34
|
-
end
|
35
|
-
|
36
|
-
def [](char)
|
37
|
-
@characters[char]
|
38
|
-
end
|
39
|
-
|
40
|
-
def has_char?(char)
|
41
|
-
@characters.has_key? char
|
42
|
-
end
|
43
|
-
|
44
|
-
attr_reader :height, :hard_blank, :old_layout
|
45
|
-
|
46
|
-
def right_to_left?
|
47
|
-
@right_to_left
|
48
|
-
end
|
49
|
-
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def load_comments(file)
|
54
|
-
@comment_count.times { file.gets.strip }
|
55
|
-
end
|
56
|
-
|
57
|
-
def load_ascii_characters(file)
|
58
|
-
(32..126).each { |i| @characters[i] = load_char(file) }
|
59
|
-
end
|
60
|
-
|
61
|
-
def load_german_characters(file)
|
62
|
-
[91, 92, 93, 123, 124, 125, 126].each do |i|
|
63
|
-
if @load_german
|
64
|
-
unless char = load_char(file)
|
65
|
-
return
|
66
|
-
end
|
67
|
-
@characters[i] = char
|
68
|
-
else
|
69
|
-
skip_char file
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def load_extended_characters(file)
|
75
|
-
until file.eof?
|
76
|
-
i = file.gets.strip.split(/ /).first
|
77
|
-
if i.empty?
|
78
|
-
next
|
79
|
-
elsif /^\-0x/i =~ i # comment
|
80
|
-
skip_char file
|
81
|
-
else
|
82
|
-
if /^0x/i =~ i
|
83
|
-
i = i[2, 1].hex
|
84
|
-
elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
|
85
|
-
i = i.oct
|
86
|
-
end
|
87
|
-
unless char = load_char(file)
|
88
|
-
return
|
89
|
-
end
|
90
|
-
@characters[i] = char
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def load_char(file)
|
96
|
-
char = []
|
97
|
-
@height.times do
|
98
|
-
return false if file.eof?
|
99
|
-
line = file.gets.rstrip
|
100
|
-
if match = /(.){1,2}$/.match(line)
|
101
|
-
line.gsub! match[1], ''
|
102
|
-
end
|
103
|
-
line << "\x00"
|
104
|
-
char << line
|
105
|
-
end
|
106
|
-
return char
|
107
|
-
end
|
108
|
-
|
109
|
-
def skip_char(file)
|
110
|
-
@height.times do
|
111
|
-
return if file.eof?
|
112
|
-
return if file.gets.strip.nil?
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
end # module Figlet
|
119
|
-
end # module Text
|
data/lib/text/figlet/smusher.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class Smusher
|
5
|
-
|
6
|
-
def initialize(font)
|
7
|
-
Text.raise_19_incompat
|
8
|
-
@font = font
|
9
|
-
end
|
10
|
-
|
11
|
-
def [](result)
|
12
|
-
todo = false
|
13
|
-
|
14
|
-
@font.height.times do |j|
|
15
|
-
result[j] = result[j].sub(pattern) { todo, x = callback(todo, $1, $2); x }
|
16
|
-
end
|
17
|
-
@font.height.times do |j|
|
18
|
-
result[j] = if todo
|
19
|
-
result[j].sub(/\s\x00(?!$)|\x00\s/, '').sub(/\x00(?!$)/, '')
|
20
|
-
else
|
21
|
-
result[j].sub(/\x00(?!$)/, '')
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def pattern
|
27
|
-
@pattern ||= /([^#{@font.hard_blank}\x00\s])\x00([^#{@font.hard_blank}\x00\s])/
|
28
|
-
end
|
29
|
-
|
30
|
-
def symbols
|
31
|
-
@@symbols ||= {
|
32
|
-
24 => '|/\\[]{}()<>',
|
33
|
-
8 => {'[' => ']', ']' => '[', '{' => '}', '}' => '{', '(' => ')', ')' => '('},
|
34
|
-
16 => {"/\\" => '|', "\\/" => 'Y', '><' => 'X'}
|
35
|
-
}
|
36
|
-
end
|
37
|
-
|
38
|
-
def old_layout?(n)
|
39
|
-
@font.old_layout & n > 0
|
40
|
-
end
|
41
|
-
|
42
|
-
def callback(s, a, b)
|
43
|
-
combined = a + b
|
44
|
-
|
45
|
-
if old_layout?(1) && a == b
|
46
|
-
return true, a
|
47
|
-
elsif old_layout?(2) && ('_' == a && symbols[24].include?(b) || '_' == b && symbols[24].include?(a))
|
48
|
-
return true, a
|
49
|
-
elsif old_layout?(4) && ((left = symbols[24].index(a)) && (right = symbols[24].index(b)))
|
50
|
-
return true, (right > left ? b : a)
|
51
|
-
elsif old_layout?(8) && (symbols[8].has_key?(b) && symbols[8][b] == a)
|
52
|
-
return true, '|'
|
53
|
-
elsif old_layout?(16) && symbols[16].has_key?(combined)
|
54
|
-
return true, symbols[16][combined]
|
55
|
-
elsif old_layout?(32) && (a == b && @font.hard_blank == a)
|
56
|
-
return true, @font.hard_blank
|
57
|
-
else
|
58
|
-
return s, "#{a}\00#{b}"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
end # module Figlet
|
65
|
-
end # module Text
|
@@ -1,69 +0,0 @@
|
|
1
|
-
module Text
|
2
|
-
module Figlet
|
3
|
-
|
4
|
-
class Typesetter
|
5
|
-
|
6
|
-
def initialize(font, options = nil)
|
7
|
-
Text.raise_19_incompat
|
8
|
-
@font = font
|
9
|
-
@options = options || {}
|
10
|
-
@smush = @options.has_key?(:smush) ? @options[:smush] : true
|
11
|
-
end
|
12
|
-
|
13
|
-
def [](str)
|
14
|
-
result = []
|
15
|
-
str.length.times do |i|
|
16
|
-
char = str[i]
|
17
|
-
unless @font.has_char?(char)
|
18
|
-
if @font.has_char?(0)
|
19
|
-
char = 0
|
20
|
-
else
|
21
|
-
next
|
22
|
-
end
|
23
|
-
end
|
24
|
-
@font.height.times do |j|
|
25
|
-
line = @font[char][j]
|
26
|
-
if result[j].nil?
|
27
|
-
result[j] = line
|
28
|
-
else
|
29
|
-
result[j] = @font.right_to_left?? (line + result[j]) : (result[j] + line)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
if @font.old_layout > -1 && i > 0
|
33
|
-
diff = -1
|
34
|
-
@font.height.times do |j|
|
35
|
-
if match = /\S(\s*\x00\s*)\S/.match(result[j])
|
36
|
-
len = match[1].length
|
37
|
-
diff = (diff == -1 ? len : min(diff, len))
|
38
|
-
end
|
39
|
-
end
|
40
|
-
diff -= 1
|
41
|
-
if diff > 0
|
42
|
-
@font.height.times do |j|
|
43
|
-
if match = /\x00(\s{0,#{diff}})/.match(result[j])
|
44
|
-
b = diff - match[1].length
|
45
|
-
result[j] = result[j].sub(/\s{0,#{b}}\x00\s{#{match[1].length}}/, "\0")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
smush[result] if @smush
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return result.join("\n").gsub(/\0/, '').gsub(@font.hard_blank, ' ')
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
def min(a, b)
|
59
|
-
a > b ? b : a
|
60
|
-
end
|
61
|
-
|
62
|
-
def smush
|
63
|
-
@smusher ||= Smusher.new(@font)
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
end # module Figlet
|
69
|
-
end # module Text
|
data/test/test_figlet.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
-
|
3
|
-
class FigletTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
if !Text.is_19?
|
6
|
-
def test_hello_world
|
7
|
-
font = Text::Figlet::Font.new(File.rel('data', 'big.flf'))
|
8
|
-
figlet = Text::Figlet::Typesetter.new(font)
|
9
|
-
assert_equal File.read(File.rel('data', 'big.txt')), figlet['Hello World']
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_no_smushing
|
13
|
-
font = Text::Figlet::Font.new(File.rel('data', 'chunky.flf'))
|
14
|
-
figlet = Text::Figlet::Typesetter.new(font, :smush => false)
|
15
|
-
assert_equal File.read(File.rel('data', 'chunky.txt')), figlet['Chunky Bacon']
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|