text 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,117 @@
1
+ module Text
2
+ module Figlet
3
+
4
+ class UnknownFontFormat < StandardError
5
+ end
6
+
7
+ class Font
8
+ def initialize(filename, load_german = true)
9
+ file = File.open(filename, 'rb')
10
+
11
+ header = file.gets.strip.split(/ /)
12
+
13
+ raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
14
+
15
+ @hard_blank = header.shift[-1, 1]
16
+ @height = header.shift.to_i
17
+ @baseline = header.shift
18
+ @max_length = header.shift
19
+ @old_layout = header.shift.to_i
20
+ @comment_count = header.shift.to_i
21
+ @right_to_left = header.shift
22
+ @right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
23
+
24
+ @load_german, @characters = load_german, {}
25
+
26
+ load_comments file
27
+ load_ascii_characters file
28
+ load_german_characters file
29
+ load_extended_characters file
30
+
31
+ file.close
32
+ end
33
+
34
+ def [](char)
35
+ @characters[char]
36
+ end
37
+
38
+ def has_char?(char)
39
+ @characters.has_key? char
40
+ end
41
+
42
+ attr_reader :height, :hard_blank, :old_layout
43
+
44
+ def right_to_left?
45
+ @right_to_left
46
+ end
47
+
48
+
49
+ private
50
+
51
+ def load_comments(file)
52
+ @comment_count.times { file.gets.strip }
53
+ end
54
+
55
+ def load_ascii_characters(file)
56
+ (32..126).each { |i| @characters[i] = load_char(file) }
57
+ end
58
+
59
+ def load_german_characters(file)
60
+ [91, 92, 93, 123, 124, 125, 126].each do |i|
61
+ if @load_german
62
+ unless char = load_char(file)
63
+ return
64
+ end
65
+ @characters[i] = char
66
+ else
67
+ skip_char file
68
+ end
69
+ end
70
+ end
71
+
72
+ def load_extended_characters(file)
73
+ until file.eof?
74
+ i = file.gets.strip.split(/ /).first
75
+ if i.empty?
76
+ next
77
+ elsif /^\-0x/i =~ i # comment
78
+ skip_char file
79
+ else
80
+ if /^0x/i =~ i
81
+ i = i[2, 1].hex
82
+ elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
83
+ i = i.oct
84
+ end
85
+ unless char = load_char(file)
86
+ return
87
+ end
88
+ @characters[i] = char
89
+ end
90
+ end
91
+ end
92
+
93
+ def load_char(file)
94
+ char = []
95
+ @height.times do
96
+ return false if file.eof?
97
+ line = file.gets.rstrip
98
+ if match = /(.){1,2}$/.match(line)
99
+ line.gsub! match[1], ''
100
+ end
101
+ line << "\x00"
102
+ char << line
103
+ end
104
+ return char
105
+ end
106
+
107
+ def skip_char(file)
108
+ @height.times do
109
+ return if file.eof?
110
+ return if file.gets.strip.nil?
111
+ end
112
+ end
113
+
114
+ end
115
+
116
+ end # module Figlet
117
+ end # module Text
@@ -0,0 +1,64 @@
1
+ module Text
2
+ module Figlet
3
+
4
+ class Smusher
5
+
6
+ def initialize(font)
7
+ @font = font
8
+ end
9
+
10
+ def [](result)
11
+ todo = false
12
+
13
+ @font.height.times do |j|
14
+ result[j] = result[j].sub(pattern) { todo, x = callback(todo, $1, $2); x }
15
+ end
16
+ @font.height.times do |j|
17
+ result[j] = if todo
18
+ result[j].sub(/\s\x00(?!$)|\x00\s/, '').sub(/\x00(?!$)/, '')
19
+ else
20
+ result[j].sub(/\x00(?!$)/, '')
21
+ end
22
+ end
23
+ end
24
+
25
+ def pattern
26
+ @pattern ||= /([^#{@font.hard_blank}\x00\s])\x00([^#{@font.hard_blank}\x00\s])/
27
+ end
28
+
29
+ def symbols
30
+ @@symbols ||= {
31
+ 24 => '|/\\[]{}()<>',
32
+ 8 => {'[' => ']', ']' => '[', '{' => '}', '}' => '{', '(' => ')', ')' => '('},
33
+ 16 => {"/\\" => '|', "\\/" => 'Y', '><' => 'X'}
34
+ }
35
+ end
36
+
37
+ def old_layout?(n)
38
+ @font.old_layout & n > 0
39
+ end
40
+
41
+ def callback(s, a, b)
42
+ combined = a + b
43
+
44
+ if old_layout?(1) && a == b
45
+ return true, a
46
+ elsif old_layout?(2) && ('_' == a && symbols[24].include?(b) || '_' == b && symbols[24].include?(a))
47
+ return true, a
48
+ elsif old_layout?(4) && ((left = symbols[24].index(a)) && (right = symbols[24].index(b)))
49
+ return true, (right > left ? b : a)
50
+ elsif old_layout?(8) && (symbols[8].has_key?(b) && symbols[8][b] == a)
51
+ return true, '|'
52
+ elsif old_layout?(16) && symbols[16].has_key?(combined)
53
+ return true, symbols[16][combined]
54
+ elsif old_layout?(32) && (a == b && @font.hard_blank == a)
55
+ return true, @font.hard_blank
56
+ else
57
+ return s, "#{a}\00#{b}"
58
+ end
59
+ end
60
+
61
+ end
62
+
63
+ end # module Figlet
64
+ end # module Text
@@ -0,0 +1,68 @@
1
+ module Text
2
+ module Figlet
3
+
4
+ class Typesetter
5
+
6
+ def initialize(font, options = nil)
7
+ @font = font
8
+ @options = options || {}
9
+ @smush = @options.has_key?(:smush) ? @options[:smush] : true
10
+ end
11
+
12
+ def [](str)
13
+ result = []
14
+ str.length.times do |i|
15
+ char = str[i]
16
+ unless @font.has_char?(char)
17
+ if @font.has_char?(0)
18
+ char = 0
19
+ else
20
+ next
21
+ end
22
+ end
23
+ @font.height.times do |j|
24
+ line = @font[char][j]
25
+ if result[j].nil?
26
+ result[j] = line
27
+ else
28
+ result[j] = @font.right_to_left?? (line + result[j]) : (result[j] + line)
29
+ end
30
+ end
31
+ if @font.old_layout > -1 && i > 0
32
+ diff = -1
33
+ @font.height.times do |j|
34
+ if match = /\S(\s*\x00\s*)\S/.match(result[j])
35
+ len = match[1].length
36
+ diff = (diff == -1 ? len : min(diff, len))
37
+ end
38
+ end
39
+ diff -= 1
40
+ if diff > 0
41
+ @font.height.times do |j|
42
+ if match = /\x00(\s{0,#{diff}})/.match(result[j])
43
+ b = diff - match[1].length
44
+ result[j] = result[j].sub(/\s{0,#{b}}\x00\s{#{match[1].length}}/, "\0")
45
+ end
46
+ end
47
+ end
48
+ smush[result] if @smush
49
+ end
50
+ end
51
+ return result.join("\n").gsub(/\0/, '').gsub(@font.hard_blank, ' ')
52
+ end
53
+
54
+
55
+ private
56
+
57
+ def min(a, b)
58
+ a > b ? b : a
59
+ end
60
+
61
+ def smush
62
+ @smusher ||= Smusher.new(@font)
63
+ end
64
+
65
+ end
66
+
67
+ end # module Figlet
68
+ end # module Text
@@ -0,0 +1,65 @@
1
+ #
2
+ # Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
3
+ #
4
+ # The Levenshtein distance is a measure of how similar two strings s and t are,
5
+ # calculated as the number of deletions/insertions/substitutions needed to
6
+ # transform s into t. The greater the distance, the more the strings differ.
7
+ #
8
+ # The Levenshtein distance is also sometimes referred to as the
9
+ # easier-to-pronounce-and-spell 'edit distance'.
10
+ #
11
+ # Author: Paul Battley (pbattley@gmail.com)
12
+ #
13
+
14
+ module Text # :nodoc:
15
+ module Levenshtein
16
+
17
+ # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
18
+ # +str1+ and +str2+ should be ASCII, UTF-8, or a one-byte-per character encoding such
19
+ # as ISO-8859-*.
20
+ #
21
+ # The strings will be treated as UTF-8 if $KCODE is set appropriately (i.e. 'u').
22
+ # Otherwise, the comparison will be performed byte-by-byte. There is no specific support
23
+ # for Shift-JIS or EUC strings.
24
+ #
25
+ # When using Unicode text, be aware that this algorithm does not perform normalisation.
26
+ # If there is a possibility of different normalised forms being used, normalisation
27
+ # should be performed beforehand.
28
+ #
29
+ def distance(str1, str2)
30
+ if $KCODE =~ /^U/i
31
+ unpack_rule = 'U*'
32
+ else
33
+ unpack_rule = 'C*'
34
+ end
35
+ s = str1.unpack(unpack_rule)
36
+ t = str2.unpack(unpack_rule)
37
+ n = s.length
38
+ m = t.length
39
+ return m if (0 == n)
40
+ return n if (0 == m)
41
+
42
+ d = (0..m).to_a
43
+ x = nil
44
+
45
+ (0...n).each do |i|
46
+ e = i+1
47
+ (0...m).each do |j|
48
+ cost = (s[i] == t[j]) ? 0 : 1
49
+ x = [
50
+ d[j+1] + 1, # insertion
51
+ e + 1, # deletion
52
+ d[j] + cost # substitution
53
+ ].min
54
+ d[j] = e
55
+ e = x
56
+ end
57
+ d[m] = x
58
+ end
59
+
60
+ return x
61
+ end
62
+
63
+ extend self
64
+ end
65
+ end
@@ -0,0 +1,97 @@
1
+ #
2
+ # An implementation of the Metaphone phonetic coding system in Ruby.
3
+ #
4
+ # Metaphone encodes names into a phonetic form such that similar-sounding names
5
+ # have the same or similar Metaphone encodings.
6
+ #
7
+ # The original system was described by Lawrence Philips in Computer Language
8
+ # Vol. 7 No. 12, December 1990, pp 39-43.
9
+ #
10
+ # As there are multiple implementations of Metaphone, each with their own
11
+ # quirks, I have based this on my interpretation of the algorithm specification.
12
+ # Even LP's original BASIC implementation appears to contain bugs (specifically
13
+ # with the handling of CC and MB), when compared to his explanation of the
14
+ # algorithm.
15
+ #
16
+ # I have also compared this implementation with that found in PHP's standard
17
+ # library, which appears to mimic the behaviour of LP's original BASIC
18
+ # implementation. For compatibility, these rules can also be used by passing
19
+ # :buggy=>true to the methods.
20
+ #
21
+ # Author: Paul Battley (pbattley@gmail.com)
22
+ #
23
+
24
+ module Text # :nodoc:
25
+ module Metaphone
26
+
27
+ module Rules # :nodoc:all
28
+
29
+ # Metaphone rules. These are simply applied in order.
30
+ #
31
+ STANDARD = [
32
+ # Regexp, replacement
33
+ [ /([bcdfhjklmnpqrstvwxyz])\1+/,
34
+ '\1' ], # Remove doubled consonants except g.
35
+ # [PHP] remove c from regexp.
36
+ [ /^ae/, 'E' ],
37
+ [ /^[gkp]n/, 'N' ],
38
+ [ /^wr/, 'R' ],
39
+ [ /^x/, 'S' ],
40
+ [ /^wh/, 'W' ],
41
+ [ /mb$/, 'M' ], # [PHP] remove $ from regexp.
42
+ [ /(?!^)sch/, 'SK' ],
43
+ [ /th/, '0' ],
44
+ [ /t?ch|sh/, 'X' ],
45
+ [ /c(?=ia)/, 'X' ],
46
+ [ /[st](?=i[ao])/, 'X' ],
47
+ [ /s?c(?=[iey])/, 'S' ],
48
+ [ /[cq]/, 'K' ],
49
+ [ /dg(?=[iey])/, 'J' ],
50
+ [ /d/, 'T' ],
51
+ [ /g(?=h[^aeiou])/, '' ],
52
+ [ /gn(ed)?/, 'N' ],
53
+ [ /([^g]|^)g(?=[iey])/,
54
+ '\1J' ],
55
+ [ /g+/, 'K' ],
56
+ [ /ph/, 'F' ],
57
+ [ /([aeiou])h(?=\b|[^aeiou])/,
58
+ '\1' ],
59
+ [ /[wy](?![aeiou])/, '' ],
60
+ [ /z/, 'S' ],
61
+ [ /v/, 'F' ],
62
+ [ /(?!^)[aeiou]+/, '' ],
63
+ ]
64
+
65
+ # The rules for the 'buggy' alternate implementation used by PHP etc.
66
+ #
67
+ BUGGY = STANDARD.dup
68
+ BUGGY[0] = [ /([bdfhjklmnpqrstvwxyz])\1+/, '\1' ]
69
+ BUGGY[6] = [ /mb/, 'M' ]
70
+ end
71
+
72
+ # Returns the Metaphone representation of a string. If the string contains
73
+ # multiple words, each word in turn is converted into its Metaphone
74
+ # representation. Note that only the letters A-Z are supported, so any
75
+ # language-specific processing should be done beforehand.
76
+ #
77
+ # If the :buggy option is set, alternate 'buggy' rules are used.
78
+ #
79
+ def metaphone(str, options={})
80
+ return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ')
81
+ end
82
+
83
+ private
84
+
85
+ def metaphone_word(w, options={})
86
+ # Normalise case and remove non-ASCII
87
+ s = w.downcase.gsub(/[^a-z]/, '')
88
+ # Apply the Metaphone rules
89
+ rules = options[:buggy] ? Rules::BUGGY : Rules::STANDARD
90
+ rules.each { |rx, rep| s.gsub!(rx, rep) }
91
+ return s.upcase
92
+ end
93
+
94
+ extend self
95
+
96
+ end
97
+ end
@@ -0,0 +1,171 @@
1
+ #
2
+ # This is the Porter Stemming algorithm, ported to Ruby from the
3
+ # version coded up in Perl. It's easy to follow against the rules
4
+ # in the original paper in:
5
+ #
6
+ # Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
7
+ # no. 3, pp 130-137,
8
+ #
9
+ # Taken from http://www.tartarus.org/~martin/PorterStemmer (Public Domain)
10
+ #
11
+ module Text # :nodoc:
12
+ module PorterStemming
13
+
14
+ STEP_2_LIST = {
15
+ 'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance',
16
+ 'izer' => 'ize', 'bli' => 'ble',
17
+ 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous',
18
+ 'ization' => 'ize', 'ation' => 'ate',
19
+ 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful',
20
+ 'ousness' => 'ous', 'aliti' => 'al',
21
+ 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
22
+ }
23
+
24
+ STEP_3_LIST = {
25
+ 'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
26
+ 'ical' => 'ic', 'ful' => '', 'ness' => ''
27
+ }
28
+
29
+ SUFFIX_1_REGEXP = /(
30
+ ational |
31
+ tional |
32
+ enci |
33
+ anci |
34
+ izer |
35
+ bli |
36
+ alli |
37
+ entli |
38
+ eli |
39
+ ousli |
40
+ ization |
41
+ ation |
42
+ ator |
43
+ alism |
44
+ iveness |
45
+ fulness |
46
+ ousness |
47
+ aliti |
48
+ iviti |
49
+ biliti |
50
+ logi)$/x
51
+
52
+ SUFFIX_2_REGEXP = /(
53
+ al |
54
+ ance |
55
+ ence |
56
+ er |
57
+ ic |
58
+ able |
59
+ ible |
60
+ ant |
61
+ ement |
62
+ ment |
63
+ ent |
64
+ ou |
65
+ ism |
66
+ ate |
67
+ iti |
68
+ ous |
69
+ ive |
70
+ ize)$/x
71
+
72
+ C = "[^aeiou]" # consonant
73
+ V = "[aeiouy]" # vowel
74
+ CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
75
+ VV = "#{V}(?>[aeiou]*)" # vowel sequence
76
+
77
+ MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0
78
+ MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
79
+ MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
80
+ VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
81
+
82
+ def self.stem(word)
83
+
84
+ # make a copy of the given object and convert it to a string.
85
+ word = word.dup.to_str
86
+
87
+ return word if word.length < 3
88
+
89
+ # now map initial y to Y so that the patterns never treat it as vowel
90
+ word[0] = 'Y' if word[0] == ?y
91
+
92
+ # Step 1a
93
+ if word =~ /(ss|i)es$/
94
+ word = $` + $1
95
+ elsif word =~ /([^s])s$/
96
+ word = $` + $1
97
+ end
98
+
99
+ # Step 1b
100
+ if word =~ /eed$/
101
+ word.chop! if $` =~ MGR0
102
+ elsif word =~ /(ed|ing)$/
103
+ stem = $`
104
+ if stem =~ VOWEL_IN_STEM
105
+ word = stem
106
+ case word
107
+ when /(at|bl|iz)$/ then word << "e"
108
+ when /([^aeiouylsz])\1$/ then word.chop!
109
+ when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
110
+ end
111
+ end
112
+ end
113
+
114
+ if word =~ /y$/
115
+ stem = $`
116
+ word = stem + "i" if stem =~ VOWEL_IN_STEM
117
+ end
118
+
119
+ # Step 2
120
+ if word =~ SUFFIX_1_REGEXP
121
+ stem = $`
122
+ suffix = $1
123
+ # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
124
+ if stem =~ MGR0
125
+ word = stem + STEP_2_LIST[suffix]
126
+ end
127
+ end
128
+
129
+ # Step 3
130
+ if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
131
+ stem = $`
132
+ suffix = $1
133
+ if stem =~ MGR0
134
+ word = stem + STEP_3_LIST[suffix]
135
+ end
136
+ end
137
+
138
+ # Step 4
139
+ if word =~ SUFFIX_2_REGEXP
140
+ stem = $`
141
+ if stem =~ MGR1
142
+ word = stem
143
+ end
144
+ elsif word =~ /(s|t)(ion)$/
145
+ stem = $` + $1
146
+ if stem =~ MGR1
147
+ word = stem
148
+ end
149
+ end
150
+
151
+ # Step 5
152
+ if word =~ /e$/
153
+ stem = $`
154
+ if (stem =~ MGR1) ||
155
+ (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
156
+ word = stem
157
+ end
158
+ end
159
+
160
+ if word =~ /ll$/ && word =~ MGR1
161
+ word.chop!
162
+ end
163
+
164
+ # and turn initial Y back to y
165
+ word[0] = 'y' if word[0] == ?Y
166
+
167
+ word
168
+ end
169
+
170
+ end
171
+ end