Text 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,73 +1,57 @@
1
- --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0.10
3
- specification_version: 1
1
+ --- !ruby/object:Gem::Specification
4
2
  name: Text
5
- version: !ruby/object:Gem::Version
6
- version: 1.1.2
7
- date: 2007-02-20 00:00:00 +00:00
8
- summary: A collection of text algorithms
9
- require_paths:
10
- - lib
11
- email:
12
- homepage: http://text.rubyforge.org/
13
- rubyforge_project: text
14
- description:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Paul Battley
9
+ - Michael Neumann
10
+ - Tim Fletcher
15
11
  autorequire:
16
- default_executable:
17
12
  bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
25
- platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
- authors:
30
- - Paul Battley, Michael Neumann, Tim Fletcher
31
- files:
32
- - lib/text.rb
33
- - lib/text/double_metaphone.rb
34
- - lib/text/figlet.rb
35
- - lib/text/levenshtein.rb
36
- - lib/text/metaphone.rb
37
- - lib/text/porter_stemming.rb
38
- - lib/text/soundex.rb
39
- - lib/text/figlet/font.rb
40
- - lib/text/figlet/smusher.rb
41
- - lib/text/figlet/typesetter.rb
42
- - test/preamble.rb
43
- - test/test_double_metaphone.rb
44
- - test/test_figlet.rb
45
- - test/test_levenshtein.rb
46
- - test/test_metaphone.rb
47
- - test/test_porter_stemming.rb
48
- - test/test_soundex.rb
49
- - test/data/big.flf
50
- - test/data/big.txt
51
- - test/data/chunky.flf
52
- - test/data/chunky.txt
53
- - test/data/double_metaphone.csv
54
- - test/data/metaphone.txt
55
- - test/data/metaphone_buggy.txt
56
- - test/data/porter_stemming_input.txt
57
- - test/data/porter_stemming_output.txt
58
- - README.rdoc
59
- - rakefile.rb
60
- test_files: []
61
-
62
- rdoc_options: []
63
-
64
- extra_rdoc_files: []
65
-
13
+ cert_chain: []
14
+ date: 2012-06-07 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: text
18
+ requirement: &12920520 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ! '>='
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.0
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: *12920520
27
+ description: ! 'This gem is now just a legacy shim with one dependency: text'
28
+ email: pbattley@gmail.com
66
29
  executables: []
67
-
68
30
  extensions: []
69
-
31
+ extra_rdoc_files: []
32
+ files: []
33
+ homepage: http://github.com/threedaymonk/text
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
70
51
  requirements: []
71
-
72
- dependencies: []
73
-
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.11
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Legacy shim. Use 'text' instead.
57
+ test_files: []
@@ -1,28 +0,0 @@
1
- = Text
2
-
3
- A collection of text algorithms.
4
-
5
-
6
- = Usage
7
-
8
- require 'text'
9
-
10
- font = Text::Figlet::Font.new('big.flf')
11
- figlet = Text::Figlet::Typesetter.new(font)
12
- figlet['Hello World'] # => '...'
13
-
14
- Text::Levenshtein.distance('test', 'test') # => 0
15
- Text::Levenshtein.distance('test', 'tent') # => 1
16
-
17
- Text::Metaphone.metaphone('BRIAN') # => 'BRN'
18
- Text::Metaphone.double_metaphone('Coburn') # => ['KPRN', nil]
19
- Text::Metaphone.double_metaphone('Angier') # => ['ANJ', 'ANJR']
20
-
21
- Text::Soundex.soundex('Knuth') # => 'K530'
22
-
23
- Text::PorterStemming.stem('abatements') # => 'abat'
24
-
25
-
26
- = License
27
-
28
- Same as Ruby.
@@ -1,6 +0,0 @@
1
- require 'text/double_metaphone'
2
- require 'text/figlet'
3
- require 'text/levenshtein'
4
- require 'text/metaphone'
5
- require 'text/porter_stemming'
6
- require 'text/soundex'
@@ -1,356 +0,0 @@
1
- #
2
- # Ruby implementation of the Double Metaphone algorithm by Lawrence Philips,
3
- # originally published in the June 2000 issue of C/C++ Users Journal.
4
- #
5
- # Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/
6
- #
7
- # Author: Tim Fletcher (twoggle@gmail.com)
8
- #
9
-
10
- module Text # :nodoc:
11
- module Metaphone
12
-
13
- # Returns the primary and secondary double metaphone tokens
14
- # (the secondary will be nil if equal to the primary).
15
- def double_metaphone(str)
16
- primary, secondary, current = [], [], 0
17
- original, length, last = "#{str} ".upcase, str.length, str.length - 1
18
- if /^GN|KN|PN|WR|PS$/ =~ original[0, 2]
19
- current += 1
20
- end
21
- if 'X' == original[0, 1]
22
- primary << :S
23
- secondary << :S
24
- current += 1
25
- end
26
- while primary.length < 4 || secondary.length < 4
27
- break if current > str.length
28
- a, b, c = double_metaphone_lookup(original, current, length, last)
29
- primary << a if a
30
- secondary << b if b
31
- current += c if c
32
- end
33
- primary, secondary = primary.to_s[0, 4], secondary.to_s[0, 4]
34
- return primary, (primary == secondary ? nil : secondary)
35
- end
36
-
37
-
38
- private
39
-
40
- def slavo_germanic?(str)
41
- /W|K|CZ|WITZ/ =~ str
42
- end
43
-
44
- def vowel?(str)
45
- /^A|E|I|O|U|Y$/ =~ str
46
- end
47
-
48
- def double_metaphone_lookup(str, pos, length, last)
49
- case str[pos, 1]
50
- when /^A|E|I|O|U|Y$/
51
- if 0 == pos
52
- return :A, :A, 1
53
- else
54
- return nil, nil, 1
55
- end
56
- when 'B'
57
- return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
58
- when 'Ç'
59
- return :S, :S, 1
60
- when 'C'
61
- if pos > 1 &&
62
- !vowel?(str[pos - 2, 1]) &&
63
- 'ACH' == str[pos - 1, 3] &&
64
- str[pos + 2, 1] != 'I' && (
65
- str[pos + 2, 1] != 'E' ||
66
- str[pos - 2, 6] =~ /^(B|M)ACHER$/
67
- ) then
68
- return :K, :K, 2
69
- elsif 0 == pos && 'CAESAR' == str[pos, 6]
70
- return :S, :S, 2
71
- elsif 'CHIA' == str[pos, 4]
72
- return :K, :K, 2
73
- elsif 'CH' == str[pos, 2]
74
- if pos > 0 && 'CHAE' == str[pos, 4]
75
- return :K, :X, 2
76
- elsif 0 == pos && (
77
- ['HARAC', 'HARIS'].include?(str[pos + 1, 5]) ||
78
- ['HOR', 'HYM', 'HIA', 'HEM'].include?(str[pos + 1, 3])
79
- ) && str[0, 5] != 'CHORE' then
80
- return :K, :K, 2
81
- elsif ['VAN ','VON '].include?(str[0, 4]) ||
82
- 'SCH' == str[0, 3] ||
83
- ['ORCHES','ARCHIT','ORCHID'].include?(str[pos - 2, 6]) ||
84
- ['T','S'].include?(str[pos + 2, 1]) || (
85
- ((0 == pos) || ['A','O','U','E'].include?(str[pos - 1, 1])) &&
86
- ['L','R','N','M','B','H','F','V','W',' '].include?(str[pos + 2, 1])
87
- ) then
88
- return :K, :K, 2
89
- elsif pos > 0
90
- return ('MC' == str[0, 2] ? 'K' : 'X'), 'K', 2
91
- else
92
- return :X, :X, 2
93
- end
94
- elsif 'CZ' == str[pos, 2] && 'WICZ' != str[pos - 2, 4]
95
- return :S, :X, 2
96
- elsif 'CIA' == str[pos + 1, 3]
97
- return :X, :X, 3
98
- elsif 'CC' == str[pos, 2] && !(1 == pos && 'M' == str[0, 1])
99
- if /^I|E|H$/ =~ str[pos + 2, 1] && 'HU' != str[pos + 2, 2]
100
- if (1 == pos && 'A' == str[pos - 1, 1]) ||
101
- /^UCCE(E|S)$/ =~ str[pos - 1, 5] then
102
- return :KS, :KS, 3
103
- else
104
- return :X, :X, 3
105
- end
106
- else
107
- return :K, :K, 2
108
- end
109
- elsif /^C(K|G|Q)$/ =~ str[pos, 2]
110
- return :K, :K, 2
111
- elsif /^C(I|E|Y)$/ =~ str[pos, 2]
112
- return :S, (/^CI(O|E|A)$/ =~ str[pos, 3] ? :X : :S), 2
113
- else
114
- if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
115
- return :K, :K, 3
116
- else
117
- return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
118
- end
119
- end
120
- when 'D'
121
- if 'DG' == str[pos, 2]
122
- if /^I|E|Y$/ =~ str[pos + 2, 1]
123
- return :J, :J, 3
124
- else
125
- return :TK, :TK, 2
126
- end
127
- else
128
- return :T, :T, (/^D(T|D)$/ =~ str[pos, 2] ? 2 : 1)
129
- end
130
- when 'F'
131
- return :F, :F, ('F' == str[pos + 1, 1] ? 2 : 1)
132
- when 'G'
133
- if 'H' == str[pos + 1, 1]
134
- if pos > 0 && !vowel?(str[pos - 1, 1])
135
- return :K, :K, 2
136
- elsif 0 == pos
137
- if 'I' == str[pos + 2, 1]
138
- return :J, :J, 2
139
- else
140
- return :K, :K, 2
141
- end
142
- elsif (pos > 1 && /^B|H|D$/ =~ str[pos - 2, 1]) ||
143
- (pos > 2 && /^B|H|D$/ =~ str[pos - 3, 1]) ||
144
- (pos > 3 && /^B|H$/ =~ str[pos - 4, 1])
145
- return nil, nil, 2
146
- else
147
- if (pos > 2 && 'U' == str[pos - 1, 1] && /^C|G|L|R|T$/ =~ str[pos - 3, 1])
148
- return :F, :F, 2
149
- elsif pos > 0 && 'I' != str[pos - 1, 1]
150
- return :K, :K, 2
151
- else
152
- return nil, nil, 2
153
- end
154
- end
155
- elsif 'N' == str[pos + 1, 1]
156
- if 1 == pos && vowel?(str[0, 1]) && !slavo_germanic?(str)
157
- return :KN, :N, 2
158
- else
159
- if 'EY' != str[pos + 2, 2] && 'Y' != str[pos + 1, 1] && !slavo_germanic?(str)
160
- return :N, :KN, 2
161
- else
162
- return :KN, :KN, 2
163
- end
164
- end
165
- elsif 'LI' == str[pos + 1, 2] && !slavo_germanic?(str)
166
- return :KL, :L, 2
167
- elsif 0 == pos && ('Y' == str[pos + 1, 1] || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/ =~ str[pos + 1, 2])
168
- return :K, :J, 2
169
- elsif (('ER' == str[pos + 1, 2] || 'Y' == str[pos + 1, 1]) &&
170
- /^(D|R|M)ANGER$/ !~ str[0, 6] &&
171
- /^E|I$/ !~ str[pos - 1, 1] &&
172
- /^(R|O)GY$/ !~ str[pos - 1, 3])
173
- return :K, :J, 2
174
- elsif /^E|I|Y$/ =~ str[pos + 1, 1] || /^(A|O)GGI$/ =~ str[pos - 1, 4]
175
- if (/^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]) || 'ET' == str[pos + 1, 2]
176
- return :K, :K, 2
177
- else
178
- if 'IER ' == str[pos + 1, 4]
179
- return :J, :J, 2
180
- else
181
- return :J, :K, 2
182
- end
183
- end
184
- elsif 'G' == str[pos + 1, 1]
185
- return :K, :K, 2
186
- else
187
- return :K, :K, 1
188
- end
189
- when 'H'
190
- if (0 == pos || vowel?(str[pos - 1, 1])) && vowel?(str[pos + 1, 1])
191
- return :H, :H, 2
192
- else
193
- return nil, nil, 1
194
- end
195
- when 'J'
196
- if 'JOSE' == str[pos, 4] || 'SAN ' == str[0, 4]
197
- if (0 == pos && ' ' == str[pos + 4, 1]) || 'SAN ' == str[0, 4]
198
- return :H, :H, 1
199
- else
200
- return :J, :H, 1
201
- end
202
- else
203
- current = ('J' == str[pos + 1, 1] ? 2 : 1)
204
-
205
- if 0 == pos && 'JOSE' != str[pos, 4]
206
- return :J, :A, current
207
- else
208
- if vowel?(str[pos - 1, 1]) && !slavo_germanic?(str) && /^A|O$/ =~ str[pos + 1, 1]
209
- return :J, :H, current
210
- else
211
- if last == pos
212
- return :J, nil, current
213
- else
214
- if /^L|T|K|S|N|M|B|Z$/ !~ str[pos + 1, 1] && /^S|K|L$/ !~ str[pos - 1, 1]
215
- return :J, :J, current
216
- else
217
- return nil, nil, current
218
- end
219
- end
220
- end
221
- end
222
- end
223
- when 'K'
224
- return :K, :K, ('K' == str[pos + 1, 1] ? 2 : 1)
225
- when 'L'
226
- if 'L' == str[pos + 1, 1]
227
- if (((length - 3) == pos && /^(ILL(O|A)|ALLE)$/ =~ str[pos - 1, 4]) ||
228
- ((/^(A|O)S$/ =~ str[last - 1, 2] || /^A|O$/ =~ str[last, 1]) && 'ALLE' == str[pos - 1, 4]))
229
- return :L, nil, 2
230
- else
231
- return :L, :L, 2
232
- end
233
- else
234
- return :L, :L, 1
235
- end
236
- when 'M'
237
- if ('UMB' == str[pos - 1, 3] &&
238
- ((last - 1) == pos || 'ER' == str[pos + 2, 2])) || 'M' == str[pos + 1, 1]
239
- return :M, :M, 2
240
- else
241
- return :M, :M, 1
242
- end
243
- when 'N'
244
- return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
245
- when 'Ñ'
246
- return :N, :N, 1
247
- when 'P'
248
- if 'H' == str[pos + 1, 1]
249
- return :F, :F, 2
250
- else
251
- return :P, :P, (/^P|B$/ =~ str[pos + 1, 1] ? 2 : 1)
252
- end
253
- when 'Q'
254
- return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
255
- when 'R'
256
- current = ('R' == str[pos + 1, 1] ? 2 : 1)
257
-
258
- if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
259
- return nil, :R, current
260
- else
261
- return :R, :R, current
262
- end
263
- when 'S'
264
- if /^(I|Y)SL$/ =~ str[pos - 1, 3]
265
- return nil, nil, 1
266
- elsif 0 == pos && 'SUGAR' == str[pos, 5]
267
- return :X, :S, 1
268
- elsif 'SH' == str[pos, 2]
269
- if /^H(EIM|OEK|OLM|OLZ)$/ =~ str[pos + 1, 4]
270
- return :S, :S, 2
271
- else
272
- return :X, :X, 2
273
- end
274
- elsif /^SI(O|A)$/ =~ str[pos, 3] || 'SIAN' == str[pos, 4]
275
- return :S, (slavo_germanic?(str) ? :S : :X), 3
276
- elsif (0 == pos && /^M|N|L|W$/ =~ str[pos + 1, 1]) || 'Z' == str[pos + 1, 1]
277
- return :S, :X, ('Z' == str[pos + 1, 1] ? 2 : 1)
278
- elsif 'SC' == str[pos, 2]
279
- if 'H' == str[pos + 2, 1]
280
- if /^OO|ER|EN|UY|ED|EM$/ =~ str[pos + 3, 2]
281
- return (/^E(R|N)$/ =~ str[pos + 3, 2] ? :X : :SK), :SK, 3
282
- else
283
- return :X, ((0 == pos && !vowel?(str[3, 1]) && ('W' != str[pos + 3, 1])) ? :S : :X), 3
284
- end
285
- elsif /^I|E|Y$/ =~ str[pos + 2, 1]
286
- return :S, :S, 3
287
- else
288
- return :SK, :SK, 3
289
- end
290
- else
291
- return (last == pos && /^(A|O)I$/ =~ str[pos - 2, 2] ? nil : 'S'), 'S', (/^S|Z$/ =~ str[pos + 1, 1] ? 2 : 1)
292
- end
293
- when 'T'
294
- if 'TION' == str[pos, 4]
295
- return :X, :X, 3
296
- elsif /^T(IA|CH)$/ =~ str[pos, 3]
297
- return :X, :X, 3
298
- elsif 'TH' == str[pos, 2] || 'TTH' == str[pos, 3]
299
- if /^(O|A)M$/ =~ str[pos + 2, 2] || /^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]
300
- return :T, :T, 2
301
- else
302
- return 0, :T, 2
303
- end
304
- else
305
- return :T, :T, (/^T|D$/ =~ str[pos + 1, 1] ? 2 : 1)
306
- end
307
- when 'V'
308
- return :F, :F, ('V' == str[pos + 1, 1] ? 2 : 1)
309
- when 'W'
310
- if 'WR' == str[pos, 2]
311
- return :R, :R, 2
312
- end
313
- pri, sec = nil, nil
314
-
315
- if 0 == pos && (vowel?(str[pos + 1, 1]) || 'WH' == str[pos, 2])
316
- pri = :A
317
- sec = vowel?(str[pos + 1, 1]) ? :F : :A
318
- end
319
-
320
- if (last == pos && vowel?(str[pos - 1, 1])) || 'SCH' == str[0, 3] ||
321
- /^EWSKI|EWSKY|OWSKI|OWSKY$/ =~ str[pos - 1, 5]
322
- return pri, "#{sec}F".intern, 1
323
- elsif /^WI(C|T)Z$/ =~ str[pos, 4]
324
- return "#{pri}TS".intern, "#{sec}FX".intern, 4
325
- else
326
- return pri, sec, 1
327
- end
328
- when 'X'
329
- current = (/^C|X$/ =~ str[pos + 1, 1] ? 2 : 1)
330
-
331
- if !(last == pos && (/^(I|E)AU$/ =~ str[pos - 3, 3] || /^(A|O)U$/ =~ str[pos - 2, 2]))
332
- return :KS, :KS, current
333
- else
334
- return nil, nil, current
335
- end
336
- when 'Z'
337
- if 'H' == str[pos + 1, 1]
338
- return :J, :J, 2
339
- else
340
- current = ('Z' == str[pos + 1, 1] ? 2 : 1)
341
-
342
- if /^Z(O|I|A)$/ =~ str[pos + 1, 2] || (slavo_germanic?(str) && (pos > 0 && 'T' != str[pos - 1, 1]))
343
- return :S, :TS, current
344
- else
345
- return :S, :S, current
346
- end
347
- end
348
- else
349
- return nil, nil, 1
350
- end
351
- end # def double_metaphone_lookup
352
-
353
- extend self
354
-
355
- end # module Metaphone
356
- end # module Text