Text 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,73 +1,57 @@
1
- --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0.10
3
- specification_version: 1
1
+ --- !ruby/object:Gem::Specification
4
2
  name: Text
5
- version: !ruby/object:Gem::Version
6
- version: 1.1.2
7
- date: 2007-02-20 00:00:00 +00:00
8
- summary: A collection of text algorithms
9
- require_paths:
10
- - lib
11
- email:
12
- homepage: http://text.rubyforge.org/
13
- rubyforge_project: text
14
- description:
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Paul Battley
9
+ - Michael Neumann
10
+ - Tim Fletcher
15
11
  autorequire:
16
- default_executable:
17
12
  bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
25
- platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
- authors:
30
- - Paul Battley, Michael Neumann, Tim Fletcher
31
- files:
32
- - lib/text.rb
33
- - lib/text/double_metaphone.rb
34
- - lib/text/figlet.rb
35
- - lib/text/levenshtein.rb
36
- - lib/text/metaphone.rb
37
- - lib/text/porter_stemming.rb
38
- - lib/text/soundex.rb
39
- - lib/text/figlet/font.rb
40
- - lib/text/figlet/smusher.rb
41
- - lib/text/figlet/typesetter.rb
42
- - test/preamble.rb
43
- - test/test_double_metaphone.rb
44
- - test/test_figlet.rb
45
- - test/test_levenshtein.rb
46
- - test/test_metaphone.rb
47
- - test/test_porter_stemming.rb
48
- - test/test_soundex.rb
49
- - test/data/big.flf
50
- - test/data/big.txt
51
- - test/data/chunky.flf
52
- - test/data/chunky.txt
53
- - test/data/double_metaphone.csv
54
- - test/data/metaphone.txt
55
- - test/data/metaphone_buggy.txt
56
- - test/data/porter_stemming_input.txt
57
- - test/data/porter_stemming_output.txt
58
- - README.rdoc
59
- - rakefile.rb
60
- test_files: []
61
-
62
- rdoc_options: []
63
-
64
- extra_rdoc_files: []
65
-
13
+ cert_chain: []
14
+ date: 2012-06-07 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: text
18
+ requirement: &12920520 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ! '>='
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.0
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: *12920520
27
+ description: ! 'This gem is now just a legacy shim with one dependency: text'
28
+ email: pbattley@gmail.com
66
29
  executables: []
67
-
68
30
  extensions: []
69
-
31
+ extra_rdoc_files: []
32
+ files: []
33
+ homepage: http://github.com/threedaymonk/text
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
70
51
  requirements: []
71
-
72
- dependencies: []
73
-
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.11
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Legacy shim. Use 'text' instead.
57
+ test_files: []
@@ -1,28 +0,0 @@
1
- = Text
2
-
3
- A collection of text algorithms.
4
-
5
-
6
- = Usage
7
-
8
- require 'text'
9
-
10
- font = Text::Figlet::Font.new('big.flf')
11
- figlet = Text::Figlet::Typesetter.new(font)
12
- figlet['Hello World'] # => '...'
13
-
14
- Text::Levenshtein.distance('test', 'test') # => 0
15
- Text::Levenshtein.distance('test', 'tent') # => 1
16
-
17
- Text::Metaphone.metaphone('BRIAN') # => 'BRN'
18
- Text::Metaphone.double_metaphone('Coburn') # => ['KPRN', nil]
19
- Text::Metaphone.double_metaphone('Angier') # => ['ANJ', 'ANJR']
20
-
21
- Text::Soundex.soundex('Knuth') # => 'K530'
22
-
23
- Text::PorterStemming.stem('abatements') # => 'abat'
24
-
25
-
26
- = License
27
-
28
- Same as Ruby.
@@ -1,6 +0,0 @@
1
- require 'text/double_metaphone'
2
- require 'text/figlet'
3
- require 'text/levenshtein'
4
- require 'text/metaphone'
5
- require 'text/porter_stemming'
6
- require 'text/soundex'
@@ -1,356 +0,0 @@
1
- #
2
- # Ruby implementation of the Double Metaphone algorithm by Lawrence Philips,
3
- # originally published in the June 2000 issue of C/C++ Users Journal.
4
- #
5
- # Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/
6
- #
7
- # Author: Tim Fletcher (twoggle@gmail.com)
8
- #
9
-
10
- module Text # :nodoc:
11
- module Metaphone
12
-
13
- # Returns the primary and secondary double metaphone tokens
14
- # (the secondary will be nil if equal to the primary).
15
- def double_metaphone(str)
16
- primary, secondary, current = [], [], 0
17
- original, length, last = "#{str} ".upcase, str.length, str.length - 1
18
- if /^GN|KN|PN|WR|PS$/ =~ original[0, 2]
19
- current += 1
20
- end
21
- if 'X' == original[0, 1]
22
- primary << :S
23
- secondary << :S
24
- current += 1
25
- end
26
- while primary.length < 4 || secondary.length < 4
27
- break if current > str.length
28
- a, b, c = double_metaphone_lookup(original, current, length, last)
29
- primary << a if a
30
- secondary << b if b
31
- current += c if c
32
- end
33
- primary, secondary = primary.to_s[0, 4], secondary.to_s[0, 4]
34
- return primary, (primary == secondary ? nil : secondary)
35
- end
36
-
37
-
38
- private
39
-
40
- def slavo_germanic?(str)
41
- /W|K|CZ|WITZ/ =~ str
42
- end
43
-
44
- def vowel?(str)
45
- /^A|E|I|O|U|Y$/ =~ str
46
- end
47
-
48
- def double_metaphone_lookup(str, pos, length, last)
49
- case str[pos, 1]
50
- when /^A|E|I|O|U|Y$/
51
- if 0 == pos
52
- return :A, :A, 1
53
- else
54
- return nil, nil, 1
55
- end
56
- when 'B'
57
- return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
58
- when 'Ç'
59
- return :S, :S, 1
60
- when 'C'
61
- if pos > 1 &&
62
- !vowel?(str[pos - 2, 1]) &&
63
- 'ACH' == str[pos - 1, 3] &&
64
- str[pos + 2, 1] != 'I' && (
65
- str[pos + 2, 1] != 'E' ||
66
- str[pos - 2, 6] =~ /^(B|M)ACHER$/
67
- ) then
68
- return :K, :K, 2
69
- elsif 0 == pos && 'CAESAR' == str[pos, 6]
70
- return :S, :S, 2
71
- elsif 'CHIA' == str[pos, 4]
72
- return :K, :K, 2
73
- elsif 'CH' == str[pos, 2]
74
- if pos > 0 && 'CHAE' == str[pos, 4]
75
- return :K, :X, 2
76
- elsif 0 == pos && (
77
- ['HARAC', 'HARIS'].include?(str[pos + 1, 5]) ||
78
- ['HOR', 'HYM', 'HIA', 'HEM'].include?(str[pos + 1, 3])
79
- ) && str[0, 5] != 'CHORE' then
80
- return :K, :K, 2
81
- elsif ['VAN ','VON '].include?(str[0, 4]) ||
82
- 'SCH' == str[0, 3] ||
83
- ['ORCHES','ARCHIT','ORCHID'].include?(str[pos - 2, 6]) ||
84
- ['T','S'].include?(str[pos + 2, 1]) || (
85
- ((0 == pos) || ['A','O','U','E'].include?(str[pos - 1, 1])) &&
86
- ['L','R','N','M','B','H','F','V','W',' '].include?(str[pos + 2, 1])
87
- ) then
88
- return :K, :K, 2
89
- elsif pos > 0
90
- return ('MC' == str[0, 2] ? 'K' : 'X'), 'K', 2
91
- else
92
- return :X, :X, 2
93
- end
94
- elsif 'CZ' == str[pos, 2] && 'WICZ' != str[pos - 2, 4]
95
- return :S, :X, 2
96
- elsif 'CIA' == str[pos + 1, 3]
97
- return :X, :X, 3
98
- elsif 'CC' == str[pos, 2] && !(1 == pos && 'M' == str[0, 1])
99
- if /^I|E|H$/ =~ str[pos + 2, 1] && 'HU' != str[pos + 2, 2]
100
- if (1 == pos && 'A' == str[pos - 1, 1]) ||
101
- /^UCCE(E|S)$/ =~ str[pos - 1, 5] then
102
- return :KS, :KS, 3
103
- else
104
- return :X, :X, 3
105
- end
106
- else
107
- return :K, :K, 2
108
- end
109
- elsif /^C(K|G|Q)$/ =~ str[pos, 2]
110
- return :K, :K, 2
111
- elsif /^C(I|E|Y)$/ =~ str[pos, 2]
112
- return :S, (/^CI(O|E|A)$/ =~ str[pos, 3] ? :X : :S), 2
113
- else
114
- if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
115
- return :K, :K, 3
116
- else
117
- return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
118
- end
119
- end
120
- when 'D'
121
- if 'DG' == str[pos, 2]
122
- if /^I|E|Y$/ =~ str[pos + 2, 1]
123
- return :J, :J, 3
124
- else
125
- return :TK, :TK, 2
126
- end
127
- else
128
- return :T, :T, (/^D(T|D)$/ =~ str[pos, 2] ? 2 : 1)
129
- end
130
- when 'F'
131
- return :F, :F, ('F' == str[pos + 1, 1] ? 2 : 1)
132
- when 'G'
133
- if 'H' == str[pos + 1, 1]
134
- if pos > 0 && !vowel?(str[pos - 1, 1])
135
- return :K, :K, 2
136
- elsif 0 == pos
137
- if 'I' == str[pos + 2, 1]
138
- return :J, :J, 2
139
- else
140
- return :K, :K, 2
141
- end
142
- elsif (pos > 1 && /^B|H|D$/ =~ str[pos - 2, 1]) ||
143
- (pos > 2 && /^B|H|D$/ =~ str[pos - 3, 1]) ||
144
- (pos > 3 && /^B|H$/ =~ str[pos - 4, 1])
145
- return nil, nil, 2
146
- else
147
- if (pos > 2 && 'U' == str[pos - 1, 1] && /^C|G|L|R|T$/ =~ str[pos - 3, 1])
148
- return :F, :F, 2
149
- elsif pos > 0 && 'I' != str[pos - 1, 1]
150
- return :K, :K, 2
151
- else
152
- return nil, nil, 2
153
- end
154
- end
155
- elsif 'N' == str[pos + 1, 1]
156
- if 1 == pos && vowel?(str[0, 1]) && !slavo_germanic?(str)
157
- return :KN, :N, 2
158
- else
159
- if 'EY' != str[pos + 2, 2] && 'Y' != str[pos + 1, 1] && !slavo_germanic?(str)
160
- return :N, :KN, 2
161
- else
162
- return :KN, :KN, 2
163
- end
164
- end
165
- elsif 'LI' == str[pos + 1, 2] && !slavo_germanic?(str)
166
- return :KL, :L, 2
167
- elsif 0 == pos && ('Y' == str[pos + 1, 1] || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/ =~ str[pos + 1, 2])
168
- return :K, :J, 2
169
- elsif (('ER' == str[pos + 1, 2] || 'Y' == str[pos + 1, 1]) &&
170
- /^(D|R|M)ANGER$/ !~ str[0, 6] &&
171
- /^E|I$/ !~ str[pos - 1, 1] &&
172
- /^(R|O)GY$/ !~ str[pos - 1, 3])
173
- return :K, :J, 2
174
- elsif /^E|I|Y$/ =~ str[pos + 1, 1] || /^(A|O)GGI$/ =~ str[pos - 1, 4]
175
- if (/^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]) || 'ET' == str[pos + 1, 2]
176
- return :K, :K, 2
177
- else
178
- if 'IER ' == str[pos + 1, 4]
179
- return :J, :J, 2
180
- else
181
- return :J, :K, 2
182
- end
183
- end
184
- elsif 'G' == str[pos + 1, 1]
185
- return :K, :K, 2
186
- else
187
- return :K, :K, 1
188
- end
189
- when 'H'
190
- if (0 == pos || vowel?(str[pos - 1, 1])) && vowel?(str[pos + 1, 1])
191
- return :H, :H, 2
192
- else
193
- return nil, nil, 1
194
- end
195
- when 'J'
196
- if 'JOSE' == str[pos, 4] || 'SAN ' == str[0, 4]
197
- if (0 == pos && ' ' == str[pos + 4, 1]) || 'SAN ' == str[0, 4]
198
- return :H, :H, 1
199
- else
200
- return :J, :H, 1
201
- end
202
- else
203
- current = ('J' == str[pos + 1, 1] ? 2 : 1)
204
-
205
- if 0 == pos && 'JOSE' != str[pos, 4]
206
- return :J, :A, current
207
- else
208
- if vowel?(str[pos - 1, 1]) && !slavo_germanic?(str) && /^A|O$/ =~ str[pos + 1, 1]
209
- return :J, :H, current
210
- else
211
- if last == pos
212
- return :J, nil, current
213
- else
214
- if /^L|T|K|S|N|M|B|Z$/ !~ str[pos + 1, 1] && /^S|K|L$/ !~ str[pos - 1, 1]
215
- return :J, :J, current
216
- else
217
- return nil, nil, current
218
- end
219
- end
220
- end
221
- end
222
- end
223
- when 'K'
224
- return :K, :K, ('K' == str[pos + 1, 1] ? 2 : 1)
225
- when 'L'
226
- if 'L' == str[pos + 1, 1]
227
- if (((length - 3) == pos && /^(ILL(O|A)|ALLE)$/ =~ str[pos - 1, 4]) ||
228
- ((/^(A|O)S$/ =~ str[last - 1, 2] || /^A|O$/ =~ str[last, 1]) && 'ALLE' == str[pos - 1, 4]))
229
- return :L, nil, 2
230
- else
231
- return :L, :L, 2
232
- end
233
- else
234
- return :L, :L, 1
235
- end
236
- when 'M'
237
- if ('UMB' == str[pos - 1, 3] &&
238
- ((last - 1) == pos || 'ER' == str[pos + 2, 2])) || 'M' == str[pos + 1, 1]
239
- return :M, :M, 2
240
- else
241
- return :M, :M, 1
242
- end
243
- when 'N'
244
- return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
245
- when 'Ñ'
246
- return :N, :N, 1
247
- when 'P'
248
- if 'H' == str[pos + 1, 1]
249
- return :F, :F, 2
250
- else
251
- return :P, :P, (/^P|B$/ =~ str[pos + 1, 1] ? 2 : 1)
252
- end
253
- when 'Q'
254
- return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
255
- when 'R'
256
- current = ('R' == str[pos + 1, 1] ? 2 : 1)
257
-
258
- if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
259
- return nil, :R, current
260
- else
261
- return :R, :R, current
262
- end
263
- when 'S'
264
- if /^(I|Y)SL$/ =~ str[pos - 1, 3]
265
- return nil, nil, 1
266
- elsif 0 == pos && 'SUGAR' == str[pos, 5]
267
- return :X, :S, 1
268
- elsif 'SH' == str[pos, 2]
269
- if /^H(EIM|OEK|OLM|OLZ)$/ =~ str[pos + 1, 4]
270
- return :S, :S, 2
271
- else
272
- return :X, :X, 2
273
- end
274
- elsif /^SI(O|A)$/ =~ str[pos, 3] || 'SIAN' == str[pos, 4]
275
- return :S, (slavo_germanic?(str) ? :S : :X), 3
276
- elsif (0 == pos && /^M|N|L|W$/ =~ str[pos + 1, 1]) || 'Z' == str[pos + 1, 1]
277
- return :S, :X, ('Z' == str[pos + 1, 1] ? 2 : 1)
278
- elsif 'SC' == str[pos, 2]
279
- if 'H' == str[pos + 2, 1]
280
- if /^OO|ER|EN|UY|ED|EM$/ =~ str[pos + 3, 2]
281
- return (/^E(R|N)$/ =~ str[pos + 3, 2] ? :X : :SK), :SK, 3
282
- else
283
- return :X, ((0 == pos && !vowel?(str[3, 1]) && ('W' != str[pos + 3, 1])) ? :S : :X), 3
284
- end
285
- elsif /^I|E|Y$/ =~ str[pos + 2, 1]
286
- return :S, :S, 3
287
- else
288
- return :SK, :SK, 3
289
- end
290
- else
291
- return (last == pos && /^(A|O)I$/ =~ str[pos - 2, 2] ? nil : 'S'), 'S', (/^S|Z$/ =~ str[pos + 1, 1] ? 2 : 1)
292
- end
293
- when 'T'
294
- if 'TION' == str[pos, 4]
295
- return :X, :X, 3
296
- elsif /^T(IA|CH)$/ =~ str[pos, 3]
297
- return :X, :X, 3
298
- elsif 'TH' == str[pos, 2] || 'TTH' == str[pos, 3]
299
- if /^(O|A)M$/ =~ str[pos + 2, 2] || /^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]
300
- return :T, :T, 2
301
- else
302
- return 0, :T, 2
303
- end
304
- else
305
- return :T, :T, (/^T|D$/ =~ str[pos + 1, 1] ? 2 : 1)
306
- end
307
- when 'V'
308
- return :F, :F, ('V' == str[pos + 1, 1] ? 2 : 1)
309
- when 'W'
310
- if 'WR' == str[pos, 2]
311
- return :R, :R, 2
312
- end
313
- pri, sec = nil, nil
314
-
315
- if 0 == pos && (vowel?(str[pos + 1, 1]) || 'WH' == str[pos, 2])
316
- pri = :A
317
- sec = vowel?(str[pos + 1, 1]) ? :F : :A
318
- end
319
-
320
- if (last == pos && vowel?(str[pos - 1, 1])) || 'SCH' == str[0, 3] ||
321
- /^EWSKI|EWSKY|OWSKI|OWSKY$/ =~ str[pos - 1, 5]
322
- return pri, "#{sec}F".intern, 1
323
- elsif /^WI(C|T)Z$/ =~ str[pos, 4]
324
- return "#{pri}TS".intern, "#{sec}FX".intern, 4
325
- else
326
- return pri, sec, 1
327
- end
328
- when 'X'
329
- current = (/^C|X$/ =~ str[pos + 1, 1] ? 2 : 1)
330
-
331
- if !(last == pos && (/^(I|E)AU$/ =~ str[pos - 3, 3] || /^(A|O)U$/ =~ str[pos - 2, 2]))
332
- return :KS, :KS, current
333
- else
334
- return nil, nil, current
335
- end
336
- when 'Z'
337
- if 'H' == str[pos + 1, 1]
338
- return :J, :J, 2
339
- else
340
- current = ('Z' == str[pos + 1, 1] ? 2 : 1)
341
-
342
- if /^Z(O|I|A)$/ =~ str[pos + 1, 2] || (slavo_germanic?(str) && (pos > 0 && 'T' != str[pos - 1, 1]))
343
- return :S, :TS, current
344
- else
345
- return :S, :S, current
346
- end
347
- end
348
- else
349
- return nil, nil, 1
350
- end
351
- end # def double_metaphone_lookup
352
-
353
- extend self
354
-
355
- end # module Metaphone
356
- end # module Text