amatch 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,124 @@
1
+ # amatch - Approximate Matching Extension for Ruby
2
+
3
+ ## Description
4
+
5
+ This is a collection of classes that can be used for Approximate
6
+ matching, searching, and comparing of Strings. They implement algorithms
7
+ that compute the Levenshtein edit distance, Sellers edit distance, the
8
+ Hamming distance, the longest common subsequence length, the longest common
9
+ substring length, the pair distance metric, the Jaro-Winkler metric.
10
+
11
+ ## Installation
12
+
13
+ To install this extension as a gem type
14
+
15
+ # gem install amatch
16
+
17
+ into the shell.
18
+
19
+ ## Download
20
+
21
+ The homepage of this library is located at
22
+
23
+ * https://github.com/flori/amatch
24
+
25
+ ## Examples
26
+
27
+ require 'amatch'
28
+ # => true
29
+ include Amatch
30
+ # => Object
31
+
32
+ m = Sellers.new("pattern")
33
+ # => #<Amatch::Sellers:0x40366324>
34
+ m.match("pattren")
35
+ # => 2.0
36
+ m.substitution = m.insertion = 3
37
+ # => 3
38
+ m.match("pattren")
39
+ # => 4.0
40
+ m.reset_weights
41
+ # => #<Amatch::Sellers:0x40366324>
42
+ m.match(["pattren","parent"])
43
+ # => [2.0, 4.0]
44
+ m.search("abcpattrendef")
45
+ # => 2.0
46
+
47
+ m = Levenshtein.new("pattern")
48
+ # => #<Amatch::Levenshtein:0x4035919c>
49
+ m.match("pattren")
50
+ # => 2
51
+ m.search("abcpattrendef")
52
+ # => 2
53
+ "pattern language".levenshtein_similar("language of patterns")
54
+ # => 0.2
55
+
56
+ m = Amatch::DamerauLevenshtein.new("pattern")
57
+ # => #<Amatch::DamerauLevenshtein:0x007fc3483dd278>
58
+ m.match("pattren")
59
+ # => 1
60
+ "pattern language".damerau_levenshtein_similar("language of patterns")
61
+ # => 0.19999999999999996
62
+
63
+ m = Hamming.new("pattern")
64
+ # => #<Amatch::Hamming:0x40350858>
65
+ m.match("pattren")
66
+ # => 2
67
+ "pattern language".hamming_similar("language of patterns")
68
+ # => 0.1
69
+
70
+ m = PairDistance.new("pattern")
71
+ # => #<Amatch::PairDistance:0x40349be8>
72
+ m.match("pattr en")
73
+ # => 0.545454545454545
74
+ m.match("pattr en", nil)
75
+ # => 0.461538461538462
76
+ m.match("pattr en", /t+/)
77
+ # => 0.285714285714286
78
+ "pattern language".pair_distance_similar("language of patterns")
79
+ # => 0.928571428571429
80
+
81
+ m = LongestSubsequence.new("pattern")
82
+ # => #<Amatch::LongestSubsequence:0x4033e900>
83
+ m.match("pattren")
84
+ # => 6
85
+ "pattern language".longest_subsequence_similar("language of patterns")
86
+ # => 0.4
87
+
88
+ m = LongestSubstring.new("pattern")
89
+ # => #<Amatch::LongestSubstring:0x403378d0>
90
+ m.match("pattren")
91
+ # => 4
92
+ "pattern language".longest_substring_similar("language of patterns")
93
+ # => 0.4
94
+
95
+ m = Jaro.new("pattern")
96
+ # => #<Amatch::Jaro:0x363b70>
97
+ m.match("paTTren")
98
+ # => 0.952380952380952
99
+ m.ignore_case = false
100
+ m.match("paTTren")
101
+ # => 0.742857142857143
102
+ "pattern language".jaro_similar("language of patterns")
103
+ # => 0.672222222222222
104
+
105
+ m = JaroWinkler.new("pattern")
106
+ # #<Amatch::JaroWinkler:0x3530b8>
107
+ m.match("paTTren")
108
+ # => 0.971428571712403
109
+ m.ignore_case = false
110
+ m.match("paTTren")
111
+ # => 0.79428571505206
112
+ m.scaling_factor = 0.05
113
+ m.match("pattren")
114
+ # => 0.961904762046678
115
+ "pattern language".jarowinkler_similar("language of patterns")
116
+ # => 0.672222222222222
117
+
118
+ ## Author
119
+
120
+ Florian Frank mailto:flori@ping.de
121
+
122
+ ## License
123
+
124
+ Apache License, Version 2.0 – See the COPYING file in the source archive.
data/Rakefile CHANGED
@@ -13,22 +13,15 @@ Amatch is a library for approximate string matching and searching in strings.
13
13
  Several algorithms can be used to do this, and it's also possible to compute a
14
14
  similarity metric number between 0.0 and 1.0 for two given strings.
15
15
  EOT
16
- executables << 'agrep.rb'
16
+ executables << 'agrep' << 'dupfind'
17
17
  bindir 'bin'
18
18
  test_dir 'tests'
19
- ignore '.*.sw[pon]', 'pkg', 'Gemfile.lock', '.AppleDouble', '.rbx', '.bundle'
19
+ ignore '.*.sw[pon]', 'pkg', 'Gemfile.lock', '.AppleDouble', '.rbx', 'Makefile'
20
20
  title "#{name.camelize} - Approximate Matching"
21
- readme 'README.rdoc'
21
+ readme 'README.md'
22
22
  require_paths %w[lib ext]
23
23
  dependency 'tins', '~>1.0'
24
+ dependency 'mize'
24
25
  development_dependency 'test-unit', '~>3.0'
25
- licenses << 'GPL'
26
-
27
- install_library do
28
- libdir = CONFIG["sitelibdir"]
29
- src, = Dir['ext/amatch.*'].reject { |x| x =~ /\.[co]$/ }
30
- install(src, File.join(libdir, File.basename(src)), :verbose => true)
31
- mkdir_p dst = File.join(libdir, 'amatch')
32
- install('lib/amatch/version.rb', File.join(dst, 'version.rb'), :verbose => true)
33
- end
26
+ licenses << 'Apache-2.0'
34
27
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.1
1
+ 0.4.0
Binary file
@@ -15,15 +15,21 @@ end
15
15
 
16
16
  class Amatch::Levenshtein
17
17
  def search_relative(strings)
18
- search(strings).to_f / pattern.size
18
+ if Array === strings
19
+ search(strings).map { |s| s.to_f / pattern.size }
20
+ else
21
+ search(strings).to_f / pattern.size
22
+ end
19
23
  end
20
24
  end
21
25
 
26
+ $algorithm = 'Levenshtein'
22
27
  $distance = 1
23
28
  $mode = :search
24
29
  begin
25
30
  parser = GetoptLong.new
26
31
  options = [
32
+ [ '--algorithm', '-a', GetoptLong::REQUIRED_ARGUMENT ],
27
33
  [ '--distance', '-d', GetoptLong::REQUIRED_ARGUMENT ],
28
34
  [ '--relative', '-r', GetoptLong::NO_ARGUMENT ],
29
35
  [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
@@ -33,6 +39,8 @@ begin
33
39
  parser.each_option do |name, arg|
34
40
  name = name.sub(/^--/, '')
35
41
  case name
42
+ when 'algorithm'
43
+ $algorithm = arg
36
44
  when 'distance'
37
45
  $distance = arg.to_f
38
46
  when 'relative'
@@ -48,7 +56,7 @@ rescue
48
56
  end
49
57
  pattern = ARGV.shift or usage('Pattern needed!', options)
50
58
 
51
- matcher = Amatch::Levenshtein.new(pattern)
59
+ matcher = Amatch.const_get($algorithm).new(pattern)
52
60
  size = 0
53
61
  start = Time.new
54
62
  if ARGV.size > 0 then
@@ -56,9 +64,12 @@ if ARGV.size > 0 then
56
64
  File.stat(filename).file? or next
57
65
  size += File.size(filename)
58
66
  begin
59
- File.open(filename, 'r').each_line do |line|
60
- if matcher.__send__($mode, line) <= $distance
61
- puts "#{filename}:#{line}"
67
+ File.open(filename, 'r').each_line.each_slice(1000) do |lines|
68
+ results = matcher.__send__($mode, lines)
69
+ lines.zip(results) do |line, r|
70
+ if r <= $distance
71
+ puts "#{filename}:#{line}"
72
+ end
62
73
  end
63
74
  end
64
75
  rescue
@@ -66,10 +77,13 @@ if ARGV.size > 0 then
66
77
  end
67
78
  end
68
79
  else
69
- STDIN.each_line do |line|
70
- size += line.size
71
- if matcher.__send__($mode, line) <= $distance
72
- puts line
80
+ STDIN.each_line.each_slice(1000) do |lines|
81
+ size += lines.size
82
+ results = matcher.__send__($mode, lines)
83
+ lines.zip(results) do |line, r|
84
+ if r <= $distance
85
+ puts line
86
+ end
73
87
  end
74
88
  end
75
89
  end
@@ -0,0 +1,153 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'tins/go'
4
+ include Tins::GO
5
+ require 'tins/minimize'
6
+ class Array
7
+ include Tins::Minimize
8
+ end
9
+ require 'amatch'
10
+ begin
11
+ require 'infobar'
12
+ rescue LoadError
13
+ warn "Please install gem infobar to run this executable!"
14
+ exit 1
15
+ end
16
+
17
+ def usage
18
+ puts <<EOT
19
+ Usage: #{File.basename($0)} [OPTIONS] FILE
20
+
21
+ -a ALGO Amatch matching algorithm
22
+ -p LIMIT more than p similarity to be a match
23
+ -R NUMBER skip NUMBER mismatch for building ranges
24
+ -r NUMBER minimum length to be counted as a range
25
+ -i compute a PNG per file
26
+
27
+ Repor$ bugs to <flori@ping.de>.
28
+ EOT
29
+ exit 0
30
+ end
31
+
32
+ class FindDuplicates
33
+ def initialize(algo, p_lim, filename)
34
+ @algo, @p_lim, @filename = algo, p_lim, filename
35
+ end
36
+
37
+ attr_reader :filename
38
+
39
+ attr_reader :algo
40
+
41
+ attr_reader :p_lim
42
+
43
+ memoize method:
44
+ def lines
45
+ File.readlines(filename)
46
+ end
47
+
48
+ memoize method:
49
+ def matrix
50
+ result = lines.with_infobar(label: filename, output: STDERR).map do |l1|
51
+ +infobar
52
+ a = algo.new(l1)
53
+ r = a.similar(lines)
54
+ r.map! { |s| s >= p_lim ? ?1 : ?0 }
55
+ r.join
56
+ end
57
+ infobar.finish
58
+ infobar.newline
59
+ result
60
+ end
61
+
62
+ def pbm(output: $>)
63
+ output << <<HEADER
64
+ P1
65
+ #{matrix.size} #{matrix.size}
66
+ HEADER
67
+ output << matrix.map { |line| line.each_char.to_a * ' ' } * ?\n
68
+ self
69
+ end
70
+
71
+ def png(output: $>)
72
+ IO.popen("pnmtopng", 'w+') do |conv|
73
+ pbm(output: conv)
74
+ conv.close_write
75
+ output.write(conv.read)
76
+ end
77
+ self
78
+ end
79
+
80
+ def create_image
81
+ suffix = Regexp.quote(File.extname(filename))
82
+ f = filename.sub(/(#{suffix}|)\z/, '.png')
83
+ File.open(f, 'wb') do |output|
84
+ png(output: output)
85
+ infobar.puts "Writing output to #{f.inspect}."
86
+ end
87
+ self
88
+ end
89
+
90
+ def similar_ranges(min_range: 3, skip_range: 0)
91
+ set = 0
92
+ ranges = { set => [] }
93
+ m = matrix
94
+ n = m.size
95
+ skip_count = 0
96
+ n.downto(1) do |h|
97
+ (n - h + 1).upto(n - 1) do |k|
98
+ i = k
99
+ j = k - (n - h + 1)
100
+ if m[i][j] == ?1
101
+ skip_count = 0
102
+ ranges[set] << [ i, j ]
103
+ elsif !ranges[set].empty? && skip_count < skip_range
104
+ skip_count += 1
105
+ else
106
+ skip_count = 0
107
+ ranges[set].empty? or ranges[set += 1] = []
108
+ end
109
+ end
110
+ skip_count = 0
111
+ ranges[set].empty? or ranges[set += 1] = []
112
+ end
113
+ ranges.each { |_, r|
114
+ r.flatten!
115
+ r.sort!
116
+ r.map! { |x| x + 1 }
117
+ r.minimize!
118
+ r.reject! { |s| s.size < min_range }
119
+ }.reject! { |_, r| r.empty? }
120
+ unions = []
121
+ while !ranges.empty?
122
+ _, r = ranges.first
123
+ equivalent = ranges.reject { |_, v| (v & r).empty? }
124
+ unions << equivalent.values.flatten.uniq
125
+ ranges.delete_if { |k, _| equivalent.keys.include?(k) }
126
+ end
127
+ unions.each do |r|
128
+ r.map! do |x|
129
+ "#{filename}:#{x.begin}-#{x.end}"
130
+ end
131
+ end
132
+ unions
133
+ end
134
+ end
135
+
136
+ opts = go 'a:p:R:r:ih'
137
+
138
+ usage if opts[?h]
139
+ algo = Amatch.const_get(opts[?a] || 'Levenshtein')
140
+ p_lim = (opts[?p] || 0.95).to_f
141
+ min_range = (opts[?r] || 3).to_i
142
+ skip_range = opts[?R].to_i
143
+ ARGV.empty? and usage
144
+
145
+ filenames = ARGV.inject([]) { |s, f| s.concat(Dir[f]) }
146
+ for filename in filenames
147
+ finder = FindDuplicates.new(algo, p_lim, filename)
148
+ opts[?i] and finder.create_image
149
+ for s in finder.similar_ranges(min_range: min_range, skip_range: skip_range)
150
+ infobar.reset
151
+ puts s, ?\n
152
+ end
153
+ end
@@ -3,24 +3,8 @@
3
3
  #include <ctype.h>
4
4
  #include "common.h"
5
5
 
6
- /*
7
- * Document-method: pattern
8
- *
9
- * call-seq: pattern -> pattern string
10
- *
11
- * Returns the current pattern string of this instance.
12
- */
13
-
14
- /*
15
- * Document-method: pattern=
16
- *
17
- * call-seq: pattern=(pattern)
18
- *
19
- * Sets the current pattern string of this instance to <code>pattern</code>.
20
- */
21
-
22
-
23
- static VALUE rb_mAmatch, rb_mAmatchStringMethods, rb_cLevenshtein, rb_cSellers, rb_cHamming,
6
+ static VALUE rb_mAmatch, rb_mAmatchStringMethods, rb_cLevenshtein,
7
+ rb_cDamerauLevenshtein, rb_cSellers, rb_cHamming,
24
8
  rb_cPairDistance, rb_cLongestSubsequence, rb_cLongestSubstring,
25
9
  rb_cJaro, rb_cJaroWinkler;
26
10
 
@@ -230,9 +214,11 @@ DEF_ITERATE_STRINGS(JaroWinkler)
230
214
  */
231
215
 
232
216
  #define COMPUTE_LEVENSHTEIN_DISTANCE \
233
- for (i = 1, c = 0, p = 1; i <= a_len; i++) { \
217
+ c = 0; \
218
+ p = 0; \
219
+ for (i = 1; i <= a_len; i++) { \
234
220
  c = i % 2; /* current row */ \
235
- p = (i + 1) % 2; /* previous row */ \
221
+ p = (i - 1) % 2; /* previous row */ \
236
222
  v[c][0] = i; /* first column */ \
237
223
  for (j = 1; j <= b_len; j++) { \
238
224
  /* Bellman's principle of optimality: */ \
@@ -245,8 +231,6 @@ DEF_ITERATE_STRINGS(JaroWinkler)
245
231
  } \
246
232
  v[c][j] = weight; \
247
233
  } \
248
- p = c; \
249
- c = (c + 1) % 2; \
250
234
  }
251
235
 
252
236
  static VALUE Levenshtein_match(General *amatch, VALUE string)
@@ -269,7 +253,7 @@ static VALUE Levenshtein_match(General *amatch, VALUE string)
269
253
 
270
254
  COMPUTE_LEVENSHTEIN_DISTANCE
271
255
 
272
- result = INT2FIX(v[p][b_len]);
256
+ result = INT2FIX(v[c][b_len]);
273
257
 
274
258
  xfree(v[0]);
275
259
  xfree(v[1]);
@@ -287,6 +271,7 @@ static VALUE Levenshtein_similar(General *amatch, VALUE string)
287
271
 
288
272
  Check_Type(string, T_STRING);
289
273
  DONT_OPTIMIZE
274
+
290
275
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
291
276
  if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
292
277
  v[0] = ALLOC_N(int, b_len + 1);
@@ -299,12 +284,14 @@ static VALUE Levenshtein_similar(General *amatch, VALUE string)
299
284
  COMPUTE_LEVENSHTEIN_DISTANCE
300
285
 
301
286
  if (b_len > a_len) {
302
- result = rb_float_new(1.0 - ((double) v[p][b_len]) / b_len);
287
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / b_len);
303
288
  } else {
304
- result = rb_float_new(1.0 - ((double) v[p][b_len]) / a_len);
289
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / a_len);
305
290
  }
291
+
306
292
  xfree(v[0]);
307
293
  xfree(v[1]);
294
+
308
295
  return result;
309
296
  }
310
297
 
@@ -327,26 +314,159 @@ static VALUE Levenshtein_search(General *amatch, VALUE string)
327
314
  COMPUTE_LEVENSHTEIN_DISTANCE
328
315
 
329
316
  for (i = 0, min = a_len; i <= b_len; i++) {
330
- if (v[p][i] < min) min = v[p][i];
317
+ if (v[c][i] < min) min = v[c][i];
331
318
  }
332
319
 
333
320
  result = INT2FIX(min);
334
321
 
335
322
  xfree(v[0]);
336
323
  xfree(v[1]);
337
-
324
+
325
+ return result;
326
+ }
327
+
328
+ /*
329
+ * DamerauLevenshtein edit distances are computed here:
330
+ */
331
+
332
+ #define COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE \
333
+ c = 0; \
334
+ p = 0; \
335
+ pp = 0; \
336
+ for (i = 1; i <= a_len; i++) { \
337
+ c = i % 3; /* current row */ \
338
+ p = (i - 1) % 3; /* previous row */ \
339
+ pp = (i - 2) % 3; /* previous previous row */ \
340
+ v[c][0] = i; /* first column */ \
341
+ for (j = 1; j <= b_len; j++) { \
342
+ /* Bellman's principle of optimality: */ \
343
+ weight = v[p][j - 1] + (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : 1); \
344
+ if (weight > v[p][j] + 1) { \
345
+ weight = v[p][j] + 1; \
346
+ } \
347
+ if (weight > v[c][j - 1] + 1) { \
348
+ weight = v[c][j - 1] + 1; \
349
+ } \
350
+ if (i > 2 && j > 2 && a_ptr[i - 1] == b_ptr[j - 2] && a_ptr[i - 2] == b_ptr[j - 1]) {\
351
+ if (weight > v[pp][j - 2]) { \
352
+ weight = v[pp][j - 2] + (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : 1); \
353
+ } \
354
+ } \
355
+ v[c][j] = weight; \
356
+ } \
357
+ }
358
+
359
+ static VALUE DamerauLevenshtein_match(General *amatch, VALUE string)
360
+ {
361
+ VALUE result;
362
+ char *a_ptr, *b_ptr;
363
+ int a_len, b_len;
364
+ int *v[3], weight;
365
+ int i, j, c, p, pp;
366
+
367
+ Check_Type(string, T_STRING);
368
+ DONT_OPTIMIZE
369
+
370
+ v[0] = ALLOC_N(int, b_len + 1);
371
+ v[1] = ALLOC_N(int, b_len + 1);
372
+ v[2] = ALLOC_N(int, b_len + 1);
373
+ for (i = 0; i <= b_len; i++) {
374
+ v[0][i] = i;
375
+ v[1][i] = i;
376
+ v[2][i] = i;
377
+ }
378
+
379
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
380
+
381
+ result = INT2FIX(v[c][b_len]);
382
+
383
+ xfree(v[0]);
384
+ xfree(v[1]);
385
+ xfree(v[2]);
386
+
338
387
  return result;
339
388
  }
340
389
 
390
+ static VALUE DamerauLevenshtein_similar(General *amatch, VALUE string)
391
+ {
392
+ VALUE result;
393
+ char *a_ptr, *b_ptr;
394
+ int a_len, b_len;
395
+ int *v[3], weight;
396
+ int i, j, c, p, pp;
397
+
398
+ Check_Type(string, T_STRING);
399
+ DONT_OPTIMIZE
400
+
401
+ if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
402
+ if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
403
+ v[0] = ALLOC_N(int, b_len + 1);
404
+ v[1] = ALLOC_N(int, b_len + 1);
405
+ v[2] = ALLOC_N(int, b_len + 1);
406
+ for (i = 0; i <= b_len; i++) {
407
+ v[0][i] = i;
408
+ v[1][i] = i;
409
+ v[2][i] = i;
410
+ }
411
+
412
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
413
+
414
+ if (b_len > a_len) {
415
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / b_len);
416
+ } else {
417
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / a_len);
418
+ }
419
+
420
+ xfree(v[0]);
421
+ xfree(v[1]);
422
+ xfree(v[2]);
423
+
424
+ return result;
425
+ }
426
+
427
+ static VALUE DamerauLevenshtein_search(General *amatch, VALUE string)
428
+ {
429
+ VALUE result;
430
+ char *a_ptr, *b_ptr;
431
+ int a_len, b_len;
432
+ int *v[3], weight, min;
433
+ int i, j, c, p, pp;
434
+
435
+ Check_Type(string, T_STRING);
436
+ DONT_OPTIMIZE
437
+
438
+ v[0] = ALLOC_N(int, b_len + 1);
439
+ v[1] = ALLOC_N(int, b_len + 1);
440
+ v[2] = ALLOC_N(int, b_len + 1);
441
+ MEMZERO(v[0], int, b_len + 1);
442
+ MEMZERO(v[1], int, b_len + 1);
443
+ MEMZERO(v[2], int, b_len + 1);
444
+
445
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
446
+
447
+ for (i = 0, min = a_len; i <= b_len; i++) {
448
+ if (v[c][i] < min) min = v[c][i];
449
+ }
450
+
451
+ result = INT2FIX(min);
452
+
453
+ xfree(v[0]);
454
+ xfree(v[1]);
455
+ xfree(v[2]);
456
+
457
+ return result;
458
+ }
341
459
 
342
460
  /*
343
461
  * Sellers edit distances are computed here:
344
462
  */
345
463
 
346
464
  #define COMPUTE_SELLERS_DISTANCE \
347
- for (i = 1, c = 0, p = 1; i <= a_len; i++) { \
465
+ c = 0; \
466
+ p = 0; \
467
+ for (i = 1; i <= a_len; i++) { \
348
468
  c = i % 2; /* current row */ \
349
- p = (i + 1) % 2; /* previous row */ \
469
+ p = (i - 1) % 2; /* previous row */ \
350
470
  v[c][0] = i * amatch->deletion; /* first column */ \
351
471
  for (j = 1; j <= b_len; j++) { \
352
472
  /* Bellman's principle of optimality: */ \
@@ -361,7 +481,6 @@ static VALUE Levenshtein_search(General *amatch, VALUE string)
361
481
  v[c][j] = weight; \
362
482
  } \
363
483
  p = c; \
364
- c = (c + 1) % 2; \
365
484
  }
366
485
 
367
486
  static VALUE Sellers_match(Sellers *amatch, VALUE string)
@@ -411,9 +530,10 @@ static VALUE Sellers_similar(Sellers *amatch, VALUE string)
411
530
  max_weight = amatch->deletion;
412
531
  }
413
532
  }
414
-
533
+
415
534
  Check_Type(string, T_STRING);
416
535
  DONT_OPTIMIZE
536
+
417
537
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
418
538
  if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
419
539
  v[0] = ALLOC_N(double, b_len + 1);
@@ -459,7 +579,7 @@ static VALUE Sellers_search(Sellers *amatch, VALUE string)
459
579
  result = rb_float_new(min);
460
580
  xfree(v[0]);
461
581
  xfree(v[1]);
462
-
582
+
463
583
  return result;
464
584
  }
465
585
 
@@ -470,7 +590,7 @@ static VALUE Sellers_search(Sellers *amatch, VALUE string)
470
590
  static VALUE PairDistance_match(PairDistance *amatch, VALUE string, VALUE regexp, int use_regexp)
471
591
  {
472
592
  double result;
473
- VALUE tokens, string_tokens;
593
+ VALUE string_tokens, tokens;
474
594
  PairArray *pattern_pair_array, *pair_array;
475
595
 
476
596
  Check_Type(string, T_STRING);
@@ -518,7 +638,7 @@ static VALUE Hamming_match(General *amatch, VALUE string)
518
638
  char *a_ptr, *b_ptr;
519
639
  int a_len, b_len;
520
640
  int i, result;
521
-
641
+
522
642
  Check_Type(string, T_STRING);
523
643
  OPTIMIZE_TIME
524
644
  COMPUTE_HAMMING_DISTANCE
@@ -530,7 +650,7 @@ static VALUE Hamming_similar(General *amatch, VALUE string)
530
650
  char *a_ptr, *b_ptr;
531
651
  int a_len, b_len;
532
652
  int i, result;
533
-
653
+
534
654
  Check_Type(string, T_STRING);
535
655
  OPTIMIZE_TIME
536
656
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
@@ -570,7 +690,7 @@ static VALUE LongestSubsequence_match(General *amatch, VALUE string)
570
690
  char *a_ptr, *b_ptr;
571
691
  int a_len, b_len;
572
692
  int result, c, p, i, j, *l[2];
573
-
693
+
574
694
  Check_Type(string, T_STRING);
575
695
  OPTIMIZE_TIME
576
696
 
@@ -584,7 +704,7 @@ static VALUE LongestSubsequence_similar(General *amatch, VALUE string)
584
704
  char *a_ptr, *b_ptr;
585
705
  int a_len, b_len;
586
706
  int result, c, p, i, j, *l[2];
587
-
707
+
588
708
  Check_Type(string, T_STRING);
589
709
  OPTIMIZE_TIME
590
710
 
@@ -624,7 +744,7 @@ static VALUE LongestSubstring_match(General *amatch, VALUE string)
624
744
  char *a_ptr, *b_ptr;
625
745
  int a_len, b_len;
626
746
  int result, c, p, i, j, *l[2];
627
-
747
+
628
748
  Check_Type(string, T_STRING);
629
749
  OPTIMIZE_TIME
630
750
  if (a_len == 0 || b_len == 0) return INT2FIX(0);
@@ -637,7 +757,7 @@ static VALUE LongestSubstring_similar(General *amatch, VALUE string)
637
757
  char *a_ptr, *b_ptr;
638
758
  int a_len, b_len;
639
759
  int result, c, p, i, j, *l[2];
640
-
760
+
641
761
  Check_Type(string, T_STRING);
642
762
  OPTIMIZE_TIME
643
763
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
@@ -769,7 +889,7 @@ static VALUE JaroWinkler_match(JaroWinkler *amatch, VALUE string)
769
889
  * Ruby API
770
890
  */
771
891
 
772
- /*
892
+ /*
773
893
  * Document-class: Amatch::Levenshtein
774
894
  *
775
895
  * The Levenshtein edit distance is defined as the minimal costs involved to
@@ -802,7 +922,7 @@ DEF_CONSTRUCTOR(Levenshtein, General)
802
922
 
803
923
  /*
804
924
  * call-seq: match(strings) -> results
805
- *
925
+ *
806
926
  * Uses this Amatch::Levenshtein instance to match Amatch::Levenshtein#pattern
807
927
  * against <code>strings</code>. It returns the number operations, the Sellers
808
928
  * distance. <code>strings</code> has to be either a String or an Array of
@@ -810,14 +930,14 @@ DEF_CONSTRUCTOR(Levenshtein, General)
810
930
  * Floats respectively.
811
931
  */
812
932
  static VALUE rb_Levenshtein_match(VALUE self, VALUE strings)
813
- {
933
+ {
814
934
  GET_STRUCT(General)
815
935
  return General_iterate_strings(amatch, strings, Levenshtein_match);
816
936
  }
817
937
 
818
938
  /*
819
939
  * call-seq: similar(strings) -> results
820
- *
940
+ *
821
941
  * Uses this Amatch::Levenshtein instance to match Amatch::Levenshtein#pattern
822
942
  * against <code>strings</code>, and compute a Levenshtein distance metric
823
943
  * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
@@ -826,14 +946,14 @@ static VALUE rb_Levenshtein_match(VALUE self, VALUE strings)
826
946
  * respectively.
827
947
  */
828
948
  static VALUE rb_Levenshtein_similar(VALUE self, VALUE strings)
829
- {
949
+ {
830
950
  GET_STRUCT(General)
831
951
  return General_iterate_strings(amatch, strings, Levenshtein_similar);
832
952
  }
833
953
 
834
954
  /*
835
955
  * call-seq: levenshtein_similar(strings) -> results
836
- *
956
+ *
837
957
  * If called on a String, this string is used as a Amatch::Levenshtein#pattern
838
958
  * to match against <code>strings</code>. It returns a Levenshtein distance
839
959
  * metric number between 0.0 for very unsimilar strings and 1.0 for an exact
@@ -849,7 +969,7 @@ static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
849
969
 
850
970
  /*
851
971
  * call-seq: search(strings) -> results
852
- *
972
+ *
853
973
  * searches Amatch::Levenshtein#pattern in <code>strings</code> and returns the
854
974
  * edit distance (the sum of character operations) as a Fixnum value, by greedy
855
975
  * trimming prefixes or postfixes of the match. <code>strings</code> has
@@ -857,12 +977,105 @@ static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
857
977
  * <code>results</code> is either a Float or an Array of Floats respectively.
858
978
  */
859
979
  static VALUE rb_Levenshtein_search(VALUE self, VALUE strings)
860
- {
980
+ {
861
981
  GET_STRUCT(General)
862
982
  return General_iterate_strings(amatch, strings, Levenshtein_search);
863
983
  }
864
984
 
865
- /*
985
+ /*
986
+ * Document-class: Amatch::DamerauLevenshtein
987
+ * XXX
988
+ * The DamerauLevenshtein edit distance is defined as the minimal costs
989
+ * involved to transform one string into another by using three elementary
990
+ * operations: deletion, insertion and substitution of a character. To
991
+ * transform "water" into "wine", for instance, you have to substitute "a" ->
992
+ * "i": "witer", "t" -> "n": "winer" and delete "r": "wine". The edit distance
993
+ * between "water" and "wine" is 3, because you have to apply three
994
+ * operations. The edit distance between "wine" and "wine" is 0 of course: no
995
+ * operation is necessary for the transformation -- they're already the same
996
+ * string. It's easy to see that more similar strings have smaller edit
997
+ * distances than strings that differ a lot.
998
+ */
999
+
1000
+ DEF_RB_FREE(DamerauLevenshtein, General)
1001
+
1002
+ /*
1003
+ * call-seq: new(pattern)
1004
+ * XXX
1005
+ * Creates a new Amatch::DamerauLevenshtein instance from <code>pattern</code>.
1006
+ */
1007
+ static VALUE rb_DamerauLevenshtein_initialize(VALUE self, VALUE pattern)
1008
+ {
1009
+ GET_STRUCT(General)
1010
+ General_pattern_set(amatch, pattern);
1011
+ return self;
1012
+ }
1013
+
1014
+ DEF_CONSTRUCTOR(DamerauLevenshtein, General)
1015
+
1016
+ /*
1017
+ * call-seq: match(strings) -> results
1018
+ * XXX
1019
+ * Uses this Amatch::DamerauLevenshtein instance to match Amatch::DamerauLevenshtein#pattern
1020
+ * against <code>strings</code>. It returns the number operations, the Sellers
1021
+ * distance. <code>strings</code> has to be either a String or an Array of
1022
+ * Strings. The returned <code>results</code> is either a Float or an Array of
1023
+ * Floats respectively.
1024
+ */
1025
+ static VALUE rb_DamerauLevenshtein_match(VALUE self, VALUE strings)
1026
+ {
1027
+ GET_STRUCT(General)
1028
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_match);
1029
+ }
1030
+
1031
+ /*
1032
+ * call-seq: similar(strings) -> results
1033
+ * XXX
1034
+ * Uses this Amatch::DamerauLevenshtein instance to match Amatch::DamerauLevenshtein#pattern
1035
+ * against <code>strings</code>, and compute a DamerauLevenshtein distance metric
1036
+ * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
1037
+ * <code>strings</code> has to be either a String or an Array of Strings. The
1038
+ * returned <code>results</code> is either a Fixnum or an Array of Fixnums
1039
+ * respectively.
1040
+ */
1041
+ static VALUE rb_DamerauLevenshtein_similar(VALUE self, VALUE strings)
1042
+ {
1043
+ GET_STRUCT(General)
1044
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_similar);
1045
+ }
1046
+
1047
+ /*
1048
+ * call-seq: levenshtein_similar(strings) -> results
1049
+ * XXX
1050
+ * If called on a String, this string is used as a Amatch::DamerauLevenshtein#pattern
1051
+ * to match against <code>strings</code>. It returns a DamerauLevenshtein distance
1052
+ * metric number between 0.0 for very unsimilar strings and 1.0 for an exact
1053
+ * match. <code>strings</code> has to be either a String or an Array of
1054
+ * Strings. The returned <code>results</code> is either a Float or an Array of
1055
+ * Floats respectively.
1056
+ */
1057
+ static VALUE rb_str_damerau_levenshtein_similar(VALUE self, VALUE strings)
1058
+ {
1059
+ VALUE amatch = rb_DamerauLevenshtein_new(rb_cDamerauLevenshtein, self);
1060
+ return rb_DamerauLevenshtein_similar(amatch, strings);
1061
+ }
1062
+
1063
+ /*
1064
+ * call-seq: search(strings) -> results
1065
+ * XXX
1066
+ * searches Amatch::DamerauLevenshtein#pattern in <code>strings</code> and returns the
1067
+ * edit distance (the sum of character operations) as a Fixnum value, by greedy
1068
+ * trimming prefixes or postfixes of the match. <code>strings</code> has
1069
+ * to be either a String or an Array of Strings. The returned
1070
+ * <code>results</code> is either a Float or an Array of Floats respectively.
1071
+ */
1072
+ static VALUE rb_DamerauLevenshtein_search(VALUE self, VALUE strings)
1073
+ {
1074
+ GET_STRUCT(General)
1075
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_search);
1076
+ }
1077
+
1078
+ /*
866
1079
  * Document-class: Amatch::Sellers
867
1080
  *
868
1081
  * The Sellers edit distance is very similar to the Levenshtein edit distance.
@@ -981,14 +1194,14 @@ DEF_CONSTRUCTOR(Sellers, Sellers)
981
1194
  * Document-method: pattern=
982
1195
  *
983
1196
  * call-seq: pattern=(pattern)
984
- *
1197
+ *
985
1198
  * Sets the current pattern string of this Amatch::Sellers instance to
986
1199
  * <code>pattern</code>.
987
1200
  */
988
1201
 
989
1202
  /*
990
1203
  * call-seq: match(strings) -> results
991
- *
1204
+ *
992
1205
  * Uses this Amatch::Sellers instance to match Sellers#pattern against
993
1206
  * <code>strings</code>, while taking into account the given weights. It
994
1207
  * returns the number of weighted character operations, the Sellers distance.
@@ -997,14 +1210,14 @@ DEF_CONSTRUCTOR(Sellers, Sellers)
997
1210
  * respectively.
998
1211
  */
999
1212
  static VALUE rb_Sellers_match(VALUE self, VALUE strings)
1000
- {
1213
+ {
1001
1214
  GET_STRUCT(Sellers)
1002
1215
  return Sellers_iterate_strings(amatch, strings, Sellers_match);
1003
1216
  }
1004
1217
 
1005
1218
  /*
1006
1219
  * call-seq: similar(strings) -> results
1007
- *
1220
+ *
1008
1221
  * Uses this Amatch::Sellers instance to match Amatch::Sellers#pattern
1009
1222
  * against <code>strings</code> (taking into account the given weights), and
1010
1223
  * compute a Sellers distance metric number between 0.0 for very unsimilar
@@ -1014,7 +1227,7 @@ static VALUE rb_Sellers_match(VALUE self, VALUE strings)
1014
1227
  * respectively.
1015
1228
  */
1016
1229
  static VALUE rb_Sellers_similar(VALUE self, VALUE strings)
1017
- {
1230
+ {
1018
1231
  GET_STRUCT(Sellers)
1019
1232
  return Sellers_iterate_strings(amatch, strings, Sellers_similar);
1020
1233
  }
@@ -1029,12 +1242,12 @@ static VALUE rb_Sellers_similar(VALUE self, VALUE strings)
1029
1242
  * <code>results</code> is either a Float or an Array of Floats respectively.
1030
1243
  */
1031
1244
  static VALUE rb_Sellers_search(VALUE self, VALUE strings)
1032
- {
1245
+ {
1033
1246
  GET_STRUCT(Sellers)
1034
1247
  return Sellers_iterate_strings(amatch, strings, Sellers_search);
1035
1248
  }
1036
1249
 
1037
- /*
1250
+ /*
1038
1251
  * Document-class: Amatch::PairDistance
1039
1252
  *
1040
1253
  * The pair distance between two strings is based on the number of adjacent
@@ -1045,7 +1258,7 @@ static VALUE rb_Sellers_search(VALUE self, VALUE strings)
1045
1258
  * are more dissimilar. The advantage of considering adjacent characters, is to
1046
1259
  * take account not only of the characters, but also of the character ordering
1047
1260
  * in the original strings.
1048
- *
1261
+ *
1049
1262
  * This metric is very capable to find similarities in natural languages.
1050
1263
  * It is explained in more detail in Simon White's article "How to Strike a
1051
1264
  * Match", located at this url:
@@ -1072,7 +1285,7 @@ DEF_CONSTRUCTOR(PairDistance, PairDistance)
1072
1285
 
1073
1286
  /*
1074
1287
  * call-seq: match(strings, regexp = /\s+/) -> results
1075
- *
1288
+ *
1076
1289
  * Uses this Amatch::PairDistance instance to match PairDistance#pattern against
1077
1290
  * <code>strings</code>. It returns the pair distance measure, that is a
1078
1291
  * returned value of 1.0 is an exact match, partial matches are lower
@@ -1088,7 +1301,7 @@ DEF_CONSTRUCTOR(PairDistance, PairDistance)
1088
1301
  * Array of Floats respectively.
1089
1302
  */
1090
1303
  static VALUE rb_PairDistance_match(int argc, VALUE *argv, VALUE self)
1091
- {
1304
+ {
1092
1305
  VALUE result, strings, regexp = Qnil;
1093
1306
  int use_regexp;
1094
1307
  GET_STRUCT(PairDistance)
@@ -1146,7 +1359,7 @@ static VALUE rb_str_pair_distance_similar(int argc, VALUE *argv, VALUE self)
1146
1359
  }
1147
1360
  }
1148
1361
 
1149
- /*
1362
+ /*
1150
1363
  * Document-class: Amatch::Hamming
1151
1364
  *
1152
1365
  * This class computes the Hamming distance between two strings.
@@ -1176,7 +1389,7 @@ DEF_CONSTRUCTOR(Hamming, General)
1176
1389
 
1177
1390
  /*
1178
1391
  * call-seq: match(strings) -> results
1179
- *
1392
+ *
1180
1393
  * Uses this Amatch::Hamming instance to match Amatch::Hamming#pattern against
1181
1394
  * <code>strings</code>, that is compute the hamming distance between
1182
1395
  * <code>pattern</code> and <code>strings</code>. <code>strings</code> has to
@@ -1184,7 +1397,7 @@ DEF_CONSTRUCTOR(Hamming, General)
1184
1397
  * is either a Fixnum or an Array of Fixnums respectively.
1185
1398
  */
1186
1399
  static VALUE rb_Hamming_match(VALUE self, VALUE strings)
1187
- {
1400
+ {
1188
1401
  GET_STRUCT(General)
1189
1402
  return General_iterate_strings(amatch, strings, Hamming_match);
1190
1403
  }
@@ -1200,7 +1413,7 @@ static VALUE rb_Hamming_match(VALUE self, VALUE strings)
1200
1413
  * respectively.
1201
1414
  */
1202
1415
  static VALUE rb_Hamming_similar(VALUE self, VALUE strings)
1203
- {
1416
+ {
1204
1417
  GET_STRUCT(General)
1205
1418
  return General_iterate_strings(amatch, strings, Hamming_similar);
1206
1419
  }
@@ -1222,7 +1435,7 @@ static VALUE rb_str_hamming_similar(VALUE self, VALUE strings)
1222
1435
  }
1223
1436
 
1224
1437
 
1225
- /*
1438
+ /*
1226
1439
  * Document-class: Amatch::LongestSubsequence
1227
1440
  *
1228
1441
  * This class computes the length of the longest subsequence common to two
@@ -1252,7 +1465,7 @@ DEF_CONSTRUCTOR(LongestSubsequence, General)
1252
1465
 
1253
1466
  /*
1254
1467
  * call-seq: match(strings) -> results
1255
- *
1468
+ *
1256
1469
  * Uses this Amatch::LongestSubsequence instance to match
1257
1470
  * LongestSubsequence#pattern against <code>strings</code>, that is compute the
1258
1471
  * length of the longest common subsequence. <code>strings</code> has to be
@@ -1260,14 +1473,14 @@ DEF_CONSTRUCTOR(LongestSubsequence, General)
1260
1473
  * is either a Fixnum or an Array of Fixnums respectively.
1261
1474
  */
1262
1475
  static VALUE rb_LongestSubsequence_match(VALUE self, VALUE strings)
1263
- {
1476
+ {
1264
1477
  GET_STRUCT(General)
1265
1478
  return General_iterate_strings(amatch, strings, LongestSubsequence_match);
1266
1479
  }
1267
1480
 
1268
1481
  /*
1269
1482
  * call-seq: similar(strings) -> results
1270
- *
1483
+ *
1271
1484
  * Uses this Amatch::LongestSubsequence instance to match
1272
1485
  * Amatch::LongestSubsequence#pattern against <code>strings</code>, and compute
1273
1486
  * a longest substring distance metric number between 0.0 for very unsimilar
@@ -1276,7 +1489,7 @@ static VALUE rb_LongestSubsequence_match(VALUE self, VALUE strings)
1276
1489
  * a Fixnum or an Array of Fixnums
1277
1490
  */
1278
1491
  static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
1279
- {
1492
+ {
1280
1493
  GET_STRUCT(General)
1281
1494
  return General_iterate_strings(amatch, strings, LongestSubsequence_similar);
1282
1495
  }
@@ -1292,12 +1505,12 @@ static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
1292
1505
  * is either a Float or an Array of Floats respectively.
1293
1506
  */
1294
1507
  static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
1295
- {
1508
+ {
1296
1509
  VALUE amatch = rb_LongestSubsequence_new(rb_cLongestSubsequence, self);
1297
1510
  return rb_LongestSubsequence_similar(amatch, strings);
1298
1511
  }
1299
1512
 
1300
- /*
1513
+ /*
1301
1514
  * Document-class: Amatch::LongestSubstring
1302
1515
  *
1303
1516
  * The longest common substring is the longest substring, that is part of
@@ -1308,7 +1521,7 @@ static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
1308
1521
  * The longest common substring between 'string' and 'string' is 'string'
1309
1522
  * again, thus the longest common substring length is 6. The longest common
1310
1523
  * substring between 'string' and 'storing' is 'ring', thus the longest common
1311
- * substring length is 4.
1524
+ * substring length is 4.
1312
1525
  */
1313
1526
 
1314
1527
  DEF_RB_FREE(LongestSubstring, General)
@@ -1329,7 +1542,7 @@ DEF_CONSTRUCTOR(LongestSubstring, General)
1329
1542
 
1330
1543
  /*
1331
1544
  * call-seq: match(strings) -> results
1332
- *
1545
+ *
1333
1546
  * Uses this Amatch::LongestSubstring instance to match
1334
1547
  * LongestSubstring#pattern against <code>strings</code>, that is compute the
1335
1548
  * length of the longest common substring. <code>strings</code> has to be
@@ -1344,7 +1557,7 @@ static VALUE rb_LongestSubstring_match(VALUE self, VALUE strings)
1344
1557
 
1345
1558
  /*
1346
1559
  * call-seq: similar(strings) -> results
1347
- *
1560
+ *
1348
1561
  * Uses this Amatch::LongestSubstring instance to match
1349
1562
  * Amatch::LongestSubstring#pattern against <code>strings</code>, and compute a
1350
1563
  * longest substring distance metric number between 0.0 for very unsimilar
@@ -1370,11 +1583,11 @@ static VALUE rb_LongestSubstring_similar(VALUE self, VALUE strings)
1370
1583
  * is either a Float or an Array of Floats respectively.
1371
1584
  */
1372
1585
  static VALUE rb_str_longest_substring_similar(VALUE self, VALUE strings)
1373
- {
1586
+ {
1374
1587
  VALUE amatch = rb_LongestSubstring_new(rb_cLongestSubstring, self);
1375
1588
  return rb_LongestSubstring_similar(amatch, strings);
1376
1589
  }
1377
-
1590
+
1378
1591
  /*
1379
1592
  * Document-class: Amatch::Jaro
1380
1593
  *
@@ -1571,6 +1784,17 @@ void Init_amatch_ext()
1571
1784
  rb_define_method(rb_cLevenshtein, "similar", rb_Levenshtein_similar, 1);
1572
1785
  rb_define_method(rb_mAmatchStringMethods, "levenshtein_similar", rb_str_levenshtein_similar, 1);
1573
1786
 
1787
+ /* DamerauLevenshtein */
1788
+ rb_cDamerauLevenshtein = rb_define_class_under(rb_mAmatch, "DamerauLevenshtein", rb_cObject);
1789
+ rb_define_alloc_func(rb_cDamerauLevenshtein, rb_DamerauLevenshtein_s_allocate);
1790
+ rb_define_method(rb_cDamerauLevenshtein, "initialize", rb_DamerauLevenshtein_initialize, 1);
1791
+ rb_define_method(rb_cDamerauLevenshtein, "pattern", rb_General_pattern, 0);
1792
+ rb_define_method(rb_cDamerauLevenshtein, "pattern=", rb_General_pattern_set, 1);
1793
+ rb_define_method(rb_cDamerauLevenshtein, "match", rb_DamerauLevenshtein_match, 1);
1794
+ rb_define_method(rb_cDamerauLevenshtein, "search", rb_DamerauLevenshtein_search, 1);
1795
+ rb_define_method(rb_cDamerauLevenshtein, "similar", rb_DamerauLevenshtein_similar, 1);
1796
+ rb_define_method(rb_mAmatchStringMethods, "damerau_levenshtein_similar", rb_str_damerau_levenshtein_similar, 1);
1797
+
1574
1798
  /* Sellers */
1575
1799
  rb_cSellers = rb_define_class_under(rb_mAmatch, "Sellers", rb_cObject);
1576
1800
  rb_define_alloc_func(rb_cSellers, rb_Sellers_s_allocate);