amatch 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ # amatch - Approximate Matching Extension for Ruby
2
+
3
+ ## Description
4
+
5
+ This is a collection of classes that can be used for Approximate
6
+ matching, searching, and comparing of Strings. They implement algorithms
7
+ that compute the Levenshtein edit distance, Sellers edit distance, the
8
+ Hamming distance, the longest common subsequence length, the longest common
9
+ substring length, the pair distance metric, the Jaro-Winkler metric.
10
+
11
+ ## Installation
12
+
13
+ To install this extension as a gem type
14
+
15
+ # gem install amatch
16
+
17
+ into the shell.
18
+
19
+ ## Download
20
+
21
+ The homepage of this library is located at
22
+
23
+ * https://github.com/flori/amatch
24
+
25
+ ## Examples
26
+
27
+ require 'amatch'
28
+ # => true
29
+ include Amatch
30
+ # => Object
31
+
32
+ m = Sellers.new("pattern")
33
+ # => #<Amatch::Sellers:0x40366324>
34
+ m.match("pattren")
35
+ # => 2.0
36
+ m.substitution = m.insertion = 3
37
+ # => 3
38
+ m.match("pattren")
39
+ # => 4.0
40
+ m.reset_weights
41
+ # => #<Amatch::Sellers:0x40366324>
42
+ m.match(["pattren","parent"])
43
+ # => [2.0, 4.0]
44
+ m.search("abcpattrendef")
45
+ # => 2.0
46
+
47
+ m = Levenshtein.new("pattern")
48
+ # => #<Amatch::Levenshtein:0x4035919c>
49
+ m.match("pattren")
50
+ # => 2
51
+ m.search("abcpattrendef")
52
+ # => 2
53
+ "pattern language".levenshtein_similar("language of patterns")
54
+ # => 0.2
55
+
56
+ m = Amatch::DamerauLevenshtein.new("pattern")
57
+ # => #<Amatch::DamerauLevenshtein:0x007fc3483dd278>
58
+ m.match("pattren")
59
+ # => 1
60
+ "pattern language".damerau_levenshtein_similar("language of patterns")
61
+ # => 0.19999999999999996
62
+
63
+ m = Hamming.new("pattern")
64
+ # => #<Amatch::Hamming:0x40350858>
65
+ m.match("pattren")
66
+ # => 2
67
+ "pattern language".hamming_similar("language of patterns")
68
+ # => 0.1
69
+
70
+ m = PairDistance.new("pattern")
71
+ # => #<Amatch::PairDistance:0x40349be8>
72
+ m.match("pattr en")
73
+ # => 0.545454545454545
74
+ m.match("pattr en", nil)
75
+ # => 0.461538461538462
76
+ m.match("pattr en", /t+/)
77
+ # => 0.285714285714286
78
+ "pattern language".pair_distance_similar("language of patterns")
79
+ # => 0.928571428571429
80
+
81
+ m = LongestSubsequence.new("pattern")
82
+ # => #<Amatch::LongestSubsequence:0x4033e900>
83
+ m.match("pattren")
84
+ # => 6
85
+ "pattern language".longest_subsequence_similar("language of patterns")
86
+ # => 0.4
87
+
88
+ m = LongestSubstring.new("pattern")
89
+ # => #<Amatch::LongestSubstring:0x403378d0>
90
+ m.match("pattren")
91
+ # => 4
92
+ "pattern language".longest_substring_similar("language of patterns")
93
+ # => 0.4
94
+
95
+ m = Jaro.new("pattern")
96
+ # => #<Amatch::Jaro:0x363b70>
97
+ m.match("paTTren")
98
+ # => 0.952380952380952
99
+ m.ignore_case = false
100
+ m.match("paTTren")
101
+ # => 0.742857142857143
102
+ "pattern language".jaro_similar("language of patterns")
103
+ # => 0.672222222222222
104
+
105
+ m = JaroWinkler.new("pattern")
106
+ # #<Amatch::JaroWinkler:0x3530b8>
107
+ m.match("paTTren")
108
+ # => 0.971428571712403
109
+ m.ignore_case = false
110
+ m.match("paTTren")
111
+ # => 0.79428571505206
112
+ m.scaling_factor = 0.05
113
+ m.match("pattren")
114
+ # => 0.961904762046678
115
+ "pattern language".jarowinkler_similar("language of patterns")
116
+ # => 0.672222222222222
117
+
118
+ ## Author
119
+
120
+ Florian Frank mailto:flori@ping.de
121
+
122
+ ## License
123
+
124
+ Apache License, Version 2.0 – See the COPYING file in the source archive.
data/Rakefile CHANGED
@@ -13,22 +13,15 @@ Amatch is a library for approximate string matching and searching in strings.
13
13
  Several algorithms can be used to do this, and it's also possible to compute a
14
14
  similarity metric number between 0.0 and 1.0 for two given strings.
15
15
  EOT
16
- executables << 'agrep.rb'
16
+ executables << 'agrep' << 'dupfind'
17
17
  bindir 'bin'
18
18
  test_dir 'tests'
19
- ignore '.*.sw[pon]', 'pkg', 'Gemfile.lock', '.AppleDouble', '.rbx', '.bundle'
19
+ ignore '.*.sw[pon]', 'pkg', 'Gemfile.lock', '.AppleDouble', '.rbx', 'Makefile'
20
20
  title "#{name.camelize} - Approximate Matching"
21
- readme 'README.rdoc'
21
+ readme 'README.md'
22
22
  require_paths %w[lib ext]
23
23
  dependency 'tins', '~>1.0'
24
+ dependency 'mize'
24
25
  development_dependency 'test-unit', '~>3.0'
25
- licenses << 'GPL'
26
-
27
- install_library do
28
- libdir = CONFIG["sitelibdir"]
29
- src, = Dir['ext/amatch.*'].reject { |x| x =~ /\.[co]$/ }
30
- install(src, File.join(libdir, File.basename(src)), :verbose => true)
31
- mkdir_p dst = File.join(libdir, 'amatch')
32
- install('lib/amatch/version.rb', File.join(dst, 'version.rb'), :verbose => true)
33
- end
26
+ licenses << 'Apache-2.0'
34
27
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.1
1
+ 0.4.0
Binary file
@@ -15,15 +15,21 @@ end
15
15
 
16
16
  class Amatch::Levenshtein
17
17
  def search_relative(strings)
18
- search(strings).to_f / pattern.size
18
+ if Array === strings
19
+ search(strings).map { |s| s.to_f / pattern.size }
20
+ else
21
+ search(strings).to_f / pattern.size
22
+ end
19
23
  end
20
24
  end
21
25
 
26
+ $algorithm = 'Levenshtein'
22
27
  $distance = 1
23
28
  $mode = :search
24
29
  begin
25
30
  parser = GetoptLong.new
26
31
  options = [
32
+ [ '--algorithm', '-a', GetoptLong::REQUIRED_ARGUMENT ],
27
33
  [ '--distance', '-d', GetoptLong::REQUIRED_ARGUMENT ],
28
34
  [ '--relative', '-r', GetoptLong::NO_ARGUMENT ],
29
35
  [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
@@ -33,6 +39,8 @@ begin
33
39
  parser.each_option do |name, arg|
34
40
  name = name.sub(/^--/, '')
35
41
  case name
42
+ when 'algorithm'
43
+ $algorithm = arg
36
44
  when 'distance'
37
45
  $distance = arg.to_f
38
46
  when 'relative'
@@ -48,7 +56,7 @@ rescue
48
56
  end
49
57
  pattern = ARGV.shift or usage('Pattern needed!', options)
50
58
 
51
- matcher = Amatch::Levenshtein.new(pattern)
59
+ matcher = Amatch.const_get($algorithm).new(pattern)
52
60
  size = 0
53
61
  start = Time.new
54
62
  if ARGV.size > 0 then
@@ -56,9 +64,12 @@ if ARGV.size > 0 then
56
64
  File.stat(filename).file? or next
57
65
  size += File.size(filename)
58
66
  begin
59
- File.open(filename, 'r').each_line do |line|
60
- if matcher.__send__($mode, line) <= $distance
61
- puts "#{filename}:#{line}"
67
+ File.open(filename, 'r').each_line.each_slice(1000) do |lines|
68
+ results = matcher.__send__($mode, lines)
69
+ lines.zip(results) do |line, r|
70
+ if r <= $distance
71
+ puts "#{filename}:#{line}"
72
+ end
62
73
  end
63
74
  end
64
75
  rescue
@@ -66,10 +77,13 @@ if ARGV.size > 0 then
66
77
  end
67
78
  end
68
79
  else
69
- STDIN.each_line do |line|
70
- size += line.size
71
- if matcher.__send__($mode, line) <= $distance
72
- puts line
80
+ STDIN.each_line.each_slice(1000) do |lines|
81
+ size += lines.size
82
+ results = matcher.__send__($mode, lines)
83
+ lines.zip(results) do |line, r|
84
+ if r <= $distance
85
+ puts line
86
+ end
73
87
  end
74
88
  end
75
89
  end
@@ -0,0 +1,153 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'tins/go'
4
+ include Tins::GO
5
+ require 'tins/minimize'
6
+ class Array
7
+ include Tins::Minimize
8
+ end
9
+ require 'amatch'
10
+ begin
11
+ require 'infobar'
12
+ rescue LoadError
13
+ warn "Please install gem infobar to run this executable!"
14
+ exit 1
15
+ end
16
+
17
+ def usage
18
+ puts <<EOT
19
+ Usage: #{File.basename($0)} [OPTIONS] FILE
20
+
21
+ -a ALGO Amatch matching algorithm
22
+ -p LIMIT more than p similarity to be a match
23
+ -R NUMBER skip NUMBER mismatch for building ranges
24
+ -r NUMBER minimum length to be counted as a range
25
+ -i compute a PNG per file
26
+
27
+ Repor$ bugs to <flori@ping.de>.
28
+ EOT
29
+ exit 0
30
+ end
31
+
32
+ class FindDuplicates
33
+ def initialize(algo, p_lim, filename)
34
+ @algo, @p_lim, @filename = algo, p_lim, filename
35
+ end
36
+
37
+ attr_reader :filename
38
+
39
+ attr_reader :algo
40
+
41
+ attr_reader :p_lim
42
+
43
+ memoize method:
44
+ def lines
45
+ File.readlines(filename)
46
+ end
47
+
48
+ memoize method:
49
+ def matrix
50
+ result = lines.with_infobar(label: filename, output: STDERR).map do |l1|
51
+ +infobar
52
+ a = algo.new(l1)
53
+ r = a.similar(lines)
54
+ r.map! { |s| s >= p_lim ? ?1 : ?0 }
55
+ r.join
56
+ end
57
+ infobar.finish
58
+ infobar.newline
59
+ result
60
+ end
61
+
62
+ def pbm(output: $>)
63
+ output << <<HEADER
64
+ P1
65
+ #{matrix.size} #{matrix.size}
66
+ HEADER
67
+ output << matrix.map { |line| line.each_char.to_a * ' ' } * ?\n
68
+ self
69
+ end
70
+
71
+ def png(output: $>)
72
+ IO.popen("pnmtopng", 'w+') do |conv|
73
+ pbm(output: conv)
74
+ conv.close_write
75
+ output.write(conv.read)
76
+ end
77
+ self
78
+ end
79
+
80
+ def create_image
81
+ suffix = Regexp.quote(File.extname(filename))
82
+ f = filename.sub(/(#{suffix}|)\z/, '.png')
83
+ File.open(f, 'wb') do |output|
84
+ png(output: output)
85
+ infobar.puts "Writing output to #{f.inspect}."
86
+ end
87
+ self
88
+ end
89
+
90
+ def similar_ranges(min_range: 3, skip_range: 0)
91
+ set = 0
92
+ ranges = { set => [] }
93
+ m = matrix
94
+ n = m.size
95
+ skip_count = 0
96
+ n.downto(1) do |h|
97
+ (n - h + 1).upto(n - 1) do |k|
98
+ i = k
99
+ j = k - (n - h + 1)
100
+ if m[i][j] == ?1
101
+ skip_count = 0
102
+ ranges[set] << [ i, j ]
103
+ elsif !ranges[set].empty? && skip_count < skip_range
104
+ skip_count += 1
105
+ else
106
+ skip_count = 0
107
+ ranges[set].empty? or ranges[set += 1] = []
108
+ end
109
+ end
110
+ skip_count = 0
111
+ ranges[set].empty? or ranges[set += 1] = []
112
+ end
113
+ ranges.each { |_, r|
114
+ r.flatten!
115
+ r.sort!
116
+ r.map! { |x| x + 1 }
117
+ r.minimize!
118
+ r.reject! { |s| s.size < min_range }
119
+ }.reject! { |_, r| r.empty? }
120
+ unions = []
121
+ while !ranges.empty?
122
+ _, r = ranges.first
123
+ equivalent = ranges.reject { |_, v| (v & r).empty? }
124
+ unions << equivalent.values.flatten.uniq
125
+ ranges.delete_if { |k, _| equivalent.keys.include?(k) }
126
+ end
127
+ unions.each do |r|
128
+ r.map! do |x|
129
+ "#{filename}:#{x.begin}-#{x.end}"
130
+ end
131
+ end
132
+ unions
133
+ end
134
+ end
135
+
136
+ opts = go 'a:p:R:r:ih'
137
+
138
+ usage if opts[?h]
139
+ algo = Amatch.const_get(opts[?a] || 'Levenshtein')
140
+ p_lim = (opts[?p] || 0.95).to_f
141
+ min_range = (opts[?r] || 3).to_i
142
+ skip_range = opts[?R].to_i
143
+ ARGV.empty? and usage
144
+
145
+ filenames = ARGV.inject([]) { |s, f| s.concat(Dir[f]) }
146
+ for filename in filenames
147
+ finder = FindDuplicates.new(algo, p_lim, filename)
148
+ opts[?i] and finder.create_image
149
+ for s in finder.similar_ranges(min_range: min_range, skip_range: skip_range)
150
+ infobar.reset
151
+ puts s, ?\n
152
+ end
153
+ end
@@ -3,24 +3,8 @@
3
3
  #include <ctype.h>
4
4
  #include "common.h"
5
5
 
6
- /*
7
- * Document-method: pattern
8
- *
9
- * call-seq: pattern -> pattern string
10
- *
11
- * Returns the current pattern string of this instance.
12
- */
13
-
14
- /*
15
- * Document-method: pattern=
16
- *
17
- * call-seq: pattern=(pattern)
18
- *
19
- * Sets the current pattern string of this instance to <code>pattern</code>.
20
- */
21
-
22
-
23
- static VALUE rb_mAmatch, rb_mAmatchStringMethods, rb_cLevenshtein, rb_cSellers, rb_cHamming,
6
+ static VALUE rb_mAmatch, rb_mAmatchStringMethods, rb_cLevenshtein,
7
+ rb_cDamerauLevenshtein, rb_cSellers, rb_cHamming,
24
8
  rb_cPairDistance, rb_cLongestSubsequence, rb_cLongestSubstring,
25
9
  rb_cJaro, rb_cJaroWinkler;
26
10
 
@@ -230,9 +214,11 @@ DEF_ITERATE_STRINGS(JaroWinkler)
230
214
  */
231
215
 
232
216
  #define COMPUTE_LEVENSHTEIN_DISTANCE \
233
- for (i = 1, c = 0, p = 1; i <= a_len; i++) { \
217
+ c = 0; \
218
+ p = 0; \
219
+ for (i = 1; i <= a_len; i++) { \
234
220
  c = i % 2; /* current row */ \
235
- p = (i + 1) % 2; /* previous row */ \
221
+ p = (i - 1) % 2; /* previous row */ \
236
222
  v[c][0] = i; /* first column */ \
237
223
  for (j = 1; j <= b_len; j++) { \
238
224
  /* Bellman's principle of optimality: */ \
@@ -245,8 +231,6 @@ DEF_ITERATE_STRINGS(JaroWinkler)
245
231
  } \
246
232
  v[c][j] = weight; \
247
233
  } \
248
- p = c; \
249
- c = (c + 1) % 2; \
250
234
  }
251
235
 
252
236
  static VALUE Levenshtein_match(General *amatch, VALUE string)
@@ -269,7 +253,7 @@ static VALUE Levenshtein_match(General *amatch, VALUE string)
269
253
 
270
254
  COMPUTE_LEVENSHTEIN_DISTANCE
271
255
 
272
- result = INT2FIX(v[p][b_len]);
256
+ result = INT2FIX(v[c][b_len]);
273
257
 
274
258
  xfree(v[0]);
275
259
  xfree(v[1]);
@@ -287,6 +271,7 @@ static VALUE Levenshtein_similar(General *amatch, VALUE string)
287
271
 
288
272
  Check_Type(string, T_STRING);
289
273
  DONT_OPTIMIZE
274
+
290
275
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
291
276
  if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
292
277
  v[0] = ALLOC_N(int, b_len + 1);
@@ -299,12 +284,14 @@ static VALUE Levenshtein_similar(General *amatch, VALUE string)
299
284
  COMPUTE_LEVENSHTEIN_DISTANCE
300
285
 
301
286
  if (b_len > a_len) {
302
- result = rb_float_new(1.0 - ((double) v[p][b_len]) / b_len);
287
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / b_len);
303
288
  } else {
304
- result = rb_float_new(1.0 - ((double) v[p][b_len]) / a_len);
289
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / a_len);
305
290
  }
291
+
306
292
  xfree(v[0]);
307
293
  xfree(v[1]);
294
+
308
295
  return result;
309
296
  }
310
297
 
@@ -327,26 +314,159 @@ static VALUE Levenshtein_search(General *amatch, VALUE string)
327
314
  COMPUTE_LEVENSHTEIN_DISTANCE
328
315
 
329
316
  for (i = 0, min = a_len; i <= b_len; i++) {
330
- if (v[p][i] < min) min = v[p][i];
317
+ if (v[c][i] < min) min = v[c][i];
331
318
  }
332
319
 
333
320
  result = INT2FIX(min);
334
321
 
335
322
  xfree(v[0]);
336
323
  xfree(v[1]);
337
-
324
+
325
+ return result;
326
+ }
327
+
328
+ /*
329
+ * DamerauLevenshtein edit distances are computed here:
330
+ */
331
+
332
+ #define COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE \
333
+ c = 0; \
334
+ p = 0; \
335
+ pp = 0; \
336
+ for (i = 1; i <= a_len; i++) { \
337
+ c = i % 3; /* current row */ \
338
+ p = (i - 1) % 3; /* previous row */ \
339
+ pp = (i - 2) % 3; /* previous previous row */ \
340
+ v[c][0] = i; /* first column */ \
341
+ for (j = 1; j <= b_len; j++) { \
342
+ /* Bellman's principle of optimality: */ \
343
+ weight = v[p][j - 1] + (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : 1); \
344
+ if (weight > v[p][j] + 1) { \
345
+ weight = v[p][j] + 1; \
346
+ } \
347
+ if (weight > v[c][j - 1] + 1) { \
348
+ weight = v[c][j - 1] + 1; \
349
+ } \
350
+ if (i > 2 && j > 2 && a_ptr[i - 1] == b_ptr[j - 2] && a_ptr[i - 2] == b_ptr[j - 1]) {\
351
+ if (weight > v[pp][j - 2]) { \
352
+ weight = v[pp][j - 2] + (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : 1); \
353
+ } \
354
+ } \
355
+ v[c][j] = weight; \
356
+ } \
357
+ }
358
+
359
+ static VALUE DamerauLevenshtein_match(General *amatch, VALUE string)
360
+ {
361
+ VALUE result;
362
+ char *a_ptr, *b_ptr;
363
+ int a_len, b_len;
364
+ int *v[3], weight;
365
+ int i, j, c, p, pp;
366
+
367
+ Check_Type(string, T_STRING);
368
+ DONT_OPTIMIZE
369
+
370
+ v[0] = ALLOC_N(int, b_len + 1);
371
+ v[1] = ALLOC_N(int, b_len + 1);
372
+ v[2] = ALLOC_N(int, b_len + 1);
373
+ for (i = 0; i <= b_len; i++) {
374
+ v[0][i] = i;
375
+ v[1][i] = i;
376
+ v[2][i] = i;
377
+ }
378
+
379
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
380
+
381
+ result = INT2FIX(v[c][b_len]);
382
+
383
+ xfree(v[0]);
384
+ xfree(v[1]);
385
+ xfree(v[2]);
386
+
338
387
  return result;
339
388
  }
340
389
 
390
+ static VALUE DamerauLevenshtein_similar(General *amatch, VALUE string)
391
+ {
392
+ VALUE result;
393
+ char *a_ptr, *b_ptr;
394
+ int a_len, b_len;
395
+ int *v[3], weight;
396
+ int i, j, c, p, pp;
397
+
398
+ Check_Type(string, T_STRING);
399
+ DONT_OPTIMIZE
400
+
401
+ if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
402
+ if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
403
+ v[0] = ALLOC_N(int, b_len + 1);
404
+ v[1] = ALLOC_N(int, b_len + 1);
405
+ v[2] = ALLOC_N(int, b_len + 1);
406
+ for (i = 0; i <= b_len; i++) {
407
+ v[0][i] = i;
408
+ v[1][i] = i;
409
+ v[2][i] = i;
410
+ }
411
+
412
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
413
+
414
+ if (b_len > a_len) {
415
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / b_len);
416
+ } else {
417
+ result = rb_float_new(1.0 - ((double) v[c][b_len]) / a_len);
418
+ }
419
+
420
+ xfree(v[0]);
421
+ xfree(v[1]);
422
+ xfree(v[2]);
423
+
424
+ return result;
425
+ }
426
+
427
+ static VALUE DamerauLevenshtein_search(General *amatch, VALUE string)
428
+ {
429
+ VALUE result;
430
+ char *a_ptr, *b_ptr;
431
+ int a_len, b_len;
432
+ int *v[3], weight, min;
433
+ int i, j, c, p, pp;
434
+
435
+ Check_Type(string, T_STRING);
436
+ DONT_OPTIMIZE
437
+
438
+ v[0] = ALLOC_N(int, b_len + 1);
439
+ v[1] = ALLOC_N(int, b_len + 1);
440
+ v[2] = ALLOC_N(int, b_len + 1);
441
+ MEMZERO(v[0], int, b_len + 1);
442
+ MEMZERO(v[1], int, b_len + 1);
443
+ MEMZERO(v[2], int, b_len + 1);
444
+
445
+ COMPUTE_DAMERAU_LEVENSHTEIN_DISTANCE
446
+
447
+ for (i = 0, min = a_len; i <= b_len; i++) {
448
+ if (v[c][i] < min) min = v[c][i];
449
+ }
450
+
451
+ result = INT2FIX(min);
452
+
453
+ xfree(v[0]);
454
+ xfree(v[1]);
455
+ xfree(v[2]);
456
+
457
+ return result;
458
+ }
341
459
 
342
460
  /*
343
461
  * Sellers edit distances are computed here:
344
462
  */
345
463
 
346
464
  #define COMPUTE_SELLERS_DISTANCE \
347
- for (i = 1, c = 0, p = 1; i <= a_len; i++) { \
465
+ c = 0; \
466
+ p = 0; \
467
+ for (i = 1; i <= a_len; i++) { \
348
468
  c = i % 2; /* current row */ \
349
- p = (i + 1) % 2; /* previous row */ \
469
+ p = (i - 1) % 2; /* previous row */ \
350
470
  v[c][0] = i * amatch->deletion; /* first column */ \
351
471
  for (j = 1; j <= b_len; j++) { \
352
472
  /* Bellman's principle of optimality: */ \
@@ -361,7 +481,6 @@ static VALUE Levenshtein_search(General *amatch, VALUE string)
361
481
  v[c][j] = weight; \
362
482
  } \
363
483
  p = c; \
364
- c = (c + 1) % 2; \
365
484
  }
366
485
 
367
486
  static VALUE Sellers_match(Sellers *amatch, VALUE string)
@@ -411,9 +530,10 @@ static VALUE Sellers_similar(Sellers *amatch, VALUE string)
411
530
  max_weight = amatch->deletion;
412
531
  }
413
532
  }
414
-
533
+
415
534
  Check_Type(string, T_STRING);
416
535
  DONT_OPTIMIZE
536
+
417
537
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
418
538
  if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
419
539
  v[0] = ALLOC_N(double, b_len + 1);
@@ -459,7 +579,7 @@ static VALUE Sellers_search(Sellers *amatch, VALUE string)
459
579
  result = rb_float_new(min);
460
580
  xfree(v[0]);
461
581
  xfree(v[1]);
462
-
582
+
463
583
  return result;
464
584
  }
465
585
 
@@ -470,7 +590,7 @@ static VALUE Sellers_search(Sellers *amatch, VALUE string)
470
590
  static VALUE PairDistance_match(PairDistance *amatch, VALUE string, VALUE regexp, int use_regexp)
471
591
  {
472
592
  double result;
473
- VALUE tokens, string_tokens;
593
+ VALUE string_tokens, tokens;
474
594
  PairArray *pattern_pair_array, *pair_array;
475
595
 
476
596
  Check_Type(string, T_STRING);
@@ -518,7 +638,7 @@ static VALUE Hamming_match(General *amatch, VALUE string)
518
638
  char *a_ptr, *b_ptr;
519
639
  int a_len, b_len;
520
640
  int i, result;
521
-
641
+
522
642
  Check_Type(string, T_STRING);
523
643
  OPTIMIZE_TIME
524
644
  COMPUTE_HAMMING_DISTANCE
@@ -530,7 +650,7 @@ static VALUE Hamming_similar(General *amatch, VALUE string)
530
650
  char *a_ptr, *b_ptr;
531
651
  int a_len, b_len;
532
652
  int i, result;
533
-
653
+
534
654
  Check_Type(string, T_STRING);
535
655
  OPTIMIZE_TIME
536
656
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
@@ -570,7 +690,7 @@ static VALUE LongestSubsequence_match(General *amatch, VALUE string)
570
690
  char *a_ptr, *b_ptr;
571
691
  int a_len, b_len;
572
692
  int result, c, p, i, j, *l[2];
573
-
693
+
574
694
  Check_Type(string, T_STRING);
575
695
  OPTIMIZE_TIME
576
696
 
@@ -584,7 +704,7 @@ static VALUE LongestSubsequence_similar(General *amatch, VALUE string)
584
704
  char *a_ptr, *b_ptr;
585
705
  int a_len, b_len;
586
706
  int result, c, p, i, j, *l[2];
587
-
707
+
588
708
  Check_Type(string, T_STRING);
589
709
  OPTIMIZE_TIME
590
710
 
@@ -624,7 +744,7 @@ static VALUE LongestSubstring_match(General *amatch, VALUE string)
624
744
  char *a_ptr, *b_ptr;
625
745
  int a_len, b_len;
626
746
  int result, c, p, i, j, *l[2];
627
-
747
+
628
748
  Check_Type(string, T_STRING);
629
749
  OPTIMIZE_TIME
630
750
  if (a_len == 0 || b_len == 0) return INT2FIX(0);
@@ -637,7 +757,7 @@ static VALUE LongestSubstring_similar(General *amatch, VALUE string)
637
757
  char *a_ptr, *b_ptr;
638
758
  int a_len, b_len;
639
759
  int result, c, p, i, j, *l[2];
640
-
760
+
641
761
  Check_Type(string, T_STRING);
642
762
  OPTIMIZE_TIME
643
763
  if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
@@ -769,7 +889,7 @@ static VALUE JaroWinkler_match(JaroWinkler *amatch, VALUE string)
769
889
  * Ruby API
770
890
  */
771
891
 
772
- /*
892
+ /*
773
893
  * Document-class: Amatch::Levenshtein
774
894
  *
775
895
  * The Levenshtein edit distance is defined as the minimal costs involved to
@@ -802,7 +922,7 @@ DEF_CONSTRUCTOR(Levenshtein, General)
802
922
 
803
923
  /*
804
924
  * call-seq: match(strings) -> results
805
- *
925
+ *
806
926
  * Uses this Amatch::Levenshtein instance to match Amatch::Levenshtein#pattern
807
927
  * against <code>strings</code>. It returns the number operations, the Sellers
808
928
  * distance. <code>strings</code> has to be either a String or an Array of
@@ -810,14 +930,14 @@ DEF_CONSTRUCTOR(Levenshtein, General)
810
930
  * Floats respectively.
811
931
  */
812
932
  static VALUE rb_Levenshtein_match(VALUE self, VALUE strings)
813
- {
933
+ {
814
934
  GET_STRUCT(General)
815
935
  return General_iterate_strings(amatch, strings, Levenshtein_match);
816
936
  }
817
937
 
818
938
  /*
819
939
  * call-seq: similar(strings) -> results
820
- *
940
+ *
821
941
  * Uses this Amatch::Levenshtein instance to match Amatch::Levenshtein#pattern
822
942
  * against <code>strings</code>, and compute a Levenshtein distance metric
823
943
  * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
@@ -826,14 +946,14 @@ static VALUE rb_Levenshtein_match(VALUE self, VALUE strings)
826
946
  * respectively.
827
947
  */
828
948
  static VALUE rb_Levenshtein_similar(VALUE self, VALUE strings)
829
- {
949
+ {
830
950
  GET_STRUCT(General)
831
951
  return General_iterate_strings(amatch, strings, Levenshtein_similar);
832
952
  }
833
953
 
834
954
  /*
835
955
  * call-seq: levenshtein_similar(strings) -> results
836
- *
956
+ *
837
957
  * If called on a String, this string is used as a Amatch::Levenshtein#pattern
838
958
  * to match against <code>strings</code>. It returns a Levenshtein distance
839
959
  * metric number between 0.0 for very unsimilar strings and 1.0 for an exact
@@ -849,7 +969,7 @@ static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
849
969
 
850
970
  /*
851
971
  * call-seq: search(strings) -> results
852
- *
972
+ *
853
973
  * searches Amatch::Levenshtein#pattern in <code>strings</code> and returns the
854
974
  * edit distance (the sum of character operations) as a Fixnum value, by greedy
855
975
  * trimming prefixes or postfixes of the match. <code>strings</code> has
@@ -857,12 +977,105 @@ static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
857
977
  * <code>results</code> is either a Float or an Array of Floats respectively.
858
978
  */
859
979
  static VALUE rb_Levenshtein_search(VALUE self, VALUE strings)
860
- {
980
+ {
861
981
  GET_STRUCT(General)
862
982
  return General_iterate_strings(amatch, strings, Levenshtein_search);
863
983
  }
864
984
 
865
- /*
985
+ /*
986
+ * Document-class: Amatch::DamerauLevenshtein
987
+ * XXX
988
+ * The DamerauLevenshtein edit distance is defined as the minimal costs
989
+ * involved to transform one string into another by using three elementary
990
+ * operations: deletion, insertion and substitution of a character. To
991
+ * transform "water" into "wine", for instance, you have to substitute "a" ->
992
+ * "i": "witer", "t" -> "n": "winer" and delete "r": "wine". The edit distance
993
+ * between "water" and "wine" is 3, because you have to apply three
994
+ * operations. The edit distance between "wine" and "wine" is 0 of course: no
995
+ * operation is necessary for the transformation -- they're already the same
996
+ * string. It's easy to see that more similar strings have smaller edit
997
+ * distances than strings that differ a lot.
998
+ */
999
+
1000
+ DEF_RB_FREE(DamerauLevenshtein, General)
1001
+
1002
+ /*
1003
+ * call-seq: new(pattern)
1004
+ * XXX
1005
+ * Creates a new Amatch::DamerauLevenshtein instance from <code>pattern</code>.
1006
+ */
1007
+ static VALUE rb_DamerauLevenshtein_initialize(VALUE self, VALUE pattern)
1008
+ {
1009
+ GET_STRUCT(General)
1010
+ General_pattern_set(amatch, pattern);
1011
+ return self;
1012
+ }
1013
+
1014
+ DEF_CONSTRUCTOR(DamerauLevenshtein, General)
1015
+
1016
+ /*
1017
+ * call-seq: match(strings) -> results
1018
+ * XXX
1019
+ * Uses this Amatch::DamerauLevenshtein instance to match Amatch::DamerauLevenshtein#pattern
1020
+ * against <code>strings</code>. It returns the number operations, the Sellers
1021
+ * distance. <code>strings</code> has to be either a String or an Array of
1022
+ * Strings. The returned <code>results</code> is either a Float or an Array of
1023
+ * Floats respectively.
1024
+ */
1025
+ static VALUE rb_DamerauLevenshtein_match(VALUE self, VALUE strings)
1026
+ {
1027
+ GET_STRUCT(General)
1028
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_match);
1029
+ }
1030
+
1031
+ /*
1032
+ * call-seq: similar(strings) -> results
1033
+ * XXX
1034
+ * Uses this Amatch::DamerauLevenshtein instance to match Amatch::DamerauLevenshtein#pattern
1035
+ * against <code>strings</code>, and compute a DamerauLevenshtein distance metric
1036
+ * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
1037
+ * <code>strings</code> has to be either a String or an Array of Strings. The
1038
+ * returned <code>results</code> is either a Fixnum or an Array of Fixnums
1039
+ * respectively.
1040
+ */
1041
+ static VALUE rb_DamerauLevenshtein_similar(VALUE self, VALUE strings)
1042
+ {
1043
+ GET_STRUCT(General)
1044
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_similar);
1045
+ }
1046
+
1047
+ /*
1048
+ * call-seq: levenshtein_similar(strings) -> results
1049
+ * XXX
1050
+ * If called on a String, this string is used as a Amatch::DamerauLevenshtein#pattern
1051
+ * to match against <code>strings</code>. It returns a DamerauLevenshtein distance
1052
+ * metric number between 0.0 for very unsimilar strings and 1.0 for an exact
1053
+ * match. <code>strings</code> has to be either a String or an Array of
1054
+ * Strings. The returned <code>results</code> is either a Float or an Array of
1055
+ * Floats respectively.
1056
+ */
1057
+ static VALUE rb_str_damerau_levenshtein_similar(VALUE self, VALUE strings)
1058
+ {
1059
+ VALUE amatch = rb_DamerauLevenshtein_new(rb_cDamerauLevenshtein, self);
1060
+ return rb_DamerauLevenshtein_similar(amatch, strings);
1061
+ }
1062
+
1063
+ /*
1064
+ * call-seq: search(strings) -> results
1065
+ * XXX
1066
+ * searches Amatch::DamerauLevenshtein#pattern in <code>strings</code> and returns the
1067
+ * edit distance (the sum of character operations) as a Fixnum value, by greedy
1068
+ * trimming prefixes or postfixes of the match. <code>strings</code> has
1069
+ * to be either a String or an Array of Strings. The returned
1070
+ * <code>results</code> is either a Float or an Array of Floats respectively.
1071
+ */
1072
+ static VALUE rb_DamerauLevenshtein_search(VALUE self, VALUE strings)
1073
+ {
1074
+ GET_STRUCT(General)
1075
+ return General_iterate_strings(amatch, strings, DamerauLevenshtein_search);
1076
+ }
1077
+
1078
+ /*
866
1079
  * Document-class: Amatch::Sellers
867
1080
  *
868
1081
  * The Sellers edit distance is very similar to the Levenshtein edit distance.
@@ -981,14 +1194,14 @@ DEF_CONSTRUCTOR(Sellers, Sellers)
981
1194
  * Document-method: pattern=
982
1195
  *
983
1196
  * call-seq: pattern=(pattern)
984
- *
1197
+ *
985
1198
  * Sets the current pattern string of this Amatch::Sellers instance to
986
1199
  * <code>pattern</code>.
987
1200
  */
988
1201
 
989
1202
  /*
990
1203
  * call-seq: match(strings) -> results
991
- *
1204
+ *
992
1205
  * Uses this Amatch::Sellers instance to match Sellers#pattern against
993
1206
  * <code>strings</code>, while taking into account the given weights. It
994
1207
  * returns the number of weighted character operations, the Sellers distance.
@@ -997,14 +1210,14 @@ DEF_CONSTRUCTOR(Sellers, Sellers)
997
1210
  * respectively.
998
1211
  */
999
1212
  static VALUE rb_Sellers_match(VALUE self, VALUE strings)
1000
- {
1213
+ {
1001
1214
  GET_STRUCT(Sellers)
1002
1215
  return Sellers_iterate_strings(amatch, strings, Sellers_match);
1003
1216
  }
1004
1217
 
1005
1218
  /*
1006
1219
  * call-seq: similar(strings) -> results
1007
- *
1220
+ *
1008
1221
  * Uses this Amatch::Sellers instance to match Amatch::Sellers#pattern
1009
1222
  * against <code>strings</code> (taking into account the given weights), and
1010
1223
  * compute a Sellers distance metric number between 0.0 for very unsimilar
@@ -1014,7 +1227,7 @@ static VALUE rb_Sellers_match(VALUE self, VALUE strings)
1014
1227
  * respectively.
1015
1228
  */
1016
1229
  static VALUE rb_Sellers_similar(VALUE self, VALUE strings)
1017
- {
1230
+ {
1018
1231
  GET_STRUCT(Sellers)
1019
1232
  return Sellers_iterate_strings(amatch, strings, Sellers_similar);
1020
1233
  }
@@ -1029,12 +1242,12 @@ static VALUE rb_Sellers_similar(VALUE self, VALUE strings)
1029
1242
  * <code>results</code> is either a Float or an Array of Floats respectively.
1030
1243
  */
1031
1244
  static VALUE rb_Sellers_search(VALUE self, VALUE strings)
1032
- {
1245
+ {
1033
1246
  GET_STRUCT(Sellers)
1034
1247
  return Sellers_iterate_strings(amatch, strings, Sellers_search);
1035
1248
  }
1036
1249
 
1037
- /*
1250
+ /*
1038
1251
  * Document-class: Amatch::PairDistance
1039
1252
  *
1040
1253
  * The pair distance between two strings is based on the number of adjacent
@@ -1045,7 +1258,7 @@ static VALUE rb_Sellers_search(VALUE self, VALUE strings)
1045
1258
  * are more dissimilar. The advantage of considering adjacent characters, is to
1046
1259
  * take account not only of the characters, but also of the character ordering
1047
1260
  * in the original strings.
1048
- *
1261
+ *
1049
1262
  * This metric is very capable to find similarities in natural languages.
1050
1263
  * It is explained in more detail in Simon White's article "How to Strike a
1051
1264
  * Match", located at this url:
@@ -1072,7 +1285,7 @@ DEF_CONSTRUCTOR(PairDistance, PairDistance)
1072
1285
 
1073
1286
  /*
1074
1287
  * call-seq: match(strings, regexp = /\s+/) -> results
1075
- *
1288
+ *
1076
1289
  * Uses this Amatch::PairDistance instance to match PairDistance#pattern against
1077
1290
  * <code>strings</code>. It returns the pair distance measure, that is a
1078
1291
  * returned value of 1.0 is an exact match, partial matches are lower
@@ -1088,7 +1301,7 @@ DEF_CONSTRUCTOR(PairDistance, PairDistance)
1088
1301
  * Array of Floats respectively.
1089
1302
  */
1090
1303
  static VALUE rb_PairDistance_match(int argc, VALUE *argv, VALUE self)
1091
- {
1304
+ {
1092
1305
  VALUE result, strings, regexp = Qnil;
1093
1306
  int use_regexp;
1094
1307
  GET_STRUCT(PairDistance)
@@ -1146,7 +1359,7 @@ static VALUE rb_str_pair_distance_similar(int argc, VALUE *argv, VALUE self)
1146
1359
  }
1147
1360
  }
1148
1361
 
1149
- /*
1362
+ /*
1150
1363
  * Document-class: Amatch::Hamming
1151
1364
  *
1152
1365
  * This class computes the Hamming distance between two strings.
@@ -1176,7 +1389,7 @@ DEF_CONSTRUCTOR(Hamming, General)
1176
1389
 
1177
1390
  /*
1178
1391
  * call-seq: match(strings) -> results
1179
- *
1392
+ *
1180
1393
  * Uses this Amatch::Hamming instance to match Amatch::Hamming#pattern against
1181
1394
  * <code>strings</code>, that is compute the hamming distance between
1182
1395
  * <code>pattern</code> and <code>strings</code>. <code>strings</code> has to
@@ -1184,7 +1397,7 @@ DEF_CONSTRUCTOR(Hamming, General)
1184
1397
  * is either a Fixnum or an Array of Fixnums respectively.
1185
1398
  */
1186
1399
  static VALUE rb_Hamming_match(VALUE self, VALUE strings)
1187
- {
1400
+ {
1188
1401
  GET_STRUCT(General)
1189
1402
  return General_iterate_strings(amatch, strings, Hamming_match);
1190
1403
  }
@@ -1200,7 +1413,7 @@ static VALUE rb_Hamming_match(VALUE self, VALUE strings)
1200
1413
  * respectively.
1201
1414
  */
1202
1415
  static VALUE rb_Hamming_similar(VALUE self, VALUE strings)
1203
- {
1416
+ {
1204
1417
  GET_STRUCT(General)
1205
1418
  return General_iterate_strings(amatch, strings, Hamming_similar);
1206
1419
  }
@@ -1222,7 +1435,7 @@ static VALUE rb_str_hamming_similar(VALUE self, VALUE strings)
1222
1435
  }
1223
1436
 
1224
1437
 
1225
- /*
1438
+ /*
1226
1439
  * Document-class: Amatch::LongestSubsequence
1227
1440
  *
1228
1441
  * This class computes the length of the longest subsequence common to two
@@ -1252,7 +1465,7 @@ DEF_CONSTRUCTOR(LongestSubsequence, General)
1252
1465
 
1253
1466
  /*
1254
1467
  * call-seq: match(strings) -> results
1255
- *
1468
+ *
1256
1469
  * Uses this Amatch::LongestSubsequence instance to match
1257
1470
  * LongestSubsequence#pattern against <code>strings</code>, that is compute the
1258
1471
  * length of the longest common subsequence. <code>strings</code> has to be
@@ -1260,14 +1473,14 @@ DEF_CONSTRUCTOR(LongestSubsequence, General)
1260
1473
  * is either a Fixnum or an Array of Fixnums respectively.
1261
1474
  */
1262
1475
  static VALUE rb_LongestSubsequence_match(VALUE self, VALUE strings)
1263
- {
1476
+ {
1264
1477
  GET_STRUCT(General)
1265
1478
  return General_iterate_strings(amatch, strings, LongestSubsequence_match);
1266
1479
  }
1267
1480
 
1268
1481
  /*
1269
1482
  * call-seq: similar(strings) -> results
1270
- *
1483
+ *
1271
1484
  * Uses this Amatch::LongestSubsequence instance to match
1272
1485
  * Amatch::LongestSubsequence#pattern against <code>strings</code>, and compute
1273
1486
  * a longest substring distance metric number between 0.0 for very unsimilar
@@ -1276,7 +1489,7 @@ static VALUE rb_LongestSubsequence_match(VALUE self, VALUE strings)
1276
1489
  * a Fixnum or an Array of Fixnums
1277
1490
  */
1278
1491
  static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
1279
- {
1492
+ {
1280
1493
  GET_STRUCT(General)
1281
1494
  return General_iterate_strings(amatch, strings, LongestSubsequence_similar);
1282
1495
  }
@@ -1292,12 +1505,12 @@ static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
1292
1505
  * is either a Float or an Array of Floats respectively.
1293
1506
  */
1294
1507
  static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
1295
- {
1508
+ {
1296
1509
  VALUE amatch = rb_LongestSubsequence_new(rb_cLongestSubsequence, self);
1297
1510
  return rb_LongestSubsequence_similar(amatch, strings);
1298
1511
  }
1299
1512
 
1300
- /*
1513
+ /*
1301
1514
  * Document-class: Amatch::LongestSubstring
1302
1515
  *
1303
1516
  * The longest common substring is the longest substring, that is part of
@@ -1308,7 +1521,7 @@ static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
1308
1521
  * The longest common substring between 'string' and 'string' is 'string'
1309
1522
  * again, thus the longest common substring length is 6. The longest common
1310
1523
  * substring between 'string' and 'storing' is 'ring', thus the longest common
1311
- * substring length is 4.
1524
+ * substring length is 4.
1312
1525
  */
1313
1526
 
1314
1527
  DEF_RB_FREE(LongestSubstring, General)
@@ -1329,7 +1542,7 @@ DEF_CONSTRUCTOR(LongestSubstring, General)
1329
1542
 
1330
1543
  /*
1331
1544
  * call-seq: match(strings) -> results
1332
- *
1545
+ *
1333
1546
  * Uses this Amatch::LongestSubstring instance to match
1334
1547
  * LongestSubstring#pattern against <code>strings</code>, that is compute the
1335
1548
  * length of the longest common substring. <code>strings</code> has to be
@@ -1344,7 +1557,7 @@ static VALUE rb_LongestSubstring_match(VALUE self, VALUE strings)
1344
1557
 
1345
1558
  /*
1346
1559
  * call-seq: similar(strings) -> results
1347
- *
1560
+ *
1348
1561
  * Uses this Amatch::LongestSubstring instance to match
1349
1562
  * Amatch::LongestSubstring#pattern against <code>strings</code>, and compute a
1350
1563
  * longest substring distance metric number between 0.0 for very unsimilar
@@ -1370,11 +1583,11 @@ static VALUE rb_LongestSubstring_similar(VALUE self, VALUE strings)
1370
1583
  * is either a Float or an Array of Floats respectively.
1371
1584
  */
1372
1585
  static VALUE rb_str_longest_substring_similar(VALUE self, VALUE strings)
1373
- {
1586
+ {
1374
1587
  VALUE amatch = rb_LongestSubstring_new(rb_cLongestSubstring, self);
1375
1588
  return rb_LongestSubstring_similar(amatch, strings);
1376
1589
  }
1377
-
1590
+
1378
1591
  /*
1379
1592
  * Document-class: Amatch::Jaro
1380
1593
  *
@@ -1571,6 +1784,17 @@ void Init_amatch_ext()
1571
1784
  rb_define_method(rb_cLevenshtein, "similar", rb_Levenshtein_similar, 1);
1572
1785
  rb_define_method(rb_mAmatchStringMethods, "levenshtein_similar", rb_str_levenshtein_similar, 1);
1573
1786
 
1787
+ /* DamerauLevenshtein */
1788
+ rb_cDamerauLevenshtein = rb_define_class_under(rb_mAmatch, "DamerauLevenshtein", rb_cObject);
1789
+ rb_define_alloc_func(rb_cDamerauLevenshtein, rb_DamerauLevenshtein_s_allocate);
1790
+ rb_define_method(rb_cDamerauLevenshtein, "initialize", rb_DamerauLevenshtein_initialize, 1);
1791
+ rb_define_method(rb_cDamerauLevenshtein, "pattern", rb_General_pattern, 0);
1792
+ rb_define_method(rb_cDamerauLevenshtein, "pattern=", rb_General_pattern_set, 1);
1793
+ rb_define_method(rb_cDamerauLevenshtein, "match", rb_DamerauLevenshtein_match, 1);
1794
+ rb_define_method(rb_cDamerauLevenshtein, "search", rb_DamerauLevenshtein_search, 1);
1795
+ rb_define_method(rb_cDamerauLevenshtein, "similar", rb_DamerauLevenshtein_similar, 1);
1796
+ rb_define_method(rb_mAmatchStringMethods, "damerau_levenshtein_similar", rb_str_damerau_levenshtein_similar, 1);
1797
+
1574
1798
  /* Sellers */
1575
1799
  rb_cSellers = rb_define_class_under(rb_mAmatch, "Sellers", rb_cObject);
1576
1800
  rb_define_alloc_func(rb_cSellers, rb_Sellers_s_allocate);