jaro_winkler 1.2.7 → 1.2.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5de1821108ec68b1c02ad1b83731e669d15a5e20
4
- data.tar.gz: 4686731f92f9f7bb3b790943f07bc148b63ad0ad
3
+ metadata.gz: 13eaf9c47df66ba5c9883611ee7a1c8468cc9e7a
4
+ data.tar.gz: 7c43db047cfb1aaade4c0d6f2c907ed19f8a79ab
5
5
  SHA512:
6
- metadata.gz: 6ffa1c7caa063549e0b271601a6c049c679e8fcb6eacf9297f7c83276117fa43ecebd268e96f5f4ca378501edfba383d1ea4f8f8c6f06f50afd01254609c18e4
7
- data.tar.gz: c042a318557e8539e42b41cdd3d5f776e68655a8a89357a90a92bc29f9a18e5bbbf37f7c86e76d43b942e64e78d7c35eaac3e3bad936d25524d4e1322098565c
6
+ metadata.gz: 57421be340741b44879c3104689363a7ca2897014a6e9cd0c3fcaa524b29abf1a5db370cab72e4de0299f26ca2e14357ce24f7b8b7cafce97ca1527c27c46798
7
+ data.tar.gz: 8db803506546b4a99dd7e267bc561c90664282b4c8dc253f22695a0a8cc55df64e559be3c7e68cb26e05fd4ca08626481a82c82128f8616e0a2b378b29012413
data/README.md CHANGED
@@ -14,7 +14,7 @@ gem install jaro_winkler
14
14
  require 'jaro_winkler'
15
15
  JaroWinkler.distance "MARTHA", "MARHTA"
16
16
  # => 0.9611
17
- JaroWinkler.distance "MARTHA", "marhta", case_match: true
17
+ JaroWinkler.distance "MARTHA", "marhta", ignore_case: true
18
18
  # => 0.9611
19
19
  JaroWinkler.distance "MARTHA", "MARHTA", weight: 0.2
20
20
  # => 0.9778
@@ -30,7 +30,7 @@ JaroWinkler.r_distance "MARTHA", "MARHTA" # Pure Ruby
30
30
 
31
31
  Name | Type | Default | Note
32
32
  ----------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------
33
- case_match | boolean | false | All lower case characters are converted to upper case prior to the comparison.
33
+ ignore_case | boolean | false | All lower case characters are converted to upper case prior to the comparison.
34
34
  weight | number | 0.1 | A constant scaling factor for how much the score is adjusted upwards for having common prefixes.
35
35
  threshold | number | 0.7 | The prefix bonus is only added when the compared strings have a Jaro distance above the threshold.
36
36
 
@@ -5,7 +5,7 @@
5
5
 
6
6
  Option* option_new(){
7
7
  Option *opt = calloc(1, sizeof(Option));
8
- opt->case_match = 0;
8
+ opt->ignore_case = 0;
9
9
  opt->weight = 0.1;
10
10
  opt->threshold = 0.7;
11
11
  return opt;
@@ -42,7 +42,7 @@ double c_distance(char *s1, int byte_len1, char *s2, int byte_len2, Option *opt)
42
42
  int ary_1_len, ary_2_len;
43
43
  unsigned long long *ary_1 = codepoints(s1, byte_len1, &ary_1_len), *ary_2 = codepoints(s2, byte_len2, &ary_2_len);
44
44
 
45
- if(opt->case_match){
45
+ if(opt->ignore_case){
46
46
  for(int i = 0; i < ary_1_len; ++i) if(ary_1[i] < 256 && islower(ary_1[i])) ary_1[i] -= 32;
47
47
  for(int i = 0; i < ary_2_len; ++i) if(ary_2[i] < 256 && islower(ary_2[i])) ary_2[i] -= 32;
48
48
  }
@@ -2,7 +2,7 @@
2
2
  #define DISTANCE_H 1
3
3
 
4
4
  typedef struct{
5
- char case_match;
5
+ char ignore_case;
6
6
  double weight, threshold;
7
7
  } Option;
8
8
 
@@ -15,11 +15,11 @@ VALUE rb_distance(int argc, VALUE *argv, VALUE self){
15
15
  if(TYPE(opt) == T_HASH){
16
16
  VALUE weight = rb_hash_aref(opt, ID2SYM(rb_intern("weight")));
17
17
  VALUE threshold = rb_hash_aref(opt, ID2SYM(rb_intern("threshold")));
18
- VALUE case_match = rb_hash_aref(opt, ID2SYM(rb_intern("case_match")));
18
+ VALUE ignore_case = rb_hash_aref(opt, ID2SYM(rb_intern("ignore_case")));
19
19
  if(!NIL_P(weight)) c_opt->weight = NUM2DBL(weight);
20
20
  if(c_opt->weight > 0.25) rb_raise(rb_eRuntimeError, "Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1.");
21
21
  if(!NIL_P(threshold)) c_opt->threshold = NUM2DBL(threshold);
22
- if(!NIL_P(case_match)) c_opt->case_match = (TYPE(case_match) == T_FALSE || NIL_P(case_match)) ? 0 : 1;
22
+ if(!NIL_P(ignore_case)) c_opt->ignore_case = (TYPE(ignore_case) == T_FALSE || NIL_P(ignore_case)) ? 0 : 1;
23
23
  }
24
24
  VALUE ret = rb_float_new(c_distance(StringValuePtr(s1), RSTRING_LEN(s1), StringValuePtr(s2), RSTRING_LEN(s2), c_opt));
25
25
  free(c_opt);
@@ -42,10 +42,10 @@ module JaroWinkler
42
42
  end
43
43
 
44
44
  def r_distance s1, s2, options = {}
45
- options = {weight: 0.1, threshold: 0.7, case_match: false}.merge options
46
- weight, threshold, case_match = options[:weight], options[:threshold], options[:case_match]
45
+ options = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge options
46
+ weight, threshold, ignore_case = options[:weight], options[:threshold], options[:ignore_case]
47
47
  raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25
48
- s1, s2 = s1.upcase, s2.upcase if case_match
48
+ s1, s2 = s1.upcase, s2.upcase if ignore_case
49
49
  distance = jaro_distance(s1, s2)
50
50
  prefix = 0
51
51
  max_length = [4, s1.length, s2.length].min
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.2.7"
2
+ VERSION = "1.2.8"
3
3
  end
@@ -31,18 +31,32 @@ describe JaroWinkler do
31
31
  end
32
32
 
33
33
  it 'works with UTF-8' do
34
- expect(c_distance('變形金剛4:絕跡重生', '變形金剛4: 絕跡重生')).to eq c_distance('0123456789', '01234x56789')
34
+ expect(c_distance('變形金剛4:絕跡重生', '變形金剛4: 絕跡重生')).to be_within(0.0001).of(0.9818)
35
+ expect(c_distance('連勝文', '連勝丼')).to be_within(0.0001).of(0.8222)
36
+ expect(c_distance('馬英九', '馬英丸')).to be_within(0.0001).of(0.8222)
35
37
  end
36
38
 
37
- it 'can ignore case' do
38
- expect(r_distance('MARTHA', 'marhta', case_match: true)).to be_within(0.0001).of(0.9611)
39
- expect(c_distance('MARTHA', 'marhta', case_match: true)).to be_within(0.0001).of(0.9611)
39
+ it 'sets ignore_case' do
40
+ params = 'MARTHA', 'marhta', {ignore_case: true}
41
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9611)
42
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9611)
40
43
  end
41
44
 
42
- it 'can set weight' do
43
- expect(r_distance('MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
44
- expect(c_distance('MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
45
- expect{ r_distance('MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
46
- expect{ c_distance('MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
45
+ it 'sets weight' do
46
+ params = 'MARTHA', 'MARHTA', {weight: 0.2}
47
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9778)
48
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9778)
49
+ end
50
+
51
+ it 'sets threshold' do
52
+ params = 'MARTHA', 'MARHTA', {threshold: 0.99}
53
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9445)
54
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9445)
55
+ end
56
+
57
+ it 'throws exception when weight exceeding 0.25' do
58
+ params = 'MARTHA', 'MARHTA', {weight: 0.26}
59
+ expect{ r_distance(*params) }.to raise_error
60
+ expect{ c_distance(*params) }.to raise_error
47
61
  end
48
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang