jaro_winkler 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5de1821108ec68b1c02ad1b83731e669d15a5e20
4
- data.tar.gz: 4686731f92f9f7bb3b790943f07bc148b63ad0ad
3
+ metadata.gz: 13eaf9c47df66ba5c9883611ee7a1c8468cc9e7a
4
+ data.tar.gz: 7c43db047cfb1aaade4c0d6f2c907ed19f8a79ab
5
5
  SHA512:
6
- metadata.gz: 6ffa1c7caa063549e0b271601a6c049c679e8fcb6eacf9297f7c83276117fa43ecebd268e96f5f4ca378501edfba383d1ea4f8f8c6f06f50afd01254609c18e4
7
- data.tar.gz: c042a318557e8539e42b41cdd3d5f776e68655a8a89357a90a92bc29f9a18e5bbbf37f7c86e76d43b942e64e78d7c35eaac3e3bad936d25524d4e1322098565c
6
+ metadata.gz: 57421be340741b44879c3104689363a7ca2897014a6e9cd0c3fcaa524b29abf1a5db370cab72e4de0299f26ca2e14357ce24f7b8b7cafce97ca1527c27c46798
7
+ data.tar.gz: 8db803506546b4a99dd7e267bc561c90664282b4c8dc253f22695a0a8cc55df64e559be3c7e68cb26e05fd4ca08626481a82c82128f8616e0a2b378b29012413
data/README.md CHANGED
@@ -14,7 +14,7 @@ gem install jaro_winkler
14
14
  require 'jaro_winkler'
15
15
  JaroWinkler.distance "MARTHA", "MARHTA"
16
16
  # => 0.9611
17
- JaroWinkler.distance "MARTHA", "marhta", case_match: true
17
+ JaroWinkler.distance "MARTHA", "marhta", ignore_case: true
18
18
  # => 0.9611
19
19
  JaroWinkler.distance "MARTHA", "MARHTA", weight: 0.2
20
20
  # => 0.9778
@@ -30,7 +30,7 @@ JaroWinkler.r_distance "MARTHA", "MARHTA" # Pure Ruby
30
30
 
31
31
  Name | Type | Default | Note
32
32
  ----------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------
33
- case_match | boolean | false | All lower case characters are converted to upper case prior to the comparison.
33
+ ignore_case | boolean | false | All lower case characters are converted to upper case prior to the comparison.
34
34
  weight | number | 0.1 | A constant scaling factor for how much the score is adjusted upwards for having common prefixes.
35
35
  threshold | number | 0.7 | The prefix bonus is only added when the compared strings have a Jaro distance above the threshold.
36
36
 
@@ -5,7 +5,7 @@
5
5
 
6
6
  Option* option_new(){
7
7
  Option *opt = calloc(1, sizeof(Option));
8
- opt->case_match = 0;
8
+ opt->ignore_case = 0;
9
9
  opt->weight = 0.1;
10
10
  opt->threshold = 0.7;
11
11
  return opt;
@@ -42,7 +42,7 @@ double c_distance(char *s1, int byte_len1, char *s2, int byte_len2, Option *opt)
42
42
  int ary_1_len, ary_2_len;
43
43
  unsigned long long *ary_1 = codepoints(s1, byte_len1, &ary_1_len), *ary_2 = codepoints(s2, byte_len2, &ary_2_len);
44
44
 
45
- if(opt->case_match){
45
+ if(opt->ignore_case){
46
46
  for(int i = 0; i < ary_1_len; ++i) if(ary_1[i] < 256 && islower(ary_1[i])) ary_1[i] -= 32;
47
47
  for(int i = 0; i < ary_2_len; ++i) if(ary_2[i] < 256 && islower(ary_2[i])) ary_2[i] -= 32;
48
48
  }
@@ -2,7 +2,7 @@
2
2
  #define DISTANCE_H 1
3
3
 
4
4
  typedef struct{
5
- char case_match;
5
+ char ignore_case;
6
6
  double weight, threshold;
7
7
  } Option;
8
8
 
@@ -15,11 +15,11 @@ VALUE rb_distance(int argc, VALUE *argv, VALUE self){
15
15
  if(TYPE(opt) == T_HASH){
16
16
  VALUE weight = rb_hash_aref(opt, ID2SYM(rb_intern("weight")));
17
17
  VALUE threshold = rb_hash_aref(opt, ID2SYM(rb_intern("threshold")));
18
- VALUE case_match = rb_hash_aref(opt, ID2SYM(rb_intern("case_match")));
18
+ VALUE ignore_case = rb_hash_aref(opt, ID2SYM(rb_intern("ignore_case")));
19
19
  if(!NIL_P(weight)) c_opt->weight = NUM2DBL(weight);
20
20
  if(c_opt->weight > 0.25) rb_raise(rb_eRuntimeError, "Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1.");
21
21
  if(!NIL_P(threshold)) c_opt->threshold = NUM2DBL(threshold);
22
- if(!NIL_P(case_match)) c_opt->case_match = (TYPE(case_match) == T_FALSE || NIL_P(case_match)) ? 0 : 1;
22
+ if(!NIL_P(ignore_case)) c_opt->ignore_case = (TYPE(ignore_case) == T_FALSE || NIL_P(ignore_case)) ? 0 : 1;
23
23
  }
24
24
  VALUE ret = rb_float_new(c_distance(StringValuePtr(s1), RSTRING_LEN(s1), StringValuePtr(s2), RSTRING_LEN(s2), c_opt));
25
25
  free(c_opt);
@@ -42,10 +42,10 @@ module JaroWinkler
42
42
  end
43
43
 
44
44
  def r_distance s1, s2, options = {}
45
- options = {weight: 0.1, threshold: 0.7, case_match: false}.merge options
46
- weight, threshold, case_match = options[:weight], options[:threshold], options[:case_match]
45
+ options = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge options
46
+ weight, threshold, ignore_case = options[:weight], options[:threshold], options[:ignore_case]
47
47
  raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25
48
- s1, s2 = s1.upcase, s2.upcase if case_match
48
+ s1, s2 = s1.upcase, s2.upcase if ignore_case
49
49
  distance = jaro_distance(s1, s2)
50
50
  prefix = 0
51
51
  max_length = [4, s1.length, s2.length].min
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.2.7"
2
+ VERSION = "1.2.8"
3
3
  end
@@ -31,18 +31,32 @@ describe JaroWinkler do
31
31
  end
32
32
 
33
33
  it 'works with UTF-8' do
34
- expect(c_distance('變形金剛4:絕跡重生', '變形金剛4: 絕跡重生')).to eq c_distance('0123456789', '01234x56789')
34
+ expect(c_distance('變形金剛4:絕跡重生', '變形金剛4: 絕跡重生')).to be_within(0.0001).of(0.9818)
35
+ expect(c_distance('連勝文', '連勝丼')).to be_within(0.0001).of(0.8222)
36
+ expect(c_distance('馬英九', '馬英丸')).to be_within(0.0001).of(0.8222)
35
37
  end
36
38
 
37
- it 'can ignore case' do
38
- expect(r_distance('MARTHA', 'marhta', case_match: true)).to be_within(0.0001).of(0.9611)
39
- expect(c_distance('MARTHA', 'marhta', case_match: true)).to be_within(0.0001).of(0.9611)
39
+ it 'sets ignore_case' do
40
+ params = 'MARTHA', 'marhta', {ignore_case: true}
41
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9611)
42
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9611)
40
43
  end
41
44
 
42
- it 'can set weight' do
43
- expect(r_distance('MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
44
- expect(c_distance('MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
45
- expect{ r_distance('MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
46
- expect{ c_distance('MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
45
+ it 'sets weight' do
46
+ params = 'MARTHA', 'MARHTA', {weight: 0.2}
47
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9778)
48
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9778)
49
+ end
50
+
51
+ it 'sets threshold' do
52
+ params = 'MARTHA', 'MARHTA', {threshold: 0.99}
53
+ expect(r_distance(*params)).to be_within(0.0001).of(0.9445)
54
+ expect(c_distance(*params)).to be_within(0.0001).of(0.9445)
55
+ end
56
+
57
+ it 'throws exception when weight exceeding 0.25' do
58
+ params = 'MARTHA', 'MARHTA', {weight: 0.26}
59
+ expect{ r_distance(*params) }.to raise_error
60
+ expect{ c_distance(*params) }.to raise_error
47
61
  end
48
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang