object_similarity 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/object_similarity.rb +119 -0
- data/object_similarity.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36c6f28929e8a3a8035b56a74310a4e99efbab3a
|
4
|
+
data.tar.gz: 91a84506444429a663d541733bb57d303fb6b69b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 960af75b973afc0ea9ebc7f6a1b632777a1f8c83e9f165872cb1cd83f6b58c0db476373ecb2ac5f5e6e799a53ff96fa917f9e37fa5875da5faf8c4b00fa0a3b5
|
7
|
+
data.tar.gz: fd7dbaa0d11c657f7ec8055b33650570c2350626cc034fca564ee9feea0c240831163a05eef345e376e3ba972ce8f014930ec9fe73b5e483aa059d93a830c7ca
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
|
3
|
+
module ObjectSimilarity
|
4
|
+
class SkipException < Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
class FieldScorer
|
8
|
+
attr_reader :field, :weight
|
9
|
+
|
10
|
+
def initialize(field, object, weight, options = {})
|
11
|
+
@field = field
|
12
|
+
@object = object
|
13
|
+
@weight = weight || 1
|
14
|
+
@options = options
|
15
|
+
end
|
16
|
+
|
17
|
+
def weighted_distance(other_object)
|
18
|
+
distance(other_object) * @weight
|
19
|
+
end
|
20
|
+
|
21
|
+
def value
|
22
|
+
@value ||= (@options[:value] || get_value(@object)).tap do |v|
|
23
|
+
raise SkipException if skip_value?(v)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_value(object)
|
28
|
+
object.send(@field).tap do |v|
|
29
|
+
raise SkipException if skip_value?(v)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def skip_value?(value)
|
35
|
+
false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class ExactFieldScorer < FieldScorer
|
40
|
+
def distance(other_object)
|
41
|
+
(value == get_value(other_object)) ? 0 : 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class NearnessFieldScorer < FieldScorer
|
46
|
+
def skip_value?(value)
|
47
|
+
value.nil?
|
48
|
+
end
|
49
|
+
|
50
|
+
def distance(other_object)
|
51
|
+
(value - get_value(other_object)).abs
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class InsensitiveLevenshteinNearnessFieldScorer < NearnessFieldScorer
|
56
|
+
def distance(other_object)
|
57
|
+
Text::Levenshtein.distance(value.downcase, get_value(other_object).downcase)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class ObjectScorer
|
62
|
+
def initialize(type, object, field_scorers, field_weight, field_options = {})
|
63
|
+
@type = type
|
64
|
+
raise ArgumentError, "Invalid scoring type: #{@type.inspect}" if not respond_to?("#{@type}_score")
|
65
|
+
@object = object
|
66
|
+
|
67
|
+
@field_scorers = field_scorers.map do |field, scorer_definition|
|
68
|
+
field_scorer_class(scorer_definition).new(field, object, field_weight[field], field_options[field] || {})
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def score(other_object)
|
73
|
+
send("#{@type}_score", other_object)
|
74
|
+
end
|
75
|
+
|
76
|
+
def euclidean_distance_score(other_object)
|
77
|
+
@field_scorers.inject(0) do |sum, scorer|
|
78
|
+
sum += begin
|
79
|
+
scorer.weighted_distance(other_object) ** 2
|
80
|
+
rescue SkipException
|
81
|
+
0 # Is this right?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def normalized_euclidean_distance_score(other_object)
|
87
|
+
euclidean_distance_score(other_object) / Math.sqrt(@field_scorers.size)
|
88
|
+
end
|
89
|
+
|
90
|
+
def print_distances_report(other_object)
|
91
|
+
@field_scorers.each do |scorer|
|
92
|
+
print "#{scorer.field}: "
|
93
|
+
begin
|
94
|
+
puts "#{scorer.distance(other_object)} * #{scorer.weight}"
|
95
|
+
rescue SkipException
|
96
|
+
puts "SKIPPED"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
def field_scorer_class(scorer_definition)
|
106
|
+
if scorer_definition.is_a?(FieldScorer)
|
107
|
+
scorer_definition
|
108
|
+
else
|
109
|
+
partial_class_name = scorer_definition.to_s.capitalize.gsub(/_[a-z]/i) do |match|
|
110
|
+
match[1].upcase
|
111
|
+
end
|
112
|
+
|
113
|
+
Kernel.const_get("::ObjectSimilarity::#{partial_class_name}FieldScorer")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
data/object_similarity.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: object_similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Underwood
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
A ruby library to calculate similarity between ruby objects
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- Gemfile
|
21
21
|
- README.md
|
22
|
+
- lib/object_similarity.rb
|
22
23
|
- object_similarity.gemspec
|
23
24
|
homepage: https://github.com/cheerfulstoic/object_similarity/
|
24
25
|
licenses:
|