object_similarity 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/object_similarity.rb +119 -0
- data/object_similarity.gemspec +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36c6f28929e8a3a8035b56a74310a4e99efbab3a
|
4
|
+
data.tar.gz: 91a84506444429a663d541733bb57d303fb6b69b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 960af75b973afc0ea9ebc7f6a1b632777a1f8c83e9f165872cb1cd83f6b58c0db476373ecb2ac5f5e6e799a53ff96fa917f9e37fa5875da5faf8c4b00fa0a3b5
|
7
|
+
data.tar.gz: fd7dbaa0d11c657f7ec8055b33650570c2350626cc034fca564ee9feea0c240831163a05eef345e376e3ba972ce8f014930ec9fe73b5e483aa059d93a830c7ca
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
|
3
|
+
module ObjectSimilarity
|
4
|
+
class SkipException < Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
class FieldScorer
|
8
|
+
attr_reader :field, :weight
|
9
|
+
|
10
|
+
def initialize(field, object, weight, options = {})
|
11
|
+
@field = field
|
12
|
+
@object = object
|
13
|
+
@weight = weight || 1
|
14
|
+
@options = options
|
15
|
+
end
|
16
|
+
|
17
|
+
def weighted_distance(other_object)
|
18
|
+
distance(other_object) * @weight
|
19
|
+
end
|
20
|
+
|
21
|
+
def value
|
22
|
+
@value ||= (@options[:value] || get_value(@object)).tap do |v|
|
23
|
+
raise SkipException if skip_value?(v)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_value(object)
|
28
|
+
object.send(@field).tap do |v|
|
29
|
+
raise SkipException if skip_value?(v)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def skip_value?(value)
|
35
|
+
false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class ExactFieldScorer < FieldScorer
|
40
|
+
def distance(other_object)
|
41
|
+
(value == get_value(other_object)) ? 0 : 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class NearnessFieldScorer < FieldScorer
|
46
|
+
def skip_value?(value)
|
47
|
+
value.nil?
|
48
|
+
end
|
49
|
+
|
50
|
+
def distance(other_object)
|
51
|
+
(value - get_value(other_object)).abs
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class InsensitiveLevenshteinNearnessFieldScorer < NearnessFieldScorer
|
56
|
+
def distance(other_object)
|
57
|
+
Text::Levenshtein.distance(value.downcase, get_value(other_object).downcase)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class ObjectScorer
|
62
|
+
def initialize(type, object, field_scorers, field_weight, field_options = {})
|
63
|
+
@type = type
|
64
|
+
raise ArgumentError, "Invalid scoring type: #{@type.inspect}" if not respond_to?("#{@type}_score")
|
65
|
+
@object = object
|
66
|
+
|
67
|
+
@field_scorers = field_scorers.map do |field, scorer_definition|
|
68
|
+
field_scorer_class(scorer_definition).new(field, object, field_weight[field], field_options[field] || {})
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def score(other_object)
|
73
|
+
send("#{@type}_score", other_object)
|
74
|
+
end
|
75
|
+
|
76
|
+
def euclidean_distance_score(other_object)
|
77
|
+
@field_scorers.inject(0) do |sum, scorer|
|
78
|
+
sum += begin
|
79
|
+
scorer.weighted_distance(other_object) ** 2
|
80
|
+
rescue SkipException
|
81
|
+
0 # Is this right?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def normalized_euclidean_distance_score(other_object)
|
87
|
+
euclidean_distance_score(other_object) / Math.sqrt(@field_scorers.size)
|
88
|
+
end
|
89
|
+
|
90
|
+
def print_distances_report(other_object)
|
91
|
+
@field_scorers.each do |scorer|
|
92
|
+
print "#{scorer.field}: "
|
93
|
+
begin
|
94
|
+
puts "#{scorer.distance(other_object)} * #{scorer.weight}"
|
95
|
+
rescue SkipException
|
96
|
+
puts "SKIPPED"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
def field_scorer_class(scorer_definition)
|
106
|
+
if scorer_definition.is_a?(FieldScorer)
|
107
|
+
scorer_definition
|
108
|
+
else
|
109
|
+
partial_class_name = scorer_definition.to_s.capitalize.gsub(/_[a-z]/i) do |match|
|
110
|
+
match[1].upcase
|
111
|
+
end
|
112
|
+
|
113
|
+
Kernel.const_get("::ObjectSimilarity::#{partial_class_name}FieldScorer")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
data/object_similarity.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: object_similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Underwood
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |
|
14
14
|
A ruby library to calculate similarity between ruby objects
|
@@ -19,6 +19,7 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- Gemfile
|
21
21
|
- README.md
|
22
|
+
- lib/object_similarity.rb
|
22
23
|
- object_similarity.gemspec
|
23
24
|
homepage: https://github.com/cheerfulstoic/object_similarity/
|
24
25
|
licenses:
|