object_similarity 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5824ac6f614151b457b42bcd8388f011c4d5018e
4
- data.tar.gz: 8103be5eaad7b60442e670a869785b8192ea18ce
3
+ metadata.gz: 36c6f28929e8a3a8035b56a74310a4e99efbab3a
4
+ data.tar.gz: 91a84506444429a663d541733bb57d303fb6b69b
5
5
  SHA512:
6
- metadata.gz: 3f74a0e0716c85fee4aa00851a915a08b7ae1f8bcf2c06adc2453f40af4f06f42b8f0205cde6e17654bd277ed81230c73cba6efe92ac55a5a617508b3130a647
7
- data.tar.gz: 14904116c305df07043931522bb537bfd6252996afdc3b3976d3bc7858d81f97cc0d7db78a896c13905bfde823d11c96857b6116862056f1fc31486dc1a77e81
6
+ metadata.gz: 960af75b973afc0ea9ebc7f6a1b632777a1f8c83e9f165872cb1cd83f6b58c0db476373ecb2ac5f5e6e799a53ff96fa917f9e37fa5875da5faf8c4b00fa0a3b5
7
+ data.tar.gz: fd7dbaa0d11c657f7ec8055b33650570c2350626cc034fca564ee9feea0c240831163a05eef345e376e3ba972ce8f014930ec9fe73b5e483aa059d93a830c7ca
@@ -0,0 +1,119 @@
1
+ require 'matrix'
2
+
3
+ module ObjectSimilarity
4
+ class SkipException < Exception
5
+ end
6
+
7
+ class FieldScorer
8
+ attr_reader :field, :weight
9
+
10
+ def initialize(field, object, weight, options = {})
11
+ @field = field
12
+ @object = object
13
+ @weight = weight || 1
14
+ @options = options
15
+ end
16
+
17
+ def weighted_distance(other_object)
18
+ distance(other_object) * @weight
19
+ end
20
+
21
+ def value
22
+ @value ||= (@options[:value] || get_value(@object)).tap do |v|
23
+ raise SkipException if skip_value?(v)
24
+ end
25
+ end
26
+
27
+ def get_value(object)
28
+ object.send(@field).tap do |v|
29
+ raise SkipException if skip_value?(v)
30
+ end
31
+ end
32
+
33
+
34
+ def skip_value?(value)
35
+ false
36
+ end
37
+ end
38
+
39
+ class ExactFieldScorer < FieldScorer
40
+ def distance(other_object)
41
+ (value == get_value(other_object)) ? 0 : 1
42
+ end
43
+ end
44
+
45
+ class NearnessFieldScorer < FieldScorer
46
+ def skip_value?(value)
47
+ value.nil?
48
+ end
49
+
50
+ def distance(other_object)
51
+ (value - get_value(other_object)).abs
52
+ end
53
+ end
54
+
55
+ class InsensitiveLevenshteinNearnessFieldScorer < NearnessFieldScorer
56
+ def distance(other_object)
57
+ Text::Levenshtein.distance(value.downcase, get_value(other_object).downcase)
58
+ end
59
+ end
60
+
61
+ class ObjectScorer
62
+ def initialize(type, object, field_scorers, field_weight, field_options = {})
63
+ @type = type
64
+ raise ArgumentError, "Invalid scoring type: #{@type.inspect}" if not respond_to?("#{@type}_score")
65
+ @object = object
66
+
67
+ @field_scorers = field_scorers.map do |field, scorer_definition|
68
+ field_scorer_class(scorer_definition).new(field, object, field_weight[field], field_options[field] || {})
69
+ end
70
+ end
71
+
72
+ def score(other_object)
73
+ send("#{@type}_score", other_object)
74
+ end
75
+
76
+ def euclidean_distance_score(other_object)
77
+ @field_scorers.inject(0) do |sum, scorer|
78
+ sum += begin
79
+ scorer.weighted_distance(other_object) ** 2
80
+ rescue SkipException
81
+ 0 # Is this right?
82
+ end
83
+ end
84
+ end
85
+
86
+ def normalized_euclidean_distance_score(other_object)
87
+ euclidean_distance_score(other_object) / Math.sqrt(@field_scorers.size)
88
+ end
89
+
90
+ def print_distances_report(other_object)
91
+ @field_scorers.each do |scorer|
92
+ print "#{scorer.field}: "
93
+ begin
94
+ puts "#{scorer.distance(other_object)} * #{scorer.weight}"
95
+ rescue SkipException
96
+ puts "SKIPPED"
97
+ end
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+
104
+
105
+ def field_scorer_class(scorer_definition)
106
+ if scorer_definition.is_a?(FieldScorer)
107
+ scorer_definition
108
+ else
109
+ partial_class_name = scorer_definition.to_s.capitalize.gsub(/_[a-z]/i) do |match|
110
+ match[1].upcase
111
+ end
112
+
113
+ Kernel.const_get("::ObjectSimilarity::#{partial_class_name}FieldScorer")
114
+ end
115
+ end
116
+ end
117
+
118
+ end
119
+
@@ -3,7 +3,7 @@ $:.unshift lib unless $:.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "object_similarity"
6
- s.version = '0.0.1'
6
+ s.version = '0.1.0'
7
7
  s.required_ruby_version = ">= 1.9.1"
8
8
 
9
9
  s.authors = "Brian Underwood"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: object_similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brian Underwood
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-09 00:00:00.000000000 Z
11
+ date: 2014-11-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  A ruby library to calculate similarity between ruby objects
@@ -19,6 +19,7 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - Gemfile
21
21
  - README.md
22
+ - lib/object_similarity.rb
22
23
  - object_similarity.gemspec
23
24
  homepage: https://github.com/cheerfulstoic/object_similarity/
24
25
  licenses: