twins 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/twins.rb +52 -0
- data/lib/twins/utilities.rb +46 -0
- data/lib/twins/version.rb +3 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 20e9e6421c19c6abccbb7f121b1ba3a36ae56a7e
|
4
|
+
data.tar.gz: 660fdd54b8e6acb0d2a947c329e0293d147919af
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e2dababf31cb5d844ae4ff9588758e27538fad29fd086354a6f43e66640647f0c8339c838a17c34515339bd6191286ea6618e4fd042a82bee70140980c905710
|
7
|
+
data.tar.gz: fd4442cd82dbc5d41df4d605d724c03c3070e8e363dd4b0b97b54454e298b059289de248e1c6284b6109efdb1812f580f2977af6bb48a00dbb1da2c08b05aefc
|
data/lib/twins.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'active_support/core_ext'
|
2
|
+
require 'twins/utilities'
|
3
|
+
|
4
|
+
module Twins
|
5
|
+
|
6
|
+
# @param collection [Enumerable] A collection of Hash objects
|
7
|
+
# @param options [Hash]
|
8
|
+
# @return [Hash, Nil]
|
9
|
+
def consolidate(collection, options = {})
|
10
|
+
return nil unless collection.any?
|
11
|
+
|
12
|
+
if collection.all? { |e| e.is_a?(Hash) }
|
13
|
+
# noop
|
14
|
+
elsif collection.all? { |e| e.is_a?(collection.first.class) }
|
15
|
+
collection = collection.map do |element|
|
16
|
+
Hash[element.instance_variables.map { |name| [name.to_s.sub(/\A@/, ''), element.instance_variable_get(name)] }]
|
17
|
+
end
|
18
|
+
else
|
19
|
+
raise ArgumentError, "The collection's elements must all be of the same Class"
|
20
|
+
end
|
21
|
+
|
22
|
+
options = options.with_indifferent_access
|
23
|
+
consolidated = Hash.new
|
24
|
+
|
25
|
+
collection.each do |hash|
|
26
|
+
hash.each_pair do |key, value|
|
27
|
+
|
28
|
+
# Recursively consolidate nested hashes
|
29
|
+
if value.is_a?(Hash) && !consolidated[key]
|
30
|
+
consolidated[key] = consolidate(collection.map { |element| element[key] })
|
31
|
+
else
|
32
|
+
# Filter elements without a given key to avoid unintentionally nil values
|
33
|
+
values = collection.select { |element| element.has_key?(key) }.map { |element| element[key] }
|
34
|
+
|
35
|
+
if options[:priority].try(:[], key)
|
36
|
+
# Compute each element's distance from the given priority
|
37
|
+
distances = values.map { |f| Twins::Utilities.distance(options[:priority][key], f) }
|
38
|
+
|
39
|
+
# The best candidate is the first element with the shortest distance
|
40
|
+
consolidated[key] = values[distances.index(distances.min)]
|
41
|
+
else
|
42
|
+
# The best candidate is the mode or the first one
|
43
|
+
consolidated[key] = Twins::Utilities.mode(values)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
consolidated.with_indifferent_access
|
50
|
+
end
|
51
|
+
module_function :consolidate
|
52
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'amatch'
|
2
|
+
|
3
|
+
module Twins
|
4
|
+
module Utilities
|
5
|
+
|
6
|
+
# @param collection [Enumerable]
|
7
|
+
# @return [Object]
|
8
|
+
def mode(collection)
|
9
|
+
collection.group_by { |n| n }.values.max_by(&:size).first
|
10
|
+
end
|
11
|
+
module_function :mode
|
12
|
+
|
13
|
+
# Normalized distance for Strings and Numerics
|
14
|
+
# the lower the result, the shortest the distance
|
15
|
+
#
|
16
|
+
# @param a [String, Numeric]
|
17
|
+
# @param b [String, Numeric]
|
18
|
+
# @return [Float]
|
19
|
+
def distance(a, b)
|
20
|
+
if a.is_a?(String) && b.is_a?(String)
|
21
|
+
Twins::Utilities.string_distance(a, b) * -1
|
22
|
+
elsif a.is_a?(Numeric) && b.is_a?(Numeric)
|
23
|
+
Twins::Utilities.numeric_distance(a, b)
|
24
|
+
else
|
25
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'String' or 'Numeric'"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
module_function :distance
|
29
|
+
|
30
|
+
# @param a [String]
|
31
|
+
# @param b [String]
|
32
|
+
def string_distance(a, b)
|
33
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'String'" unless a.is_a?(String) && b.is_a?(String)
|
34
|
+
Amatch::LongestSubsequence.new(a).match(b)
|
35
|
+
end
|
36
|
+
module_function :string_distance
|
37
|
+
|
38
|
+
# @param a [Numeric]
|
39
|
+
# @param b [Numeric]
|
40
|
+
def numeric_distance(a, b)
|
41
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'Numeric'" unless a.is_a?(Numeric) && b.is_a?(Numeric)
|
42
|
+
(a - b).abs
|
43
|
+
end
|
44
|
+
module_function :numeric_distance
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: twins
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Philippe Dionne
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.2.11
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.2.11
|
41
|
+
description: Twin sorts through the small differences between multiple objects and
|
42
|
+
smartly consolidate all of them together.
|
43
|
+
email:
|
44
|
+
- dionne.phil@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/twins.rb
|
50
|
+
- lib/twins/utilities.rb
|
51
|
+
- lib/twins/version.rb
|
52
|
+
homepage: https://github.com/phildionne/twins
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project: "[none]"
|
72
|
+
rubygems_version: 2.2.2
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Smartly merge multiple objects together.
|
76
|
+
test_files: []
|
77
|
+
has_rdoc:
|