twins 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/twins.rb +52 -0
- data/lib/twins/utilities.rb +46 -0
- data/lib/twins/version.rb +3 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 20e9e6421c19c6abccbb7f121b1ba3a36ae56a7e
|
4
|
+
data.tar.gz: 660fdd54b8e6acb0d2a947c329e0293d147919af
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e2dababf31cb5d844ae4ff9588758e27538fad29fd086354a6f43e66640647f0c8339c838a17c34515339bd6191286ea6618e4fd042a82bee70140980c905710
|
7
|
+
data.tar.gz: fd4442cd82dbc5d41df4d605d724c03c3070e8e363dd4b0b97b54454e298b059289de248e1c6284b6109efdb1812f580f2977af6bb48a00dbb1da2c08b05aefc
|
data/lib/twins.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'active_support/core_ext'
|
2
|
+
require 'twins/utilities'
|
3
|
+
|
4
|
+
module Twins
|
5
|
+
|
6
|
+
# @param collection [Enumerable] A collection of Hash objects
|
7
|
+
# @param options [Hash]
|
8
|
+
# @return [Hash, Nil]
|
9
|
+
def consolidate(collection, options = {})
|
10
|
+
return nil unless collection.any?
|
11
|
+
|
12
|
+
if collection.all? { |e| e.is_a?(Hash) }
|
13
|
+
# noop
|
14
|
+
elsif collection.all? { |e| e.is_a?(collection.first.class) }
|
15
|
+
collection = collection.map do |element|
|
16
|
+
Hash[element.instance_variables.map { |name| [name.to_s.sub(/\A@/, ''), element.instance_variable_get(name)] }]
|
17
|
+
end
|
18
|
+
else
|
19
|
+
raise ArgumentError, "The collection's elements must all be of the same Class"
|
20
|
+
end
|
21
|
+
|
22
|
+
options = options.with_indifferent_access
|
23
|
+
consolidated = Hash.new
|
24
|
+
|
25
|
+
collection.each do |hash|
|
26
|
+
hash.each_pair do |key, value|
|
27
|
+
|
28
|
+
# Recursively consolidate nested hashes
|
29
|
+
if value.is_a?(Hash) && !consolidated[key]
|
30
|
+
consolidated[key] = consolidate(collection.map { |element| element[key] })
|
31
|
+
else
|
32
|
+
# Filter elements without a given key to avoid unintentionally nil values
|
33
|
+
values = collection.select { |element| element.has_key?(key) }.map { |element| element[key] }
|
34
|
+
|
35
|
+
if options[:priority].try(:[], key)
|
36
|
+
# Compute each element's distance from the given priority
|
37
|
+
distances = values.map { |f| Twins::Utilities.distance(options[:priority][key], f) }
|
38
|
+
|
39
|
+
# The best candidate is the first element with the shortest distance
|
40
|
+
consolidated[key] = values[distances.index(distances.min)]
|
41
|
+
else
|
42
|
+
# The best candidate is the mode or the first one
|
43
|
+
consolidated[key] = Twins::Utilities.mode(values)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
consolidated.with_indifferent_access
|
50
|
+
end
|
51
|
+
module_function :consolidate
|
52
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'amatch'
|
2
|
+
|
3
|
+
module Twins
|
4
|
+
module Utilities
|
5
|
+
|
6
|
+
# @param collection [Enumerable]
|
7
|
+
# @return [Object]
|
8
|
+
def mode(collection)
|
9
|
+
collection.group_by { |n| n }.values.max_by(&:size).first
|
10
|
+
end
|
11
|
+
module_function :mode
|
12
|
+
|
13
|
+
# Normalized distance for Strings and Numerics
|
14
|
+
# the lower the result, the shortest the distance
|
15
|
+
#
|
16
|
+
# @param a [String, Numeric]
|
17
|
+
# @param b [String, Numeric]
|
18
|
+
# @return [Float]
|
19
|
+
def distance(a, b)
|
20
|
+
if a.is_a?(String) && b.is_a?(String)
|
21
|
+
Twins::Utilities.string_distance(a, b) * -1
|
22
|
+
elsif a.is_a?(Numeric) && b.is_a?(Numeric)
|
23
|
+
Twins::Utilities.numeric_distance(a, b)
|
24
|
+
else
|
25
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'String' or 'Numeric'"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
module_function :distance
|
29
|
+
|
30
|
+
# @param a [String]
|
31
|
+
# @param b [String]
|
32
|
+
def string_distance(a, b)
|
33
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'String'" unless a.is_a?(String) && b.is_a?(String)
|
34
|
+
Amatch::LongestSubsequence.new(a).match(b)
|
35
|
+
end
|
36
|
+
module_function :string_distance
|
37
|
+
|
38
|
+
# @param a [Numeric]
|
39
|
+
# @param b [Numeric]
|
40
|
+
def numeric_distance(a, b)
|
41
|
+
raise StandardError, "Distance can only be determined between two elements of kind 'Numeric'" unless a.is_a?(Numeric) && b.is_a?(Numeric)
|
42
|
+
(a - b).abs
|
43
|
+
end
|
44
|
+
module_function :numeric_distance
|
45
|
+
end
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: twins
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Philippe Dionne
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.2.11
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.2.11
|
41
|
+
description: Twin sorts through the small differences between multiple objects and
|
42
|
+
smartly consolidate all of them together.
|
43
|
+
email:
|
44
|
+
- dionne.phil@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- lib/twins.rb
|
50
|
+
- lib/twins/utilities.rb
|
51
|
+
- lib/twins/version.rb
|
52
|
+
homepage: https://github.com/phildionne/twins
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project: "[none]"
|
72
|
+
rubygems_version: 2.2.2
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Smartly merge multiple objects together.
|
76
|
+
test_files: []
|
77
|
+
has_rdoc:
|