identity_parade 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 511f89370f34b95f2bbbf67fcd8b4108f152f81dcb2cdee25dea9faa3f4d0dfa
4
+ data.tar.gz: c5c719d640fffee1a63bd29561141879ddd2a7ec9697b61befa0b9d4f18bf5dc
5
+ SHA512:
6
+ metadata.gz: 52b3957c97a9d951600674773443e7befd39563cbd53a8dc87c5ff38db035904195b8c05c499e31073de29ac5ca7eae2d02a0817937bec6b53d77e314649d622
7
+ data.tar.gz: 68b33f2f09aa840d0ba47966850d865c6d1f11d1212ca08c38108674337c42693be97273863fe6eff4ca3a431753103da463a2752465debd70ed2360f3523656
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ Gemfile.lock
13
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
5
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in identity_parade.gemspec
6
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ # A sample Guardfile
4
+ # More info at https://github.com/guard/guard#readme
5
+
6
+ ## Uncomment and set this to only include directories you want to watch
7
+ (directories %w[lib spec]).select do |d|
8
+ if Dir.exist?(d)
9
+ d
10
+ else
11
+ UI.warning("Directory #{d} does not exist")
12
+ end
13
+ end
14
+
15
+ ## Note: if you are using the `directories` clause above and you are not
16
+ ## watching the project directory ('.'), then you will want to move
17
+ ## the Guardfile to a watched dir and symlink it back, e.g.
18
+ #
19
+ # $ mkdir config
20
+ # $ mv Guardfile config/
21
+ # $ ln -s config/Guardfile .
22
+ #
23
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
24
+
25
+ # Note: The cmd option is now required due to the increasing number of ways
26
+ # rspec may be run, below are examples of the most common uses.
27
+ # * bundler: 'bundle exec rspec'
28
+ # * bundler binstubs: 'bin/rspec'
29
+ # * spring: 'bin/rspec' (This will use spring if running and you have
30
+ # installed the spring binstubs per the docs)
31
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
32
+ # * 'just' rspec: 'rspec'
33
+
34
+ guard :rspec, cmd: 'bundle exec rspec' do
35
+ watch('spec/spec_helper.rb') { 'spec' }
36
+ watch(%r{^spec/.+_spec\.rb$})
37
+ watch(%r{^app/(.+)\.rb$}) do |m|
38
+ "spec/#{m[1]}_spec.rb"
39
+ end
40
+ watch(%r{^app/(.*)(\.erb|\.haml|\.slim)$}) do |m|
41
+ "spec/#{m[1]}#{m[2]}_spec.rb"
42
+ end
43
+ watch(%r{^lib/(.+)\.rb$}) do |m|
44
+ "spec/lib/#{m[1]}_spec.rb"
45
+ end
46
+ watch(%r{^app/api/(.+)\.rb$}) do |m|
47
+ "spec/api/#{m[1]}_spec.rb"
48
+ end
49
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Henning Vogt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # IdentityParade
2
+
3
+
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'identity_parade'
10
+ ```
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install identity_parade
19
+
20
+ ## Usage
21
+
22
+ ``` ruby
23
+ IdentityParade.match(original, possible_duplicate)
24
+ # => 0.95
25
+
26
+ IdentityParade.match?(original, possible_duplicate)
27
+ # => true
28
+ ```
29
+
30
+
31
+ ## Development
32
+
33
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
34
+
35
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
36
+
37
+ ## Contributing
38
+
39
+ Bug reports and pull requests are welcome on GitHub at https://github.com/henvo/identity_parade.
40
+
41
+ ## License
42
+
43
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "identity_parade"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,40 @@
1
+
2
+ lib = File.expand_path('lib', __dir__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'identity_parade/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'identity_parade'
8
+ spec.version = IdentityParade::VERSION
9
+ spec.authors = ['Henning Vogt']
10
+ spec.email = ['git@henvo.de']
11
+
12
+ spec.summary = 'Find duplicates or near-duplicates of.'
13
+ spec.description = 'This gem allows to compare two hashes.'
14
+ spec.homepage = 'https://github.com/henvo/identity_parade'
15
+ spec.license = 'MIT'
16
+
17
+ if spec.respond_to?(:metadata)
18
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
+ else
20
+ raise 'RubyGems 2.0 or newer is required to protect against ' \
21
+ 'public gem pushes.'
22
+ end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
25
+ f.match(%r{^(test|spec|features)/})
26
+ end
27
+ spec.bindir = 'exe'
28
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_dependency 'activesupport', '~> 5.1'
32
+ spec.add_dependency 'fuzzy-string-match', '~> 1.0', '>= 1.0.1'
33
+ spec.add_dependency 'recursive-open-struct', '~> 1.1'
34
+
35
+ spec.add_development_dependency 'bundler', '~> 1.16'
36
+ spec.add_development_dependency 'pry', '~> 0.11'
37
+ spec.add_development_dependency 'rake', '~> 10.0'
38
+ spec.add_development_dependency 'reek', '~> 4.8'
39
+ spec.add_development_dependency 'rspec', '~> 3.0'
40
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Set some basic configuration settings. This can be overwritten in the
4
+ # application in the same fashion.
5
+ IdentityParade.configure do |config|
6
+ config.blacklisted_keys = []
7
+ config.match_score = 0.5
8
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IdentityParade
4
+ # The initial starting point for a matching.
5
+ class Match
6
+ def initialize(left, right)
7
+ @left = left
8
+ @right = right
9
+ end
10
+
11
+ def score
12
+ return unless Kernel.const_defined?(matcher_class_name)
13
+
14
+ matcher.new(@left, @right).score
15
+ end
16
+
17
+ def matcher
18
+ matcher_class_name.constantize
19
+ end
20
+
21
+ def matcher_class_name
22
+ "identity_parade/matchers/#{@left.class}_matcher".classify
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,15 @@
1
+ module IdentityParade
2
+ # The base class for all matchers
3
+ class Matcher
4
+ attr_reader :left, :right
5
+
6
+ def initialize(left, right)
7
+ @left = left
8
+ @right = right
9
+ end
10
+
11
+ def score
12
+ raise NotImplementedError
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,14 @@
1
+ module IdentityParade
2
+ module Matchers
3
+ # This matcher checks the similarity of two arrays. For this purpose, it
4
+ # iterates over all elements and creates a new matcher for every type.
5
+ class ArrayMatcher < Matcher
6
+ def score
7
+ return if left.empty?
8
+ return 0 unless right.is_a? Array
9
+
10
+ 1 - ((left - right).size / left.size.to_f)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IdentityParade
4
+ module Matchers
5
+ # This matcher checks the similarity of two numerics.
6
+ class FloatMatcher < Matchers::NumericMatcher; end
7
+ end
8
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IdentityParade
4
+ module Matchers
5
+ # This matcher checks the similarity of two hashes. For this purpose, it
6
+ # iterates over all elements and creates a new matcher for every type.
7
+ class HashMatcher < Matcher
8
+ def score
9
+ sub_scores.sum / sub_scores.size.to_f
10
+ end
11
+
12
+ # @return [Float] The sum of all sub scores
13
+ def sub_scores
14
+ left.map do |key, value|
15
+ next nil if blacklisted_keys.include?(key.to_s)
16
+
17
+ next 0 unless right.key?(key)
18
+
19
+ IdentityParade::Match.new(value, right[key]).score
20
+ end.compact
21
+ end
22
+
23
+ # @return [Array<String>] the list of blacklisted keys
24
+ # :reek:UtilityFunction because it's a shorthand
25
+ def blacklisted_keys
26
+ IdentityParade.config.blacklisted_keys
27
+ end
28
+
29
+ # @return [Array<String>] the list of permitted keys
30
+ def permitted_keys
31
+ left.keys.map(&:to_s) - blacklisted_keys
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IdentityParade
4
+ module Matchers
5
+ # This matcher checks the similarity of two numerics.
6
+ class IntegerMatcher < Matchers::NumericMatcher; end
7
+ end
8
+ end
@@ -0,0 +1,25 @@
1
+ module IdentityParade
2
+ module Matchers
3
+ # This matcher checks the similarity of two numerics.
4
+ class NumericMatcher < Matcher
5
+ def score
6
+ return 1 if max == min
7
+ return 0 if max.zero?
8
+
9
+ min / max
10
+ end
11
+
12
+ def max
13
+ floats.max
14
+ end
15
+
16
+ def min
17
+ floats.min
18
+ end
19
+
20
+ def floats
21
+ [left.to_f, right.to_f]
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ require 'fuzzystringmatch'
2
+
3
+ module IdentityParade
4
+ module Matchers
5
+ # This matcher checks the similarity of two strings.
6
+ class StringMatcher < Matcher
7
+ def score
8
+ jarow.getDistance(left.to_s, right.to_s)
9
+ end
10
+
11
+ def jarow
12
+ FuzzyStringMatch::JaroWinkler.create(:native)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IdentityParade
4
+ VERSION = '0.1.0'.freeze
5
+ end
@@ -0,0 +1,38 @@
1
+ require 'recursive-open-struct'
2
+ require 'active_support/all'
3
+
4
+ require 'identity_parade/version'
5
+ require 'identity_parade/match'
6
+ require 'identity_parade/matcher'
7
+ require 'identity_parade/matchers/hash_matcher'
8
+ require 'identity_parade/matchers/string_matcher'
9
+ require 'identity_parade/matchers/array_matcher'
10
+ require 'identity_parade/matchers/numeric_matcher'
11
+ require 'identity_parade/matchers/integer_matcher'
12
+ require 'identity_parade/matchers/float_matcher'
13
+
14
+ # This gem allows to compare two types.
15
+ module IdentityParade
16
+ class << self
17
+ # :reek;Attribute because we want to share the config
18
+ def config
19
+ @config ||= RecursiveOpenStruct.new
20
+ end
21
+
22
+ def configure
23
+ yield(config)
24
+ end
25
+ end
26
+
27
+ module_function
28
+
29
+ def match(left, right)
30
+ IdentityParade::Match.new(left, right).score
31
+ end
32
+
33
+ def match?(left, right)
34
+ match(left, right) >= IdentityParade.config.match_score
35
+ end
36
+ end
37
+
38
+ require 'identity_parade/initializer'
metadata ADDED
@@ -0,0 +1,185 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: identity_parade
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Henning Vogt
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-04-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fuzzy-string-match
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 1.0.1
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '1.0'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.0.1
47
+ - !ruby/object:Gem::Dependency
48
+ name: recursive-open-struct
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.1'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.1'
61
+ - !ruby/object:Gem::Dependency
62
+ name: bundler
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.16'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.16'
75
+ - !ruby/object:Gem::Dependency
76
+ name: pry
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '0.11'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '0.11'
89
+ - !ruby/object:Gem::Dependency
90
+ name: rake
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '10.0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '10.0'
103
+ - !ruby/object:Gem::Dependency
104
+ name: reek
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '4.8'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '4.8'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rspec
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '3.0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '3.0'
131
+ description: This gem allows to compare two hashes.
132
+ email:
133
+ - git@henvo.de
134
+ executables: []
135
+ extensions: []
136
+ extra_rdoc_files: []
137
+ files:
138
+ - ".gitignore"
139
+ - ".rspec"
140
+ - ".travis.yml"
141
+ - Gemfile
142
+ - Guardfile
143
+ - LICENSE.txt
144
+ - README.md
145
+ - Rakefile
146
+ - bin/console
147
+ - bin/setup
148
+ - identity_parade.gemspec
149
+ - lib/identity_parade.rb
150
+ - lib/identity_parade/initializer.rb
151
+ - lib/identity_parade/match.rb
152
+ - lib/identity_parade/matcher.rb
153
+ - lib/identity_parade/matchers/array_matcher.rb
154
+ - lib/identity_parade/matchers/float_matcher.rb
155
+ - lib/identity_parade/matchers/hash_matcher.rb
156
+ - lib/identity_parade/matchers/integer_matcher.rb
157
+ - lib/identity_parade/matchers/numeric_matcher.rb
158
+ - lib/identity_parade/matchers/string_matcher.rb
159
+ - lib/identity_parade/version.rb
160
+ homepage: https://github.com/henvo/identity_parade
161
+ licenses:
162
+ - MIT
163
+ metadata:
164
+ allowed_push_host: https://rubygems.org
165
+ post_install_message:
166
+ rdoc_options: []
167
+ require_paths:
168
+ - lib
169
+ required_ruby_version: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ required_rubygems_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: '0'
179
+ requirements: []
180
+ rubyforge_project:
181
+ rubygems_version: 2.7.6
182
+ signing_key:
183
+ specification_version: 4
184
+ summary: Find duplicates or near-duplicates of.
185
+ test_files: []