activerecord-find_duplicates 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ea01f085526639dec081cbac6e4c7ef60bd0d2116a479cd5cd9c338f4c1c6ee1
4
+ data.tar.gz: af82766b89cc6c5a07b6d48b10b87b49195e4e960521fe85db5d2bebcc2abe44
5
+ SHA512:
6
+ metadata.gz: b54fb2ace3992cf22892abda4f791a26ad2a3956a16a4f8d918b2ec6e57873fa5c56f0305d642935423c10564fbbc59b61973ab9c0d5313066118dff8eeb53c3
7
+ data.tar.gz: 7fd2995428e5d6cdd8948788e964b0888bc374a4f06f67e6e75029e70588a989a7958cd20d4167c211a91c56d9f9269a257f39d2b40f01b914b57d2f87e95108
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ Gemfile.lock
13
+ spec/database.yml
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.3
7
+ before_install: gem install bundler -v 2.0.2
File without changes
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in activerecord-find_duplicates.gemspec
4
+ gemspec
data/License ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (c) 2019 Tyler Rick
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
4
+ associated documentation files (the "Software"), to deal in the Software without restriction,
5
+ including without limitation the rights to use, copy, modify, merge, publish, distribute,
6
+ sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
7
+ furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or substantial
10
+ portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18
+ SOFTWARE.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,64 @@
1
+ # Activerecord::FindDuplicates
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's `Gemfile`:
6
+
7
+ ```ruby
8
+ gem 'activerecord-find_duplicates'
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ General usage is:
14
+ ```ruby
15
+ Model.find_duplicates(on: attr_name)
16
+ ```
17
+
18
+ You can pass a minimum number to be considered a duplicate (default is 2) with `min:`.
19
+
20
+ Example: To find all user records that have a duplicate email address:
21
+ ```ruby
22
+ User.find_duplicates(on: :email)
23
+ # => [#<User:0x000055e7916ff3c8 id: 1, email: "a@a.com">,
24
+ #<User:0x000055e7916ff1e8 id: 2, email: "a@a.com">]
25
+
26
+ ```
27
+
28
+ Often it is useful to group by the duplicate value, making the value the key and the set of records sharing that key as the value:
29
+ ```ruby
30
+ User.find_duplicates(on: :email).group_by(&:email)
31
+ # => {"a@a.com"=>
32
+ [#<User:0x000055cc1915f0c8 id: 1, email: "a@a.com">,
33
+ #<User:0x000055cc1915ef38 id: 2, email: "a@a.com">]}
34
+ ```
35
+
36
+ You can also chain it on other relations. For example, to find all duplicates *except* those with a null value:
37
+ ```ruby
38
+ User.where('email is not null').find_duplicates(on: :email)
39
+ ```
40
+
41
+ ## Possible uses
42
+
43
+ You realize that a certain column should be unique but actually contains duplicate values. Even though you had a uniqueness validation on the model:
44
+ ```ruby
45
+ validates :email, uniqueness: true
46
+ ```
47
+ , this is subject to race conditions. The only sure way to prevent duplicate values on a column is to add a unique index/constraint and let your *database* engine enforce the constraint.
48
+
49
+ But before you can add a migration that adds that index, you have to remove all duplicates or you will get:
50
+ ```
51
+ PG::UniqueViolation: ERROR: could not create unique index "index_users_on_email"
52
+ DETAIL: Key (email)=(user@example.com) is duplicated.
53
+ ```
54
+
55
+
56
+ ## Development
57
+
58
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
59
+
60
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
61
+
62
+ ## Contributing
63
+
64
+ Bug reports and pull requests are welcome on GitHub at https://github.com/TylerRick/activerecord-find_duplicates.
@@ -0,0 +1,37 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "activerecord/find_duplicates/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "activerecord-find_duplicates"
7
+ spec.version = Activerecord::FindDuplicates.version
8
+ spec.authors = ["Tyler Rick"]
9
+ spec.email = ["tyler@tylerrick.com"]
10
+ spec.license = "MIT"
11
+
12
+ spec.summary = %q{Easily find all duplicate records)}
13
+ spec.description = spec.summary
14
+ spec.homepage = "https://github.com/TylerRick/merge_params"
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+ spec.metadata["changelog_uri"] = "#{spec.metadata["source_code_uri"]}/blob/master/Changelog.md"
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.required_ruby_version = ">= 2.0.0"
30
+ spec.add_dependency "activerecord", [">= 4.2", "< 5.3"]
31
+
32
+ spec.add_development_dependency "bundler", "~> 2.0"
33
+ spec.add_development_dependency "rake"
34
+ spec.add_development_dependency "rspec", "~> 3.0"
35
+ spec.add_development_dependency "sqlite3"
36
+ spec.add_development_dependency "pg"
37
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "activerecord/find_duplicates"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,24 @@
1
+ require 'active_record'
2
+
3
+ module ActiveRecord::FindDuplicates
4
+ extend ActiveSupport::Concern
5
+
6
+ module ClassMethods
7
+ # Examples:
8
+ # User.find_duplicates(on: :email).group_by(&:email)
9
+ # User.where('email is not null').find_duplicates(on: :email)
10
+ def find_duplicates(on:, min: 2)
11
+ attr_name = on
12
+ values = group(attr_name).having(
13
+ # If I could figure out how to do "count(*)".gteq(min) with Arel, then it would work with sqlite
14
+ arel_table[Arel.star].count.gteq(min)
15
+ ).count.keys
16
+
17
+ where(attr_name => values)
18
+ end
19
+ end
20
+ end
21
+
22
+ class ActiveRecord::Base
23
+ include ActiveRecord::FindDuplicates
24
+ end
@@ -0,0 +1,7 @@
1
+ module Activerecord
2
+ module FindDuplicates
3
+ def self.version
4
+ "0.1.0"
5
+ end
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-find_duplicates
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Tyler Rick
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '5.3'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.2'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.3'
33
+ - !ruby/object:Gem::Dependency
34
+ name: bundler
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.0'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rspec
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '3.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: sqlite3
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: pg
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ description: Easily find all duplicate records)
104
+ email:
105
+ - tyler@tylerrick.com
106
+ executables: []
107
+ extensions: []
108
+ extra_rdoc_files: []
109
+ files:
110
+ - ".gitignore"
111
+ - ".rspec"
112
+ - ".travis.yml"
113
+ - Changelog.md
114
+ - Gemfile
115
+ - License
116
+ - Rakefile
117
+ - Readme.md
118
+ - activerecord-find_duplicates.gemspec
119
+ - bin/console
120
+ - bin/setup
121
+ - lib/activerecord/find_duplicates.rb
122
+ - lib/activerecord/find_duplicates/version.rb
123
+ homepage: https://github.com/TylerRick/merge_params
124
+ licenses:
125
+ - MIT
126
+ metadata:
127
+ homepage_uri: https://github.com/TylerRick/merge_params
128
+ source_code_uri: https://github.com/TylerRick/merge_params
129
+ changelog_uri: https://github.com/TylerRick/merge_params/blob/master/Changelog.md
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 2.0.0
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubygems_version: 3.0.3
146
+ signing_key:
147
+ specification_version: 4
148
+ summary: Easily find all duplicate records)
149
+ test_files: []