activerecord-find_duplicates 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ea01f085526639dec081cbac6e4c7ef60bd0d2116a479cd5cd9c338f4c1c6ee1
4
+ data.tar.gz: af82766b89cc6c5a07b6d48b10b87b49195e4e960521fe85db5d2bebcc2abe44
5
+ SHA512:
6
+ metadata.gz: b54fb2ace3992cf22892abda4f791a26ad2a3956a16a4f8d918b2ec6e57873fa5c56f0305d642935423c10564fbbc59b61973ab9c0d5313066118dff8eeb53c3
7
+ data.tar.gz: 7fd2995428e5d6cdd8948788e964b0888bc374a4f06f67e6e75029e70588a989a7958cd20d4167c211a91c56d9f9269a257f39d2b40f01b914b57d2f87e95108
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ Gemfile.lock
13
+ spec/database.yml
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.6.3
7
+ before_install: gem install bundler -v 2.0.2
File without changes
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in activerecord-find_duplicates.gemspec
4
+ gemspec
data/License ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (c) 2019 Tyler Rick
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
4
+ associated documentation files (the "Software"), to deal in the Software without restriction,
5
+ including without limitation the rights to use, copy, modify, merge, publish, distribute,
6
+ sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
7
+ furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or substantial
10
+ portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18
+ SOFTWARE.
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,64 @@
1
+ # Activerecord::FindDuplicates
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's `Gemfile`:
6
+
7
+ ```ruby
8
+ gem 'activerecord-find_duplicates'
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ General usage is:
14
+ ```ruby
15
+ Model.find_duplicates(on: attr_name)
16
+ ```
17
+
18
+ You can pass a minimum number to be considered a duplicate (default is 2) with `min:`.
19
+
20
+ Example: To find all user records that have a duplicate email address:
21
+ ```ruby
22
+ User.find_duplicates(on: :email)
23
+ # => [#<User:0x000055e7916ff3c8 id: 1, email: "a@a.com">,
24
+ #<User:0x000055e7916ff1e8 id: 2, email: "a@a.com">]
25
+
26
+ ```
27
+
28
+ Often it is useful to group by the duplicate value, making the value the key and the set of records sharing that key as the value:
29
+ ```ruby
30
+ User.find_duplicates(on: :email).group_by(&:email)
31
+ # => {"a@a.com"=>
32
+ [#<User:0x000055cc1915f0c8 id: 1, email: "a@a.com">,
33
+ #<User:0x000055cc1915ef38 id: 2, email: "a@a.com">]}
34
+ ```
35
+
36
+ You can also chain it on other relations. For example, to find all duplicates *except* those with a null value:
37
+ ```ruby
38
+ User.where('email is not null').find_duplicates(on: :email)
39
+ ```
40
+
41
+ ## Possible uses
42
+
43
+ You realize that a certain column should be unique but actually contains duplicate values. Even though you had a uniqueness validation on the model:
44
+ ```ruby
45
+ validates :email, uniqueness: true
46
+ ```
47
+ , this is subject to race conditions. The only sure way to prevent duplicate values on a column is to add a unique index/constraint and let your *database* engine enforce the constraint.
48
+
49
+ But before you can add a migration that adds that index, you have to remove all duplicates or you will get:
50
+ ```
51
+ PG::UniqueViolation: ERROR: could not create unique index "index_users_on_email"
52
+ DETAIL: Key (email)=(user@example.com) is duplicated.
53
+ ```
54
+
55
+
56
+ ## Development
57
+
58
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
59
+
60
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
61
+
62
+ ## Contributing
63
+
64
+ Bug reports and pull requests are welcome on GitHub at https://github.com/TylerRick/activerecord-find_duplicates.
@@ -0,0 +1,37 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "activerecord/find_duplicates/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "activerecord-find_duplicates"
7
+ spec.version = Activerecord::FindDuplicates.version
8
+ spec.authors = ["Tyler Rick"]
9
+ spec.email = ["tyler@tylerrick.com"]
10
+ spec.license = "MIT"
11
+
12
+ spec.summary = %q{Easily find all duplicate records)}
13
+ spec.description = spec.summary
14
+ spec.homepage = "https://github.com/TylerRick/merge_params"
15
+
16
+ spec.metadata["homepage_uri"] = spec.homepage
17
+ spec.metadata["source_code_uri"] = spec.homepage
18
+ spec.metadata["changelog_uri"] = "#{spec.metadata["source_code_uri"]}/blob/master/Changelog.md"
19
+
20
+ # Specify which files should be added to the gem when it is released.
21
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
24
+ end
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.required_ruby_version = ">= 2.0.0"
30
+ spec.add_dependency "activerecord", [">= 4.2", "< 5.3"]
31
+
32
+ spec.add_development_dependency "bundler", "~> 2.0"
33
+ spec.add_development_dependency "rake"
34
+ spec.add_development_dependency "rspec", "~> 3.0"
35
+ spec.add_development_dependency "sqlite3"
36
+ spec.add_development_dependency "pg"
37
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "activerecord/find_duplicates"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,24 @@
1
+ require 'active_record'
2
+
3
+ module ActiveRecord::FindDuplicates
4
+ extend ActiveSupport::Concern
5
+
6
+ module ClassMethods
7
+ # Examples:
8
+ # User.find_duplicates(on: :email).group_by(&:email)
9
+ # User.where('email is not null').find_duplicates(on: :email)
10
+ def find_duplicates(on:, min: 2)
11
+ attr_name = on
12
+ values = group(attr_name).having(
13
+ # If I could figure out how to do "count(*)".gteq(min) with Arel, then it would work with sqlite
14
+ arel_table[Arel.star].count.gteq(min)
15
+ ).count.keys
16
+
17
+ where(attr_name => values)
18
+ end
19
+ end
20
+ end
21
+
22
+ class ActiveRecord::Base
23
+ include ActiveRecord::FindDuplicates
24
+ end
@@ -0,0 +1,7 @@
1
+ module Activerecord
2
+ module FindDuplicates
3
+ def self.version
4
+ "0.1.0"
5
+ end
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-find_duplicates
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Tyler Rick
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-07-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '5.3'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.2'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '5.3'
33
+ - !ruby/object:Gem::Dependency
34
+ name: bundler
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.0'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rspec
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '3.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: sqlite3
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: pg
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ description: Easily find all duplicate records)
104
+ email:
105
+ - tyler@tylerrick.com
106
+ executables: []
107
+ extensions: []
108
+ extra_rdoc_files: []
109
+ files:
110
+ - ".gitignore"
111
+ - ".rspec"
112
+ - ".travis.yml"
113
+ - Changelog.md
114
+ - Gemfile
115
+ - License
116
+ - Rakefile
117
+ - Readme.md
118
+ - activerecord-find_duplicates.gemspec
119
+ - bin/console
120
+ - bin/setup
121
+ - lib/activerecord/find_duplicates.rb
122
+ - lib/activerecord/find_duplicates/version.rb
123
+ homepage: https://github.com/TylerRick/merge_params
124
+ licenses:
125
+ - MIT
126
+ metadata:
127
+ homepage_uri: https://github.com/TylerRick/merge_params
128
+ source_code_uri: https://github.com/TylerRick/merge_params
129
+ changelog_uri: https://github.com/TylerRick/merge_params/blob/master/Changelog.md
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 2.0.0
139
+ required_rubygems_version: !ruby/object:Gem::Requirement
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ requirements: []
145
+ rubygems_version: 3.0.3
146
+ signing_key:
147
+ specification_version: 4
148
+ summary: Easily find all duplicate records)
149
+ test_files: []