hash_comparator 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d80094bdb97ca8548a489934126643430d81b52d78c32e39f49eb543a8f5094e
4
- data.tar.gz: 5edab8fa1f3474079f95f9c7eb62143493262aa79125eb1e37b44ff80149910c
3
+ metadata.gz: 54c03d1e58cabe164b3df964dac46cee9f437b0071c0e0f5fca0a05f2188d49e
4
+ data.tar.gz: f9d2999965d9b2588f4ebb90b389596e34c2435f97bb17517300b938caf28209
5
5
  SHA512:
6
- metadata.gz: 215206512d43dc07b147f8d930539e557709219727c67d04a85ec738cf7b1832dc911a7d1393164f2eff3b9967d32db1f7a019f1d8e13c99ef87b3b1a6cf10aa
7
- data.tar.gz: 629862e4672fb94ab7faabee76f7d72e28b5d299341340969be0fcca5f6a6cd3f134d8e1c096f16cc82b5c53c71746e18f51aefc03b54a92c730dc8cde222c6a
6
+ metadata.gz: 8449d47c0c12cc89e2fbb3827790a6103d81d2b2f5b64b863840a284243f13622f24b69e621153970942289b348a186f12f8578f1d3543aa89773cc9baf2814a
7
+ data.tar.gz: 7688d1f04f962e17aaea4c6591899627432842dec49e3e62ac47bd652c2a550c37b9b08fa755de973e6a55f4e2001e41b85dcc72b2a352961af3d918b1afedf2
@@ -6,23 +6,71 @@ require 'hash_comparator/reverse_matcher'
6
6
  module HashComparator
7
7
  module Emails
8
8
  class Analyzer
9
- def self.execute(hash_function:, subject_raw_emails:, target_hashed_emails:, remove_username: false)
10
- if remove_username
11
- subject_raw_emails = Parser.parse(subject_raw_emails)
9
+ def self.find_common_human_readable(hash_function:, subject_raw_emails:, target_hashed_emails:, options: {})
10
+ new(
11
+ hash_function: hash_function,
12
+ subject_raw_emails: subject_raw_emails,
13
+ target_hashed_emails: target_hashed_emails,
14
+ options: options
15
+ ).find_common_human_readable
16
+ end
17
+
18
+ def self.find_common_hashes(hash_function:, subject_raw_emails:, target_hashed_emails:, options: {})
19
+ new(
20
+ hash_function: hash_function,
21
+ subject_raw_emails: subject_raw_emails,
22
+ target_hashed_emails: target_hashed_emails,
23
+ options: options
24
+ ).find_common_hashes
25
+ end
26
+
27
+ def initialize(hash_function:, subject_raw_emails:, target_hashed_emails:, options:)
28
+ @hash_function = hash_function
29
+ @subject_raw_emails = subject_raw_emails
30
+ @target_hashed_emails = target_hashed_emails
31
+ @options = options
32
+ @subject_hashed_emails = []
33
+ end
34
+
35
+ attr_accessor :hash_function, :subject_raw_emails, :subject_hashed_emails, :target_hashed_emails, :options
36
+
37
+ def find_common_human_readable
38
+ common_hashes = find_common_hashes
39
+ reverse_match(common_hashes)
40
+ end
41
+
42
+ def find_common_hashes
43
+ parse
44
+ hash
45
+ compare
46
+ end
47
+
48
+ private
49
+
50
+ def parse
51
+ if options[:parsing]
52
+ @subject_raw_emails = Parser.parse(subject_raw_emails, options[:parsing])
12
53
  end
54
+ end
13
55
 
14
- subject_hashed_emails = Hasher.hash(
56
+ def hash
57
+ @subject_hashed_emails = Hasher.hash(
15
58
  hash_function: hash_function,
16
59
  human_readable_items: subject_raw_emails
17
60
  )
18
- hashed_common_emails = SetComparator.calculate_overlap(
61
+ end
62
+
63
+ def compare
64
+ SetComparator.calculate_overlap(
19
65
  subject_items: subject_hashed_emails,
20
66
  target_items: target_hashed_emails
21
67
  )
68
+ end
22
69
 
70
+ def reverse_match(common_hashes)
23
71
  ReverseMatcher.execute(
24
72
  hash_function: hash_function,
25
- hashed_items: hashed_common_emails,
73
+ hashed_items: common_hashes,
26
74
  human_readable_items: subject_raw_emails
27
75
  )
28
76
  end
@@ -1,11 +1,156 @@
1
1
  module HashComparator
2
2
  module Emails
3
3
  class Parser
4
- def self.parse(emails)
5
- emails.map do |email|
4
+ def self.parse(emails, options = {})
5
+ new(emails, options).parse
6
+ end
7
+
8
+ def initialize(emails, options)
9
+ @emails = emails
10
+ @options = options
11
+ end
12
+
13
+ attr_accessor :emails, :options
14
+
15
+ def parse
16
+ remove_usernames if options[:remove_usernames]
17
+ remove_generic_domains if options[:remove_generic_domains]
18
+
19
+ emails
20
+ end
21
+
22
+ private
23
+
24
+ def remove_usernames
25
+ @emails = emails.map do |email|
6
26
  email.strip.split('@')[1]
7
27
  end
8
28
  end
29
+
30
+ def remove_generic_domains
31
+ @emails = emails.each_with_object([]) do |email, list|
32
+ domain = email.split('@')[-1]
33
+ list << email unless GENERIC_EMAIL_DOMAINS.include?(domain)
34
+ end
35
+ end
9
36
  end
37
+
38
+ # Email Domains
39
+ # Adapted from https://github.com/mailcheck/mailcheck/wiki/List-of-Popular-Domains
40
+ GENERIC_EMAIL_DOMAINS = %w[
41
+ aol.com
42
+ att.net
43
+ comcast.net
44
+ facebook.com
45
+ gmail.com
46
+ gmx.com
47
+ googlemail.com
48
+ google.com
49
+ hotmail.com
50
+ hotmail.co.uk
51
+ mac.com
52
+ me.com
53
+ mail.com
54
+ msn.com
55
+ live.com
56
+ sbcglobal.net
57
+ verizon.net
58
+ yahoo.com
59
+ yahoo.co.uk
60
+ email.com
61
+ games.com
62
+ gmx.net
63
+ hush.com
64
+ hushmail.com
65
+ icloud.com
66
+ inbox.com
67
+ lavabit.com
68
+ love.com
69
+ outlook.com
70
+ pobox.com
71
+ rocketmail.com
72
+ safe-mail.net
73
+ wow.com
74
+ ygm.com
75
+ ymail.com
76
+ zoho.com
77
+ fastmail.fm
78
+ bellsouth.net
79
+ charter.net
80
+ comcast.net
81
+ cox.net
82
+ earthlink.net
83
+ juno.com
84
+ btinternet.com
85
+ virginmedia.com
86
+ blueyonder.co.uk
87
+ freeserve.co.uk
88
+ live.co.uk
89
+ ntlworld.com
90
+ o2.co.uk
91
+ orange.net
92
+ sky.com
93
+ talktalk.co.uk
94
+ tiscali.co.uk
95
+ virgin.net
96
+ wanadoo.co.uk
97
+ bt.com
98
+ sina.com
99
+ qq.com
100
+ naver.com
101
+ hanmail.net
102
+ daum.net
103
+ nate.com
104
+ yahoo.co.jp
105
+ yahoo.co.kr
106
+ yahoo.co.id
107
+ yahoo.co.in
108
+ yahoo.com.sg
109
+ yahoo.com.ph
110
+ hotmail.fr
111
+ live.fr
112
+ laposte.net
113
+ yahoo.fr
114
+ wanadoo.fr
115
+ orange.fr
116
+ gmx.fr
117
+ sfr.fr
118
+ neuf.fr
119
+ free.fr
120
+ gmx.de
121
+ hotmail.de
122
+ live.de
123
+ online.de
124
+ t-online.de
125
+ web.de
126
+ yahoo.de
127
+ mail.ru
128
+ rambler.ru
129
+ yandex.ru
130
+ ya.ru
131
+ list.ru
132
+ hotmail.be
133
+ live.be
134
+ skynet.be
135
+ voo.be
136
+ tvcablenet.be
137
+ telenet.be
138
+ hotmail.com.ar
139
+ live.com.ar
140
+ yahoo.com.ar
141
+ fibertel.com.ar
142
+ speedy.com.ar
143
+ arnet.com.ar
144
+ hotmail.com
145
+ gmail.com
146
+ yahoo.com.mx
147
+ live.com.mx
148
+ yahoo.com
149
+ hotmail.es
150
+ live.com
151
+ hotmail.com.mx
152
+ prodigy.net.mx
153
+ msn.com
154
+ ].freeze
10
155
  end
11
156
  end
@@ -2,7 +2,12 @@ require 'digest'
2
2
 
3
3
  module HashComparator
4
4
  class Hasher
5
- SUPPORTED_HASH_FUNCTIONS = { md5: Digest::MD5 }.freeze
5
+ SUPPORTED_HASH_FUNCTIONS = {
6
+ md5: Digest::MD5,
7
+ sha1: Digest::SHA1,
8
+ sha256: Digest::SHA2,
9
+ sha512: Digest::SHA512
10
+ }.freeze
6
11
 
7
12
  def self.hash(hash_function:, human_readable_items:)
8
13
  new(hash_function: hash_function, human_readable_items: human_readable_items).hash
@@ -24,10 +24,11 @@ module HashComparator
24
24
  hash_function: hash_function,
25
25
  human_readable_items: human_readable_items
26
26
  )
27
- matches = human_readable_items.each_with_index.map do |item, i|
28
- item if hashed_items.include?(subject_hashed_items[i])
29
- end.compact
30
27
 
28
+ matches = human_readable_items.each_with_index.each_with_object([]) do |(item, i), list|
29
+ list << item if hashed_items.include?(subject_hashed_items[i])
30
+ end
31
+
31
32
  matches.uniq.sort
32
33
  end
33
34
  end
@@ -1,3 +1,3 @@
1
1
  module HashComparator
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hash_comparator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tanner Johnson
@@ -14,16 +14,16 @@ dependencies:
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.2
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.2
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -52,10 +52,13 @@ files:
52
52
  - lib/hash_comparator/reverse_matcher.rb
53
53
  - lib/hash_comparator/set_comparator.rb
54
54
  - lib/hash_comparator/version.rb
55
- homepage: https://rubygems.org/gems/hash_comparator
55
+ homepage: https://github.com/tannerljohnson/hash_comparator
56
56
  licenses:
57
57
  - MIT
58
- metadata: {}
58
+ metadata:
59
+ homepage_uri: https://github.com/tannerljohnson/hash_comparator
60
+ source_code_uri: https://github.com/tannerljohnson/hash_comparator
61
+ changelog_uri: https://github.com/tannerljohnson/hash_comparator
59
62
  post_install_message:
60
63
  rdoc_options: []
61
64
  require_paths: