hash_comparator 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d80094bdb97ca8548a489934126643430d81b52d78c32e39f49eb543a8f5094e
4
- data.tar.gz: 5edab8fa1f3474079f95f9c7eb62143493262aa79125eb1e37b44ff80149910c
3
+ metadata.gz: 54c03d1e58cabe164b3df964dac46cee9f437b0071c0e0f5fca0a05f2188d49e
4
+ data.tar.gz: f9d2999965d9b2588f4ebb90b389596e34c2435f97bb17517300b938caf28209
5
5
  SHA512:
6
- metadata.gz: 215206512d43dc07b147f8d930539e557709219727c67d04a85ec738cf7b1832dc911a7d1393164f2eff3b9967d32db1f7a019f1d8e13c99ef87b3b1a6cf10aa
7
- data.tar.gz: 629862e4672fb94ab7faabee76f7d72e28b5d299341340969be0fcca5f6a6cd3f134d8e1c096f16cc82b5c53c71746e18f51aefc03b54a92c730dc8cde222c6a
6
+ metadata.gz: 8449d47c0c12cc89e2fbb3827790a6103d81d2b2f5b64b863840a284243f13622f24b69e621153970942289b348a186f12f8578f1d3543aa89773cc9baf2814a
7
+ data.tar.gz: 7688d1f04f962e17aaea4c6591899627432842dec49e3e62ac47bd652c2a550c37b9b08fa755de973e6a55f4e2001e41b85dcc72b2a352961af3d918b1afedf2
@@ -6,23 +6,71 @@ require 'hash_comparator/reverse_matcher'
6
6
  module HashComparator
7
7
  module Emails
8
8
  class Analyzer
9
- def self.execute(hash_function:, subject_raw_emails:, target_hashed_emails:, remove_username: false)
10
- if remove_username
11
- subject_raw_emails = Parser.parse(subject_raw_emails)
9
+ def self.find_common_human_readable(hash_function:, subject_raw_emails:, target_hashed_emails:, options: {})
10
+ new(
11
+ hash_function: hash_function,
12
+ subject_raw_emails: subject_raw_emails,
13
+ target_hashed_emails: target_hashed_emails,
14
+ options: options
15
+ ).find_common_human_readable
16
+ end
17
+
18
+ def self.find_common_hashes(hash_function:, subject_raw_emails:, target_hashed_emails:, options: {})
19
+ new(
20
+ hash_function: hash_function,
21
+ subject_raw_emails: subject_raw_emails,
22
+ target_hashed_emails: target_hashed_emails,
23
+ options: options
24
+ ).find_common_hashes
25
+ end
26
+
27
+ def initialize(hash_function:, subject_raw_emails:, target_hashed_emails:, options:)
28
+ @hash_function = hash_function
29
+ @subject_raw_emails = subject_raw_emails
30
+ @target_hashed_emails = target_hashed_emails
31
+ @options = options
32
+ @subject_hashed_emails = []
33
+ end
34
+
35
+ attr_accessor :hash_function, :subject_raw_emails, :subject_hashed_emails, :target_hashed_emails, :options
36
+
37
+ def find_common_human_readable
38
+ common_hashes = find_common_hashes
39
+ reverse_match(common_hashes)
40
+ end
41
+
42
+ def find_common_hashes
43
+ parse
44
+ hash
45
+ compare
46
+ end
47
+
48
+ private
49
+
50
+ def parse
51
+ if options[:parsing]
52
+ @subject_raw_emails = Parser.parse(subject_raw_emails, options[:parsing])
12
53
  end
54
+ end
13
55
 
14
- subject_hashed_emails = Hasher.hash(
56
+ def hash
57
+ @subject_hashed_emails = Hasher.hash(
15
58
  hash_function: hash_function,
16
59
  human_readable_items: subject_raw_emails
17
60
  )
18
- hashed_common_emails = SetComparator.calculate_overlap(
61
+ end
62
+
63
+ def compare
64
+ SetComparator.calculate_overlap(
19
65
  subject_items: subject_hashed_emails,
20
66
  target_items: target_hashed_emails
21
67
  )
68
+ end
22
69
 
70
+ def reverse_match(common_hashes)
23
71
  ReverseMatcher.execute(
24
72
  hash_function: hash_function,
25
- hashed_items: hashed_common_emails,
73
+ hashed_items: common_hashes,
26
74
  human_readable_items: subject_raw_emails
27
75
  )
28
76
  end
@@ -1,11 +1,156 @@
1
1
  module HashComparator
2
2
  module Emails
3
3
  class Parser
4
- def self.parse(emails)
5
- emails.map do |email|
4
+ def self.parse(emails, options = {})
5
+ new(emails, options).parse
6
+ end
7
+
8
+ def initialize(emails, options)
9
+ @emails = emails
10
+ @options = options
11
+ end
12
+
13
+ attr_accessor :emails, :options
14
+
15
+ def parse
16
+ remove_usernames if options[:remove_usernames]
17
+ remove_generic_domains if options[:remove_generic_domains]
18
+
19
+ emails
20
+ end
21
+
22
+ private
23
+
24
+ def remove_usernames
25
+ @emails = emails.map do |email|
6
26
  email.strip.split('@')[1]
7
27
  end
8
28
  end
29
+
30
+ def remove_generic_domains
31
+ @emails = emails.each_with_object([]) do |email, list|
32
+ domain = email.split('@')[-1]
33
+ list << email unless GENERIC_EMAIL_DOMAINS.include?(domain)
34
+ end
35
+ end
9
36
  end
37
+
38
+ # Email Domains
39
+ # Adapted from https://github.com/mailcheck/mailcheck/wiki/List-of-Popular-Domains
40
+ GENERIC_EMAIL_DOMAINS = %w[
41
+ aol.com
42
+ att.net
43
+ comcast.net
44
+ facebook.com
45
+ gmail.com
46
+ gmx.com
47
+ googlemail.com
48
+ google.com
49
+ hotmail.com
50
+ hotmail.co.uk
51
+ mac.com
52
+ me.com
53
+ mail.com
54
+ msn.com
55
+ live.com
56
+ sbcglobal.net
57
+ verizon.net
58
+ yahoo.com
59
+ yahoo.co.uk
60
+ email.com
61
+ games.com
62
+ gmx.net
63
+ hush.com
64
+ hushmail.com
65
+ icloud.com
66
+ inbox.com
67
+ lavabit.com
68
+ love.com
69
+ outlook.com
70
+ pobox.com
71
+ rocketmail.com
72
+ safe-mail.net
73
+ wow.com
74
+ ygm.com
75
+ ymail.com
76
+ zoho.com
77
+ fastmail.fm
78
+ bellsouth.net
79
+ charter.net
80
+ comcast.net
81
+ cox.net
82
+ earthlink.net
83
+ juno.com
84
+ btinternet.com
85
+ virginmedia.com
86
+ blueyonder.co.uk
87
+ freeserve.co.uk
88
+ live.co.uk
89
+ ntlworld.com
90
+ o2.co.uk
91
+ orange.net
92
+ sky.com
93
+ talktalk.co.uk
94
+ tiscali.co.uk
95
+ virgin.net
96
+ wanadoo.co.uk
97
+ bt.com
98
+ sina.com
99
+ qq.com
100
+ naver.com
101
+ hanmail.net
102
+ daum.net
103
+ nate.com
104
+ yahoo.co.jp
105
+ yahoo.co.kr
106
+ yahoo.co.id
107
+ yahoo.co.in
108
+ yahoo.com.sg
109
+ yahoo.com.ph
110
+ hotmail.fr
111
+ live.fr
112
+ laposte.net
113
+ yahoo.fr
114
+ wanadoo.fr
115
+ orange.fr
116
+ gmx.fr
117
+ sfr.fr
118
+ neuf.fr
119
+ free.fr
120
+ gmx.de
121
+ hotmail.de
122
+ live.de
123
+ online.de
124
+ t-online.de
125
+ web.de
126
+ yahoo.de
127
+ mail.ru
128
+ rambler.ru
129
+ yandex.ru
130
+ ya.ru
131
+ list.ru
132
+ hotmail.be
133
+ live.be
134
+ skynet.be
135
+ voo.be
136
+ tvcablenet.be
137
+ telenet.be
138
+ hotmail.com.ar
139
+ live.com.ar
140
+ yahoo.com.ar
141
+ fibertel.com.ar
142
+ speedy.com.ar
143
+ arnet.com.ar
144
+ hotmail.com
145
+ gmail.com
146
+ yahoo.com.mx
147
+ live.com.mx
148
+ yahoo.com
149
+ hotmail.es
150
+ live.com
151
+ hotmail.com.mx
152
+ prodigy.net.mx
153
+ msn.com
154
+ ].freeze
10
155
  end
11
156
  end
@@ -2,7 +2,12 @@ require 'digest'
2
2
 
3
3
  module HashComparator
4
4
  class Hasher
5
- SUPPORTED_HASH_FUNCTIONS = { md5: Digest::MD5 }.freeze
5
+ SUPPORTED_HASH_FUNCTIONS = {
6
+ md5: Digest::MD5,
7
+ sha1: Digest::SHA1,
8
+ sha256: Digest::SHA2,
9
+ sha512: Digest::SHA512
10
+ }.freeze
6
11
 
7
12
  def self.hash(hash_function:, human_readable_items:)
8
13
  new(hash_function: hash_function, human_readable_items: human_readable_items).hash
@@ -24,10 +24,11 @@ module HashComparator
24
24
  hash_function: hash_function,
25
25
  human_readable_items: human_readable_items
26
26
  )
27
- matches = human_readable_items.each_with_index.map do |item, i|
28
- item if hashed_items.include?(subject_hashed_items[i])
29
- end.compact
30
27
 
28
+ matches = human_readable_items.each_with_index.each_with_object([]) do |(item, i), list|
29
+ list << item if hashed_items.include?(subject_hashed_items[i])
30
+ end
31
+
31
32
  matches.uniq.sort
32
33
  end
33
34
  end
@@ -1,3 +1,3 @@
1
1
  module HashComparator
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hash_comparator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tanner Johnson
@@ -14,16 +14,16 @@ dependencies:
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.2
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.2
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -52,10 +52,13 @@ files:
52
52
  - lib/hash_comparator/reverse_matcher.rb
53
53
  - lib/hash_comparator/set_comparator.rb
54
54
  - lib/hash_comparator/version.rb
55
- homepage: https://rubygems.org/gems/hash_comparator
55
+ homepage: https://github.com/tannerljohnson/hash_comparator
56
56
  licenses:
57
57
  - MIT
58
- metadata: {}
58
+ metadata:
59
+ homepage_uri: https://github.com/tannerljohnson/hash_comparator
60
+ source_code_uri: https://github.com/tannerljohnson/hash_comparator
61
+ changelog_uri: https://github.com/tannerljohnson/hash_comparator
59
62
  post_install_message:
60
63
  rdoc_options: []
61
64
  require_paths: