email_collector 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/email_collector.gemspec +2 -1
- data/lib/email_collector.rb +13 -12
- data/test/test_email_collector.rb +16 -24
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1c0f717c369b8c13c23ade3df098ac17278594
|
4
|
+
data.tar.gz: 5c4ac0395cf4dc6baa43cf863e52d707c7dbe956
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eca76313ce490b60905cc6766a47d4e02b38d5358dc9081b20efc6f6d944117dffe2f0122ccd7cf8708d2e1961b4e577c0b32020f3ee726efa54d310ca778973
|
7
|
+
data.tar.gz: a9cdf169f7ac4fc8de771fa46432ea1bddf2595759458739f6f34c698a50a4a24393655dc758891055aeb428c14b06fee5a66aa105b377a339fe031a49724c59
|
data/README.md
CHANGED
data/email_collector.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = 'email_collector'
|
7
|
-
gem.version = '0.
|
7
|
+
gem.version = '0.3.0'
|
8
8
|
gem.authors = ['Alexei Fedotov']
|
9
9
|
gem.email = ['alexei.fedotov@gmail.com']
|
10
10
|
gem.description = %q{Collects emails from Google}
|
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
|
|
21
21
|
|
22
22
|
gem.add_runtime_dependency 'google-search', '1.0.3'
|
23
23
|
gem.add_runtime_dependency 'logger', '1.2.8'
|
24
|
+
gem.required_ruby_version = '>= 2.0'
|
24
25
|
end
|
data/lib/email_collector.rb
CHANGED
@@ -6,15 +6,14 @@ module EmailCollector
|
|
6
6
|
@logger.debug('logger initialized')
|
7
7
|
|
8
8
|
@size = :large
|
9
|
-
def self.
|
10
|
-
@size =
|
9
|
+
def self.size=(s)
|
10
|
+
@size = s
|
11
11
|
end
|
12
12
|
|
13
13
|
@keywords = ['', 'mail', 'mailto', 'email', 'contacts', 'contact', 'address', 'login', 'author', 'googletalk', 'gtalk',
|
14
14
|
'gmail', 'googlemail', 'yahoo', 'hotspot', 'outlook', 'yandex'];
|
15
|
-
|
16
|
-
|
17
|
-
@keywords = keywords
|
15
|
+
def self.keywords=(k)
|
16
|
+
@keywords = k
|
18
17
|
end
|
19
18
|
|
20
19
|
def self.collect(searchReq, domain = nil)
|
@@ -26,8 +25,8 @@ module EmailCollector
|
|
26
25
|
#@logger.debug("domain = #{domain}")
|
27
26
|
|
28
27
|
if (domain)
|
29
|
-
res =
|
30
|
-
res_at =
|
28
|
+
res = google_search("#{searchReq} \"#{domain}\"")
|
29
|
+
res_at = google_search("#{searchReq} \"at #{domain}\"")
|
31
30
|
|
32
31
|
(res + res_at).map do |context|
|
33
32
|
#@logger.debug("context = #{context}")
|
@@ -35,14 +34,14 @@ module EmailCollector
|
|
35
34
|
context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i)
|
36
35
|
end
|
37
36
|
else
|
38
|
-
|
37
|
+
google_search(searchReq).map do |context|
|
39
38
|
@logger.debug("context = #{context}")
|
40
39
|
context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i)
|
41
40
|
end
|
42
41
|
end
|
43
42
|
end
|
44
43
|
|
45
|
-
def self.
|
44
|
+
def self.google_search(searchReq)
|
46
45
|
@logger.debug("searching for #{searchReq}")
|
47
46
|
|
48
47
|
Google::Search::Web.new do |search|
|
@@ -57,20 +56,22 @@ module EmailCollector
|
|
57
56
|
end.flatten
|
58
57
|
end
|
59
58
|
|
59
|
+
# Replaces 'at' with @
|
60
60
|
def self.filter_at(s)
|
61
61
|
s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@')
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
|
+
# Replaces ***gmail.com with @gmail.com
|
64
65
|
def self.filter_at_domain(s, domain)
|
65
66
|
s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain)
|
66
67
|
end
|
67
68
|
|
68
|
-
#
|
69
|
+
# Transforms gmail!com addresses
|
69
70
|
def self.filter_exclam(s)
|
70
71
|
s.gsub(/[!:]/, '.')
|
71
72
|
end
|
72
73
|
|
73
|
-
#
|
74
|
+
# Fixes google-search gem bold outline
|
74
75
|
def self.filter_b(s)
|
75
76
|
s.gsub(/<\/?b>/, '')
|
76
77
|
end
|
@@ -2,11 +2,8 @@ require 'minitest/autorun'
|
|
2
2
|
require 'email_collector'
|
3
3
|
|
4
4
|
class EmailCollectorTest < Minitest::Unit::TestCase
|
5
|
+
SKIP_SLOW_TESTS = true
|
5
6
|
@@logger = Logger.new $stderr
|
6
|
-
|
7
|
-
def get_domain(email)
|
8
|
-
return email.gsub(/.*@/, '')
|
9
|
-
end
|
10
7
|
|
11
8
|
def test_filter_at
|
12
9
|
assert_equal "name@domain.com", EmailCollector.filter_at("name at domain.com")
|
@@ -28,11 +25,11 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
28
25
|
PATTERNS = ['wikipedia']
|
29
26
|
|
30
27
|
def test_search
|
31
|
-
return
|
32
|
-
EmailCollector.
|
28
|
+
return if SKIP_SLOW_TESTS
|
29
|
+
EmailCollector.size = :small
|
33
30
|
|
34
31
|
(PATTERNS + EMAILS).each do |pattern|
|
35
|
-
x = EmailCollector.
|
32
|
+
x = EmailCollector.google_search("\"#{pattern}\"").join('').gsub(/ /, '')
|
36
33
|
#@@logger.debug("PATTERN = #{pattern}")
|
37
34
|
#@@logger.debug("x = " << x)
|
38
35
|
#@@logger.debug(x.match(/#{pattern}/i))
|
@@ -41,8 +38,8 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
41
38
|
end
|
42
39
|
|
43
40
|
def test_collect_plain
|
44
|
-
return
|
45
|
-
EmailCollector.
|
41
|
+
return if SKIP_SLOW_TESTS
|
42
|
+
EmailCollector.size = :small
|
46
43
|
|
47
44
|
res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}", get_domain(AUTHOR_EMAIL)).flatten
|
48
45
|
@@logger.debug(res)
|
@@ -50,18 +47,19 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
50
47
|
end
|
51
48
|
|
52
49
|
def test_collect_plain_nodomain
|
53
|
-
return
|
54
|
-
EmailCollector.
|
50
|
+
return if SKIP_SLOW_TESTS
|
51
|
+
EmailCollector.size = :small
|
55
52
|
|
56
53
|
res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}").flatten
|
57
|
-
|
54
|
+
#@@logger.debug(res)
|
58
55
|
assert(res.include? AUTHOR_EMAIL)
|
59
56
|
end
|
60
57
|
|
61
58
|
def test_collect
|
62
|
-
return
|
63
|
-
|
64
|
-
EmailCollector.
|
59
|
+
return if SKIP_SLOW_TESTS
|
60
|
+
|
61
|
+
EmailCollector.size = :small
|
62
|
+
EmailCollector.keywords = ['harmony']
|
65
63
|
|
66
64
|
EMAILS.each do |email|
|
67
65
|
res = EmailCollector.collect("\"#{email}\"")
|
@@ -73,15 +71,9 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
73
71
|
end
|
74
72
|
end
|
75
73
|
|
76
|
-
|
77
|
-
def
|
78
|
-
|
79
|
-
# EmailCollector.set_size(:small)
|
80
|
-
# EmailCollector.set_keywords([''])
|
81
|
-
|
82
|
-
# res = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
|
83
|
-
# res = EmailCollector.collect('site:github.com', 'gmail.com')
|
84
|
-
# @@logger.debug(res)
|
74
|
+
private
|
75
|
+
def get_domain(email)
|
76
|
+
return email.gsub(/.*@/, '')
|
85
77
|
end
|
86
78
|
|
87
79
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexei Fedotov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-search
|
@@ -65,7 +65,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
68
|
+
version: '2.0'
|
69
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - ">="
|