email_collector 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/email_collector.gemspec +2 -1
- data/lib/email_collector.rb +13 -12
- data/test/test_email_collector.rb +16 -24
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1c0f717c369b8c13c23ade3df098ac17278594
|
4
|
+
data.tar.gz: 5c4ac0395cf4dc6baa43cf863e52d707c7dbe956
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eca76313ce490b60905cc6766a47d4e02b38d5358dc9081b20efc6f6d944117dffe2f0122ccd7cf8708d2e1961b4e577c0b32020f3ee726efa54d310ca778973
|
7
|
+
data.tar.gz: a9cdf169f7ac4fc8de771fa46432ea1bddf2595759458739f6f34c698a50a4a24393655dc758891055aeb428c14b06fee5a66aa105b377a339fe031a49724c59
|
data/README.md
CHANGED
data/email_collector.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = 'email_collector'
|
7
|
-
gem.version = '0.
|
7
|
+
gem.version = '0.3.0'
|
8
8
|
gem.authors = ['Alexei Fedotov']
|
9
9
|
gem.email = ['alexei.fedotov@gmail.com']
|
10
10
|
gem.description = %q{Collects emails from Google}
|
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
|
|
21
21
|
|
22
22
|
gem.add_runtime_dependency 'google-search', '1.0.3'
|
23
23
|
gem.add_runtime_dependency 'logger', '1.2.8'
|
24
|
+
gem.required_ruby_version = '>= 2.0'
|
24
25
|
end
|
data/lib/email_collector.rb
CHANGED
@@ -6,15 +6,14 @@ module EmailCollector
|
|
6
6
|
@logger.debug('logger initialized')
|
7
7
|
|
8
8
|
@size = :large
|
9
|
-
def self.
|
10
|
-
@size =
|
9
|
+
def self.size=(s)
|
10
|
+
@size = s
|
11
11
|
end
|
12
12
|
|
13
13
|
@keywords = ['', 'mail', 'mailto', 'email', 'contacts', 'contact', 'address', 'login', 'author', 'googletalk', 'gtalk',
|
14
14
|
'gmail', 'googlemail', 'yahoo', 'hotspot', 'outlook', 'yandex'];
|
15
|
-
|
16
|
-
|
17
|
-
@keywords = keywords
|
15
|
+
def self.keywords=(k)
|
16
|
+
@keywords = k
|
18
17
|
end
|
19
18
|
|
20
19
|
def self.collect(searchReq, domain = nil)
|
@@ -26,8 +25,8 @@ module EmailCollector
|
|
26
25
|
#@logger.debug("domain = #{domain}")
|
27
26
|
|
28
27
|
if (domain)
|
29
|
-
res =
|
30
|
-
res_at =
|
28
|
+
res = google_search("#{searchReq} \"#{domain}\"")
|
29
|
+
res_at = google_search("#{searchReq} \"at #{domain}\"")
|
31
30
|
|
32
31
|
(res + res_at).map do |context|
|
33
32
|
#@logger.debug("context = #{context}")
|
@@ -35,14 +34,14 @@ module EmailCollector
|
|
35
34
|
context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i)
|
36
35
|
end
|
37
36
|
else
|
38
|
-
|
37
|
+
google_search(searchReq).map do |context|
|
39
38
|
@logger.debug("context = #{context}")
|
40
39
|
context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i)
|
41
40
|
end
|
42
41
|
end
|
43
42
|
end
|
44
43
|
|
45
|
-
def self.
|
44
|
+
def self.google_search(searchReq)
|
46
45
|
@logger.debug("searching for #{searchReq}")
|
47
46
|
|
48
47
|
Google::Search::Web.new do |search|
|
@@ -57,20 +56,22 @@ module EmailCollector
|
|
57
56
|
end.flatten
|
58
57
|
end
|
59
58
|
|
59
|
+
# Replaces 'at' with @
|
60
60
|
def self.filter_at(s)
|
61
61
|
s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@')
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
|
+
# Replaces ***gmail.com with @gmail.com
|
64
65
|
def self.filter_at_domain(s, domain)
|
65
66
|
s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain)
|
66
67
|
end
|
67
68
|
|
68
|
-
#
|
69
|
+
# Transforms gmail!com addresses
|
69
70
|
def self.filter_exclam(s)
|
70
71
|
s.gsub(/[!:]/, '.')
|
71
72
|
end
|
72
73
|
|
73
|
-
#
|
74
|
+
# Fixes google-search gem bold outline
|
74
75
|
def self.filter_b(s)
|
75
76
|
s.gsub(/<\/?b>/, '')
|
76
77
|
end
|
@@ -2,11 +2,8 @@ require 'minitest/autorun'
|
|
2
2
|
require 'email_collector'
|
3
3
|
|
4
4
|
class EmailCollectorTest < Minitest::Unit::TestCase
|
5
|
+
SKIP_SLOW_TESTS = true
|
5
6
|
@@logger = Logger.new $stderr
|
6
|
-
|
7
|
-
def get_domain(email)
|
8
|
-
return email.gsub(/.*@/, '')
|
9
|
-
end
|
10
7
|
|
11
8
|
def test_filter_at
|
12
9
|
assert_equal "name@domain.com", EmailCollector.filter_at("name at domain.com")
|
@@ -28,11 +25,11 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
28
25
|
PATTERNS = ['wikipedia']
|
29
26
|
|
30
27
|
def test_search
|
31
|
-
return
|
32
|
-
EmailCollector.
|
28
|
+
return if SKIP_SLOW_TESTS
|
29
|
+
EmailCollector.size = :small
|
33
30
|
|
34
31
|
(PATTERNS + EMAILS).each do |pattern|
|
35
|
-
x = EmailCollector.
|
32
|
+
x = EmailCollector.google_search("\"#{pattern}\"").join('').gsub(/ /, '')
|
36
33
|
#@@logger.debug("PATTERN = #{pattern}")
|
37
34
|
#@@logger.debug("x = " << x)
|
38
35
|
#@@logger.debug(x.match(/#{pattern}/i))
|
@@ -41,8 +38,8 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
41
38
|
end
|
42
39
|
|
43
40
|
def test_collect_plain
|
44
|
-
return
|
45
|
-
EmailCollector.
|
41
|
+
return if SKIP_SLOW_TESTS
|
42
|
+
EmailCollector.size = :small
|
46
43
|
|
47
44
|
res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}", get_domain(AUTHOR_EMAIL)).flatten
|
48
45
|
@@logger.debug(res)
|
@@ -50,18 +47,19 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
50
47
|
end
|
51
48
|
|
52
49
|
def test_collect_plain_nodomain
|
53
|
-
return
|
54
|
-
EmailCollector.
|
50
|
+
return if SKIP_SLOW_TESTS
|
51
|
+
EmailCollector.size = :small
|
55
52
|
|
56
53
|
res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}").flatten
|
57
|
-
|
54
|
+
#@@logger.debug(res)
|
58
55
|
assert(res.include? AUTHOR_EMAIL)
|
59
56
|
end
|
60
57
|
|
61
58
|
def test_collect
|
62
|
-
return
|
63
|
-
|
64
|
-
EmailCollector.
|
59
|
+
return if SKIP_SLOW_TESTS
|
60
|
+
|
61
|
+
EmailCollector.size = :small
|
62
|
+
EmailCollector.keywords = ['harmony']
|
65
63
|
|
66
64
|
EMAILS.each do |email|
|
67
65
|
res = EmailCollector.collect("\"#{email}\"")
|
@@ -73,15 +71,9 @@ class EmailCollectorTest < Minitest::Unit::TestCase
|
|
73
71
|
end
|
74
72
|
end
|
75
73
|
|
76
|
-
|
77
|
-
def
|
78
|
-
|
79
|
-
# EmailCollector.set_size(:small)
|
80
|
-
# EmailCollector.set_keywords([''])
|
81
|
-
|
82
|
-
# res = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
|
83
|
-
# res = EmailCollector.collect('site:github.com', 'gmail.com')
|
84
|
-
# @@logger.debug(res)
|
74
|
+
private
|
75
|
+
def get_domain(email)
|
76
|
+
return email.gsub(/.*@/, '')
|
85
77
|
end
|
86
78
|
|
87
79
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_collector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexei Fedotov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-search
|
@@ -65,7 +65,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
68
|
+
version: '2.0'
|
69
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - ">="
|