email_collector 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c1d357c68d40f1688c8cb1203f7235214776c9b
4
- data.tar.gz: 96c74c8954807e708ba0fc4b12e93d24b2b85b0b
3
+ metadata.gz: 9c1c0f717c369b8c13c23ade3df098ac17278594
4
+ data.tar.gz: 5c4ac0395cf4dc6baa43cf863e52d707c7dbe956
5
5
  SHA512:
6
- metadata.gz: aab5408505dd34fe65a4a04cd0742338821e3a230a5475557bee91f34131ba6686069807b5059ad63db5376de2de17df59905a1f0abba125e7d6e14a709b199f
7
- data.tar.gz: 4a206237bea75758ef6c4c34b33ee77d3c97255ae74d14e4015bacb70333443745be61dea7d0ed72621545a0e2620ac049ad17634e4d52781e9a12267e5cd9a2
6
+ metadata.gz: eca76313ce490b60905cc6766a47d4e02b38d5358dc9081b20efc6f6d944117dffe2f0122ccd7cf8708d2e1961b4e577c0b32020f3ee726efa54d310ca778973
7
+ data.tar.gz: a9cdf169f7ac4fc8de771fa46432ea1bddf2595759458739f6f34c698a50a4a24393655dc758891055aeb428c14b06fee5a66aa105b377a339fe031a49724c59
data/README.md CHANGED
@@ -28,6 +28,8 @@ $ irb
28
28
  require 'email_collector'
29
29
  EmailCollector.set_keywords([''])
30
30
  emails = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
31
+ => ["events@yandex.ru", "music@yandex.ru", "company@yandex.ru", "api@yandex.ru"]
32
+
31
33
  ```
32
34
 
33
35
 
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |gem|
6
6
  gem.name = 'email_collector'
7
- gem.version = '0.2.0'
7
+ gem.version = '0.3.0'
8
8
  gem.authors = ['Alexei Fedotov']
9
9
  gem.email = ['alexei.fedotov@gmail.com']
10
10
  gem.description = %q{Collects emails from Google}
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
21
21
 
22
22
  gem.add_runtime_dependency 'google-search', '1.0.3'
23
23
  gem.add_runtime_dependency 'logger', '1.2.8'
24
+ gem.required_ruby_version = '>= 2.0'
24
25
  end
@@ -6,15 +6,14 @@ module EmailCollector
6
6
  @logger.debug('logger initialized')
7
7
 
8
8
  @size = :large
9
- def self.set_size(size)
10
- @size = size
9
+ def self.size=(s)
10
+ @size = s
11
11
  end
12
12
 
13
13
  @keywords = ['', 'mail', 'mailto', 'email', 'contacts', 'contact', 'address', 'login', 'author', 'googletalk', 'gtalk',
14
14
  'gmail', 'googlemail', 'yahoo', 'hotspot', 'outlook', 'yandex'];
15
-
16
- def self.set_keywords(keywords)
17
- @keywords = keywords
15
+ def self.keywords=(k)
16
+ @keywords = k
18
17
  end
19
18
 
20
19
  def self.collect(searchReq, domain = nil)
@@ -26,8 +25,8 @@ module EmailCollector
26
25
  #@logger.debug("domain = #{domain}")
27
26
 
28
27
  if (domain)
29
- res = search("#{searchReq} \"#{domain}\"")
30
- res_at = search("#{searchReq} \"at #{domain}\"")
28
+ res = google_search("#{searchReq} \"#{domain}\"")
29
+ res_at = google_search("#{searchReq} \"at #{domain}\"")
31
30
 
32
31
  (res + res_at).map do |context|
33
32
  #@logger.debug("context = #{context}")
@@ -35,14 +34,14 @@ module EmailCollector
35
34
  context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i)
36
35
  end
37
36
  else
38
- search(searchReq).map do |context|
37
+ google_search(searchReq).map do |context|
39
38
  @logger.debug("context = #{context}")
40
39
  context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i)
41
40
  end
42
41
  end
43
42
  end
44
43
 
45
- def self.search(searchReq)
44
+ def self.google_search(searchReq)
46
45
  @logger.debug("searching for #{searchReq}")
47
46
 
48
47
  Google::Search::Web.new do |search|
@@ -57,20 +56,22 @@ module EmailCollector
57
56
  end.flatten
58
57
  end
59
58
 
59
+ # Replaces 'at' with @
60
60
  def self.filter_at(s)
61
61
  s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@')
62
62
  end
63
-
63
+
64
+ # Replaces ***gmail.com with @gmail.com
64
65
  def self.filter_at_domain(s, domain)
65
66
  s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain)
66
67
  end
67
68
 
68
- # Transform gmail!com addresses
69
+ # Transforms gmail!com addresses
69
70
  def self.filter_exclam(s)
70
71
  s.gsub(/[!:]/, '.')
71
72
  end
72
73
 
73
- # One can go fix google-search gem instead
74
+ # Fixes google-search gem bold outline
74
75
  def self.filter_b(s)
75
76
  s.gsub(/<\/?b>/, '')
76
77
  end
@@ -2,11 +2,8 @@ require 'minitest/autorun'
2
2
  require 'email_collector'
3
3
 
4
4
  class EmailCollectorTest < Minitest::Unit::TestCase
5
+ SKIP_SLOW_TESTS = true
5
6
  @@logger = Logger.new $stderr
6
-
7
- def get_domain(email)
8
- return email.gsub(/.*@/, '')
9
- end
10
7
 
11
8
  def test_filter_at
12
9
  assert_equal "name@domain.com", EmailCollector.filter_at("name at domain.com")
@@ -28,11 +25,11 @@ class EmailCollectorTest < Minitest::Unit::TestCase
28
25
  PATTERNS = ['wikipedia']
29
26
 
30
27
  def test_search
31
- return # quick return
32
- EmailCollector.set_size(:small);
28
+ return if SKIP_SLOW_TESTS
29
+ EmailCollector.size = :small
33
30
 
34
31
  (PATTERNS + EMAILS).each do |pattern|
35
- x = EmailCollector.search("\"#{pattern}\"").join('').gsub(/ /, '')
32
+ x = EmailCollector.google_search("\"#{pattern}\"").join('').gsub(/ /, '')
36
33
  #@@logger.debug("PATTERN = #{pattern}")
37
34
  #@@logger.debug("x = " << x)
38
35
  #@@logger.debug(x.match(/#{pattern}/i))
@@ -41,8 +38,8 @@ class EmailCollectorTest < Minitest::Unit::TestCase
41
38
  end
42
39
 
43
40
  def test_collect_plain
44
- return # quick return
45
- EmailCollector.set_size(:small);
41
+ return if SKIP_SLOW_TESTS
42
+ EmailCollector.size = :small
46
43
 
47
44
  res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}", get_domain(AUTHOR_EMAIL)).flatten
48
45
  @@logger.debug(res)
@@ -50,18 +47,19 @@ class EmailCollectorTest < Minitest::Unit::TestCase
50
47
  end
51
48
 
52
49
  def test_collect_plain_nodomain
53
- return # quick return
54
- EmailCollector.set_size(:small);
50
+ return if SKIP_SLOW_TESTS
51
+ EmailCollector.size = :small
55
52
 
56
53
  res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}").flatten
57
- @@logger.debug(res)
54
+ #@@logger.debug(res)
58
55
  assert(res.include? AUTHOR_EMAIL)
59
56
  end
60
57
 
61
58
  def test_collect
62
- return # quick return
63
- EmailCollector.set_size(:small);
64
- EmailCollector.set_keywords(['harmony']);
59
+ return if SKIP_SLOW_TESTS
60
+
61
+ EmailCollector.size = :small
62
+ EmailCollector.keywords = ['harmony']
65
63
 
66
64
  EMAILS.each do |email|
67
65
  res = EmailCollector.collect("\"#{email}\"")
@@ -73,15 +71,9 @@ class EmailCollectorTest < Minitest::Unit::TestCase
73
71
  end
74
72
  end
75
73
 
76
- # This is actually usage example
77
- def test_collect_example
78
- #return # quick return
79
- # EmailCollector.set_size(:small)
80
- # EmailCollector.set_keywords([''])
81
-
82
- # res = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
83
- # res = EmailCollector.collect('site:github.com', 'gmail.com')
84
- # @@logger.debug(res)
74
+ private
75
+ def get_domain(email)
76
+ return email.gsub(/.*@/, '')
85
77
  end
86
78
 
87
79
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexei Fedotov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-22 00:00:00.000000000 Z
11
+ date: 2015-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-search
@@ -65,7 +65,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '2.0'
69
69
  required_rubygems_version: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="