email_collector 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c1d357c68d40f1688c8cb1203f7235214776c9b
4
- data.tar.gz: 96c74c8954807e708ba0fc4b12e93d24b2b85b0b
3
+ metadata.gz: 9c1c0f717c369b8c13c23ade3df098ac17278594
4
+ data.tar.gz: 5c4ac0395cf4dc6baa43cf863e52d707c7dbe956
5
5
  SHA512:
6
- metadata.gz: aab5408505dd34fe65a4a04cd0742338821e3a230a5475557bee91f34131ba6686069807b5059ad63db5376de2de17df59905a1f0abba125e7d6e14a709b199f
7
- data.tar.gz: 4a206237bea75758ef6c4c34b33ee77d3c97255ae74d14e4015bacb70333443745be61dea7d0ed72621545a0e2620ac049ad17634e4d52781e9a12267e5cd9a2
6
+ metadata.gz: eca76313ce490b60905cc6766a47d4e02b38d5358dc9081b20efc6f6d944117dffe2f0122ccd7cf8708d2e1961b4e577c0b32020f3ee726efa54d310ca778973
7
+ data.tar.gz: a9cdf169f7ac4fc8de771fa46432ea1bddf2595759458739f6f34c698a50a4a24393655dc758891055aeb428c14b06fee5a66aa105b377a339fe031a49724c59
data/README.md CHANGED
@@ -28,6 +28,8 @@ $ irb
28
28
  require 'email_collector'
29
29
  EmailCollector.set_keywords([''])
30
30
  emails = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
31
+ => ["events@yandex.ru", "music@yandex.ru", "company@yandex.ru", "api@yandex.ru"]
32
+
31
33
  ```
32
34
 
33
35
 
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |gem|
6
6
  gem.name = 'email_collector'
7
- gem.version = '0.2.0'
7
+ gem.version = '0.3.0'
8
8
  gem.authors = ['Alexei Fedotov']
9
9
  gem.email = ['alexei.fedotov@gmail.com']
10
10
  gem.description = %q{Collects emails from Google}
@@ -21,4 +21,5 @@ Gem::Specification.new do |gem|
21
21
 
22
22
  gem.add_runtime_dependency 'google-search', '1.0.3'
23
23
  gem.add_runtime_dependency 'logger', '1.2.8'
24
+ gem.required_ruby_version = '>= 2.0'
24
25
  end
@@ -6,15 +6,14 @@ module EmailCollector
6
6
  @logger.debug('logger initialized')
7
7
 
8
8
  @size = :large
9
- def self.set_size(size)
10
- @size = size
9
+ def self.size=(s)
10
+ @size = s
11
11
  end
12
12
 
13
13
  @keywords = ['', 'mail', 'mailto', 'email', 'contacts', 'contact', 'address', 'login', 'author', 'googletalk', 'gtalk',
14
14
  'gmail', 'googlemail', 'yahoo', 'hotspot', 'outlook', 'yandex'];
15
-
16
- def self.set_keywords(keywords)
17
- @keywords = keywords
15
+ def self.keywords=(k)
16
+ @keywords = k
18
17
  end
19
18
 
20
19
  def self.collect(searchReq, domain = nil)
@@ -26,8 +25,8 @@ module EmailCollector
26
25
  #@logger.debug("domain = #{domain}")
27
26
 
28
27
  if (domain)
29
- res = search("#{searchReq} \"#{domain}\"")
30
- res_at = search("#{searchReq} \"at #{domain}\"")
28
+ res = google_search("#{searchReq} \"#{domain}\"")
29
+ res_at = google_search("#{searchReq} \"at #{domain}\"")
31
30
 
32
31
  (res + res_at).map do |context|
33
32
  #@logger.debug("context = #{context}")
@@ -35,14 +34,14 @@ module EmailCollector
35
34
  context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i)
36
35
  end
37
36
  else
38
- search(searchReq).map do |context|
37
+ google_search(searchReq).map do |context|
39
38
  @logger.debug("context = #{context}")
40
39
  context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i)
41
40
  end
42
41
  end
43
42
  end
44
43
 
45
- def self.search(searchReq)
44
+ def self.google_search(searchReq)
46
45
  @logger.debug("searching for #{searchReq}")
47
46
 
48
47
  Google::Search::Web.new do |search|
@@ -57,20 +56,22 @@ module EmailCollector
57
56
  end.flatten
58
57
  end
59
58
 
59
+ # Replaces 'at' with @
60
60
  def self.filter_at(s)
61
61
  s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@')
62
62
  end
63
-
63
+
64
+ # Replaces ***gmail.com with @gmail.com
64
65
  def self.filter_at_domain(s, domain)
65
66
  s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain)
66
67
  end
67
68
 
68
- # Transform gmail!com addresses
69
+ # Transforms gmail!com addresses
69
70
  def self.filter_exclam(s)
70
71
  s.gsub(/[!:]/, '.')
71
72
  end
72
73
 
73
- # One can go fix google-search gem instead
74
+ # Fixes google-search gem bold outline
74
75
  def self.filter_b(s)
75
76
  s.gsub(/<\/?b>/, '')
76
77
  end
@@ -2,11 +2,8 @@ require 'minitest/autorun'
2
2
  require 'email_collector'
3
3
 
4
4
  class EmailCollectorTest < Minitest::Unit::TestCase
5
+ SKIP_SLOW_TESTS = true
5
6
  @@logger = Logger.new $stderr
6
-
7
- def get_domain(email)
8
- return email.gsub(/.*@/, '')
9
- end
10
7
 
11
8
  def test_filter_at
12
9
  assert_equal "name@domain.com", EmailCollector.filter_at("name at domain.com")
@@ -28,11 +25,11 @@ class EmailCollectorTest < Minitest::Unit::TestCase
28
25
  PATTERNS = ['wikipedia']
29
26
 
30
27
  def test_search
31
- return # quick return
32
- EmailCollector.set_size(:small);
28
+ return if SKIP_SLOW_TESTS
29
+ EmailCollector.size = :small
33
30
 
34
31
  (PATTERNS + EMAILS).each do |pattern|
35
- x = EmailCollector.search("\"#{pattern}\"").join('').gsub(/ /, '')
32
+ x = EmailCollector.google_search("\"#{pattern}\"").join('').gsub(/ /, '')
36
33
  #@@logger.debug("PATTERN = #{pattern}")
37
34
  #@@logger.debug("x = " << x)
38
35
  #@@logger.debug(x.match(/#{pattern}/i))
@@ -41,8 +38,8 @@ class EmailCollectorTest < Minitest::Unit::TestCase
41
38
  end
42
39
 
43
40
  def test_collect_plain
44
- return # quick return
45
- EmailCollector.set_size(:small);
41
+ return if SKIP_SLOW_TESTS
42
+ EmailCollector.size = :small
46
43
 
47
44
  res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}", get_domain(AUTHOR_EMAIL)).flatten
48
45
  @@logger.debug(res)
@@ -50,18 +47,19 @@ class EmailCollectorTest < Minitest::Unit::TestCase
50
47
  end
51
48
 
52
49
  def test_collect_plain_nodomain
53
- return # quick return
54
- EmailCollector.set_size(:small);
50
+ return if SKIP_SLOW_TESTS
51
+ EmailCollector.size = :small
55
52
 
56
53
  res = EmailCollector.collect_plain("openmeetings #{AUTHOR_EMAIL}").flatten
57
- @@logger.debug(res)
54
+ #@@logger.debug(res)
58
55
  assert(res.include? AUTHOR_EMAIL)
59
56
  end
60
57
 
61
58
  def test_collect
62
- return # quick return
63
- EmailCollector.set_size(:small);
64
- EmailCollector.set_keywords(['harmony']);
59
+ return if SKIP_SLOW_TESTS
60
+
61
+ EmailCollector.size = :small
62
+ EmailCollector.keywords = ['harmony']
65
63
 
66
64
  EMAILS.each do |email|
67
65
  res = EmailCollector.collect("\"#{email}\"")
@@ -73,15 +71,9 @@ class EmailCollectorTest < Minitest::Unit::TestCase
73
71
  end
74
72
  end
75
73
 
76
- # This is actually usage example
77
- def test_collect_example
78
- #return # quick return
79
- # EmailCollector.set_size(:small)
80
- # EmailCollector.set_keywords([''])
81
-
82
- # res = EmailCollector.collect('site:moikrug.ru', 'yandex.ru')
83
- # res = EmailCollector.collect('site:github.com', 'gmail.com')
84
- # @@logger.debug(res)
74
+ private
75
+ def get_domain(email)
76
+ return email.gsub(/.*@/, '')
85
77
  end
86
78
 
87
79
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexei Fedotov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-22 00:00:00.000000000 Z
11
+ date: 2015-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-search
@@ -65,7 +65,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '2.0'
69
69
  required_rubygems_version: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="