esearchy 0.2.0.7 → 0.2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/esearchy CHANGED
@@ -31,11 +31,11 @@ require 'getoptlong'
31
31
  require 'esearchy'
32
32
 
33
33
  ESearchy::log = true
34
-
35
34
  @yahoo_key = nil
36
35
  @bing_key = nil
37
36
  @maxhits = nil
38
37
  @docs = true
38
+ @profiling = false
39
39
  @params = {}
40
40
  @list = []
41
41
  @output = nil
@@ -64,6 +64,7 @@ opts = GetoptLong.new(
64
64
  [ '--enable-pgp', GetoptLong::NO_ARGUMENT ],
65
65
  [ '--enable-usenet', GetoptLong::NO_ARGUMENT ],
66
66
  [ '--enable-spider', GetoptLong::NO_ARGUMENT ],
67
+ [ '--profiling', GetoptLong::NO_ARGUMENT ],
67
68
  [ '--disable-google', GetoptLong::NO_ARGUMENT ],
68
69
  [ '--disable-yahoo', GetoptLong::NO_ARGUMENT ],
69
70
  [ '--disable-bing', GetoptLong::NO_ARGUMENT ],
@@ -148,7 +149,7 @@ def print_(list)
148
149
  print_linux(item)
149
150
  end
150
151
  when Array
151
- puts item[0].join " " + "-->" + item[1]
152
+ puts item.join " "
152
153
  end
153
154
  end
154
155
  end
@@ -210,6 +211,10 @@ class Output
210
211
  end
211
212
  end
212
213
 
214
+ def save_html(data)
215
+
216
+ end
217
+
213
218
  def save_sqlite(data)
214
219
  require 'sqlite3'
215
220
  @db = SQLite3::Database.new(@output)
@@ -257,10 +262,22 @@ def execute(p)
257
262
  end
258
263
  end
259
264
  end
265
+
266
+ if @profiling
267
+ puts "Gathering Profile data from people"
268
+ puts "----------------------------------\n"
269
+ res = ESearchy::Profiling.new(search.people.uniq)
270
+ res.search
271
+ search.people.concat(res.people)
272
+ search.results.concat(res.results)
273
+ end
274
+
260
275
  @output.save(search.results) if @output
261
- puts "-------FINAL RESULTS--------"
276
+ puts "\n-------==< FINAL RESULTS >==--------"
262
277
  print_ search.emails.uniq
263
- print_ search.people.uniq
278
+ search.people.uniq.each do |person, profile|
279
+ puts person.join(" ") + " -> " + profile
280
+ end
264
281
  end
265
282
  end
266
283
 
@@ -326,6 +343,8 @@ opts.each do |opt, arg|
326
343
  puts "\t Enables PGP searches.\n"
327
344
  puts "--enable-usenet"
328
345
  puts "\t Enables Usenet searches.\n"
346
+ puts "--profiling"
347
+ puts "\t Enables People's profiling.\n"
329
348
  puts "--disable-docs"
330
349
  puts "\t Disables searches inside docs.\n"
331
350
  puts "--disable-google"
@@ -358,11 +377,12 @@ opts.each do |opt, arg|
358
377
  #END OF HELP
359
378
  exit(0)
360
379
  when '--enable-all' then
361
- @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
380
+ @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
362
381
  :Spoke, :JigSaw, :Ziggs, :Plaxo]
363
382
  @email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
364
383
  when '--enable-people' then
365
- @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles, :Spoke]
384
+ @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
385
+ :Spoke, :JigSaw, :Ziggs, :Plaxo]
366
386
  when '--enable-emails' then
367
387
  @email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
368
388
  when '--enable-google' then
@@ -398,43 +418,45 @@ opts.each do |opt, arg|
398
418
  when '--enable-usenet' then
399
419
  @email_engines << :Usenet
400
420
  when '--disable-people' then
401
- @people_engines = []
402
- when '--disable-emails' then
403
- @email_engines = []
404
- when '--disable-google' then
405
- @email_engines.delete(:Google)
406
- when '--disable-yahoo' then
407
- @email_engines.delete(:Yahoo)
408
- when '--disable-bing' then
409
- @email_engines.delete(:Bing)
410
- when '--disable-altavisa' then
411
- @email_engines.delete(:Altavista)
412
- when '--disable-spider' then
413
- @email_engines.delete(:Spider)
414
- when '--disable-linkedin' then
415
- @people_engines.delete(:LinkedIn)
416
- when '--disable-gprofiles' then
417
- @people_engines.delete(:GoogleProfiles)
418
- when '--disable-naymz' then
419
- @people_engines.delete(:Naymz)
420
- when '--disable-classmates' then
421
- @email_engines.delete(:Classmates)
422
- when '--disable-ggroups' then
423
- @email_engines.delete(:GoogleGroups)
424
- when '--disable-spoke' then
425
- @people_engines.delete(:Spoke)
426
- when '--disable-jigsaw' then
427
- @people_engines.delete(:JigSaw)
428
- when '--disable-ziggs' then
429
- @people_engines.delete(:Ziggs)
430
- when '--disable-plaxo' then
431
- @people_engines.delete(:Plaxo)
432
- when '--disable-pgp' then
433
- @email_engines.delete(:PGP)
434
- when '--disable-usenet' then
435
- @email_engines.delete(:Usenet)
436
- when '--disable-docs' then
437
- @docs = false
421
+ @people_engines = []
422
+ when '--disable-emails' then
423
+ @email_engines = []
424
+ when '--disable-google' then
425
+ @email_engines.delete(:Google)
426
+ when '--disable-yahoo' then
427
+ @email_engines.delete(:Yahoo)
428
+ when '--disable-bing' then
429
+ @email_engines.delete(:Bing)
430
+ when '--disable-altavisa' then
431
+ @email_engines.delete(:Altavista)
432
+ when '--disable-spider' then
433
+ @email_engines.delete(:Spider)
434
+ when '--disable-linkedin' then
435
+ @people_engines.delete(:LinkedIn)
436
+ when '--disable-gprofiles' then
437
+ @people_engines.delete(:GoogleProfiles)
438
+ when '--disable-naymz' then
439
+ @people_engines.delete(:Naymz)
440
+ when '--disable-classmates' then
441
+ @email_engines.delete(:Classmates)
442
+ when '--disable-ggroups' then
443
+ @email_engines.delete(:GoogleGroups)
444
+ when '--disable-spoke' then
445
+ @people_engines.delete(:Spoke)
446
+ when '--disable-jigsaw' then
447
+ @people_engines.delete(:JigSaw)
448
+ when '--disable-ziggs' then
449
+ @people_engines.delete(:Ziggs)
450
+ when '--disable-plaxo' then
451
+ @people_engines.delete(:Plaxo)
452
+ when '--disable-pgp' then
453
+ @email_engines.delete(:PGP)
454
+ when '--disable-usenet' then
455
+ @email_engines.delete(:Usenet)
456
+ when '--disable-docs' then
457
+ @docs = false
458
+ when '--profiling' then
459
+ @profiling = true
438
460
  when '--query' then
439
461
  @params[:query] = arg
440
462
  when '--company' then
data/lib/esearchy.rb CHANGED
@@ -5,6 +5,7 @@ require 'cgi'
5
5
  require 'json'
6
6
  require 'digest/sha2'
7
7
  require 'zip/zip'
8
+ require 'uri'
8
9
  require 'zip/zipfilesystem'
9
10
  require 'pdf/reader'
10
11
  if RUBY_PLATFORM =~ /mingw|mswin/
@@ -20,5 +21,6 @@ require 'esearchy/socialengines'
20
21
  require 'esearchy/localengines'
21
22
  require 'esearchy/bugmenot'
22
23
  require 'esearchy/docs'
24
+ require 'esearchy/profiling'
23
25
  require 'esearchy/useragent'
24
26
  require 'esearchy/esearchy'
@@ -23,10 +23,10 @@ module ESearchy
23
23
 
24
24
  def crawl_people(html)
25
25
  html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) \|/).each do |profile|
26
- profile = profile[0].to_s
27
- person = profile[1].split(" ")
28
- @people << [ p, profile ]
29
- @results << [person, "P", profile, self.class.to_s.upcase, "N"]
26
+ pf = profile[0].to_s
27
+ p = profile[1].split(" ")
28
+ @people << [ p, pf ]
29
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
30
30
  end
31
31
  end
32
32
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
29
- profile = profile[0].to_s
30
- person = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [person, "P",profile, self.class.to_s.upcase, "N"]
29
+ pf = profile[0].to_s
30
+ p = profile[1].split(" ")
31
+ @people << [ p, pf ]
32
+ @results << [p, "P",profile, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -25,10 +25,10 @@ module ESearchy
25
25
 
26
26
  def crawl_people(text)
27
27
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
28
- profile = profile[0].to_s
28
+ pf = profile[0].to_s
29
29
  p = profile[1].split(" ")
30
- @people << [ p, profile ]
31
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
30
+ @people << [ p, pf ]
31
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
32
32
  end
33
33
  end
34
34
  end
@@ -24,12 +24,12 @@ module ESearchy
24
24
 
25
25
  def crawl_people(html)
26
26
  html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
27
- profile = profile[0].to_s
28
- person = profile[1].split(" ").delete_if do
27
+ pf = profile[0].to_s
28
+ p = profile[1].split(" ").delete_if do
29
29
  |x| x =~ /mr.|mr|ms.|ms|phd.|dr.|dr|phd|phd./i
30
30
  end
31
- @people << [ p, profile ]
32
- @results << [person, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)&#39;/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -22,13 +22,13 @@ module ESearchy
22
22
  @totalhits = totalhits(hits[0][0].gsub(",","").to_i)
23
23
  end
24
24
  end
25
-
25
+
26
26
  def crawl_people(text)
27
27
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
28
- profile = profile[0].to_s
28
+ pf = profile[0].to_s
29
29
  p = profile[1].split(" ")
30
- @people << [ p, profile ]
31
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
30
+ @people << [ p, pf ]
31
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
32
32
  end
33
33
  end
34
34
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -3,7 +3,7 @@ def D m
3
3
  end
4
4
 
5
5
  module ESearchy
6
- VERSION = "0.2.0.7"
6
+ VERSION = "0.2.0.8"
7
7
  @@log = false
8
8
 
9
9
  def self.log
@@ -64,7 +64,7 @@ module ESearchy
64
64
  D "Error: Something went wrong :("
65
65
  end
66
66
  end
67
-
67
+
68
68
  def header
69
69
  begin
70
70
  return self.class::HEADER
@@ -0,0 +1,87 @@
1
+ module ESearchy
2
+ class Profiling
3
+ def initialize(people)
4
+ @peo = people.clone
5
+ @people = []
6
+ @results = []
7
+ end
8
+ attr_accessor :people, :results
9
+
10
+ def search
11
+ @peo.each { |person, profile| crawl(person, profile) }
12
+ end
13
+
14
+ private
15
+ def get_profile(uri_str, limit = 10)
16
+ begin
17
+ # You should choose better exception.
18
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
19
+ response = Net::HTTP.get_response(URI.parse(uri_str))
20
+ case response
21
+ when Net::HTTPSuccess then response.body
22
+ when Net::HTTPRedirection then get_profile(response['location'], limit - 1)
23
+ else
24
+ response.error!
25
+ end
26
+ rescue
27
+ return nil
28
+ end
29
+ end
30
+
31
+ def crawl(person, profile)
32
+ text = get_profile(profile)
33
+ if text
34
+ case profile
35
+ when /spoke.com/ then
36
+ D "Crawling #{person.join(" ")}'s profile for co-workers:"
37
+ cw = text.scan(/<a class="personLinkTag" href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)">([\w\s]*)<\/a>/)
38
+ add_persons(cw, person, "http://www.spoke.com")
39
+ when /classmate.com/ then
40
+ return nil
41
+ when /google.com/ then
42
+ D "Crawling #{person.join(" ")}'s Google profile for other Social Networks"
43
+ text.scan(/<div class="link"><a class="url" href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" rel="me">([\w\s]*)<\/a>/).each do |prof|
44
+ url = prof[0]
45
+ network = prof[1]
46
+ D "\t-#{network.strip} : #{url.strip}"
47
+ end
48
+ when /jigsaw.com/ then
49
+ D "Crawling #{person.join(" ")}'s JigSaw profile for co-workers:"
50
+ cw = text.scan(/<li><p style="margin-top: 15px"><a href='([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)'>([\w\s]*)<\/a>/)
51
+ add_persons(cw, person, "http://www.jigsaw.com")
52
+ when /linkedin.com/ then
53
+ return nil
54
+ when /naymz.com/ then
55
+ D "Crawling #{person.join(" ")}'s Google profile for other Social Networks"
56
+ text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" rel="external">[\n\s]*([\w\s]*)\n/).each do |prof|
57
+ url = prof[0]
58
+ network = prof[1]
59
+ D "\t-#{network.strip} : #{url.strip} "
60
+ end
61
+ when /plaxo.com/ then
62
+ D "Crawling #{person.join(" ")}'s Plaxo profile for other Social Networks:"
63
+ text.scan(/rel="me nofollow" title="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)">([\w\s]*)<\/a><\/div><\/td>/).each do |prof|
64
+ url = prof[0]
65
+ network = url.scan(/:\/\/(.*)\./)[0][0]
66
+ username = prof[1]
67
+ D "\t-#{network} : #{username} : #{url} "
68
+ end
69
+ when /ziggs.com/ then
70
+ return nil
71
+ end
72
+ else
73
+ D "Something went wrong Crawling #{person.join(" ")}'s Profile\n"
74
+ end
75
+ end
76
+
77
+ def add_persons(cowork, per, url)
78
+ cowork.uniq.each do |profile|
79
+ pf = url + profile[0].to_s
80
+ p = profile[1].split(" ")
81
+ D "\t-#{p.join(" ")} -> #{pf}"
82
+ @people << [ p, pf ]
83
+ @results << [p, "P", pf, per.to_s.upcase, "N"]
84
+ end
85
+ end
86
+ end
87
+ end
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esearchy
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 79
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 2
8
9
  - 0
9
- - 7
10
- version: 0.2.0.7
10
+ - 8
11
+ version: 0.2.0.8
11
12
  platform: ruby
12
13
  authors:
13
14
  - Matias P. Brutti
@@ -15,16 +16,18 @@ autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-03-22 00:00:00 -03:00
19
+ date: 2010-05-17 00:00:00 -07:00
19
20
  default_executable:
20
21
  dependencies:
21
22
  - !ruby/object:Gem::Dependency
22
23
  name: pdf-reader
23
24
  prerelease: false
24
25
  requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
25
27
  requirements:
26
28
  - - ">="
27
29
  - !ruby/object:Gem::Version
30
+ hash: 9
28
31
  segments:
29
32
  - 0
30
33
  - 7
@@ -36,9 +39,11 @@ dependencies:
36
39
  name: json
37
40
  prerelease: false
38
41
  requirement: &id002 !ruby/object:Gem::Requirement
42
+ none: false
39
43
  requirements:
40
44
  - - ">="
41
45
  - !ruby/object:Gem::Version
46
+ hash: 1
42
47
  segments:
43
48
  - 1
44
49
  - 1
@@ -50,9 +55,11 @@ dependencies:
50
55
  name: FreedomCoder-rubyzip
51
56
  prerelease: false
52
57
  requirement: &id003 !ruby/object:Gem::Requirement
58
+ none: false
53
59
  requirements:
54
60
  - - ">="
55
61
  - !ruby/object:Gem::Version
62
+ hash: 61
56
63
  segments:
57
64
  - 0
58
65
  - 9
@@ -64,9 +71,11 @@ dependencies:
64
71
  name: spidr
65
72
  prerelease: false
66
73
  requirement: &id004 !ruby/object:Gem::Requirement
74
+ none: false
67
75
  requirements:
68
76
  - - ">="
69
77
  - !ruby/object:Gem::Version
78
+ hash: 21
70
79
  segments:
71
80
  - 0
72
81
  - 2
@@ -95,6 +104,7 @@ files:
95
104
  - lib/esearchy/OtherEngines/spider.rb
96
105
  - lib/esearchy/OtherEngines/usenet.rb
97
106
  - lib/esearchy/otherengines.rb
107
+ - lib/esearchy/profiling.rb
98
108
  - lib/esearchy/SearchEngines/altavista.rb
99
109
  - lib/esearchy/SearchEngines/bing.rb
100
110
  - lib/esearchy/SearchEngines/google.rb
@@ -112,6 +122,7 @@ files:
112
122
  - lib/esearchy/useragent.rb
113
123
  - lib/esearchy.rb
114
124
  - README.rdoc
125
+ - bin/esearchy
115
126
  has_rdoc: true
116
127
  homepage: http://freedomcoder.com.ar/esearchy
117
128
  licenses: []
@@ -122,23 +133,27 @@ rdoc_options: []
122
133
  require_paths:
123
134
  - lib
124
135
  required_ruby_version: !ruby/object:Gem::Requirement
136
+ none: false
125
137
  requirements:
126
138
  - - ">="
127
139
  - !ruby/object:Gem::Version
140
+ hash: 3
128
141
  segments:
129
142
  - 0
130
143
  version: "0"
131
144
  required_rubygems_version: !ruby/object:Gem::Requirement
145
+ none: false
132
146
  requirements:
133
147
  - - ">="
134
148
  - !ruby/object:Gem::Version
149
+ hash: 3
135
150
  segments:
136
151
  - 0
137
152
  version: "0"
138
153
  requirements: []
139
154
 
140
155
  rubyforge_project:
141
- rubygems_version: 1.3.6
156
+ rubygems_version: 1.3.7
142
157
  signing_key:
143
158
  specification_version: 3
144
159
  summary: A library to search for emails in search engines