esearchy 0.2.0.7 → 0.2.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/bin/esearchy CHANGED
@@ -31,11 +31,11 @@ require 'getoptlong'
31
31
  require 'esearchy'
32
32
 
33
33
  ESearchy::log = true
34
-
35
34
  @yahoo_key = nil
36
35
  @bing_key = nil
37
36
  @maxhits = nil
38
37
  @docs = true
38
+ @profiling = false
39
39
  @params = {}
40
40
  @list = []
41
41
  @output = nil
@@ -64,6 +64,7 @@ opts = GetoptLong.new(
64
64
  [ '--enable-pgp', GetoptLong::NO_ARGUMENT ],
65
65
  [ '--enable-usenet', GetoptLong::NO_ARGUMENT ],
66
66
  [ '--enable-spider', GetoptLong::NO_ARGUMENT ],
67
+ [ '--profiling', GetoptLong::NO_ARGUMENT ],
67
68
  [ '--disable-google', GetoptLong::NO_ARGUMENT ],
68
69
  [ '--disable-yahoo', GetoptLong::NO_ARGUMENT ],
69
70
  [ '--disable-bing', GetoptLong::NO_ARGUMENT ],
@@ -148,7 +149,7 @@ def print_(list)
148
149
  print_linux(item)
149
150
  end
150
151
  when Array
151
- puts item[0].join " " + "-->" + item[1]
152
+ puts item.join " "
152
153
  end
153
154
  end
154
155
  end
@@ -210,6 +211,10 @@ class Output
210
211
  end
211
212
  end
212
213
 
214
+ def save_html(data)
215
+
216
+ end
217
+
213
218
  def save_sqlite(data)
214
219
  require 'sqlite3'
215
220
  @db = SQLite3::Database.new(@output)
@@ -257,10 +262,22 @@ def execute(p)
257
262
  end
258
263
  end
259
264
  end
265
+
266
+ if @profiling
267
+ puts "Gathering Profile data from people"
268
+ puts "----------------------------------\n"
269
+ res = ESearchy::Profiling.new(search.people.uniq)
270
+ res.search
271
+ search.people.concat(res.people)
272
+ search.results.concat(res.results)
273
+ end
274
+
260
275
  @output.save(search.results) if @output
261
- puts "-------FINAL RESULTS--------"
276
+ puts "\n-------==< FINAL RESULTS >==--------"
262
277
  print_ search.emails.uniq
263
- print_ search.people.uniq
278
+ search.people.uniq.each do |person, profile|
279
+ puts person.join(" ") + " -> " + profile
280
+ end
264
281
  end
265
282
  end
266
283
 
@@ -326,6 +343,8 @@ opts.each do |opt, arg|
326
343
  puts "\t Enables PGP searches.\n"
327
344
  puts "--enable-usenet"
328
345
  puts "\t Enables Usenet searches.\n"
346
+ puts "--profiling"
347
+ puts "\t Enables People's profiling.\n"
329
348
  puts "--disable-docs"
330
349
  puts "\t Disables searches inside docs.\n"
331
350
  puts "--disable-google"
@@ -358,11 +377,12 @@ opts.each do |opt, arg|
358
377
  #END OF HELP
359
378
  exit(0)
360
379
  when '--enable-all' then
361
- @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
380
+ @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
362
381
  :Spoke, :JigSaw, :Ziggs, :Plaxo]
363
382
  @email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
364
383
  when '--enable-people' then
365
- @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles, :Spoke]
384
+ @people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
385
+ :Spoke, :JigSaw, :Ziggs, :Plaxo]
366
386
  when '--enable-emails' then
367
387
  @email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
368
388
  when '--enable-google' then
@@ -398,43 +418,45 @@ opts.each do |opt, arg|
398
418
  when '--enable-usenet' then
399
419
  @email_engines << :Usenet
400
420
  when '--disable-people' then
401
- @people_engines = []
402
- when '--disable-emails' then
403
- @email_engines = []
404
- when '--disable-google' then
405
- @email_engines.delete(:Google)
406
- when '--disable-yahoo' then
407
- @email_engines.delete(:Yahoo)
408
- when '--disable-bing' then
409
- @email_engines.delete(:Bing)
410
- when '--disable-altavisa' then
411
- @email_engines.delete(:Altavista)
412
- when '--disable-spider' then
413
- @email_engines.delete(:Spider)
414
- when '--disable-linkedin' then
415
- @people_engines.delete(:LinkedIn)
416
- when '--disable-gprofiles' then
417
- @people_engines.delete(:GoogleProfiles)
418
- when '--disable-naymz' then
419
- @people_engines.delete(:Naymz)
420
- when '--disable-classmates' then
421
- @email_engines.delete(:Classmates)
422
- when '--disable-ggroups' then
423
- @email_engines.delete(:GoogleGroups)
424
- when '--disable-spoke' then
425
- @people_engines.delete(:Spoke)
426
- when '--disable-jigsaw' then
427
- @people_engines.delete(:JigSaw)
428
- when '--disable-ziggs' then
429
- @people_engines.delete(:Ziggs)
430
- when '--disable-plaxo' then
431
- @people_engines.delete(:Plaxo)
432
- when '--disable-pgp' then
433
- @email_engines.delete(:PGP)
434
- when '--disable-usenet' then
435
- @email_engines.delete(:Usenet)
436
- when '--disable-docs' then
437
- @docs = false
421
+ @people_engines = []
422
+ when '--disable-emails' then
423
+ @email_engines = []
424
+ when '--disable-google' then
425
+ @email_engines.delete(:Google)
426
+ when '--disable-yahoo' then
427
+ @email_engines.delete(:Yahoo)
428
+ when '--disable-bing' then
429
+ @email_engines.delete(:Bing)
430
+ when '--disable-altavisa' then
431
+ @email_engines.delete(:Altavista)
432
+ when '--disable-spider' then
433
+ @email_engines.delete(:Spider)
434
+ when '--disable-linkedin' then
435
+ @people_engines.delete(:LinkedIn)
436
+ when '--disable-gprofiles' then
437
+ @people_engines.delete(:GoogleProfiles)
438
+ when '--disable-naymz' then
439
+ @people_engines.delete(:Naymz)
440
+ when '--disable-classmates' then
441
+ @email_engines.delete(:Classmates)
442
+ when '--disable-ggroups' then
443
+ @email_engines.delete(:GoogleGroups)
444
+ when '--disable-spoke' then
445
+ @people_engines.delete(:Spoke)
446
+ when '--disable-jigsaw' then
447
+ @people_engines.delete(:JigSaw)
448
+ when '--disable-ziggs' then
449
+ @people_engines.delete(:Ziggs)
450
+ when '--disable-plaxo' then
451
+ @people_engines.delete(:Plaxo)
452
+ when '--disable-pgp' then
453
+ @email_engines.delete(:PGP)
454
+ when '--disable-usenet' then
455
+ @email_engines.delete(:Usenet)
456
+ when '--disable-docs' then
457
+ @docs = false
458
+ when '--profiling' then
459
+ @profiling = true
438
460
  when '--query' then
439
461
  @params[:query] = arg
440
462
  when '--company' then
data/lib/esearchy.rb CHANGED
@@ -5,6 +5,7 @@ require 'cgi'
5
5
  require 'json'
6
6
  require 'digest/sha2'
7
7
  require 'zip/zip'
8
+ require 'uri'
8
9
  require 'zip/zipfilesystem'
9
10
  require 'pdf/reader'
10
11
  if RUBY_PLATFORM =~ /mingw|mswin/
@@ -20,5 +21,6 @@ require 'esearchy/socialengines'
20
21
  require 'esearchy/localengines'
21
22
  require 'esearchy/bugmenot'
22
23
  require 'esearchy/docs'
24
+ require 'esearchy/profiling'
23
25
  require 'esearchy/useragent'
24
26
  require 'esearchy/esearchy'
@@ -23,10 +23,10 @@ module ESearchy
23
23
 
24
24
  def crawl_people(html)
25
25
  html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) \|/).each do |profile|
26
- profile = profile[0].to_s
27
- person = profile[1].split(" ")
28
- @people << [ p, profile ]
29
- @results << [person, "P", profile, self.class.to_s.upcase, "N"]
26
+ pf = profile[0].to_s
27
+ p = profile[1].split(" ")
28
+ @people << [ p, pf ]
29
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
30
30
  end
31
31
  end
32
32
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
29
- profile = profile[0].to_s
30
- person = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [person, "P",profile, self.class.to_s.upcase, "N"]
29
+ pf = profile[0].to_s
30
+ p = profile[1].split(" ")
31
+ @people << [ p, pf ]
32
+ @results << [p, "P",profile, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -25,10 +25,10 @@ module ESearchy
25
25
 
26
26
  def crawl_people(text)
27
27
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
28
- profile = profile[0].to_s
28
+ pf = profile[0].to_s
29
29
  p = profile[1].split(" ")
30
- @people << [ p, profile ]
31
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
30
+ @people << [ p, pf ]
31
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
32
32
  end
33
33
  end
34
34
  end
@@ -24,12 +24,12 @@ module ESearchy
24
24
 
25
25
  def crawl_people(html)
26
26
  html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
27
- profile = profile[0].to_s
28
- person = profile[1].split(" ").delete_if do
27
+ pf = profile[0].to_s
28
+ p = profile[1].split(" ").delete_if do
29
29
  |x| x =~ /mr.|mr|ms.|ms|phd.|dr.|dr|phd|phd./i
30
30
  end
31
- @people << [ p, profile ]
32
- @results << [person, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)&#39;/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -22,13 +22,13 @@ module ESearchy
22
22
  @totalhits = totalhits(hits[0][0].gsub(",","").to_i)
23
23
  end
24
24
  end
25
-
25
+
26
26
  def crawl_people(text)
27
27
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
28
- profile = profile[0].to_s
28
+ pf = profile[0].to_s
29
29
  p = profile[1].split(" ")
30
- @people << [ p, profile ]
31
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
30
+ @people << [ p, pf ]
31
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
32
32
  end
33
33
  end
34
34
  end
@@ -26,10 +26,10 @@ module ESearchy
26
26
 
27
27
  def crawl_people(text)
28
28
  text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
29
- profile = profile[0].to_s
29
+ pf = profile[0].to_s
30
30
  p = profile[1].split(" ")
31
- @people << [ p, profile ]
32
- @results << [p, "P", profile, self.class.to_s.upcase, "N"]
31
+ @people << [ p, pf ]
32
+ @results << [p, "P", pf, self.class.to_s.upcase, "N"]
33
33
  end
34
34
  end
35
35
  end
@@ -3,7 +3,7 @@ def D m
3
3
  end
4
4
 
5
5
  module ESearchy
6
- VERSION = "0.2.0.7"
6
+ VERSION = "0.2.0.8"
7
7
  @@log = false
8
8
 
9
9
  def self.log
@@ -64,7 +64,7 @@ module ESearchy
64
64
  D "Error: Something went wrong :("
65
65
  end
66
66
  end
67
-
67
+
68
68
  def header
69
69
  begin
70
70
  return self.class::HEADER
@@ -0,0 +1,87 @@
1
+ module ESearchy
2
+ class Profiling
3
+ def initialize(people)
4
+ @peo = people.clone
5
+ @people = []
6
+ @results = []
7
+ end
8
+ attr_accessor :people, :results
9
+
10
+ def search
11
+ @peo.each { |person, profile| crawl(person, profile) }
12
+ end
13
+
14
+ private
15
+ def get_profile(uri_str, limit = 10)
16
+ begin
17
+ # You should choose better exception.
18
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
19
+ response = Net::HTTP.get_response(URI.parse(uri_str))
20
+ case response
21
+ when Net::HTTPSuccess then response.body
22
+ when Net::HTTPRedirection then get_profile(response['location'], limit - 1)
23
+ else
24
+ response.error!
25
+ end
26
+ rescue
27
+ return nil
28
+ end
29
+ end
30
+
31
+ def crawl(person, profile)
32
+ text = get_profile(profile)
33
+ if text
34
+ case profile
35
+ when /spoke.com/ then
36
+ D "Crawling #{person.join(" ")}'s profile for co-workers:"
37
+ cw = text.scan(/<a class="personLinkTag" href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)">([\w\s]*)<\/a>/)
38
+ add_persons(cw, person, "http://www.spoke.com")
39
+ when /classmate.com/ then
40
+ return nil
41
+ when /google.com/ then
42
+ D "Crawling #{person.join(" ")}'s Google profile for other Social Networks"
43
+ text.scan(/<div class="link"><a class="url" href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" rel="me">([\w\s]*)<\/a>/).each do |prof|
44
+ url = prof[0]
45
+ network = prof[1]
46
+ D "\t-#{network.strip} : #{url.strip}"
47
+ end
48
+ when /jigsaw.com/ then
49
+ D "Crawling #{person.join(" ")}'s JigSaw profile for co-workers:"
50
+ cw = text.scan(/<li><p style="margin-top: 15px"><a href='([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)'>([\w\s]*)<\/a>/)
51
+ add_persons(cw, person, "http://www.jigsaw.com")
52
+ when /linkedin.com/ then
53
+ return nil
54
+ when /naymz.com/ then
55
+ D "Crawling #{person.join(" ")}'s Google profile for other Social Networks"
56
+ text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" rel="external">[\n\s]*([\w\s]*)\n/).each do |prof|
57
+ url = prof[0]
58
+ network = prof[1]
59
+ D "\t-#{network.strip} : #{url.strip} "
60
+ end
61
+ when /plaxo.com/ then
62
+ D "Crawling #{person.join(" ")}'s Plaxo profile for other Social Networks:"
63
+ text.scan(/rel="me nofollow" title="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)">([\w\s]*)<\/a><\/div><\/td>/).each do |prof|
64
+ url = prof[0]
65
+ network = url.scan(/:\/\/(.*)\./)[0][0]
66
+ username = prof[1]
67
+ D "\t-#{network} : #{username} : #{url} "
68
+ end
69
+ when /ziggs.com/ then
70
+ return nil
71
+ end
72
+ else
73
+ D "Something went wrong Crawling #{person.join(" ")}'s Profile\n"
74
+ end
75
+ end
76
+
77
+ def add_persons(cowork, per, url)
78
+ cowork.uniq.each do |profile|
79
+ pf = url + profile[0].to_s
80
+ p = profile[1].split(" ")
81
+ D "\t-#{p.join(" ")} -> #{pf}"
82
+ @people << [ p, pf ]
83
+ @results << [p, "P", pf, per.to_s.upcase, "N"]
84
+ end
85
+ end
86
+ end
87
+ end
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esearchy
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 79
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 2
8
9
  - 0
9
- - 7
10
- version: 0.2.0.7
10
+ - 8
11
+ version: 0.2.0.8
11
12
  platform: ruby
12
13
  authors:
13
14
  - Matias P. Brutti
@@ -15,16 +16,18 @@ autorequire:
15
16
  bindir: bin
16
17
  cert_chain: []
17
18
 
18
- date: 2010-03-22 00:00:00 -03:00
19
+ date: 2010-05-17 00:00:00 -07:00
19
20
  default_executable:
20
21
  dependencies:
21
22
  - !ruby/object:Gem::Dependency
22
23
  name: pdf-reader
23
24
  prerelease: false
24
25
  requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
25
27
  requirements:
26
28
  - - ">="
27
29
  - !ruby/object:Gem::Version
30
+ hash: 9
28
31
  segments:
29
32
  - 0
30
33
  - 7
@@ -36,9 +39,11 @@ dependencies:
36
39
  name: json
37
40
  prerelease: false
38
41
  requirement: &id002 !ruby/object:Gem::Requirement
42
+ none: false
39
43
  requirements:
40
44
  - - ">="
41
45
  - !ruby/object:Gem::Version
46
+ hash: 1
42
47
  segments:
43
48
  - 1
44
49
  - 1
@@ -50,9 +55,11 @@ dependencies:
50
55
  name: FreedomCoder-rubyzip
51
56
  prerelease: false
52
57
  requirement: &id003 !ruby/object:Gem::Requirement
58
+ none: false
53
59
  requirements:
54
60
  - - ">="
55
61
  - !ruby/object:Gem::Version
62
+ hash: 61
56
63
  segments:
57
64
  - 0
58
65
  - 9
@@ -64,9 +71,11 @@ dependencies:
64
71
  name: spidr
65
72
  prerelease: false
66
73
  requirement: &id004 !ruby/object:Gem::Requirement
74
+ none: false
67
75
  requirements:
68
76
  - - ">="
69
77
  - !ruby/object:Gem::Version
78
+ hash: 21
70
79
  segments:
71
80
  - 0
72
81
  - 2
@@ -95,6 +104,7 @@ files:
95
104
  - lib/esearchy/OtherEngines/spider.rb
96
105
  - lib/esearchy/OtherEngines/usenet.rb
97
106
  - lib/esearchy/otherengines.rb
107
+ - lib/esearchy/profiling.rb
98
108
  - lib/esearchy/SearchEngines/altavista.rb
99
109
  - lib/esearchy/SearchEngines/bing.rb
100
110
  - lib/esearchy/SearchEngines/google.rb
@@ -112,6 +122,7 @@ files:
112
122
  - lib/esearchy/useragent.rb
113
123
  - lib/esearchy.rb
114
124
  - README.rdoc
125
+ - bin/esearchy
115
126
  has_rdoc: true
116
127
  homepage: http://freedomcoder.com.ar/esearchy
117
128
  licenses: []
@@ -122,23 +133,27 @@ rdoc_options: []
122
133
  require_paths:
123
134
  - lib
124
135
  required_ruby_version: !ruby/object:Gem::Requirement
136
+ none: false
125
137
  requirements:
126
138
  - - ">="
127
139
  - !ruby/object:Gem::Version
140
+ hash: 3
128
141
  segments:
129
142
  - 0
130
143
  version: "0"
131
144
  required_rubygems_version: !ruby/object:Gem::Requirement
145
+ none: false
132
146
  requirements:
133
147
  - - ">="
134
148
  - !ruby/object:Gem::Version
149
+ hash: 3
135
150
  segments:
136
151
  - 0
137
152
  version: "0"
138
153
  requirements: []
139
154
 
140
155
  rubyforge_project:
141
- rubygems_version: 1.3.6
156
+ rubygems_version: 1.3.7
142
157
  signing_key:
143
158
  specification_version: 3
144
159
  summary: A library to search for emails in search engines