esearchy 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/esearchy +6 -7
- data/lib/esearchy/OtherEngines/amazonwishlist.rb +18 -0
- data/lib/esearchy/SocialEngines/classmates.rb +1 -0
- data/lib/esearchy/SocialEngines/googleprofiles.rb +1 -0
- data/lib/esearchy/SocialEngines/jigsaw.rb +1 -0
- data/lib/esearchy/SocialEngines/linkedin.rb +2 -1
- data/lib/esearchy/SocialEngines/naymz.rb +1 -0
- data/lib/esearchy/SocialEngines/plaxo.rb +1 -0
- data/lib/esearchy/SocialEngines/spoke.rb +1 -0
- data/lib/esearchy/SocialEngines/ziggs.rb +1 -0
- data/lib/esearchy/compare.rb +18 -0
- data/lib/esearchy/genericengine.rb +3 -3
- metadata +10 -8
data/bin/esearchy
CHANGED
@@ -192,7 +192,7 @@ class Output
|
|
192
192
|
private
|
193
193
|
def save_csv(data)
|
194
194
|
out = File.new(@output, "w")
|
195
|
-
out << "
|
195
|
+
out << "ENTITY, TYPE, SITE, CLASS, MATCH\n"
|
196
196
|
data.each { |r| out << "#{r[0].to_s},#{r[1]},#{r[2]},#{r[3]}\n"}
|
197
197
|
end
|
198
198
|
|
@@ -203,7 +203,7 @@ class Output
|
|
203
203
|
Prawn::Document.generate(@output) do
|
204
204
|
table data,
|
205
205
|
:position => :center,
|
206
|
-
:headers => ["
|
206
|
+
:headers => ["Entity", "Type", "Site", "Class", "Match"],
|
207
207
|
:header_color => "0046f9",
|
208
208
|
:row_colors => :pdf_writer, #["ffffff","ffff00"],
|
209
209
|
:font_size => 10,
|
@@ -221,14 +221,14 @@ class Output
|
|
221
221
|
@db = SQLite3::Database.new(@output)
|
222
222
|
@db.execute("CREATE TABLE IF NOT EXISTS results (
|
223
223
|
id integer primary key asc,
|
224
|
-
|
224
|
+
entity text,
|
225
225
|
type char,
|
226
226
|
site text,
|
227
227
|
class text,
|
228
228
|
match char);")
|
229
229
|
|
230
230
|
data.each do |r|
|
231
|
-
@db.execute("INSERT INTO results (
|
231
|
+
@db.execute("INSERT INTO results (entity,type,site,class,match)
|
232
232
|
VALUES (\"#{r[0].to_s}\",\"#{r[1]}\",\"#{r[2]}\",\"#{r[3]}\",\"#{r[4]}\");")
|
233
233
|
end
|
234
234
|
#@db.commit
|
@@ -380,8 +380,8 @@ opts.each do |opt, arg|
|
|
380
380
|
#END OF HELP
|
381
381
|
exit(0)
|
382
382
|
when '--enable-all' then
|
383
|
-
@people_engines = [:LinkedIn, :Naymz,
|
384
|
-
:Spoke,
|
383
|
+
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
384
|
+
:Spoke, :JigSaw, :Ziggs, :Plaxo]
|
385
385
|
@email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
|
386
386
|
when '--enable-people' then
|
387
387
|
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
@@ -504,7 +504,6 @@ puts "- if you want to use GoogleProfiles, LinkedIn, Classmates or Naymz,"
|
|
504
504
|
puts " you will need to use the --company (-c) <company_name> option"
|
505
505
|
puts "- If you want to spider a website you need to use the --website (-w) <URL> option"
|
506
506
|
|
507
|
-
|
508
507
|
unless File.exist?(ENV['HOME'] + "/.esearchyrc")
|
509
508
|
configure(@params[:maxhits],@yahoo_key,@bing_key)
|
510
509
|
else
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class AmazonWishList < ESearchy::GenericEngine
|
4
|
+
ENGINE = "" #Do not really need any of them.
|
5
|
+
PORT = 0
|
6
|
+
NUM = 0
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse( html )
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -24,6 +24,7 @@ module ESearchy
|
|
24
24
|
def crawl_people(html)
|
25
25
|
html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) \|/).each do |profile|
|
26
26
|
pf = profile[0].to_s
|
27
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
27
28
|
p = profile[1].split(" ")
|
28
29
|
@people << [ p, pf ]
|
29
30
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P",profile, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -7,7 +7,7 @@ module ESearchy
|
|
7
7
|
TYPE = 2
|
8
8
|
|
9
9
|
def search
|
10
|
-
@querypath = "/cse?q=site%3Awww.linkedin.com/in+%22at+" +
|
10
|
+
@querypath = "/cse?q=site%3Awww.linkedin.com/pub+in+%22at+" +
|
11
11
|
CGI.escape(@company) + "%22&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
12
12
|
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
13
13
|
super
|
@@ -26,6 +26,7 @@ module ESearchy
|
|
26
26
|
def crawl_people(text)
|
27
27
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
28
28
|
pf = profile[0].to_s
|
29
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
29
30
|
p = profile[1].split(" ")
|
30
31
|
@people << [ p, pf ]
|
31
32
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -25,6 +25,7 @@ module ESearchy
|
|
25
25
|
def crawl_people(html)
|
26
26
|
html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
27
27
|
pf = profile[0].to_s
|
28
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
28
29
|
p = profile[1].split(" ").delete_if do
|
29
30
|
|x| x =~ /mr.|mr|ms.|ms|phd.|dr.|dr|phd|phd./i
|
30
31
|
end
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)'/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -26,6 +26,7 @@ module ESearchy
|
|
26
26
|
def crawl_people(text)
|
27
27
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
28
28
|
pf = profile[0].to_s
|
29
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
29
30
|
p = profile[1].split(" ")
|
30
31
|
@people << [ p, pf ]
|
31
32
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#module ESearchy
|
2
|
+
# class Compare
|
3
|
+
# def distance(s1,s2)
|
4
|
+
# return 100 if s1.strip.downcase == s2.strip.downcase
|
5
|
+
# b_s1 = s1.unpack("B*")[0]
|
6
|
+
# b_s2 = s2.unpack("B*")[0]
|
7
|
+
# b_s1.size > b_s2.size ? (b1 = b_s1; b2 = b_s2;) : (b1 = b_s2; b2 = b_s1;)
|
8
|
+
# p b1
|
9
|
+
# p b2
|
10
|
+
# dist = 100
|
11
|
+
# dist -= b1.size - b2.size
|
12
|
+
# b1[0,b2.size].each_with_index do |chr,idx|
|
13
|
+
# dist -= 1 if chr != b2[idx]
|
14
|
+
# end
|
15
|
+
# return dist
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#end
|
@@ -54,8 +54,8 @@ module ESearchy
|
|
54
54
|
block.call(response)
|
55
55
|
when Net::HTTPRedirection
|
56
56
|
get(URI.parse(response['location']).host,
|
57
|
-
URI.parse(response['location']).port,
|
58
|
-
querystring, headers, limit - 1, block)
|
57
|
+
URI.parse(response['location']).port.to_i,
|
58
|
+
querystring, headers, limit - 1, &block)
|
59
59
|
else
|
60
60
|
return response.error!
|
61
61
|
end
|
@@ -65,7 +65,7 @@ module ESearchy
|
|
65
65
|
rescue Net::HTTPServerException
|
66
66
|
D "Error: Something went wrong with the HTTP request"
|
67
67
|
rescue
|
68
|
-
D "Error: Something went wrong :("
|
68
|
+
D "Error: Something went wrong :( + #{$!}"
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: esearchy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matias P. Brutti
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-04-06 00:00:00 -07:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -51,19 +51,19 @@ dependencies:
|
|
51
51
|
type: :runtime
|
52
52
|
version_requirements: *id002
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
|
-
name:
|
54
|
+
name: rubyzip
|
55
55
|
prerelease: false
|
56
56
|
requirement: &id003 !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
hash:
|
61
|
+
hash: 51
|
62
62
|
segments:
|
63
63
|
- 0
|
64
64
|
- 9
|
65
|
-
-
|
66
|
-
version: 0.9.
|
65
|
+
- 4
|
66
|
+
version: 0.9.4
|
67
67
|
type: :runtime
|
68
68
|
version_requirements: *id003
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -108,11 +108,13 @@ extra_rdoc_files:
|
|
108
108
|
- README.rdoc
|
109
109
|
files:
|
110
110
|
- lib/esearchy/bugmenot.rb
|
111
|
+
- lib/esearchy/compare.rb
|
111
112
|
- lib/esearchy/docs.rb
|
112
113
|
- lib/esearchy/esearchy.rb
|
113
114
|
- lib/esearchy/genericengine.rb
|
114
115
|
- lib/esearchy/LocalEngines/directory.rb
|
115
116
|
- lib/esearchy/localengines.rb
|
117
|
+
- lib/esearchy/OtherEngines/amazonwishlist.rb
|
116
118
|
- lib/esearchy/OtherEngines/googlegroups.rb
|
117
119
|
- lib/esearchy/OtherEngines/ldap.rb
|
118
120
|
- lib/esearchy/OtherEngines/pgp.rb
|