esearchy 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/esearchy +6 -7
- data/lib/esearchy/OtherEngines/amazonwishlist.rb +18 -0
- data/lib/esearchy/SocialEngines/classmates.rb +1 -0
- data/lib/esearchy/SocialEngines/googleprofiles.rb +1 -0
- data/lib/esearchy/SocialEngines/jigsaw.rb +1 -0
- data/lib/esearchy/SocialEngines/linkedin.rb +2 -1
- data/lib/esearchy/SocialEngines/naymz.rb +1 -0
- data/lib/esearchy/SocialEngines/plaxo.rb +1 -0
- data/lib/esearchy/SocialEngines/spoke.rb +1 -0
- data/lib/esearchy/SocialEngines/ziggs.rb +1 -0
- data/lib/esearchy/compare.rb +18 -0
- data/lib/esearchy/genericengine.rb +3 -3
- metadata +10 -8
data/bin/esearchy
CHANGED
@@ -192,7 +192,7 @@ class Output
|
|
192
192
|
private
|
193
193
|
def save_csv(data)
|
194
194
|
out = File.new(@output, "w")
|
195
|
-
out << "
|
195
|
+
out << "ENTITY, TYPE, SITE, CLASS, MATCH\n"
|
196
196
|
data.each { |r| out << "#{r[0].to_s},#{r[1]},#{r[2]},#{r[3]}\n"}
|
197
197
|
end
|
198
198
|
|
@@ -203,7 +203,7 @@ class Output
|
|
203
203
|
Prawn::Document.generate(@output) do
|
204
204
|
table data,
|
205
205
|
:position => :center,
|
206
|
-
:headers => ["
|
206
|
+
:headers => ["Entity", "Type", "Site", "Class", "Match"],
|
207
207
|
:header_color => "0046f9",
|
208
208
|
:row_colors => :pdf_writer, #["ffffff","ffff00"],
|
209
209
|
:font_size => 10,
|
@@ -221,14 +221,14 @@ class Output
|
|
221
221
|
@db = SQLite3::Database.new(@output)
|
222
222
|
@db.execute("CREATE TABLE IF NOT EXISTS results (
|
223
223
|
id integer primary key asc,
|
224
|
-
|
224
|
+
entity text,
|
225
225
|
type char,
|
226
226
|
site text,
|
227
227
|
class text,
|
228
228
|
match char);")
|
229
229
|
|
230
230
|
data.each do |r|
|
231
|
-
@db.execute("INSERT INTO results (
|
231
|
+
@db.execute("INSERT INTO results (entity,type,site,class,match)
|
232
232
|
VALUES (\"#{r[0].to_s}\",\"#{r[1]}\",\"#{r[2]}\",\"#{r[3]}\",\"#{r[4]}\");")
|
233
233
|
end
|
234
234
|
#@db.commit
|
@@ -380,8 +380,8 @@ opts.each do |opt, arg|
|
|
380
380
|
#END OF HELP
|
381
381
|
exit(0)
|
382
382
|
when '--enable-all' then
|
383
|
-
@people_engines = [:LinkedIn, :Naymz,
|
384
|
-
:Spoke,
|
383
|
+
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
384
|
+
:Spoke, :JigSaw, :Ziggs, :Plaxo]
|
385
385
|
@email_engines = [:Google, :Bing, :Yahoo, :Altavista, :PGP, :Spider ,:Usenet, :GoogleGroups ]
|
386
386
|
when '--enable-people' then
|
387
387
|
@people_engines = [:LinkedIn, :Naymz, :Classmates, :GoogleProfiles,
|
@@ -504,7 +504,6 @@ puts "- if you want to use GoogleProfiles, LinkedIn, Classmates or Naymz,"
|
|
504
504
|
puts " you will need to use the --company (-c) <company_name> option"
|
505
505
|
puts "- If you want to spider a website you need to use the --website (-w) <URL> option"
|
506
506
|
|
507
|
-
|
508
507
|
unless File.exist?(ENV['HOME'] + "/.esearchyrc")
|
509
508
|
configure(@params[:maxhits],@yahoo_key,@bing_key)
|
510
509
|
else
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ESearchy
|
2
|
+
module OtherEngines
|
3
|
+
class AmazonWishList < ESearchy::GenericEngine
|
4
|
+
ENGINE = "" #Do not really need any of them.
|
5
|
+
PORT = 0
|
6
|
+
NUM = 0
|
7
|
+
TYPE = 1
|
8
|
+
|
9
|
+
def search
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse( html )
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -24,6 +24,7 @@ module ESearchy
|
|
24
24
|
def crawl_people(html)
|
25
25
|
html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) \|/).each do |profile|
|
26
26
|
pf = profile[0].to_s
|
27
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
27
28
|
p = profile[1].split(" ")
|
28
29
|
@people << [ p, pf ]
|
29
30
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P",profile, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="(http\:\/\/www.jigsaw.com\/scid[0-9A-Za-z\/?&=@+%.;'_-]+\.xhtml)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -7,7 +7,7 @@ module ESearchy
|
|
7
7
|
TYPE = 2
|
8
8
|
|
9
9
|
def search
|
10
|
-
@querypath = "/cse?q=site%3Awww.linkedin.com/in+%22at+" +
|
10
|
+
@querypath = "/cse?q=site%3Awww.linkedin.com/pub+in+%22at+" +
|
11
11
|
CGI.escape(@company) + "%22&hl=en&cof=&num=100&filter=0&safe=off&start=" or
|
12
12
|
raise ESearchyMissingCompany, "Mssing website url Object.company=(value)"
|
13
13
|
super
|
@@ -26,6 +26,7 @@ module ESearchy
|
|
26
26
|
def crawl_people(text)
|
27
27
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
28
28
|
pf = profile[0].to_s
|
29
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
29
30
|
p = profile[1].split(" ")
|
30
31
|
@people << [ p, pf ]
|
31
32
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -25,6 +25,7 @@ module ESearchy
|
|
25
25
|
def crawl_people(html)
|
26
26
|
html.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
27
27
|
pf = profile[0].to_s
|
28
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
28
29
|
p = profile[1].split(" ").delete_if do
|
29
30
|
|x| x =~ /mr.|mr|ms.|ms|phd.|dr.|dr|phd|phd./i
|
30
31
|
end
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*)'/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -26,6 +26,7 @@ module ESearchy
|
|
26
26
|
def crawl_people(text)
|
27
27
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*),/).each do |profile|
|
28
28
|
pf = profile[0].to_s
|
29
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
29
30
|
p = profile[1].split(" ")
|
30
31
|
@people << [ p, pf ]
|
31
32
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -27,6 +27,7 @@ module ESearchy
|
|
27
27
|
def crawl_people(text)
|
28
28
|
text.scan(/<a href="([0-9A-Za-z:\\\/?&=@+%.;"'()_-]+)" class=l[\sonmousedown="return clk(this.href,'','','res','\d','')"]*>([\w\s]*) -/).each do |profile|
|
29
29
|
pf = profile[0].to_s
|
30
|
+
pf = pf.scan(/\/url\?q=([0-9A-Za-z:\\\/?=@+%.;"'()_-]+)&/).to_s if pf.match(/\/url\?q=/)
|
30
31
|
p = profile[1].split(" ")
|
31
32
|
@people << [ p, pf ]
|
32
33
|
@results << [p, "P", pf, self.class.to_s.upcase, "N"]
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#module ESearchy
|
2
|
+
# class Compare
|
3
|
+
# def distance(s1,s2)
|
4
|
+
# return 100 if s1.strip.downcase == s2.strip.downcase
|
5
|
+
# b_s1 = s1.unpack("B*")[0]
|
6
|
+
# b_s2 = s2.unpack("B*")[0]
|
7
|
+
# b_s1.size > b_s2.size ? (b1 = b_s1; b2 = b_s2;) : (b1 = b_s2; b2 = b_s1;)
|
8
|
+
# p b1
|
9
|
+
# p b2
|
10
|
+
# dist = 100
|
11
|
+
# dist -= b1.size - b2.size
|
12
|
+
# b1[0,b2.size].each_with_index do |chr,idx|
|
13
|
+
# dist -= 1 if chr != b2[idx]
|
14
|
+
# end
|
15
|
+
# return dist
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
#end
|
@@ -54,8 +54,8 @@ module ESearchy
|
|
54
54
|
block.call(response)
|
55
55
|
when Net::HTTPRedirection
|
56
56
|
get(URI.parse(response['location']).host,
|
57
|
-
URI.parse(response['location']).port,
|
58
|
-
querystring, headers, limit - 1, block)
|
57
|
+
URI.parse(response['location']).port.to_i,
|
58
|
+
querystring, headers, limit - 1, &block)
|
59
59
|
else
|
60
60
|
return response.error!
|
61
61
|
end
|
@@ -65,7 +65,7 @@ module ESearchy
|
|
65
65
|
rescue Net::HTTPServerException
|
66
66
|
D "Error: Something went wrong with the HTTP request"
|
67
67
|
rescue
|
68
|
-
D "Error: Something went wrong :("
|
68
|
+
D "Error: Something went wrong :( + #{$!}"
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: esearchy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matias P. Brutti
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-04-06 00:00:00 -07:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -51,19 +51,19 @@ dependencies:
|
|
51
51
|
type: :runtime
|
52
52
|
version_requirements: *id002
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
|
-
name:
|
54
|
+
name: rubyzip
|
55
55
|
prerelease: false
|
56
56
|
requirement: &id003 !ruby/object:Gem::Requirement
|
57
57
|
none: false
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
hash:
|
61
|
+
hash: 51
|
62
62
|
segments:
|
63
63
|
- 0
|
64
64
|
- 9
|
65
|
-
-
|
66
|
-
version: 0.9.
|
65
|
+
- 4
|
66
|
+
version: 0.9.4
|
67
67
|
type: :runtime
|
68
68
|
version_requirements: *id003
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -108,11 +108,13 @@ extra_rdoc_files:
|
|
108
108
|
- README.rdoc
|
109
109
|
files:
|
110
110
|
- lib/esearchy/bugmenot.rb
|
111
|
+
- lib/esearchy/compare.rb
|
111
112
|
- lib/esearchy/docs.rb
|
112
113
|
- lib/esearchy/esearchy.rb
|
113
114
|
- lib/esearchy/genericengine.rb
|
114
115
|
- lib/esearchy/LocalEngines/directory.rb
|
115
116
|
- lib/esearchy/localengines.rb
|
117
|
+
- lib/esearchy/OtherEngines/amazonwishlist.rb
|
116
118
|
- lib/esearchy/OtherEngines/googlegroups.rb
|
117
119
|
- lib/esearchy/OtherEngines/ldap.rb
|
118
120
|
- lib/esearchy/OtherEngines/pgp.rb
|