esearchy 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ def D m
2
+ puts m if ESearchy::log
3
+ end
4
+
5
+ module ESearchy
6
+
7
+ @@log = false
8
+ #BUGMENOT = ESearchy::Bugmenot::fetch("linkedin.com")
9
+
10
+ def self.log
11
+ @@log
12
+ end
13
+
14
+ def self.log=(v)
15
+ @@log=v
16
+ end
17
+
18
+
19
+ class ESearchyMissingAppID < StandardError; end
20
+ class ESearchyMissingQuery < StandardError; end
21
+ class ESearchyMissingWebsite < StandardError; end
22
+ class ESearchyMissingCompany < StandardError; end
23
+
24
+ class Search
25
+
26
+ EMAIL_ENGINES = {
27
+ :Google => ESearchy::SearchEngines::Google,
28
+ :Bing => ESearchy::SearchEngines::Bing,
29
+ :Yahoo => ESearchy::SearchEngines::Yahoo,
30
+ :Altavista => ESearchy::SearchEngines::Altavista,
31
+ :PGP => ESearchy::OtherEngines::PGP,
32
+ :Spider => ESearchy::OtherEngines::Spider,
33
+ :Usenet => ESearchy::OtherEngines::Usenet,
34
+ :GoogleGroups => ESearchy::OtherEngines::GoogleGroups
35
+ }
36
+
37
+ PEOPLE_ENGINES = {
38
+ :LinkedIn => ESearchy::SocialEngines::LinkedIn,
39
+ :GoogleProfiles => ESearchy::SocialEngines::GoogleProfiles,
40
+ :Naymz => ESearchy::SocialEngines::Naymz,
41
+ :Classmates => ESearchy::SocialEngines::Classmates
42
+ }
43
+
44
+ def initialize(args, &block)
45
+ @@query = args[:query] || nil
46
+ @@company = args[:company] || nil
47
+ @@maxhits = args[:maxhits] || nil
48
+ @@start_at = args[:start_at] || nil
49
+ @@website = args[:website] || nil
50
+ ESearchy.log = args[:log] if args[:log]
51
+ $emails = []
52
+ $people = []
53
+ $results = []
54
+ block.call(self) if block_given?
55
+ end
56
+
57
+ def self.query
58
+ @@query
59
+ end
60
+
61
+ def self.company
62
+ @@company
63
+ end
64
+
65
+ def self.website
66
+ @@website
67
+ end
68
+
69
+ def self.maxhits
70
+ @@maxhits
71
+ end
72
+
73
+ def maxhits
74
+ @@maxhits
75
+ end
76
+
77
+ def start(&block)
78
+ block.call(self)
79
+ end
80
+
81
+ def emails
82
+ $emails
83
+ end
84
+
85
+ def people
86
+ $people
87
+ end
88
+
89
+ def results
90
+ $results
91
+ end
92
+
93
+ def Emails(*args, &block)
94
+ Emails.new(*args, &block)
95
+ end
96
+
97
+ def People(*args, &block)
98
+ People.new(*args, &block)
99
+ end
100
+
101
+ module MetaType
102
+ def results
103
+ $results
104
+ end
105
+
106
+ def docs(&block)
107
+ @engines.each_key {|e| @documents.concat(@engines[e].documents) }
108
+ res = ESearchy::Docs.new(@documents)
109
+ res.search
110
+ $emails.concat(res.emails)
111
+ $results.concat(res.results)
112
+ block.call(res) if block_given?
113
+ end
114
+
115
+ def method_missing(name, *args)
116
+ @engines[name.to_sym]
117
+ end
118
+
119
+ def maxhits=(v)
120
+ @engines.each_key {|e| @engines[e].maxhits=v}
121
+ end
122
+
123
+ def [](v)
124
+ @engine[v]
125
+ end
126
+
127
+ private
128
+ def save_results(e)
129
+ $results.concat(@engines[e].results)
130
+ end
131
+ end
132
+
133
+ class Emails
134
+ include MetaType
135
+
136
+ def initialize(*args, &block)
137
+ @engines={}
138
+ @documents = []
139
+ args.each do |e|
140
+ @engines[e] = ESearchy::Search::EMAIL_ENGINES[e].new(Search.query)
141
+ end
142
+ self.maxhits=Search.maxhits if Search.maxhits
143
+ block.call(self) if block_given?
144
+ end
145
+ attr_reader :emails
146
+
147
+ def search(&block)
148
+ @engines.each_key do |e|
149
+ @engines[e].search
150
+ save_emails(e)
151
+ save_results(e)
152
+ block.call(@engines[e]) if block_given?
153
+ end
154
+ nil
155
+ end
156
+
157
+ private
158
+ def save_emails(e)
159
+ $emails.concat(@engines[e].emails)
160
+ end
161
+ end
162
+
163
+ class People
164
+ include MetaType
165
+
166
+ def initialize(*args, &block)
167
+ @engines={}
168
+ args.each do |e|
169
+ @engines[e] = ESearchy::Search::PEOPLE_ENGINES[e].new(Search.company)
170
+ end
171
+ self.maxhits=Search.maxhits if Search.maxhits
172
+ block.call(self) if block_given?
173
+ end
174
+
175
+ def people
176
+ $people
177
+ end
178
+
179
+ def search(&block)
180
+ @engines.each_key do |e|
181
+ @engines[e].search
182
+ save_people(e)
183
+ save_results(e)
184
+ block.call(self) if block_given?
185
+ end
186
+ nil
187
+ end
188
+
189
+ private
190
+ def save_people(e)
191
+ $people.concat(@engines[e].people)
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,153 @@
1
+ module ESearchy
2
+ class GenericEngine
3
+
4
+ def initialize(query, start = 0, stop = nil, &block)
5
+ @query = CGI.escape(query) or raise ESearchyMissingQuery
6
+ @company = ESearchy::Search.company || ""
7
+ @start = start
8
+ @totalhits = stop
9
+ @documents = []
10
+ self.class::TYPE < 2 ? @emails = [] : @people = []
11
+ @results = []
12
+ block.call(self) if block_given?
13
+ end
14
+
15
+ attr_reader :documents
16
+ attr_reader :results
17
+ attr_reader :emails
18
+ attr_reader :people
19
+
20
+
21
+ def search
22
+ get self.class::ENGINE, self.class::PORT,
23
+ @querypath + @start.to_s, header() do |r|
24
+ parse(r.body)
25
+ D "Searching #{self.class} from #{@start} to #{calculate_top()}.\n"
26
+ crawler(r.body.gsub(/<em>|<\/em>/,"").gsub(/<b>|<\/b>/,"")) unless @totalhits == 0
27
+ @start = @start + self.class::NUM
28
+ sleep(4) and search if @totalhits > @start
29
+ end
30
+ end
31
+
32
+ def start=(v)
33
+ @start=v
34
+ end
35
+
36
+ def maxhits=(v)
37
+ @totalhits=v
38
+ end
39
+
40
+ def company=(v)
41
+ @company=v
42
+ end
43
+
44
+ private
45
+
46
+ def get(url, port, querystring = "/", headers = {}, &block)
47
+ http = Net::HTTP.new(url,port)
48
+ begin
49
+ http.start do |http|
50
+ request = Net::HTTP::Get.new(querystring, headers)
51
+ response = http.request(request)
52
+ case response
53
+ when Net::HTTPSuccess, Net::HTTPRedirection
54
+ block.call(response)
55
+ else
56
+ return response.error!
57
+ end
58
+ end
59
+ rescue Net::HTTPFatalError
60
+ D "Error: Something went wrong with the HTTP request"
61
+ end
62
+ end
63
+
64
+ def header
65
+ begin
66
+ return self.class::HEADER
67
+ rescue
68
+ return {'User-Agent' => UserAgent::fetch}
69
+ end
70
+ end
71
+
72
+ def calculate_top
73
+ (@start+self.class::NUM) > @totalhits ? @totalhits : @start+self.class::NUM
74
+ end
75
+
76
+ def totalhits(realhits)
77
+ @totalhits > realhits ? realhits : @totalhits
78
+ end
79
+
80
+ def parse(object)
81
+ case object
82
+ when Array
83
+ parse_html object
84
+ when Json
85
+ parse_json object
86
+ end
87
+ end
88
+
89
+ def parse_html ( array )
90
+ array.each do |a|
91
+ case a[0]
92
+ when /(PDF|DOC|XLS|PPT|TXT)/
93
+ @documents << [a[1],"."+$1.to_s.downcase]
94
+ when nil
95
+ if a[2] =~ /(.pdf$|.doc$|.docx$|.xlsx$|.pptx$|.odt$|.odp$\
96
+ |.ods$|.odb$|.txt$|.rtf$|.ans$|.csv$)/i
97
+ @documents << [a[2],$1.to_s.downcase]
98
+ end
99
+ when /(.pdf$|.doc$|.docx$|.xlsx$|.pptx$|.odt$|.odp$|.ods$|.odb$|.txt$|.rtf$|.ans$|.csv$)/i
100
+ @documents << [CGI.unescape(a[2] || ""),$1.to_s.downcase]
101
+ else
102
+ #D "I do not parse this doc's \"#{a}\" filetype yet:)"
103
+ end
104
+ end
105
+ end
106
+
107
+ def parse_json ( json )
108
+ json.each do |j|
109
+ case j["url"]
110
+ when /(.pdf$|.doc$|.docx$|.xlsx$|.pptx$|.odt$|.odp$|.ods$|.odb$|.txt$|.rtf$|.ans$|.csv$)/i
111
+ @documents << [j["url"],$1.to_s.downcase]
112
+ else
113
+ @urls << [j["url"],$1.to_s.downcase]
114
+ end
115
+ end
116
+ end
117
+
118
+ def crawler(text)
119
+ self.class::TYPE < 2 ? crawl_emails(text) : crawl_people(text)
120
+ end
121
+
122
+ def crawl_emails(text)
123
+ list = text.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*_at_\
124
+ (?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|\
125
+ [a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]\
126
+ *[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+\
127
+ (?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|\
128
+ [a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*\
129
+ [a-z0-9])?\.)+[a-z](?:[a-z-]*[a-z])?|[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?^_`\
130
+ {|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z](?:[a-z-]*[a-z])??/i)
131
+ #print_(list)
132
+ c_list = fix(list)
133
+ @emails.concat(c_list).uniq!
134
+ c_list.zip do |e|
135
+ @results << [e[0], "E", self.class.to_s.upcase,
136
+ e[0].match(/#{CGI.unescape(@query).gsub("@","").split('.')[0]}/) ? "T" : "F"]
137
+ end
138
+ end
139
+
140
+ def fix(list)
141
+ list.each do |e|
142
+ e.gsub!(" at ","@")
143
+ e.gsub!("_at_","@")
144
+ e.gsub!(" dot ",".")
145
+ e.gsub!(/[+0-9]{0,3}[0-9()]{3,5}[-]{0,1}[0-9]{3,4}[-]{0,1}[0-9]{3,5}/,"")
146
+ end
147
+ end
148
+
149
+ def crawl_people(text)
150
+ raise "This is just a container"
151
+ end
152
+ end
153
+ end
@@ -0,0 +1 @@
1
+ require 'esearchy/localengines/directory'
@@ -0,0 +1,5 @@
1
+ require 'esearchy/OtherEngines/pgp'
2
+ require 'esearchy/OtherEngines/usenet'
3
+ require 'esearchy/OtherEngines/spider'
4
+ require 'esearchy/OtherEngines/googlegroups'
5
+ require 'esearchy/OtherEngines/ldap'
@@ -0,0 +1,4 @@
1
+ require 'esearchy/searchengines/google'
2
+ require 'esearchy/searchengines/bing'
3
+ require 'esearchy/searchengines/yahoo'
4
+ require 'esearchy/searchengines/altavista'
@@ -0,0 +1,4 @@
1
+ require 'esearchy/socialengines/classmates'
2
+ require 'esearchy/socialengines/googleprofiles'
3
+ require 'esearchy/socialengines/linkedin'
4
+ require 'esearchy/socialengines/naymz'
@@ -0,0 +1,188 @@
1
+ class UserAgent
2
+ @USER_AGENTS=[
3
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Acoo Browser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
4
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
5
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Acoo Browser; InfoPath.2; .NET CLR 2.0.50727; Alexa Toolbar)",
6
+ "Mozilla/4.0 (compatible; MSIE 7.0; AOL 7.0; Windows NT 5.1; FunWebProducts)",
7
+ "Mozilla/4.0 (compatible; MSIE 6.0; AOL 8.0; Windows NT 5.1; SV1)",
8
+ "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.0; Windows NT 5.1; .NET CLR 1.1.4322; Zango 10.1.181.0)",
9
+ "Mozilla/4.0 (compatible; MSIE 6.0; AOL 6.0; Windows NT 5.1)",
10
+ "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
11
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
12
+ "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
13
+ "Mozilla/5.0 (X11; U; Linux; C -) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.5",
14
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; JyxoToolbar1.0; Embedded Web Browser from: http://bsalsa.com/; Avant Browser; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 1.1.4322)",
15
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GTB5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; Avant Browser)",
16
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Avant Browser; Avant Browser; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1)",
17
+ "Mozilla/5.0 (X11; 78; CentOS; US-en) AppleWebKit/527+ (KHTML, like Gecko) Bolt/0.862 Version/3.0 Safari/523.15",
18
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7.2) Gecko/20040825 Camino/0.8.1",
19
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X Mach-O; en; rv:1.8.1.12) Gecko/20080206 Camino/1.5.5",
20
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030111 Chimera/0.6",
21
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.10) Gecko/20070228 Camino/1.0.4",
22
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en; rv:1.8.1.4pre) Gecko/20070511 Camino/1.6pre",
23
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418.9 (KHTML, like Gecko, Safari) Safari/419.3 Cheshire/1.0.ALPHA",
24
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/419 (KHTML, like Gecko, Safari/419.3) Cheshire/1.0.ALPHA",
25
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.36 Safari/525.19",
26
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19",
27
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042815 Firefox/3.0.10 CometBird/3.0.10",
28
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.5) Gecko/2009011615 Firefox/3.0.5 CometBird/3.0.5",
29
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; Crazy Browser 3.0.0 Beta2)",
30
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; Crazy Browser 2.0.1)",
31
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Crazy Browser 1.0.5; .NET CLR 1.1.4322; InfoPath.1)",
32
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Deepnet Explorer 1.5.0; .NET CLR 1.0.3705)",
33
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Demeter/1.0.9 Safari/125",
34
+ "Mozilla/5.0 (X11; U; Linux i686; en; rv:1.8.1.12) Gecko/20080208 (Debian-1.8.1.12-2) Epiphany/2.20",
35
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.3) Gecko/20041007 Epiphany/1.4.7",
36
+ "Mozilla/5.0 (Windows; U; Win95; en-US; rv:1.5) Gecko/20031007 Firebird/0.7",
37
+ "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.5) Gecko/20031007 Firebird/0.7",
38
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; ko; rv:1.9.1b2) Gecko/20081201 Firefox/3.1b2",
39
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; cs; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8",
40
+ "Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.7.9) Gecko/20050711 Firefox/1.0.5",
41
+ "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5",
42
+ "Mozilla/5.0 (X11; U; OpenBSD i386; en-US; rv:1.8.0.5) Gecko/20060819 Firefox/1.5.0.5",
43
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3) Gecko/20090305 Firefox/3.1b3 GTB5",
44
+ "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12) Gecko/20080214 Firefox/2.0.0.12",
45
+ "Mozilla/5.0 (Windows; U; Windows NT 5.0; es-ES; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3",
46
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.9) Gecko/20071113 BonEcho/2.0.0.9",
47
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1) Gecko/20061026 BonEcho/2.0",
48
+ "Mozilla/5.0 (BeOS; U; Haiku BePC; en-US; rv:1.8.1.21pre) Gecko/20090227 BonEcho/2.0.0.21pre",
49
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko/2009033017 GranParadiso/3.0.8",
50
+ "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1b3pre) Gecko/20090109 Shiretoko/3.1b3pre",
51
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1b4pre) Gecko/20090311 Shiretoko/3.1b4pre",
52
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.1) Gecko/20060314 Flock/0.5.13.2",
53
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.2) Gecko/2008092122 Firefox/3.0.2 Flock/2.0b3",
54
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Fluid/0.9.4 Safari/525.13",
55
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) Gecko/20050929 Galeon/1.3.21",
56
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko/20090327 Galeon/2.0.7",
57
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; GreenBrowser)",
58
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; GreenBrowser)",
59
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; GreenBrowser)",
60
+ "Mozilla/3.0 (x86 [cs] Windows NT 5.1; Sun)",
61
+ "Mozilla/5.1 (X11; U; Linux i686; en-US; rv:1.8.0.3) Gecko/20060425 SUSE/1.5.0.3-7 Hv3/alpha",
62
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SIMBAR={CFBFDAEA-F21E-4D6E-A9B0-E100A69B860F}; Hydra Browser; .NET CLR 2.0.50727; .NET CLR 1.1.4322; .NET CLR 3.0.04506.30)",
63
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Hydra Browser; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)",
64
+ "Mozilla/5.0 (compatible; IBrowse 3.0; AmigaOS4.0)",
65
+ "Mozilla/4.5 (compatible; iCab 2.9.1; Macintosh; U; PPC)",
66
+ "iCab/3.0.2 (Macintosh; U; PPC Mac OS X)",
67
+ "iCab/4.0 (Macintosh; U; Intel Mac OS X)",
68
+ "Mozilla/5.0 (Java 1.6.0_01; Windows XP 5.1 x86; en) ICEbrowser/v6_1_2",
69
+ "ICE Browser/5.05 (Java 1.4.0; Windows 2000 5.0 x86)",
70
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.9) Gecko/20071030 Iceape/1.1.6 (Debian-1.1.6-3)",
71
+ "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.8) Gecko/20071008 Iceape/1.1.5 (Ubuntu-1.1.5-1ubuntu0.7.10)",
72
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.11) Gecko/20071203 IceCat/2.0.0.11-g1",
73
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092921 IceCat/3.0.3-g1",
74
+ "Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.5) Gecko/2008122011 Iceweasel/3.0.5 (Debian-3.0.5-1)",
75
+ "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.1) Gecko/20061205 Iceweasel/2.0.0.1 (Debian-2.0.0.1+dfsg-4)",
76
+ "Mozilla/5.0 (X11; U; Linux i686; it; rv:1.9.0.5) Gecko/2008122011 Iceweasel/3.0.5 (Debian-3.0.5-1)",
77
+ "Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)",
78
+ "Mozilla/4.0 (Mozilla/4.0; MSIE 7.0; Windows NT 5.1; FDM; SV1; .NET CLR 3.0.04506.30)",
79
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0; .NET CLR 2.0.50727)",
80
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT;)",
81
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; GTB5; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506; InfoPath.2; OfficeLiveConnector.1.3; OfficeLivePatch.0.0)",
82
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; iRider 2.21.1108; FDM)",
83
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/528.7 (KHTML, like Gecko) Iron/1.0.155.0 Safari/528.7",
84
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Iron/0.2.152.0 Safari/12081672.525",
85
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/528.5 (KHTML, like Gecko) Iron/0.4.155.0 Safari/528.5",
86
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.8.1.19) Gecko/20081217 K-Meleon/1.5.2",
87
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.5) Gecko/20060706 K-Meleon/1.0",
88
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060731 K-Ninja/2.0.2",
89
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
90
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
91
+ "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
92
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
93
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; KKman2.0)",
94
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; KKMAN3.2)",
95
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; KKman3.0)",
96
+ "Mozilla/5.0 (compatible; Konqueror/2.2.1; Linux)",
97
+ "Mozilla/5.0 (compatible; Konqueror/3.5; SunOS)",
98
+ "Mozilla/5.0 (compatible; Konqueror/4.1; OpenBSD) KHTML/4.1.4 (like Gecko)",
99
+ "Mozilla/5.0 (compatible; Konqueror/3.1-rc5; i686 Linux; 20020712)",
100
+ "Links (0.96; Linux 2.4.20-18.7 i586)",
101
+ "Links (0.98; Win32; 80x25)",
102
+ "Links (2.1pre18; Linux 2.4.31 i686; 100x37)",
103
+ "Links (2.1; Linux 2.6.18-gentoo-r6 x86_64; 80x24)",
104
+ "Links (2.2; Linux 2.6.25-gentoo-r9 sparc64; 166x52)",
105
+ "Mozilla/4.0 (compatible; MSIE 6.0; Linux 2.6.26-1-amd64) Lobo/0.98.3",
106
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows XP 5.1) Lobo/0.98.4",
107
+ "Mozilla/4.0 (compatible; Lotus-Notes/6.0; Windows-NT)",
108
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1b3pre) Gecko/2008 Lunascape/4.9.9.98",
109
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528+ (KHTML, like Gecko, Safari/528.0) Lunascape/5.0.2.0",
110
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; Lunascape 2.1.3)",
111
+ "Lynx/2.8.6rel.4 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.6.3",
112
+ "Lynx/2.8.3dev.6 libwww-FM/2.14",
113
+ "Lynx/2.8.5dev.16 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.7a",
114
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)",
115
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; MyIE2)",
116
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; MAXTHON 2.0)",
117
+ "Midori/0.1.5 (X11; Linux; U; en-gb) WebKit/532+",
118
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020919",
119
+ "Mozilla/5.0 (Windows; U; Windows NT 5.0; it-IT; rv:1.7.12) Gecko/20050915",
120
+ "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.2.1; MultiZilla v1.1.32 final) Gecko/20021130",
121
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4; MultiZilla v1.5.0.0f) Gecko/20030624",
122
+ "NCSA_Mosaic/2.0 (Windows 3.1)",
123
+ "NCSA_Mosaic/3.0 (Windows 95)",
124
+ "NCSA_Mosaic/2.6 (X11; SunOS 4.1.3 sun4m)",
125
+ "Mozilla/3.01 (compatible; Netbox/3.5 R92; Linux 2.2)",
126
+ "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0; NetCaptor 6.5.0RC1)",
127
+ "Mozilla/4.04 [en] (X11; I; IRIX 5.3 IP22)",
128
+ "Mozilla/5.0 (Windows; U; Win 9x 4.90; de-DE; rv:0.9.2) Gecko/20010726 Netscape6/6.1",
129
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.12) Gecko/20080219 Firefox/2.0.0.12 Navigator/9.0.0.6",
130
+ "Mozilla/4.08 [en] (WinNT; U ;Nav)",
131
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.0.2) Gecko/20030208 Netscape/7.02",
132
+ "Mozilla/3.0 (Win95; I)",
133
+ "Mozilla/4.51 [en] (Win98; U)",
134
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1",
135
+ "NetSurf/2.0 (RISC OS; armv3l)",
136
+ "NetSurf/1.2 (Linux; i686)",
137
+ "Mozilla/4.7 (compatible; OffByOne; Windows 2000)",
138
+ "Mozilla/4.7 (compatible; OffByOne; Windows 98)",
139
+ "OmniWeb/2.7-beta-3 OWF/1.0",
140
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/420+ (KHTML, like Gecko, Safari) OmniWeb/v595",
141
+ "Mozilla/4.5 (compatible; OmniWeb/4.1.1-v424.6; Mac_PowerPC)",
142
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Opera 7.10 [en]",
143
+ "Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.2.15 Version/10.00",
144
+ "Opera/5.11 (Windows 98; U) [en]",
145
+ "Opera/9.51 (Macintosh; Intel Mac OS X; U; en)",
146
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 4.0) Opera 6.01 [en]",
147
+ "Opera/9.02 (Windows XP; U; ru)",
148
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows 98) Opera 5.12 [en]",
149
+ "Opera/9.70 (Linux i686 ; U; en) Presto/2.2.1",
150
+ "Opera/7.03 (Windows NT 5.0; U) [en]",
151
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.24",
152
+ "Opera/6.0 (Windows 2000; U) [fr]",
153
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009030821 Firefox/3.0.7 Orca/1.1 build 2",
154
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009022300 Firefox/3.0.6 Orca/1.1 build 1",
155
+ "Mozilla/1.10 [en] (Compatible; RISC OS 3.70; Oregano 1.10)",
156
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; PhaseOut-www.phaseout.net)",
157
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.4a) Gecko/20030411 Phoenix/0.5",
158
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2b) Gecko/20021029 Phoenix/0.4",
159
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; cs-CZ) AppleWebKit/527+ (KHTML, like Gecko) QtWeb Internet Browser/2.5 http://www.QtWeb.net",
160
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/527+ (KHTML, like Gecko) QtWeb Internet Browser/1.2 http://www.QtWeb.net",
161
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/527+ (KHTML, like Gecko) QtWeb Internet Browser/1.7 http://www.QtWeb.net",
162
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_6; it-it) AppleWebKit/528.16 (KHTML, like Gecko) Version/4.0 Safari/528.16",
163
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; cs-CZ) AppleWebKit/523.15 (KHTML, like Gecko) Version/3.0 Safari/523.15",
164
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/125.2 (KHTML, like Gecko) Safari/125.7",
165
+ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/528.16 (KHTML, like Gecko) Version/4.0 Safari/528.16",
166
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; fi-fi) AppleWebKit/420+ (KHTML, like Gecko) Safari/419.3",
167
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/312.8 (KHTML, like Gecko) Safari/312.6",
168
+ "Mozilla/5.0 (X11; U; Linux i686; rv:1.9.1a2pre) Gecko/20080824052448 SeaMonkey/2.0a1pre",
169
+ "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.6) Gecko/20070802 SeaMonkey/1.1.4",
170
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1b3pre) Gecko/20081208 SeaMonkey/2.0a3pre",
171
+ "Mozilla/5.0 (BeOS; U; BeOS BePC; en-US; rv:1.9a1) Gecko/20060702 SeaMonkey/1.5a",
172
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1b3pre) Gecko/20081202 SeaMonkey/2.0a2",
173
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.13) Gecko/20080313 SeaMonkey/1.1.9",
174
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; ja-jp) AppleWebKit/419 (KHTML, like Gecko) Shiira/1.2.3 Safari/125",
175
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/417.9 (KHTML, like Gecko, Safari) Shiira/1.1",
176
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; fr) AppleWebKit/418.9.1 (KHTML, like Gecko) Shiira Safari/125",
177
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Sleipnir/2.8.1",
178
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727) Sleipnir/2.8.4",
179
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_5; en-us) AppleWebKit/525.27.1 (KHTML, like Gecko) Stainless/0.4 Safari/525.20.1",
180
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-us) AppleWebKit/528.16 (KHTML, like Gecko) Stainless/0.5.3 Safari/525.20.1",
181
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.18 (KHTML, like Gecko) Sunrise/1.7.4 like Safari/4525.22",
182
+ "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/125.5.7 (KHTML, like Gecko) SunriseBrowser/0.853",
183
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.10pre) Gecko/2009041814 Firefox/3.0.10pre (Swiftfox)",
184
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 1.1.4322; TheWorld)"]
185
+ def self.fetch
186
+ @USER_AGENTS[rand(@USER_AGENTS.size)]
187
+ end
188
+ end
data/lib/esearchy.rb ADDED
@@ -0,0 +1,24 @@
1
+ #EXTERNAL REQUIRES
2
+ require 'rubygems'
3
+ require 'net/http'
4
+ require 'cgi'
5
+ require 'json'
6
+ require 'digest/sha2'
7
+ require 'zip/zip'
8
+ require 'zip/zipfilesystem'
9
+ require 'pdf/reader'
10
+ if RUBY_PLATFORM =~ /mingw|mswin/
11
+ require 'win32ole'
12
+ end
13
+ require 'ldap' # gem install ruby-ldap
14
+
15
+ #ESEARCHY REQUIRES
16
+ require 'esearchy/genericengine'
17
+ require 'esearchy/searchengines'
18
+ require 'esearchy/otherengines'
19
+ require 'esearchy/socialengines'
20
+ require 'esearchy/localengines'
21
+ require 'esearchy/bugmenot'
22
+ require 'esearchy/docs'
23
+ require 'esearchy/useragent'
24
+ require 'esearchy/esearchy'