snapsearch-client-ruby 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +20 -0
- data/README.md +109 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/examples/rack/Gemfile +5 -0
- data/examples/rack/config.ru +88 -0
- data/examples/rack/public/index.html +15 -0
- data/examples/sinatra/Gemfile +5 -0
- data/examples/sinatra/Gemfile.lock +90 -0
- data/examples/sinatra/config.ru +15 -0
- data/examples/sinatra/lib/sinatra_snap_search.rb +19 -0
- data/examples/sinatra/public/index.html +15 -0
- data/lib/rack/snap_search.rb +143 -0
- data/lib/rack/snap_search/config.rb +85 -0
- data/lib/snap_search.rb +14 -0
- data/lib/snap_search/client.rb +147 -0
- data/lib/snap_search/connection_exception.rb +15 -0
- data/lib/snap_search/detector.rb +248 -0
- data/lib/snap_search/exception.rb +8 -0
- data/lib/snap_search/interceptor.rb +66 -0
- data/lib/snap_search/validation_exception.rb +17 -0
- data/resources/cacert.pem +3785 -0
- data/resources/extensions.json +26 -0
- data/resources/robots.json +208 -0
- data/snapsearch.gemspec +31 -0
- data/spec/lib/rack/qs_spec.rb +34 -0
- data/spec/lib/rack/snap_search/config_spec.rb +56 -0
- data/spec/lib/snap_search/detector_spec.rb +362 -0
- data/spec/lib/snap_search/interceptor_spec.rb +116 -0
- data/spec/spec_helper.rb +6 -0
- metadata +216 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"generic": [
|
3
|
+
"html",
|
4
|
+
"htm",
|
5
|
+
"shtml",
|
6
|
+
"xhtml",
|
7
|
+
"xht",
|
8
|
+
"cgi"
|
9
|
+
],
|
10
|
+
"php": [
|
11
|
+
"php"
|
12
|
+
],
|
13
|
+
"python" : [
|
14
|
+
"py"
|
15
|
+
],
|
16
|
+
"asp": [
|
17
|
+
"asp",
|
18
|
+
"aspx"
|
19
|
+
],
|
20
|
+
"java": [
|
21
|
+
"jsp"
|
22
|
+
],
|
23
|
+
"perl": [
|
24
|
+
"pl"
|
25
|
+
]
|
26
|
+
}
|
@@ -0,0 +1,208 @@
|
|
1
|
+
{
|
2
|
+
"ignore": [
|
3
|
+
"SnapSearch"
|
4
|
+
],
|
5
|
+
"match": [
|
6
|
+
"008",
|
7
|
+
"ABACHOBot",
|
8
|
+
"Accoona-AI-Agent",
|
9
|
+
"AddSugarSpiderBot",
|
10
|
+
"Adsbot-Google",
|
11
|
+
"altavista",
|
12
|
+
"AnyApexBot",
|
13
|
+
"Arachmo",
|
14
|
+
"archiver",
|
15
|
+
"B-l-i-t-z-B-O-T",
|
16
|
+
"Baiduspider",
|
17
|
+
"BecomeBot",
|
18
|
+
"BeslistBot",
|
19
|
+
"BillyBobBot",
|
20
|
+
"Bimbot",
|
21
|
+
"Bingbot",
|
22
|
+
"BingPreview",
|
23
|
+
"bitlybot",
|
24
|
+
"BlitzBOT",
|
25
|
+
"boitho.com-dc",
|
26
|
+
"boitho.com-robot",
|
27
|
+
"borg",
|
28
|
+
"BrowserMob",
|
29
|
+
"btbot",
|
30
|
+
"CatchBot",
|
31
|
+
"Catchpoint bot",
|
32
|
+
"Cerberian Drtrs",
|
33
|
+
"Charlotte",
|
34
|
+
"ConveraCrawler",
|
35
|
+
"cosmos",
|
36
|
+
"Covario IDS",
|
37
|
+
"crawler",
|
38
|
+
"DataparkSearch",
|
39
|
+
"DiamondBot",
|
40
|
+
"Discobot",
|
41
|
+
"Dotbot",
|
42
|
+
"Embedly",
|
43
|
+
"EmeraldShield.com WebBot",
|
44
|
+
"envolk[ITS]spider",
|
45
|
+
"EsperanzaBot",
|
46
|
+
"Exabot",
|
47
|
+
"facebookexternalhit",
|
48
|
+
"FAST-WebCrawler",
|
49
|
+
"FAST Enterprise Crawler",
|
50
|
+
"Feedfetcher",
|
51
|
+
"FDSE robot",
|
52
|
+
"FindLinks",
|
53
|
+
"FurlBot",
|
54
|
+
"FyberSpider",
|
55
|
+
"g2crawler",
|
56
|
+
"Gaisbot",
|
57
|
+
"GalaxyBot",
|
58
|
+
"genieBot",
|
59
|
+
"Gigabot",
|
60
|
+
"Girafabot",
|
61
|
+
"GomezA",
|
62
|
+
"Googlebot",
|
63
|
+
"GurujiBot",
|
64
|
+
"HappyFunBot",
|
65
|
+
"hl_ftien_spider",
|
66
|
+
"Holmes",
|
67
|
+
"htdig",
|
68
|
+
"http client",
|
69
|
+
"HttpMonitor",
|
70
|
+
"iaskspider",
|
71
|
+
"ia_archiver",
|
72
|
+
"iCCrawler",
|
73
|
+
"ichiro",
|
74
|
+
"igdeSpyder",
|
75
|
+
"IRLbot",
|
76
|
+
"IssueCrawler",
|
77
|
+
"Jaxified Bot",
|
78
|
+
"Jyxobot",
|
79
|
+
"KoepaBot",
|
80
|
+
"L.webis",
|
81
|
+
"LapozzBot",
|
82
|
+
"Larbin",
|
83
|
+
"LDSpider",
|
84
|
+
"LexxeBot",
|
85
|
+
"Linguee Bot",
|
86
|
+
"LinkedInBot",
|
87
|
+
"LinkWalker",
|
88
|
+
"lmspider",
|
89
|
+
"lwp-trivial",
|
90
|
+
"lycos",
|
91
|
+
"mabontland",
|
92
|
+
"magpie-crawler",
|
93
|
+
"Mediapartners",
|
94
|
+
"MJ12bot",
|
95
|
+
"Mnogosearch",
|
96
|
+
"mogimogi",
|
97
|
+
"MojeekBot",
|
98
|
+
"Moreoverbot",
|
99
|
+
"Morning Paper",
|
100
|
+
"msnbot",
|
101
|
+
"MSRBot",
|
102
|
+
"MVAClient",
|
103
|
+
"mxbot",
|
104
|
+
"netresearch",
|
105
|
+
"NetResearchServer",
|
106
|
+
"NetSeer Crawler",
|
107
|
+
"NewsGator",
|
108
|
+
"NG-Search",
|
109
|
+
"nicebot",
|
110
|
+
"noxtrumbot",
|
111
|
+
"Nusearch Spider",
|
112
|
+
"NutchCVS",
|
113
|
+
"Nymesis",
|
114
|
+
"obot",
|
115
|
+
"oegp",
|
116
|
+
"omgilibot",
|
117
|
+
"OmniExplorer_Bot",
|
118
|
+
"OOZBOT",
|
119
|
+
"openbot",
|
120
|
+
"Orbiter",
|
121
|
+
"PageBitesHyperBot",
|
122
|
+
"PagePeeker",
|
123
|
+
"Peew",
|
124
|
+
"Pinterest",
|
125
|
+
"polybot",
|
126
|
+
"Pompos",
|
127
|
+
"PostPost",
|
128
|
+
"Psbot",
|
129
|
+
"PycURL",
|
130
|
+
"Qseero",
|
131
|
+
"Radian6",
|
132
|
+
"RAMPyBot",
|
133
|
+
"Read Later",
|
134
|
+
"rogerbot",
|
135
|
+
"RufusBot",
|
136
|
+
"SandCrawler",
|
137
|
+
"SBIder",
|
138
|
+
"scooter",
|
139
|
+
"ScoutJet",
|
140
|
+
"Scrubby",
|
141
|
+
"SearchSight",
|
142
|
+
"Seekbot",
|
143
|
+
"semanticdiscovery",
|
144
|
+
"Sensis Web Crawler",
|
145
|
+
"SEOChat::Bot",
|
146
|
+
"SeznamBot",
|
147
|
+
"Shim-Crawler",
|
148
|
+
"ShopWiki",
|
149
|
+
"Shoula robot",
|
150
|
+
"silk",
|
151
|
+
"Sitebot",
|
152
|
+
"slurp",
|
153
|
+
"Snappy",
|
154
|
+
"sogou spider",
|
155
|
+
"Sosospider",
|
156
|
+
"speedy",
|
157
|
+
"Speedy Spider",
|
158
|
+
"spider",
|
159
|
+
"Sqworm",
|
160
|
+
"StackRambler",
|
161
|
+
"suggybot",
|
162
|
+
"SurveyBot",
|
163
|
+
"SynooBot",
|
164
|
+
"Teoma",
|
165
|
+
"TerrawizBot",
|
166
|
+
"TheSuBot",
|
167
|
+
"Thumbnail.CZ robot",
|
168
|
+
"ThumbShotsBot",
|
169
|
+
"TinEye",
|
170
|
+
"truwoGPS",
|
171
|
+
"TurnitinBot",
|
172
|
+
"TweetedTimes Bot",
|
173
|
+
"TwengaBot",
|
174
|
+
"Twitterbot",
|
175
|
+
"updated",
|
176
|
+
"URL2PNG",
|
177
|
+
"Urlfilebot",
|
178
|
+
"Vagabondo",
|
179
|
+
"voila",
|
180
|
+
"VoilaBot",
|
181
|
+
"Vortex",
|
182
|
+
"voyager",
|
183
|
+
"VYU2",
|
184
|
+
"webcollage",
|
185
|
+
"Websquash.com",
|
186
|
+
"WebThumb",
|
187
|
+
"wf84",
|
188
|
+
"Willow Internet Crawler",
|
189
|
+
"WoFindeIch Robot",
|
190
|
+
"WomlpeFactory",
|
191
|
+
"Xaldon_WebSpider",
|
192
|
+
"yacy",
|
193
|
+
"yahoo",
|
194
|
+
"YandexBot",
|
195
|
+
"YandexImages",
|
196
|
+
"Yasaklibot",
|
197
|
+
"Yeti",
|
198
|
+
"YodaoBot",
|
199
|
+
"yoogliFetchAgent",
|
200
|
+
"YottaaMonitor",
|
201
|
+
"YoudaoBot",
|
202
|
+
"Zao",
|
203
|
+
"Zealbot",
|
204
|
+
"ZooShot",
|
205
|
+
"zspider",
|
206
|
+
"ZyBorg"
|
207
|
+
]
|
208
|
+
}
|
data/snapsearch.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
|
5
|
+
# Variables
|
6
|
+
s.author = 'Ryan Scott Lewis'
|
7
|
+
s.email = 'roger.qiu@polycademy.com'
|
8
|
+
s.name = 'snapsearch-client-ruby'
|
9
|
+
s.summary = 'Ruby HTTP Client Middleware Libraries for SnapSearch. Search engine optimisation for single page applications.'
|
10
|
+
s.homepage = 'https://github.com/SnapSearch/SnapSearch-Client-Ruby'
|
11
|
+
s.license = 'MIT'
|
12
|
+
|
13
|
+
# Dependencies
|
14
|
+
s.add_dependency 'version', '~> 1.0.0'
|
15
|
+
s.add_dependency 'httpi', '~> 2.1.0'
|
16
|
+
s.add_dependency 'addressable', '~> 2.0.0'
|
17
|
+
s.add_dependency 'rack', '~> 1.5.0'
|
18
|
+
s.add_development_dependency 'rake', '~> 10.1.1'
|
19
|
+
s.add_development_dependency 'rspec', '~> 2.14.1'
|
20
|
+
s.add_development_dependency 'guard-rspec', '~> 4.2.5'
|
21
|
+
s.add_development_dependency 'guard-yard', '~> 2.1.0'
|
22
|
+
s.add_development_dependency 'fuubar', '~> 1.3.2'
|
23
|
+
|
24
|
+
# Pragmatically set variables
|
25
|
+
s.version = Pathname.glob('VERSION*').first.read rescue '0.0.0'
|
26
|
+
s.description = s.summary
|
27
|
+
s.require_paths = ['lib']
|
28
|
+
s.files = Dir['{{Rake,Gem}file{.lock,},README*,VERSION,LICENSE,*.gemspec,{lib,bin,examples,resources,spec,test}/**/*}']
|
29
|
+
s.test_files = Dir['{examples,spec,test}/**/*']
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
describe 'Rack application QS splitting' do
|
5
|
+
|
6
|
+
let(:env) do
|
7
|
+
{
|
8
|
+
'HTTP_HOST' => 'localhost',
|
9
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
10
|
+
'SERVER_NAME' => 'localhost',
|
11
|
+
'SERVER_PORT' => '80',
|
12
|
+
'REMOTE_ADDR' => '::1',
|
13
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
14
|
+
'REQUEST_SCHEME' => 'http',
|
15
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
16
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
17
|
+
'REQUEST_METHOD' => 'GET',
|
18
|
+
'QUERY_STRING' => 'key%201=value+1&_escaped_fragment_=%2Fpath2%3Fkey2=value2',
|
19
|
+
'PATH_INFO' => '/snapsearch/path1',
|
20
|
+
'rack.url_scheme' => 'http',
|
21
|
+
'rack.input' => StringIO.new
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
subject { Rack::Request.new(env) }
|
26
|
+
|
27
|
+
it 'should decode the key/values when splitting the params of a request' do
|
28
|
+
subject.params.should == {
|
29
|
+
'key 1' => 'value 1',
|
30
|
+
"_escaped_fragment_" => '/path2?key2=value2'
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Rack::SnapSearch::Config do
|
4
|
+
|
5
|
+
subject { described_class.new }
|
6
|
+
|
7
|
+
let(:proc) do
|
8
|
+
Proc.new {}
|
9
|
+
end
|
10
|
+
|
11
|
+
context 'For each attribute' do
|
12
|
+
|
13
|
+
described_class::ATTRIBUTES.each do |attribute|
|
14
|
+
it { should respond_to(attribute) } # Have a getter method
|
15
|
+
it { should respond_to("#{attribute}=") } # Have a setter method
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#on_exception' do
|
21
|
+
|
22
|
+
it { should respond_to(:on_exception) }
|
23
|
+
|
24
|
+
it 'should be a setter if a block is given and a getter if not' do
|
25
|
+
subject.on_exception.should == nil
|
26
|
+
subject.on_exception(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
27
|
+
subject.on_exception.should == proc
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe '#before_intercept' do
|
33
|
+
|
34
|
+
it { should respond_to(:before_intercept) }
|
35
|
+
|
36
|
+
it 'should be a setter if a block is given and a getter if not' do
|
37
|
+
subject.before_intercept.should == nil
|
38
|
+
subject.before_intercept(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
39
|
+
subject.before_intercept.should == proc
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#after_intercept' do
|
45
|
+
|
46
|
+
it { should respond_to(:after_intercept) }
|
47
|
+
|
48
|
+
it 'should be a setter if a block is given and a getter if not' do
|
49
|
+
subject.after_intercept.should == nil
|
50
|
+
subject.after_intercept(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
51
|
+
subject.after_intercept.should == proc
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,362 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
describe SnapSearch::Detector do
|
5
|
+
|
6
|
+
let(:normal_browser) do
|
7
|
+
{
|
8
|
+
'HTTP_HOST' => 'localhost',
|
9
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
10
|
+
'SERVER_NAME' => 'localhost',
|
11
|
+
'SERVER_PORT' => '80',
|
12
|
+
'REMOTE_ADDR' => '::1',
|
13
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
14
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
15
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
16
|
+
'REQUEST_METHOD' => 'GET',
|
17
|
+
'QUERY_STRING' => '',
|
18
|
+
'PATH_INFO' => '/snapsearch/',
|
19
|
+
'rack.url_scheme' => 'http',
|
20
|
+
'rack.input' => StringIO.new
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
let(:search_engine) do
|
25
|
+
{
|
26
|
+
'HTTP_HOST' => 'localhost',
|
27
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
28
|
+
'SERVER_NAME' => 'localhost',
|
29
|
+
'SERVER_PORT' => '80',
|
30
|
+
'REMOTE_ADDR' => '::1',
|
31
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
32
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
33
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
34
|
+
'REQUEST_METHOD' => 'GET',
|
35
|
+
'QUERY_STRING' => '',
|
36
|
+
'PATH_INFO' => '/snapsearch/',
|
37
|
+
'rack.url_scheme' => 'http',
|
38
|
+
'rack.input' => StringIO.new
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
let(:snapsearch_robot) do
|
43
|
+
{
|
44
|
+
'HTTP_HOST' => 'localhost',
|
45
|
+
'HTTP_USER_AGENT' => 'SnapSearch',
|
46
|
+
'SERVER_NAME' => 'localhost',
|
47
|
+
'SERVER_PORT' => '80',
|
48
|
+
'REMOTE_ADDR' => '::1',
|
49
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
50
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
51
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
52
|
+
'REQUEST_METHOD' => 'GET',
|
53
|
+
'QUERY_STRING' => '',
|
54
|
+
'PATH_INFO' => '/snapsearch/',
|
55
|
+
'rack.url_scheme' => 'http',
|
56
|
+
'rack.input' => StringIO.new
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
let(:non_get_route) do
|
61
|
+
{
|
62
|
+
'HTTP_HOST' => 'localhost',
|
63
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
64
|
+
'SERVER_NAME' => 'localhost',
|
65
|
+
'SERVER_PORT' => '80',
|
66
|
+
'REMOTE_ADDR' => '::1',
|
67
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
68
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
69
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
70
|
+
'REQUEST_METHOD' => 'POST',
|
71
|
+
'QUERY_STRING' => '',
|
72
|
+
'PATH_INFO' => '/snapsearch/',
|
73
|
+
'rack.url_scheme' => 'http',
|
74
|
+
'rack.input' => StringIO.new
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
let(:ignored_route) do
|
79
|
+
{
|
80
|
+
'HTTP_HOST' => 'localhost',
|
81
|
+
'HTTP_USER_AGENT' => 'Googlebot-Video/1.0',
|
82
|
+
'SERVER_NAME' => 'localhost',
|
83
|
+
'SERVER_PORT' => '80',
|
84
|
+
'REMOTE_ADDR' => '::1',
|
85
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
86
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
87
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
88
|
+
'REQUEST_METHOD' => 'GET',
|
89
|
+
'QUERY_STRING' => '',
|
90
|
+
'PATH_INFO' => '/ignored/',
|
91
|
+
'rack.url_scheme' => 'http',
|
92
|
+
'rack.input' => StringIO.new
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
let(:matched_route) do
|
97
|
+
{
|
98
|
+
'HTTP_HOST' => 'localhost',
|
99
|
+
'HTTP_USER_AGENT' => 'msnbot/1.1 ( http://search.msn.com/msnbot.htm)',
|
100
|
+
'SERVER_NAME' => 'localhost',
|
101
|
+
'SERVER_PORT' => '80',
|
102
|
+
'REMOTE_ADDR' => '::1',
|
103
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
104
|
+
'REQUEST_SCHEME' => 'http',
|
105
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
106
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
107
|
+
'REQUEST_METHOD' => 'GET',
|
108
|
+
'QUERY_STRING' => '',
|
109
|
+
'PATH_INFO' => '/matched/',
|
110
|
+
'rack.url_scheme' => 'http',
|
111
|
+
'rack.input' => StringIO.new
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
let(:basic_escaped_fragment_route) do
|
116
|
+
{
|
117
|
+
'HTTP_HOST' => 'localhost',
|
118
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
119
|
+
'SERVER_NAME' => 'localhost',
|
120
|
+
'SERVER_PORT' => '80',
|
121
|
+
'REMOTE_ADDR' => '::1',
|
122
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
123
|
+
'REQUEST_SCHEME' => 'http',
|
124
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
125
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
126
|
+
'REQUEST_METHOD' => 'GET',
|
127
|
+
'QUERY_STRING' => '_escaped_fragment_',
|
128
|
+
'PATH_INFO' => '/snapsearch',
|
129
|
+
'rack.url_scheme' => 'http',
|
130
|
+
'rack.input' => StringIO.new
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
let(:escaped_fragment_route) do
|
135
|
+
{
|
136
|
+
'HTTP_HOST' => 'localhost',
|
137
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
138
|
+
'SERVER_NAME' => 'localhost',
|
139
|
+
'SERVER_PORT' => '80',
|
140
|
+
'REMOTE_ADDR' => '::1',
|
141
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
142
|
+
'REQUEST_SCHEME' => 'http',
|
143
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
144
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
145
|
+
'REQUEST_METHOD' => 'GET',
|
146
|
+
'QUERY_STRING' => 'key1=value1&_escaped_fragment_=%2Fpath2%3Fkey2=value2',
|
147
|
+
'PATH_INFO' => '/snapsearch/path1',
|
148
|
+
'rack.url_scheme' => 'http',
|
149
|
+
'rack.input' => StringIO.new
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
let(:valid_file_extension_route) do
|
154
|
+
{
|
155
|
+
'HTTP_HOST' => 'localhost',
|
156
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
157
|
+
'SERVER_NAME' => 'localhost',
|
158
|
+
'SERVER_PORT' => '80',
|
159
|
+
'REMOTE_ADDR' => '::1',
|
160
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
161
|
+
'REQUEST_SCHEME' => 'http',
|
162
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
163
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
164
|
+
'REQUEST_METHOD' => 'GET',
|
165
|
+
'QUERY_STRING' => '',
|
166
|
+
'PATH_INFO' => '/snapsearch/song.html?key=value',
|
167
|
+
'rack.url_scheme' => 'http',
|
168
|
+
'rack.input' => StringIO.new
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
let(:invalid_file_extension_route) do
|
173
|
+
{
|
174
|
+
'HTTP_HOST' => 'localhost',
|
175
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
176
|
+
'SERVER_NAME' => 'localhost',
|
177
|
+
'SERVER_PORT' => '80',
|
178
|
+
'REMOTE_ADDR' => '::1',
|
179
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
180
|
+
'REQUEST_SCHEME' => 'http',
|
181
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
182
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
183
|
+
'REQUEST_METHOD' => 'GET',
|
184
|
+
'QUERY_STRING' => '',
|
185
|
+
'PATH_INFO' => '/snapsearch/song.html.mp3?key=value',
|
186
|
+
'rack.url_scheme' => 'http',
|
187
|
+
'rack.input' => StringIO.new
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
let(:nonexistent_file_extension_route) do
|
192
|
+
{
|
193
|
+
'HTTP_HOST' => 'localhost',
|
194
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
195
|
+
'SERVER_NAME' => 'localhost',
|
196
|
+
'SERVER_PORT' => '80',
|
197
|
+
'REMOTE_ADDR' => '::1',
|
198
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
199
|
+
'REQUEST_SCHEME' => 'http',
|
200
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
201
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
202
|
+
'REQUEST_METHOD' => 'GET',
|
203
|
+
'QUERY_STRING' => '',
|
204
|
+
'PATH_INFO' => '/snapsearch/',
|
205
|
+
'rack.url_scheme' => 'http',
|
206
|
+
'rack.input' => StringIO.new
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
let(:mp3_file_extension_route) do
|
211
|
+
{
|
212
|
+
'HTTP_HOST' => 'localhost',
|
213
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
214
|
+
'SERVER_NAME' => 'localhost',
|
215
|
+
'SERVER_PORT' => '80',
|
216
|
+
'REMOTE_ADDR' => '::1',
|
217
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
218
|
+
'REQUEST_SCHEME' => 'http',
|
219
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
220
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
221
|
+
'REQUEST_METHOD' => 'GET',
|
222
|
+
'QUERY_STRING' => '',
|
223
|
+
'PATH_INFO' => '/snapsearch/song.html.mp3?key=value',
|
224
|
+
'rack.url_scheme' => 'http',
|
225
|
+
'rack.input' => StringIO.new
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
subject { described_class.new }
|
230
|
+
|
231
|
+
describe '#detect' do
|
232
|
+
|
233
|
+
describe 'When a request from a normal browser comes through' do
|
234
|
+
|
235
|
+
let(:request) { Rack::Request.new(normal_browser) }
|
236
|
+
|
237
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
describe 'When a request from a search engine robot comes through' do
|
242
|
+
|
243
|
+
let(:request) { Rack::Request.new(search_engine) }
|
244
|
+
|
245
|
+
it('should be intercepted') { subject.detect(request: request).should == true }
|
246
|
+
|
247
|
+
end
|
248
|
+
|
249
|
+
describe 'When a request from a SnapSearch robot comes through' do
|
250
|
+
|
251
|
+
let(:request) { Rack::Request.new(snapsearch_robot) }
|
252
|
+
|
253
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
describe 'When a non-GET request comes through' do
|
258
|
+
|
259
|
+
let(:request) { Rack::Request.new(non_get_route) }
|
260
|
+
|
261
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
262
|
+
|
263
|
+
end
|
264
|
+
|
265
|
+
describe 'When an ignored route request comes through' do
|
266
|
+
|
267
|
+
let(:request) { Rack::Request.new(ignored_route) }
|
268
|
+
|
269
|
+
it('should not be intercepted') { subject.detect(ignored_routes: [/^\/ignored/], request: request).should == false }
|
270
|
+
|
271
|
+
end
|
272
|
+
|
273
|
+
describe 'When a non-matched route request comes through' do
|
274
|
+
|
275
|
+
let(:request) { Rack::Request.new(matched_route) }
|
276
|
+
|
277
|
+
it('should not be intercepted') { subject.detect(matched_routes: [/^\/non_matched_route/], request: request).should == false }
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
describe 'When a matched route request comes through' do
|
282
|
+
|
283
|
+
let(:request) { Rack::Request.new(matched_route) }
|
284
|
+
|
285
|
+
it('should be intercepted') { subject.detect(matched_routes: [/^\/matched/], request: request).should == true }
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
describe 'When an escaped fragmented request comes through' do
|
290
|
+
|
291
|
+
let(:request) { Rack::Request.new(basic_escaped_fragment_route) }
|
292
|
+
|
293
|
+
it('should be intercepted') { subject.detect(request: request).should == true }
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
describe 'When other factors allow it and a valid file extension comes through' do
|
298
|
+
|
299
|
+
let(:request) { Rack::Request.new(valid_file_extension_route) }
|
300
|
+
|
301
|
+
it('should be intercepted') { subject.detect(request: request, check_file_extensions: true).should == true }
|
302
|
+
|
303
|
+
end
|
304
|
+
|
305
|
+
describe 'When an invalid file extension comes through' do
|
306
|
+
|
307
|
+
let(:request) { Rack::Request.new(invalid_file_extension_route) }
|
308
|
+
|
309
|
+
it('should not be intercepted') { subject.detect(request: request, check_file_extensions: true).should == false }
|
310
|
+
|
311
|
+
end
|
312
|
+
|
313
|
+
describe 'When other factors allow it and a nonexistent file extension comes through' do
|
314
|
+
|
315
|
+
let(:request) { Rack::Request.new(nonexistent_file_extension_route) }
|
316
|
+
|
317
|
+
it('should be intercepted') { subject.detect(request: request, check_file_extensions: true).should == true }
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
describe 'When custom extensions are set' do
|
322
|
+
|
323
|
+
let(:request) { Rack::Request.new(mp3_file_extension_route) }
|
324
|
+
|
325
|
+
it('should be intercepted correctly') do
|
326
|
+
subject.detect(request: request, check_file_extensions: true).should == false
|
327
|
+
|
328
|
+
subject.extensions['generic'] << 'mp3'
|
329
|
+
|
330
|
+
subject.detect(request: request, check_file_extensions: true).should == true
|
331
|
+
|
332
|
+
subject.extensions['generic'].delete('mp3')
|
333
|
+
|
334
|
+
subject.detect(request: request, check_file_extensions: true).should == false
|
335
|
+
end
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
describe '#get_encoded_url' do
|
342
|
+
|
343
|
+
let(:request) { Rack::Request.new(escaped_fragment_route) }
|
344
|
+
let(:uri) { Addressable::URI.parse(request.url) }
|
345
|
+
|
346
|
+
it 'should convert the escaped fragment route back to hash fragment' do
|
347
|
+
subject.get_encoded_url(request.params, uri).should == 'http://localhost/snapsearch/path1?key1=value1#!/path2?key2=value2'
|
348
|
+
end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
describe '#get_decoded_path' do
|
353
|
+
|
354
|
+
let(:uri) { Addressable::URI.parse('http://localhost:8080/some%20path/an%2Fother+path/path1.htm?key1=value%201%3F') }
|
355
|
+
|
356
|
+
it 'should return the decoded path with the correct query and haash fragment' do
|
357
|
+
subject.get_decoded_path( {}, uri ).should == '/some path/an/other+path/path1.htm?key1=value 1?'
|
358
|
+
end
|
359
|
+
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|